package be.ac.vub.bsb.parsers.smashref;

import be.ac.ulb.bigre.pathwayinference.core.io.OneColumnSetParser;
import be.ac.ulb.bigre.pathwayinference.core.io.TwoColumnHashMapParser;
import be.ac.vub.bsb.parsers.util.ParserTools;
import cern.colt.matrix.impl.AbstractFormatter;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.log4j.Logger;

/* loaded from: input_file:be/ac/vub/bsb/parsers/smashref/SmashReferenceGenomeFilterer.class */
public class SmashReferenceGenomeFilterer {
    private String _smashRefGenomeFile;
    public static String REF_GENOME_DELIMITER = ">";
    private Set<Object> _queryOrganisms = new HashSet();
    private Set<Object> _refOrganisms = new HashSet();
    private Set<Object> _queryOrgsContainedInSMASH = new HashSet();
    private String _filteredSmashRefGenomeFile = "";
    protected Logger _logger = Logger.getLogger(getClass().getPackage().toString());

    public SmashReferenceGenomeFilterer(String str, String str2) {
        this._smashRefGenomeFile = "";
        ParserTools.checkFileLocation(str);
        ParserTools.checkFileLocation(str2);
        HashMap parse = new TwoColumnHashMapParser(str2).parse();
        Iterator it = parse.keySet().iterator();
        while (it.hasNext()) {
            this._queryOrganisms.add(parse.get(it.next().toString()).toString());
        }
        this._smashRefGenomeFile = str;
    }

    public void parse() {
        goThroughFileLineByLine();
        this._queryOrganisms.removeAll(this._refOrganisms);
        this._logger.info(String.valueOf(this._queryOrgsContainedInSMASH.size()) + " query organisms are contained in SMASH!");
    }

    public void exportReferenceOrganismsToFile(String str) {
        ParserTools.exportCollectionToOneColumnFile(this._refOrganisms, str);
    }

    public void exportQueryOrganismsContainedInSMASH(String str) {
        ParserTools.exportCollectionToOneColumnFile(this._queryOrgsContainedInSMASH, str);
    }

    private void goThroughFileLineByLine() {
        boolean z = false;
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(this._smashRefGenomeFile));
            PrintWriter printWriter = new PrintWriter(new BufferedWriter(new FileWriter(getFilteredSmashRefGenomeFile())));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    return;
                }
                if (readLine.startsWith(REF_GENOME_DELIMITER)) {
                    String replace = readLine.replace(">", "");
                    if (replace.contains(".")) {
                        replace = replace.split("\\.")[1];
                    }
                    this._logger.info("Processing genome " + replace);
                    this._refOrganisms.add(replace);
                    if (this._queryOrganisms.contains(replace)) {
                        this._queryOrgsContainedInSMASH.add(replace);
                        printWriter.print(String.valueOf(readLine) + AbstractFormatter.DEFAULT_ROW_SEPARATOR);
                        printWriter.flush();
                        z = true;
                    } else {
                        z = false;
                    }
                } else if (z) {
                    printWriter.print(String.valueOf(readLine) + AbstractFormatter.DEFAULT_ROW_SEPARATOR);
                    printWriter.flush();
                }
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e2) {
            e2.printStackTrace();
        }
    }

    public void setFilteredSmashRefGenomeFile(String str) {
        this._filteredSmashRefGenomeFile = str;
    }

    public String getFilteredSmashRefGenomeFile() {
        return this._filteredSmashRefGenomeFile;
    }

    public static void listNCBIIdsOfOrganismsNotInSMASH(String str, String str2, String str3) {
        TwoColumnHashMapParser twoColumnHashMapParser = new TwoColumnHashMapParser(str2);
        twoColumnHashMapParser.setKeyColumn(0);
        twoColumnHashMapParser.setValueColumn(1);
        HashMap parse = twoColumnHashMapParser.parse();
        Set parse2 = new OneColumnSetParser(str).parse();
        Set keySet = parse.keySet();
        keySet.removeAll(parse2);
        ParserTools.exportCollectionToOneColumnFile(ParserTools.translateEntities(keySet, parse), str3);
    }

    public static void main(String[] strArr) {
        SmashReferenceGenomeFilterer smashReferenceGenomeFilterer = new SmashReferenceGenomeFilterer("/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Data/MetaHIT/ReadMapping/reference_genomes.20100704.fasta", "orgVsGenomeRefSeqNumber.txt");
        smashReferenceGenomeFilterer.setFilteredSmashRefGenomeFile("filteredSmashFile.txt");
        smashReferenceGenomeFilterer.parse();
        smashReferenceGenomeFilterer.exportQueryOrganismsContainedInSMASH("queryOrgsInSMASH.txt");
        listNCBIIdsOfOrganismsNotInSMASH("queryOrgsInSMASH.txt", "orgVsGenomeRefSeqNumber.txt", "RefSeqOrgsNOTInSMASH.txt");
    }
}
