package be.ac.vub.bsb.parsers.hmp;

import be.ac.ulb.bigre.pathwayinference.core.core.PathwayinferenceConstants;
import be.ac.ulb.bigre.pathwayinference.core.io.TwoColumnHashMapParser;
import be.ac.vub.bsb.cooccurrence.measures.Matrix;
import be.ac.vub.bsb.cooccurrence.measures.MatrixToolsProvider;
import cern.colt.matrix.DoubleMatrix1D;
import cern.colt.matrix.impl.AbstractFormatter;
import cern.colt.matrix.impl.DenseDoubleMatrix1D;
import cern.jet.math.Functions;
import java.io.File;
import java.util.ArrayList;
import java.util.Map;
import java.util.TreeMap;
import org.apache.log4j.Logger;

/* loaded from: input_file:be/ac/vub/bsb/parsers/hmp/HMP16SRNAPatSchlossParsingHelper.class */
public class HMP16SRNAPatSchlossParsingHelper {
    public static int DEFAULT_16SREGION = 35;
    public static double DEFAULT_SEQUENCE_ID_THRESHOLD = 90.0d;
    public static String DATA_PREFIX = "hmp1.v";
    public static String OTU_LOOKUP_SUFFIX = ".hq.otu.lookup";
    public static String OTU_COUNT_SUFFIX = ".hq.otu.counts";
    public static String PHYLOTYPE_LOOKUP_SUFFIX = ".lq.phylotype.lookup";
    public static String PHYLOTYPE_COUNT_SUFFIX = ".lq.phylotype.counts";
    private boolean _phylotype;
    private double _sequenceIdentityThreshold;
    private int _16SRegion;
    private String _dataFolder;
    private boolean _mergeRepetitiveTaxa;
    private boolean _acceptUnclassifiedTaxa;
    private Map<String, String> _taxonVersusClassificationLookup;
    private Matrix _countMatrix;
    private Matrix _outputMatrix;
    private boolean _countMatrixLoadedExternally;
    private Logger _logger;

    public HMP16SRNAPatSchlossParsingHelper() {
        this._phylotype = false;
        this._sequenceIdentityThreshold = DEFAULT_SEQUENCE_ID_THRESHOLD;
        this._16SRegion = DEFAULT_16SREGION;
        this._dataFolder = "";
        this._mergeRepetitiveTaxa = false;
        this._acceptUnclassifiedTaxa = false;
        this._taxonVersusClassificationLookup = new TreeMap();
        this._countMatrix = new Matrix();
        this._outputMatrix = new Matrix();
        this._countMatrixLoadedExternally = false;
        this._logger = Logger.getLogger(getClass().getPackage().toString());
    }

    public HMP16SRNAPatSchlossParsingHelper(String str, int i) {
        this._phylotype = false;
        this._sequenceIdentityThreshold = DEFAULT_SEQUENCE_ID_THRESHOLD;
        this._16SRegion = DEFAULT_16SREGION;
        this._dataFolder = "";
        this._mergeRepetitiveTaxa = false;
        this._acceptUnclassifiedTaxa = false;
        this._taxonVersusClassificationLookup = new TreeMap();
        this._countMatrix = new Matrix();
        this._outputMatrix = new Matrix();
        this._countMatrixLoadedExternally = false;
        this._logger = Logger.getLogger(getClass().getPackage().toString());
        setDataFolder(str);
        this._16SRegion = i;
    }

    private void filterOTUMatrix() {
        TreeMap treeMap = new TreeMap();
        new DenseDoubleMatrix1D(getOutputMatrix().getMatrix().columns());
        for (int i = 0; i < getOutputMatrix().getMatrix().rows(); i++) {
            if (HMP16SRNAPatSchlossParser.test && getOutputMatrix().getRowName(i).equals(HMP16SRNAPatSchlossParser.testOTU)) {
                System.out.println("counts for OTU " + HMP16SRNAPatSchlossParser.testOTU + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR + getOutputMatrix().getMatrix().viewRow(i));
            }
            if (treeMap.containsKey(getOutputMatrix().getRowName(i))) {
                treeMap.put(getOutputMatrix().getRowName(i), ((DoubleMatrix1D) treeMap.get(getOutputMatrix().getRowName(i))).assign(getOutputMatrix().getMatrix().viewRow(i), Functions.plus));
            } else {
                treeMap.put(getOutputMatrix().getRowName(i), getOutputMatrix().getMatrix().viewRow(i));
            }
        }
        Matrix matrix = new Matrix(treeMap.keySet().size(), getOutputMatrix().getMatrix().columns());
        matrix.setColNames(getOutputMatrix().getColNames());
        int i2 = 0;
        for (String str : treeMap.keySet()) {
            matrix.setRowName(i2, str);
            matrix.setRow(i2, ((DoubleMatrix1D) treeMap.get(str)).toArray());
            if (HMP16SRNAPatSchlossParser.test && str.equals(HMP16SRNAPatSchlossParser.testOTU)) {
                System.out.println("summed counts for OTU " + HMP16SRNAPatSchlossParser.testOTU + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR + matrix.getMatrix().viewRow(i2));
            }
            i2++;
        }
        setOutputMatrix(matrix);
        boolean z = HMP16SRNAPatSchlossParser.test;
    }

    private void loadData() {
        if (getDataFolder().isEmpty()) {
            this._logger.error("No data folder has been specified! Please specify the location of the data.");
            return;
        }
        String countTableLocation = getCountTableLocation(get16SRegion(), isPhylotype(), getDataFolder());
        TwoColumnHashMapParser twoColumnHashMapParser = new TwoColumnHashMapParser(getLookUpTableLocation(get16SRegion(), isPhylotype(), getDataFolder()));
        twoColumnHashMapParser.setHeaderLineNumber(1);
        this._taxonVersusClassificationLookup = twoColumnHashMapParser.parse();
        this._logger.info("Parsed " + this._taxonVersusClassificationLookup.keySet().size() + " taxa.");
        if (!this._countMatrixLoadedExternally) {
            this._countMatrix = new Matrix();
            this._countMatrix.readMatrix(countTableLocation, false);
        }
        this._logger.info("Parsed counts for " + this._countMatrix.getMatrix().rows() + " samples and " + this._countMatrix.getMatrix().columns() + " taxa.");
    }

    public static String getCountTableLocation(int i, boolean z, String str) {
        String str2 = String.valueOf(str) + File.separator;
        return z ? String.valueOf(str2) + DATA_PREFIX + i + PHYLOTYPE_COUNT_SUFFIX : String.valueOf(str2) + DATA_PREFIX + i + OTU_COUNT_SUFFIX;
    }

    public static String getLookUpTableLocation(int i, boolean z, String str) {
        String str2 = String.valueOf(str) + File.separator;
        return z ? String.valueOf(str2) + DATA_PREFIX + i + PHYLOTYPE_LOOKUP_SUFFIX : String.valueOf(str2) + DATA_PREFIX + i + OTU_LOOKUP_SUFFIX;
    }

    public void processCountMatrix() {
        loadData();
        ArrayList arrayList = new ArrayList();
        arrayList.add("collection");
        this._countMatrix = MatrixToolsProvider.getSubmatrixWithoutRows(this._countMatrix, arrayList);
        setOutputMatrix(MatrixToolsProvider.getTransposedMatrix(this._countMatrix));
        for (int i = 0; i < getOutputMatrix().getMatrix().rows(); i++) {
            Integer valueOf = Integer.valueOf(i + 1);
            String[] split = this._taxonVersusClassificationLookup.get(valueOf.toString()).split(";");
            for (int length = split.length - 1; length > 0; length--) {
                String str = split[length].split("\\(")[0];
                if (Integer.parseInt(split[length].split("\\(")[1].replace(")", "")) >= getSequenceIdentityThreshold() && (!str.equals(HMP16SRNAPatSchlossParser.UNCLASSIFIED) || isAcceptUnclassifiedTaxa())) {
                    getOutputMatrix().setRowName(i, isMergeRepetitiveTaxa() ? str : valueOf + PathwayinferenceConstants.REACTION_SUBREACTION_JOINER + str);
                }
            }
        }
        if (isPhylotype() || !isMergeRepetitiveTaxa()) {
            return;
        }
        filterOTUMatrix();
    }

    public void setDataFolder(String str) {
        this._dataFolder = str;
    }

    public String getDataFolder() {
        return this._dataFolder;
    }

    public void set16SRegion(int i) {
        this._16SRegion = i;
    }

    public int get16SRegion() {
        return this._16SRegion;
    }

    public void setSequenceIdentityThreshold(double d) {
        this._sequenceIdentityThreshold = d;
    }

    public double getSequenceIdentityThreshold() {
        return this._sequenceIdentityThreshold;
    }

    public void setMergeRepetitiveTaxa(boolean z) {
        this._mergeRepetitiveTaxa = z;
    }

    public boolean isMergeRepetitiveTaxa() {
        return this._mergeRepetitiveTaxa;
    }

    public void setPhylotype(boolean z) {
        this._phylotype = z;
    }

    public boolean isPhylotype() {
        return this._phylotype;
    }

    public void setOutputMatrix(Matrix matrix) {
        this._outputMatrix = matrix;
    }

    public Matrix getOutputMatrix() {
        return this._outputMatrix;
    }

    public void setAcceptUnclassifiedTaxa(boolean z) {
        this._acceptUnclassifiedTaxa = z;
    }

    public boolean isAcceptUnclassifiedTaxa() {
        return this._acceptUnclassifiedTaxa;
    }

    public void setCountMatrix(Matrix matrix) {
        this._countMatrix = matrix;
        this._countMatrixLoadedExternally = true;
    }

    public static void main(String[] strArr) {
    }
}
