/*
 * Decompiled with CFR 0.152.
 */
package iptgxdb.executables;

import com.google.common.collect.HashMultimap;
import com.google.common.collect.Sets;
import iptgxdb.utils.CLIUtils;
import iptgxdb.utils.FastaReader;
import iptgxdb.utils.UOBufferedWriter;
import iptgxdb.utils.Utils;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;

public class PeptideClassifierProteinLevel {
    public static Options options = new Options(){
        {
            this.addOption(CLIUtils.createArgOption("in", "input", "a list of protein-group ids (or a table with protein-group ids in the first column)", true, false));
            this.addOption(CLIUtils.createArgOption("fasta", "input", "the fasta file used for the search", true, false));
            this.addOption(CLIUtils.createArgOption("model", "input", "the gene-protein mapping in GENE<TAB>PROTEIN format (one line per protein), if omitted every protein is a gene", false, false));
            this.addOption(CLIUtils.createArgOption("out", "output", "the input list/table extended by a protein-class column (tsv file)", true, false));
        }
    };

    public static void printUsageAndExit() {
        new HelpFormatter().printHelp("java -jar PeptideClassifierProteinLevel.jar", "PeptideClassifierProteinLevel by Ulrich Omasits", options, null, true);
        System.exit(0);
    }

    public static void main(String[] args) throws IOException {
        String line;
        BufferedReader in;
        if (args.length > 0 && args[0].equals("debug")) {
            args = new String[]{"-in", "C:/Ulrich/Studium/phd/Christian/Hemolymph/proteinsAmbiguous_fdr0.002__.tsv", "-fasta", "C:/Ulrich/Studium/phd/Christian/Hemolymph/bw_droso_2010_pDCR.fasta", "-model", "C:/Ulrich/Studium/phd/Christian/Hemolymph/gene_protein_mapping.tsv", "-out", "C:/Ulrich/Studium/phd/Christian/Hemolymph/proteinsAmbiguous_fdr0.002_classified.tsv"};
        }
        CommandLine cli = null;
        try {
            cli = new DefaultParser().parse(options, args);
        }
        catch (ParseException e) {
            System.out.println(e.getMessage());
            PeptideClassifierProteinLevel.printUsageAndExit();
        }
        File proteinFile = CLIUtils.getFileOption(cli, "in", false);
        File fastaFile = CLIUtils.getFileOption(cli, "fasta", false);
        File modelFile = CLIUtils.getFileOption(cli, "model", false);
        File outFile = CLIUtils.getFileOption(cli, "out", true);
        Map<String, String> fastaReader = FastaReader.readFile(fastaFile, FastaReader.headerUpToFirstWhitespace);
        HashMultimap<String, String> protsBySeq = HashMultimap.create();
        for (Map.Entry<String, String> e : fastaReader.entrySet()) {
            protsBySeq.put(e.getValue(), e.getKey());
        }
        HashMap<String, String> geneByProtein = new HashMap<String, String>();
        HashMultimap<String, String> proteinsByGene = HashMultimap.create();
        if (modelFile != null) {
            Object line2;
            in = Utils.reader(modelFile);
            while ((line2 = in.readLine()) != null) {
                String[] arr = Utils.tabSplit2Array((CharSequence)line2);
                String gene = arr[0];
                String prot = arr[1];
                geneByProtein.put(prot, gene);
                proteinsByGene.put(gene, prot);
            }
            in.close();
        } else {
            for (String p : fastaReader.keySet()) {
                geneByProtein.put(p, p);
                proteinsByGene.put(p, p);
            }
        }
        in = Utils.reader(proteinFile);
        UOBufferedWriter out = new UOBufferedWriter(outFile);
        int i = 0;
        while ((line = in.readLine()) != null) {
            if (++i == 1 && line.indexOf(9) > -1) {
                out.writeLine(String.valueOf(line) + "\t" + "protein class");
                continue;
            }
            String[] arr = Utils.tabSplit2Array(line);
            String protGroup = arr[0];
            HashSet<String> protSet = new HashSet<String>(Utils.split2List(protGroup, ';'));
            HashSet<String> geneSet = new HashSet<String>();
            for (String prot : protSet) {
                geneSet.add((String)geneByProtein.get(prot));
            }
            String protClass = "";
            if (geneSet.size() == 1) {
                if (protSet.size() == 1) {
                    protClass = "1a";
                } else if (protSet.size() > 1) {
                    if (protSet.equals(protsBySeq.get((Object)fastaReader.get(protSet.iterator().next())))) {
                        protClass = "1b";
                    } else {
                        String gene = (String)geneSet.iterator().next();
                        if (protSet.equals(proteinsByGene.get((Object)gene))) {
                            protClass = "2b";
                        } else if (protSet.equals(Sets.intersection(protSet, proteinsByGene.get((Object)gene)))) {
                            protClass = "2a";
                        }
                    }
                }
            } else if (geneSet.size() > 1) {
                protClass = protSet.equals(protsBySeq.get((Object)fastaReader.get(protSet.iterator().next()))) ? "3a" : "3b";
            }
            if (protClass.equals("")) {
                System.out.println("INFO: could not classify '" + protGroup + "'");
            }
            out.writeLine(String.valueOf(line) + "\t" + protClass);
        }
        in.close();
        out.close();
    }
}

