/*
 * Decompiled with CFR 0.152.
 */
package iptgxdb.executables;

import com.google.common.base.Joiner;
import com.google.common.base.Objects;
import com.google.common.base.Splitter;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import iptgxdb.utils.CLIUtils;
import iptgxdb.utils.UOBufferedWriter;
import iptgxdb.utils.Utils;
import iptgxdb.utils.Version;
import java.io.BufferedReader;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;

public class ProteomicsPostProcess {
    public static String prefix = "processed.";
    public static Options options = new Options(){
        {
            this.addOption(CLIUtils.createArgOption("mapping", "input", "the matching table generated by GffCombiner (tsv file)", true, false));
            this.addOption(CLIUtils.createArgOption("psm", "input", "the parsed tabular psm files (tsv files) to be processed", true, true));
            this.addOption("v", false, "be verbose on assigned protein IDs that are not mapped in the mapping table");
        }
    };

    public static void printUsageAndExit() {
        String description = "ProteomicsPostProcess v" + Version.getVersion() + " by Ulrich Omasits";
        new HelpFormatter().printHelp("java -jar ProteomicsPostProcess.jar", description, options, null, true);
        System.exit(0);
    }

    public static void main(String[] args) throws Exception {
        if (args.length > 0 && args[0].equals("debug")) {
            args = new String[]{"-mapping", "/home/bioinf/bioinf_data/33_omul/projects/bartonella_henselae/annotations/NC_005956.pDT024.all.tsv", "-psm", "/home/bioinf/bioinf_data/33_omul/projects/bartonella_henselae/proteogenomics/msgf_refseq_all/test/im1_ogepep.psm"};
        }
        CommandLine cli = null;
        try {
            cli = new DefaultParser().parse(options, args);
        }
        catch (ParseException e) {
            System.out.println(e.getMessage());
            ProteomicsPostProcess.printUsageAndExit();
        }
        boolean verbose = cli.hasOption("v");
        File inIdMappingFile = CLIUtils.getFileOption(cli, "mapping", false);
        File[] inPsmFiles = CLIUtils.getFileArray(cli, "psm");
        System.out.println("INFO: reading in the protein identifier matching table from  " + inIdMappingFile.getName());
        BufferedReader inMapping = Utils.reader(inIdMappingFile);
        String line = inMapping.readLine();
        ArrayList<String> header = Lists.newArrayList(Splitter.on('\t').split(line));
        int i_chr = header.indexOf("chromosome");
        int i_endPos = header.indexOf("end");
        int i_ids = header.indexOf("mapped IDs");
        ArrayList<Integer> i_hierarchies = new ArrayList<Integer>();
        for (String head : header) {
            if (!head.endsWith(" ID")) continue;
            i_hierarchies.add(header.indexOf(head));
        }
        HashMap idsByEndpos = new HashMap();
        HashMap<ChromosomePos, Integer> hierarchylevelByEndpos = new HashMap<ChromosomePos, Integer>();
        HashMap<String, ChromosomePos> endposById = new HashMap<String, ChromosomePos>();
        while ((line = inMapping.readLine()) != null) {
            ArrayList<String> elems = Lists.newArrayList(Splitter.on('\t').split(line));
            int endPos = Integer.parseInt((String)elems.get(i_endPos));
            String chr = i_chr == -1 ? "" : (String)elems.get(i_chr);
            ChromosomePos pos = new ChromosomePos(chr, endPos);
            List mappedIds = elems.subList(i_ids, elems.size());
            idsByEndpos.put(pos, mappedIds);
            Iterator iterator = i_hierarchies.iterator();
            while (iterator.hasNext()) {
                int i_hierarchy = (Integer)iterator.next();
                if (((String)elems.get(i_hierarchy)).length() <= 0) continue;
                hierarchylevelByEndpos.put(pos, i_hierarchies.indexOf(i_hierarchy));
                break;
            }
            for (String id : mappedIds) {
                endposById.put(id, pos);
            }
        }
        inMapping.close();
        File[] fileArray = inPsmFiles;
        int n = inPsmFiles.length;
        int n2 = 0;
        while (n2 < n) {
            File inPsmFile = fileArray[n2];
            System.out.println("INFO: processing " + inPsmFile.getName());
            File outPsmFile = new File(inPsmFile.getParentFile(), String.valueOf(prefix) + inPsmFile.getName());
            if (outPsmFile.exists()) {
                System.err.println("ERROR: could not create " + inPsmFile.getName() + ", file already exists!");
            } else {
                BufferedReader inPsm = Utils.reader(inPsmFile);
                UOBufferedWriter outPsm = new UOBufferedWriter(outPsmFile);
                line = inPsm.readLine();
                outPsm.writeLine(line);
                header = Lists.newArrayList(Splitter.on('\t').split(line));
                int i_proteins = header.indexOf("psm proteins");
                int i_countproteins = header.indexOf("psm count proteins");
                int i_startpos = header.indexOf("psm start positions");
                int i_ntts = header.indexOf("psm tryptic termini");
                int i_nterms = header.indexOf("psm n-termini");
                int i_cterms = header.indexOf("psm c-termini");
                int i_pepseq = header.indexOf("psm sequence");
                while ((line = inPsm.readLine()) != null) {
                    ArrayList<String> elems = Lists.newArrayList(Splitter.on('\t').split(line));
                    String pepseq = (String)elems.get(i_pepseq);
                    if (((String)elems.get(i_proteins)).length() > 0) {
                        ArrayList<String> proteinList = Lists.newArrayList(Splitter.on(';').split((CharSequence)elems.get(i_proteins)));
                        ArrayList<String> startposList = Lists.newArrayList(Splitter.on(';').split((CharSequence)elems.get(i_startpos)));
                        ArrayList<String> nttList = Lists.newArrayList(Splitter.on(';').split((CharSequence)elems.get(i_ntts)));
                        ArrayList<String> ntermsList = Lists.newArrayList(Splitter.on(';').split((CharSequence)elems.get(i_nterms)));
                        ArrayList<String> ctermsList = Lists.newArrayList(Splitter.on(';').split((CharSequence)elems.get(i_cterms)));
                        HashSet<String> proteinsSet = Sets.newHashSet(proteinList);
                        TreeSet<ChromosomePos> clusters = Sets.newTreeSet();
                        TreeSet<String> unmatchedProteins = Sets.newTreeSet();
                        for (String prot : proteinsSet) {
                            if (endposById.containsKey(prot)) {
                                clusters.add((ChromosomePos)endposById.get(prot));
                                continue;
                            }
                            unmatchedProteins.add(prot);
                        }
                        int c_clusters = clusters.size();
                        TreeMap sortedProteinIdsByHierarchy = new TreeMap();
                        block9: for (ChromosomePos endPos : clusters) {
                            int i = 0;
                            List endPosProteins = (List)idsByEndpos.get(endPos);
                            Integer endPosHierarchy = (Integer)hierarchylevelByEndpos.get(endPos);
                            for (String id : endPosProteins) {
                                ++i;
                                if (!proteinsSet.contains(id)) continue;
                                if (!sortedProteinIdsByHierarchy.containsKey(endPosHierarchy)) {
                                    sortedProteinIdsByHierarchy.put(endPosHierarchy, new ArrayList());
                                }
                                ((List)sortedProteinIdsByHierarchy.get(endPosHierarchy)).add(id);
                                if (i != 1) continue block9;
                                continue block9;
                            }
                        }
                        ArrayList<String> sortedProteinIds = new ArrayList<String>();
                        for (Map.Entry e : sortedProteinIdsByHierarchy.entrySet()) {
                            sortedProteinIds.addAll((Collection)e.getValue());
                        }
                        sortedProteinIds.addAll(unmatchedProteins);
                        if (unmatchedProteins.size() > 0 && verbose) {
                            System.out.println("         at peptide " + pepseq + " there are " + unmatchedProteins.size() + " unmatched proteins: " + unmatchedProteins);
                        }
                        ArrayList<String> sortedStartpos = Lists.newArrayList();
                        ArrayList<String> sortedNtts = Lists.newArrayList();
                        ArrayList<String> sortedNterms = Lists.newArrayList();
                        ArrayList<String> sortedCterms = Lists.newArrayList();
                        for (String prot : sortedProteinIds) {
                            int ind = proteinList.indexOf(prot);
                            sortedStartpos.add((String)startposList.get(ind));
                            sortedNtts.add((String)nttList.get(ind));
                            sortedNterms.add((String)ntermsList.get(ind));
                            sortedCterms.add((String)ctermsList.get(ind));
                        }
                        int c_proteins = c_clusters + unmatchedProteins.size();
                        elems.set(i_proteins, Joiner.on(';').join(sortedProteinIds));
                        elems.set(i_countproteins, String.valueOf(c_proteins));
                        elems.set(i_startpos, Joiner.on(';').join(sortedStartpos));
                        elems.set(i_ntts, Joiner.on(';').join(sortedNtts));
                        elems.set(i_nterms, Joiner.on(';').join(sortedNterms));
                        elems.set(i_cterms, Joiner.on(';').join(sortedCterms));
                    }
                    outPsm.writeTsvLine(elems);
                }
                inPsm.close();
                outPsm.close();
                System.out.println("INFO: output written to " + outPsmFile.getName());
            }
            ++n2;
        }
        System.out.println("INFO: done!");
    }

    public static class ChromosomePos
    implements Comparable<ChromosomePos> {
        int pos;
        String chromosome;

        public ChromosomePos(String chromosome, int pos) {
            this.pos = pos;
            this.chromosome = chromosome;
        }

        @Override
        public int compareTo(ChromosomePos that) {
            return ComparisonChain.start().compare((Comparable<?>)((Object)this.chromosome), (Comparable<?>)((Object)that.chromosome)).compare(this.pos, that.pos).result();
        }

        public int hashCode() {
            return Objects.hashCode(this.pos, this.chromosome);
        }

        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (obj == null) {
                return false;
            }
            if (this.getClass() != obj.getClass()) {
                return false;
            }
            ChromosomePos that = (ChromosomePos)obj;
            return this.pos == that.pos && Objects.equal(this.chromosome, that.chromosome);
        }
    }
}

