/*
 * Decompiled with CFR 0.152.
 */
package iptgxdb.executables;

import com.google.common.base.CharMatcher;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.primitives.Ints;
import iptgxdb.utils.CLIUtils;
import iptgxdb.utils.UOBufferedWriter;
import iptgxdb.utils.Utils;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang3.StringUtils;

public class FastaMan {
    static String protease = "([KR])[^P]";
    static int minPepSize = 1;
    static boolean pepvar = false;
    static boolean var = false;
    static boolean proc = false;
    static boolean mat = false;
    static boolean pep = false;
    static boolean byseq = false;
    static boolean isomery = false;
    public static Options options = new Options(){
        {
            this.addOption(CLIUtils.createArgOption("in", "fasta", "fasta file (can be gzipped)", true, false));
            this.addOption(CLIUtils.createArgOption("out", "output", "output table (tsv format)", true, false));
            this.addOption("byseq", false, "group output tables by sequences");
            this.addOption("isomery", false, "consider indistinguishability of leucine and isoleucine");
            this.addOption(CLIUtils.createArgOption("protease", "cleavage pattern", "enzyme cleavage pattern for 'pep' option  (default: " + protease + ")", false, false));
            this.addOption(CLIUtils.createArgOption("minpep", "size in aa", "minimum required peptide size for peptide output (default: " + minPepSize + ")", false, false));
            this.addOption("mat", false, "use process-tags in PEFF file to extract and process all mature proteins");
            this.addOption("pep", false, "output peptides instead of proteins - see also 'protease' and 'minpep' arguments");
            this.addOption("pepvar", false, "same as -pep option, but also consider variants provided in PEFF sequence headers");
            this.addOption("var", false, "output table of variants provided in PEFF sequence headers");
            this.addOption("proc", false, "output table of processed protein forms provided in PEFF sequence headers");
        }
    };
    static int c_total_prots = 0;
    static UOBufferedWriter out = null;
    static SortedMap<CharSequence, SortedMap<String, String>> bySequence = new TreeMap<CharSequence, SortedMap<String, String>>();

    public static void printUsageAndExit() {
        new HelpFormatter().printHelp("java -jar FastaMan.jar", "FastaMan by Ulrich Omasits", options, null, true);
        System.exit(0);
    }

    public static void main(String[] args) throws Exception {
        if (args.length > 0 && args[0].equals("debug")) {
            args = new String[]{"-in", "P:/33_omul/projects/listeria_maria/scottA database/122015_assemblyScottA.faa", "-out", "P:/33_omul/projects/listeria_maria/scottA database/122015_assemblyScottA.tsv"};
        }
        CommandLine cli = null;
        try {
            cli = new DefaultParser().parse(options, args);
        }
        catch (ParseException e) {
            System.out.println(e.getMessage());
            FastaMan.printUsageAndExit();
        }
        File fastaFile = new File(cli.getOptionValue("in"));
        out = CLIUtils.getUOWriter(cli, "out", true);
        byseq = cli.hasOption("byseq");
        pep = cli.hasOption("pep");
        protease = cli.getOptionValue("protease", protease);
        minPepSize = Integer.parseInt(cli.getOptionValue("minpep", String.valueOf(minPepSize)));
        pepvar = cli.hasOption("pepvar");
        var = cli.hasOption("var");
        proc = cli.hasOption("proc");
        mat = cli.hasOption("mat");
        isomery = cli.hasOption("isomery");
        if ((pep || pepvar || var || proc) && !(pep ^ pepvar ^ var ^ proc)) {
            System.out.println("Only one of the options 'pep', 'pepvar', 'var', 'proc' is allowed!");
            FastaMan.printUsageAndExit();
        }
        if (byseq && (var || proc)) {
            System.out.println("The option 'byseq' is currently only allowed for peptide or protein output!");
            FastaMan.printUsageAndExit();
        }
        BufferedReader in = Utils.reader(fastaFile);
        String line = in.readLine();
        String header = null;
        StringBuilder sequence = new StringBuilder(256);
        while (true) {
            if ((line == null || line.startsWith(">")) && header != null) {
                String finalSequence = sequence.toString().replace("*", "").toUpperCase();
                FastaMan.processEntry(header, finalSequence, true);
            }
            if (line == null) break;
            if (line.startsWith("#")) continue;
            if (line.startsWith(">")) {
                header = line.substring(1);
                sequence.setLength(0);
            } else {
                sequence.append(line);
            }
            line = in.readLine();
        }
        if (c_total_prots == 0) {
            throw new Exception("No FASTA entries found. Check the encoding of the file.");
        }
        System.out.println("INFO: " + c_total_prots + " entries found in '" + fastaFile.getName() + "'");
        FastaMan.finish();
    }

    private static void processEntry(String headerFull, String sequence, boolean newProtein) throws IOException {
        String name;
        if (newProtein) {
            ++c_total_prots;
        }
        String headerProcessed = headerFull;
        if (mat && headerFull.contains("\\Processed=")) {
            String strProc = Utils.substringUpTo(Utils.substringAfter(headerFull, "\\Processed="), " \\");
            boolean hasValidMatureProtein = false;
            boolean hasValidProcessions = false;
            for (String processed : Splitter.on(CharMatcher.anyOf(")(")).omitEmptyStrings().split(strProc)) {
                String[] p = Utils.splitToArray(processed, Splitter.on('|'));
                Integer from = Ints.tryParse(p[0]);
                if (from == null) continue;
                String string = name = p.length == 2 ? p[1] : p[2];
                if (name.equalsIgnoreCase("mature protein")) {
                    hasValidMatureProtein = true;
                    continue;
                }
                hasValidProcessions = true;
            }
            if (hasValidMatureProtein || hasValidProcessions) {
                int count_m = 0;
                String sequenceMature = new String(sequence);
                for (String processed : Splitter.on(CharMatcher.anyOf(")(")).omitEmptyStrings().split(strProc)) {
                    String name2;
                    String[] p = Utils.splitToArray(processed, Splitter.on('|'));
                    Integer from = Ints.tryParse(p[0]);
                    if (from == null) continue;
                    int to = Integer.parseInt(p.length == 2 ? p[0] : p[1]);
                    String string = name2 = p.length == 2 ? p[1] : p[2];
                    if (hasValidMatureProtein) {
                        if (!name2.equalsIgnoreCase("mature protein")) continue;
                        sequenceMature = sequence.substring(from - 1, to);
                        String headerMature = String.valueOf(headerProcessed) + "m" + ++count_m;
                        FastaMan.processEntry(headerMature, sequenceMature, false);
                        continue;
                    }
                    if (!hasValidProcessions || name2.equalsIgnoreCase("mature protein")) continue;
                    sequenceMature = String.valueOf(sequenceMature.substring(0, from - 1)) + StringUtils.repeat('_', to - from + 1) + sequenceMature.substring(to);
                }
                if (!hasValidMatureProtein && hasValidProcessions) {
                    String headerMature = String.valueOf(headerProcessed) + "mm";
                    Object[] seqs = Utils.splitToArray(sequenceMature, Splitter.on('_').omitEmptyStrings());
                    if (seqs.length > 1) {
                        System.out.println("WARN: inconsistent processed forms: " + headerMature + "\t" + sequence + Joiner.on('\t').join(seqs));
                    } else if (seqs.length == 0) {
                        System.out.println("WARN: no mature form for " + headerFull);
                    } else {
                        FastaMan.processEntry(headerMature, seqs[0], false);
                    }
                }
                return;
            }
        }
        String strVars = null;
        if ((var || pepvar) && headerFull.contains("\\Variant=")) {
            strVars = Utils.substringUpTo(Utils.substringAfter(headerFull, "\\Variant="), " ");
        }
        if (pep || pepvar) {
            for (String pep : FastaMan.cleaveSequence(sequence, protease, strVars)) {
                if (pep.length() < minPepSize) continue;
                if (byseq) {
                    String pepIso;
                    String string = pepIso = isomery ? pep.replace('I', 'L') : pep;
                    if (!bySequence.containsKey(pepIso)) {
                        bySequence.put(pepIso, new TreeMap());
                    }
                    ((SortedMap)bySequence.get(pepIso)).put(headerProcessed, pep);
                    continue;
                }
                out.writeLine(String.valueOf(headerProcessed) + "\t" + pep + "\t" + pep.length());
            }
        } else if (var) {
            if (strVars != null) {
                for (String variant : Splitter.on(CharMatcher.anyOf(")(")).omitEmptyStrings().split(strVars)) {
                    String[] v = Utils.splitToArray(variant, Splitter.on('|'));
                    int from = Integer.parseInt(v[0]);
                    int to = Integer.parseInt(v[1]);
                    String var = v[2];
                    String klasse = "";
                    String oldseq = "";
                    if (to > sequence.length()) {
                        klasse = "out of sequence";
                    } else {
                        oldseq = sequence.substring(from - 1, to);
                        if (var.indexOf(42) >= 0) {
                            klasse = "stop gain";
                        } else if (to - from + 1 == var.length()) {
                            klasse = var.equals(oldseq) ? "mutation silent" : "mutation";
                        } else if (to - from + 1 > var.length()) {
                            klasse = "deletion";
                        } else if (to - from + 1 < var.length()) {
                            klasse = "insertion";
                        }
                    }
                    out.writeLine(String.valueOf(headerProcessed) + "\t" + from + "\t" + to + "\t" + oldseq + "\t" + (to - from + 1) + "\t" + var + "\t" + var.length() + "\t" + klasse);
                }
            }
        } else if (proc) {
            if (headerFull.contains("\\Processed=")) {
                String strProc = Utils.substringUpTo(Utils.substringAfter(headerFull, "\\Processed="), " \\");
                for (String processed : Splitter.on(CharMatcher.anyOf(")(")).omitEmptyStrings().split(strProc)) {
                    String[] p = Utils.splitToArray(processed, Splitter.on('|'));
                    Integer from = Ints.tryParse(p[0]);
                    if (from == null) {
                        out.writeLine(String.valueOf(headerProcessed) + "\t" + "\t" + "\t" + "range with ?");
                        continue;
                    }
                    int to = Integer.parseInt(p.length == 2 ? p[0] : p[1]);
                    String string = name = p.length == 2 ? p[1] : p[2];
                    if (from == 1 && to == sequence.length()) {
                        name = String.valueOf(name) + " (complete sequence)";
                    }
                    out.writeLine(String.valueOf(headerProcessed) + "\t" + from + "\t" + to + "\t" + name);
                }
            }
        } else if (byseq) {
            String seqIso;
            String string = seqIso = isomery ? sequence.replace('I', 'L') : sequence;
            if (!bySequence.containsKey(seqIso)) {
                bySequence.put(seqIso, new TreeMap());
            }
            ((SortedMap)bySequence.get(seqIso)).put(headerProcessed, sequence);
        } else {
            out.writeLine(String.valueOf(headerProcessed) + "\t" + sequence.toString() + "\t" + sequence.length());
        }
    }

    private static void finish() throws IOException {
        if (byseq) {
            for (SortedMap<String, String> e : bySequence.values()) {
                Set<String> ids = e.keySet();
                LinkedList<String> seqs = new LinkedList<String>(new LinkedHashSet<String>(e.values()));
                String primarySeq = seqs.removeFirst();
                out.writeLine(String.valueOf(Utils.join(ids, ",")) + "\t" + ids.size() + "\t" + primarySeq + "\t" + primarySeq.length() + "\t" + Utils.join(seqs, ",") + "\t" + (seqs.size() + 1));
            }
        }
        out.close();
        String subject = null;
        subject = byseq ? (pep || pepvar ? "peptide sequences" : "protein sequences") : (pep || pepvar ? "peptides" : (var ? "variants" : (proc ? "processed forms" : "proteins")));
        System.out.println("INFO: " + FastaMan.out.linesWritten + " " + subject + " written to '" + FastaMan.out.file.getName() + "'");
    }

    public static List<String> cleaveSequence(String seq, String cleavagePattern, String variants) {
        if (variants == null) {
            return FastaMan.cleaveSequence(seq, cleavagePattern);
        }
        List<String> origPeps = FastaMan.cleaveSequence(seq, cleavagePattern);
        LinkedList<String> varPeps = new LinkedList<String>();
        for (String variant : Splitter.on(CharMatcher.anyOf(")(")).omitEmptyStrings().split(variants)) {
            String[] v = Utils.splitToArray(variant, Splitter.on('|'));
            int from = Integer.parseInt(v[0]);
            int to = Integer.parseInt(v[1]);
            String var = v[2];
            if (to > seq.length()) {
                System.out.println("WARN: variant (" + variant + ") out of protein sequence (length=" + seq.length() + ")");
                continue;
            }
            if (var.equals("-")) {
                var = "";
            }
            String seqNew = String.valueOf(seq.substring(0, from - 1)) + var + seq.substring(to);
            seqNew = Utils.substringUpTo(seqNew, "*");
            List<String> newPeps = FastaMan.cleaveSequence(seqNew, cleavagePattern);
            int i = 0;
            while (i < origPeps.size() && i < newPeps.size() && origPeps.get(i).equals(newPeps.get(i))) {
                ++i;
            }
            int j = 0;
            while (j < origPeps.size() && j < newPeps.size() && origPeps.get(origPeps.size() - 1 - j).equals(newPeps.get(newPeps.size() - 1 - j))) {
                ++j;
            }
            if (i >= newPeps.size() - j) continue;
            varPeps.addAll(newPeps.subList(i, newPeps.size() - j));
        }
        origPeps.addAll(varPeps);
        return origPeps;
    }

    public static List<String> cleaveSequence(String seq, String cleavagePattern) {
        Pattern pattern = Pattern.compile(cleavagePattern);
        boolean maxMC = false;
        ArrayList<String> peps = new ArrayList<String>();
        if (seq.length() == 0) {
            return peps;
        }
        Matcher m = pattern.matcher(seq);
        LinkedList<Integer> starts = new LinkedList<Integer>();
        starts.addFirst(0);
        while (m.find((Integer)starts.getFirst())) {
            int cleavageSite = m.end();
            if (m.groupCount() == 1) {
                cleavageSite = m.end(1);
            }
            int mc = 0;
            Iterator iterator = starts.iterator();
            while (iterator.hasNext()) {
                int start = (Integer)iterator.next();
                peps.add(seq.substring(start, cleavageSite));
                ++mc;
            }
            if (starts.size() == 1) {
                starts.removeLast();
            }
            starts.addFirst(cleavageSite);
        }
        int mc = 0;
        Iterator iterator = starts.iterator();
        while (iterator.hasNext()) {
            int start = (Integer)iterator.next();
            if (start >= seq.length()) continue;
            peps.add(seq.substring(start));
            ++mc;
        }
        return peps;
    }
}

