/*
 * Decompiled with CFR 0.152.
 */
package iptgxdb.executables;

import iptgxdb.utils.CLIUtils;
import iptgxdb.utils.FastaReader;
import iptgxdb.utils.GenomeFeature;
import iptgxdb.utils.GenomeLocation;
import iptgxdb.utils.GenomicsUtil;
import iptgxdb.utils.UOBufferedWriter;
import iptgxdb.utils.Util;
import iptgxdb.utils.Utils;
import iptgxdb.utils.Version;
import java.awt.Color;
import java.io.File;
import java.io.FileWriter;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;

public class SixFrameORFs {
    static List<String> fixStopCodons = Arrays.asList("TAA", "TAG", "TGA");
    static List<String> fixStartCodons = Arrays.asList("ATG");
    static List<String> alternativeStartCodons = Arrays.asList("GTG", "TTG", "CTG");
    static int minProteinLength = 10;
    static boolean longestAlternativeAnchorOnly = false;
    public static Options options = new Options(){
        {
            this.addOption(CLIUtils.createArgOption("seq", "file", "the input sequence", true, false));
            this.addOption(CLIUtils.createArgOption("out", "file", "the output gff file", true, false));
            this.addOption(CLIUtils.createArgOption("alt", "codon(s)", "alternative start codons (default: " + Utils.join(alternativeStartCodons, " ") + ")", false, true));
            this.addOption(CLIUtils.createArgOption("min", "length", "the minimum protein length in aa (default: " + minProteinLength + ")", false, false));
            this.addOption("laa", false, "take only longest alternative anchor for regions without a main start codon");
            this.addOption(CLIUtils.createArgOption("tab", "output", "a tab-separated output file with sequences per entry", false, false));
            this.addOption(CLIUtils.createArgOption("extend", "e", "get extension for all features 3' and 5' by <e> nucleotides in tabular output file", false, false));
        }
    };

    public static void printUsageAndExit() {
        String description = "SixFrameORF v" + Version.getVersion() + " by Ulrich Omasits";
        new HelpFormatter().printHelp("java -jar SixFrameORFs.jar", description, options, null, true);
        System.exit(0);
    }

    public static void main(String[] args) throws Exception {
        if (args.length > 0 && args[0].equals("debug")) {
            args = new String[]{"-seq", "/home/bioinf/bioinf_data/33_omul/projects/bartonella_henselae/annotations/NC_005956.fasta", "-out", "/home/bioinf/Desktop/x.gff3", "-min", "18", "-alt", "TTG GTG CTG", "-laa"};
        }
        CommandLine cli = null;
        try {
            cli = new DefaultParser().parse(options, args);
        }
        catch (ParseException e) {
            System.out.println(e.getMessage());
            SixFrameORFs.printUsageAndExit();
        }
        File fInSeq = new File(cli.getOptionValue("seq"));
        File fOutGFF = new File(cli.getOptionValue("out"));
        if (fOutGFF.exists()) {
            System.err.println("ERROR: " + fOutGFF.getName() + " already exists.");
            System.exit(0);
        }
        File fTab = CLIUtils.getFileOption(cli, "tab", true);
        Integer extend = Integer.valueOf(cli.getOptionValue("extend", "0"));
        if (cli.hasOption("alt")) {
            alternativeStartCodons = Arrays.asList(cli.getOptionValues("alt"));
        }
        if (cli.hasOption("min")) {
            minProteinLength = Integer.parseInt(cli.getOptionValue("min"));
        }
        longestAlternativeAnchorOnly = cli.hasOption("laa");
        UOBufferedWriter outGFF = new UOBufferedWriter(new FileWriter(fOutGFF));
        UOBufferedWriter outTab = null;
        if (fTab != null) {
            outTab = new UOBufferedWriter(fTab);
            outTab.writeTsvLine("id", "chromosome", "from", "to", "strand", "frame", "startCodon", "extension", "sequence", extend + "nt upstream", extend + "nt downstream");
        }
        outGFF.writeLine(GenomicsUtil.createGFFheader("allORFs", Color.DARK_GRAY));
        System.out.println("INFO: Reading sequence from '" + fInSeq.getName() + "'...");
        Map<String, String> fasta = FastaReader.readFile(fInSeq, FastaReader.headerUpToFirstWhitespace);
        System.out.println("INFO: Reading sequence from '" + fInSeq.getName() + "' done!");
        int count_orfs = 0;
        int count_stops = 0;
        for (Map.Entry<String, String> e : fasta.entrySet()) {
            StringBuilder seqForward = new StringBuilder(e.getValue());
            String seqId = e.getKey();
            System.out.println("INFO: Searching for ORFs in " + seqId + "...");
            int n = seqForward.length();
            StringBuilder seqReverse = GenomicsUtil.reverseNucleotides(seqForward.toString());
            List allStartCodons = Util.concatLists(fixStartCodons, alternativeStartCodons);
            for (GenomeLocation.Strand strand : Arrays.asList(GenomeLocation.Strand.PLUS, GenomeLocation.Strand.MINUS)) {
                StringBuilder seq = strand == GenomeLocation.Strand.PLUS ? seqForward : seqReverse;
                for (int frame : Arrays.asList(0, 1, 2)) {
                    TreeMap<Integer, String> foundStartCodons = new TreeMap<Integer, String>();
                    boolean fixStartCodonFound = false;
                    int pos = 0 + frame;
                    while (pos < n - 2) {
                        String codon = seq.substring(pos, pos + 3);
                        if (!fixStartCodonFound && allStartCodons.contains(codon)) {
                            foundStartCodons.put(pos, codon);
                            if (fixStartCodons.contains(codon)) {
                                fixStartCodonFound = true;
                            }
                        } else if (fixStopCodons.contains(codon)) {
                            int endInSeq = pos + 3;
                            if (longestAlternativeAnchorOnly & foundStartCodons.size() > 1 && !fixStartCodonFound) {
                                int longestStart = (Integer)foundStartCodons.firstKey();
                                String longestCodon = (String)foundStartCodons.get(longestStart);
                                foundStartCodons.clear();
                                foundStartCodons.put(longestStart, longestCodon);
                            }
                            for (Integer startInSeq : new HashSet(foundStartCodons.keySet())) {
                                int length = endInSeq - startInSeq;
                                int lengthAA = length / 3 - 1;
                                if (lengthAA >= minProteinLength) continue;
                                foundStartCodons.remove(startInSeq);
                            }
                            if (foundStartCodons.size() > 0) {
                                ++count_stops;
                            }
                            for (Integer startInSeq : foundStartCodons.keySet()) {
                                ++count_orfs;
                                GenomeFeature gff = new GenomeFeature();
                                gff.seqId = seqId;
                                gff.source = "sixFrameORF";
                                gff.type = "CDS";
                                int from = strand == GenomeLocation.Strand.PLUS ? startInSeq + 1 : n - endInSeq + 1;
                                int to = strand == GenomeLocation.Strand.PLUS ? endInSeq : n - startInSeq;
                                gff.location = new GenomeLocation(from, to, strand, seqId);
                                String strFrame = (Object)((Object)strand) + String.valueOf(frame + 1);
                                String id = "ORF_" + gff.location.from + ".." + gff.location.to + "_" + gff.location.lengthAA() + "aa_" + strFrame;
                                gff.setID(id);
                                gff.setAtt("frame", strFrame);
                                gff.setAtt("startCodon", (String)foundStartCodons.get(startInSeq));
                                if (startInSeq != foundStartCodons.lastKey()) {
                                    gff.setAtt("extension", String.valueOf(Utils.signedString(((Integer)foundStartCodons.lastKey() - startInSeq) / 3)) + "aa");
                                }
                                gff.setAtt("pseudo", "false");
                                outGFF.writeLine(gff.toString());
                                if (outTab == null) continue;
                                GenomeLocation upstream = null;
                                GenomeLocation downstream = null;
                                if (extend > 0) {
                                    if (gff.location.strand == GenomeLocation.Strand.PLUS) {
                                        if (gff.location.from > 1) {
                                            upstream = new GenomeLocation(Math.max(gff.location.from - extend, 1), gff.location.from - 1, gff.location.strand, gff.location.chromosome);
                                        }
                                        downstream = new GenomeLocation(gff.location.to + 1, gff.location.to + extend, gff.location.strand, gff.location.chromosome);
                                    } else if (gff.location.strand == GenomeLocation.Strand.MINUS) {
                                        upstream = new GenomeLocation(gff.location.to + 1, gff.location.to + extend, gff.location.strand, gff.location.chromosome);
                                        downstream = new GenomeLocation(gff.location.from - extend, gff.location.from - 1, gff.location.strand, gff.location.chromosome);
                                    }
                                }
                                outTab.writeTsvLine(new Object[]{gff.getID(), gff.location.chromosome, gff.location.from, gff.location.to, gff.location.strand, gff.getAtt("frame"), gff.getAtt("startCodon"), gff.getAtt("extension"), gff.location.getSequence(seqForward), upstream != null ? upstream.getSequence(seqForward) : "", downstream != null ? downstream.getSequence(seqForward) : ""});
                            }
                            foundStartCodons.clear();
                            fixStartCodonFound = false;
                        }
                        pos += 3;
                    }
                }
            }
        }
        outGFF.close();
        if (outTab != null) {
            outTab.close();
        }
        System.out.println("INFO: Searching for ORFs done! Extracted " + count_orfs + " ORFs for " + count_stops + " stop sites to '" + fOutGFF.getName() + "'.");
    }
}

