package decompounder;

import de.uni_leipzig.asv.toolbox.baseforms.Zerleger2;
import decompounder.CompoundDisambiguator.DisambiguatedHypernym;
import de.tuebingen.uni.sfs.germanet.dbapi.GermaNet;
import de.tuebingen.uni.sfs.germanet.dbapi.LexUnit;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 *
 * @author Verena Henrich
 */
public class CompoundDeterminerASV {
    private Zerleger2 zerleger = new Zerleger2();
    private GermaNet germaNet;
//    private String logFile = "log.txt";
    private String compoundsDeterminedWithASVFile = "compounds_determined_with_asv";
    private String nonCompoundsDeterminedWithASVFile = "non_compounds_determined_with_asv";
    private String fileEnding = "";

    public CompoundDeterminerASV(GermaNet germaNet) throws IOException {
        this.germaNet = germaNet;

        String red = "src/main/resources/ASV_trees/grfExt.tree"; // reduce file for splitting
        String forw = "src/main/resources/ASV_trees/kompVVic.tree"; // forward file
        String back = "src/main/resources/ASV_trees/kompVHic.tree"; // backward file

        zerleger = new Zerleger2(); // splitter
        zerleger.init(forw, back, red);
    }

    public CompoundDBEntry determineCompoundWithASV(int id, String noun) throws IOException {
        List<String> splitted = zerleger.kZerlegung(noun);
//        System.out.println(noun + " " + splitted);

        if (splitted.contains(";")) {
//            System.err.println(noun + " contains \";\" " + splitted);
        }

        while (splitted.contains("")) {
//            System.err.println(noun + " contains empty entry " + splitted);
            splitted.remove("");
        }

        if (splitted.size() > 1 && splitted.get(splitted.size()-1).equalsIgnoreCase("schaft")) {
            splitted.remove(splitted.size()-1);
            splitted.set(splitted.size()-1, splitted.get(splitted.size()-1) + "schaft");
//            System.out.println(noun + " splitted without 'schaft'");
        }

        if (splitted.size() > 2
                && splitted.get(splitted.size()-1).equalsIgnoreCase("werk")
                && splitted.get(splitted.size()-2).equalsIgnoreCase("bau")) {
            splitted.remove(splitted.size()-1);
            splitted.set(splitted.size()-1, "bauwerk");
//            System.out.println(noun + " splitted without 'schaft'");
        }

        if (splitted.size() < 2) { // if noun is not a compound
            CompoundDeterminer.writeFile(nonCompoundsDeterminedWithASVFile + fileEnding, noun + ": is no compound\n", true);
        } else if (splitted.size() == 2) { // if noun is a binary compound
//            System.out.println(noun + "\t" + splitted);

            // compound as proposed by compound splitter
            String modifier = splitted.get(0);
            String head = splitted.get(1);

            if (modifier.contains(";") && head.contains(";")) {
//                System.err.println(noun + ": modifier and noun contain \";\"");
            }

            if (modifier.contains(";")) {
//                System.out.println(modifier + "-->" + noun.substring(0, noun.length() - head.length()));
                modifier = noun.substring(0, noun.length() - head.length());
            }
            
            if (head.contains(";")) {
//                System.out.println(head + "-->" + noun.substring(modifier.length()));
                head = noun.substring(modifier.length() + 1);
            }

            CompoundDBEntry compound = createCompoundDBEntry(modifier, head, id);
            
            if (compound.isModifierInGermaNet() && compound.isHeadInGermaNet()) {
                compound.setComment("ASV: 2 constituents, both parts in GN");
                CompoundDeterminer.writeFile(compoundsDeterminedWithASVFile + fileEnding, compound.toSQLString() + "\n", true);
                return compound;
            } else if (compound.isHeadInGermaNet()) {
                compound.setComment("ASV: 2 constituents, head in GN");
                CompoundDeterminer.writeFile(compoundsDeterminedWithASVFile + fileEnding, compound.toSQLString() + "\n", true);
                return compound;
            }

            CompoundDeterminer.writeFile(nonCompoundsDeterminedWithASVFile + fileEnding, noun + " is not correctly splitted: " + splitted + "\n", true);
            return new CompoundDBEntry("", "", "", id, "ASV: is not correctly splitted: " + splitted);
        } else if (splitted.size() == 3) { // if noun consists of 3 constituents
//            System.out.println(noun + "\t" + splitted);

            // modifier is first part by compound splitter,
            // head starts with second part by compound splitter
            String modifier = splitted.get(0);
//            System.out.print(" modifier=" + modifier);
            String constituent2_withoutLastChar = splitted.get(1).substring(0, splitted.get(1).length()-1);
//            System.out.print(", c=" + constituent2_withoutLastChar);
            String head = noun.substring(modifier.length());
            if (noun.indexOf(constituent2_withoutLastChar, modifier.length()-1) != -1) {
                head = noun.substring(noun.indexOf(constituent2_withoutLastChar, modifier.length()-1));
            }
            CompoundDBEntry compound1 = createCompoundDBEntry(modifier, head, id);
//            System.out.println(", head=" + head);

            head = splitted.get(2);
            if (noun.indexOf(splitted.get(1)) != -1) {
                modifier = noun.substring(0, noun.indexOf(splitted.get(1)) + splitted.get(1).length());
            } else {
                modifier = noun.substring(0, noun.length() - head.length());
            }
//                System.out.println(splitted + " modifier2=" + modifier + ", head2=" + head);

            CompoundDBEntry compound2 = createCompoundDBEntry(modifier, head, id);
//            CompoundDeterminer.writeFile(logFile, "2: " + compound2.getModifier() + " " + compound2.getHypernym() +
//                    " (isInGN=" + compound2.isModifierInGermaNet()
//                    + ", headIsInGN=" + compound2.isHeadInGermaNet() + ")\n", true);

            if (compound1.isModifierInGermaNet() && compound1.isHeadInGermaNet()
                    && compound2.isModifierInGermaNet() && compound2.isHeadInGermaNet()) {
                if (compound1.getHeadHypernymDistance() > compound2.getHeadHypernymDistance()) {
                    compound1.setComment("ASV: 3 constituents, modifier and head are in GermaNet (headDistance1 > headDistance2)");
                    return compound1;
                } else if (compound1.getHeadHypernymDistance() < compound2.getHeadHypernymDistance()) {
                    compound2.setComment("ASV: 3 constituents, modifier and head are in GermaNet (headDistance1 < headDistance2)");
                    return compound2;
                } else if (compound1.getModifierHypernymDistance() > compound2.getModifierHypernymDistance()) {
                    compound1.setComment("ASV: 3 constituents, modifier and head are in GermaNet (modifierDistance1 > modifierDistance2)");
                    return compound1;
                } else if (compound1.getModifierHypernymDistance() < compound2.getModifierHypernymDistance()) {
                    compound2.setComment("ASV: 3 constituents, modifier and head are in GermaNet (modifierDistance1 < modifierDistance2)");
                    return compound2;
                }

                CompoundDeterminer.twoBinaryCompoundsAreInGN++;
                CompoundDeterminer.writeFile(nonCompoundsDeterminedWithASVFile + fileEnding, noun + ": two possible compounds are in GN\n", true);
                CompoundDeterminer.writeFile(nonCompoundsDeterminedWithASVFile + fileEnding, compound1.toSQLString() + "\n", true);
                CompoundDeterminer.writeFile(nonCompoundsDeterminedWithASVFile + fileEnding, compound2.toSQLString() + "\n", true);
            } else if (compound1.isModifierInGermaNet() && compound1.isHeadInGermaNet()) { // ist dieses IF doppelt?
                compound1.setComment("ASV: 3 constituents, both parts in GN");
                CompoundDeterminer.writeFile(compoundsDeterminedWithASVFile + fileEnding, compound1.toSQLString() + "\n", true);
                return compound1;
            } else if (compound2.isModifierInGermaNet() && compound2.isHeadInGermaNet()) {
                compound2.setComment("ASV: 3 constituents, both parts in GN");
                CompoundDeterminer.writeFile(compoundsDeterminedWithASVFile + fileEnding, compound2.toSQLString() + "\n", true);
                return compound2;
            } else if (compound1.isHeadInGermaNet() && compound2.isHeadInGermaNet()) {
                if (compound1.getHeadHypernymDistance() > compound2.getHeadHypernymDistance()) {
                    return compound1;
                } else if (compound1.getHeadHypernymDistance() < compound2.getHeadHypernymDistance()) {
                    return compound2;
                }

                CompoundDeterminer.writeFile(nonCompoundsDeterminedWithASVFile + fileEnding, noun + ": two possible compounds whose heads are in GN\n", true);
                CompoundDeterminer.writeFile(nonCompoundsDeterminedWithASVFile + fileEnding, compound1.toSQLString() + "\n", true);
                CompoundDeterminer.writeFile(nonCompoundsDeterminedWithASVFile + fileEnding, compound2.toSQLString() + "\n", true);
            } else if (compound1.isHeadInGermaNet()) {
                compound1.setComment("ASV: 3 constituents, head in GN");
                CompoundDeterminer.writeFile(compoundsDeterminedWithASVFile + fileEnding, compound1.toSQLString() + "\n", true);
                return compound1;
            } else if (compound2.isHeadInGermaNet()) {
                compound2.setComment("ASV: 3 constituents, head in GN");
                CompoundDeterminer.writeFile(compoundsDeterminedWithASVFile + fileEnding, compound2.toSQLString() + "\n", true);
                return compound2;
            } else {
                CompoundDeterminer.writeFile(nonCompoundsDeterminedWithASVFile + fileEnding, noun + " not correctly splitted: " + splitted + "\n", true);
                return new CompoundDBEntry("", "", "", id, "ASV: not correctly splitted: " + splitted);
            }
        } else if (splitted.size() > 3) {
//            System.out.println(noun + "\t" + splitted);
            CompoundDeterminer.writeFile(nonCompoundsDeterminedWithASVFile + fileEnding, noun + " has more than 3 constituents: " + splitted + "\n", true);
            return new CompoundDBEntry("", "", "", id, "ASV: has more than 3 constituents: " + splitted);
        }

        return new CompoundDBEntry("", "", "", id, "");
    }

    private CompoundDBEntry createCompoundDBEntry(String modifier, String head, int compoundId) {
        int modifierId = -1;
        int headId = -1;
        String comment = "ASV";
        boolean modifierIsInGermaNet = true;
        boolean headIsInGermaNet = true;
        int modifierHypernymDistance = -1;
        int headHypernymDistance = -1;
        String compositionalType = "\\N";
        LexUnit compound = germaNet.getLexUnitByID(compoundId);

        DisambiguatedHypernym modifierHypernym = CompoundDisambiguator.disambiguateHypernym(compound, modifier, germaNet);
        List<LexUnit> lexUnits = new ArrayList<LexUnit>();
        
        if (modifierHypernym == null) {
            lexUnits = germaNet.getLexUnits(modifier);

            if (lexUnits.isEmpty()) {
                // try adding modifier-e
                modifierHypernym = CompoundDisambiguator.disambiguateHypernym(compound, modifier + "e", germaNet);
                if (modifierHypernym == null) {
                    lexUnits = germaNet.getLexUnits(modifier + "e");
                }

                if (modifierHypernym != null || lexUnits.size() > 0) {
                    modifier += "e";
                    compositionalType = CompoundDBEntry.COMPOSITIONAL_TYPE_MODIFIER_E;
                } else {
                    // try first letter of modifier in lower case
                    modifierHypernym = CompoundDisambiguator.disambiguateHypernym(compound, modifier.substring(0, 1).toLowerCase() + modifier.substring(1), germaNet);
                    if (modifierHypernym == null) {
                        lexUnits = germaNet.getLexUnits(modifier.substring(0, 1).toLowerCase() + modifier.substring(1));
                    }
                    if (modifierHypernym != null || lexUnits.size() > 0) {
                        modifier = modifier.substring(0, 1).toLowerCase() + modifier.substring(1);
                    }
                }
            }
        }

        if (modifierHypernym == null && lexUnits.isEmpty()) {
            modifierIsInGermaNet = false;
        } else if (modifierHypernym != null) {
            modifierId = modifierHypernym.getHypernym().getId();
            modifierHypernymDistance = modifierHypernym.getDistance();
        } else if (lexUnits.size() == 1) {
            modifierId = lexUnits.get(0).getId();
        }


        // try first character of head in upper case
        DisambiguatedHypernym headHypernym = CompoundDisambiguator.disambiguateHypernym(compound,
                head.substring(0, 1).toUpperCase() + head.substring(1), germaNet);
        lexUnits = new ArrayList<LexUnit>();
        if (headHypernym == null) {
            lexUnits = germaNet.getLexUnits(head.substring(0, 1).toUpperCase() + head.substring(1));
            if (lexUnits.isEmpty()) {
                // try as it was
                headHypernym = CompoundDisambiguator.disambiguateHypernym(compound, head, germaNet);
                if (headHypernym == null) {
                    lexUnits = germaNet.getLexUnits(head);
                }
            } else {
                head = head.substring(0, 1).toUpperCase() + head.substring(1);
            }
        } else {
            head = head.substring(0, 1).toUpperCase() + head.substring(1);
        }


        if (headHypernym == null && lexUnits.isEmpty()) {
            headIsInGermaNet = false;
        }

        if (headHypernym != null) {
            headId = headHypernym.getHypernym().getId();
            headHypernymDistance = headHypernym.getDistance();
        } else if (lexUnits.size() == 1) {
            headId = lexUnits.get(0).getId();
        }

        return new CompoundDBEntry(compound.getOrthForm(), modifier, modifierId,
                head, headId, compoundId, comment, modifierIsInGermaNet, headIsInGermaNet,
                modifierHypernymDistance, headHypernymDistance);
    }

    public void setFileEnding(String fileEnding) throws IOException {
        this.fileEnding = fileEnding;
        CompoundDeterminer.writeFile(compoundsDeterminedWithASVFile + fileEnding, "", false);
        CompoundDeterminer.writeFile(nonCompoundsDeterminedWithASVFile + fileEnding, "", false);
    }
}
