package decompounder;

import de.tuebingen.uni.sfs.germanet.dbapi.GermaNet;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;

/**
 *
 * @author Verena Henrich
 */
public class CompoundDeterminer {
    private final Connection connection;
    private static final String username = "germanet";
    private static final String password = "germanet";
    private final GermaNet germaNetCaseInsensitive;
    private final GermaNet germaNet;
//    public static String databaseUrl = "jdbc:postgresql://localhost:5432/germanet";
    public static String databaseUrl = "jdbc:postgresql://localhost:5432/germanet14a";
    private String compoundsWithoutHyphensFinalFileName = "compounds_without_hyphens_";
    private String compoundsWithoutHyphensToReviewFileName = "compounds_without_hyphens_to_review";
    private String fileEnding = ".txt";
    private String nonCompoundsFinalFile = "non_compounds";
    private String nonCompoundsToReviewFile = "non_compounds_to_review";
    public static int twoBinaryCompoundsAreInGN = 0;
    public static String latin1Encoding = "ISO-8859-1"; // ISO-8859-1 == Latin-1
    public static String utf8Encoding = "UTF-8";
    private CompoundDeterminerASV compoundDeterminerASV;
    private CompoundDeterminerGN compoundDeterminerGN;
    private CompoundDeterminerSMOR compoundDeterminerSMOR;
    public static String resultsFolder = "src/test/resources/results" + File.separatorChar;// + "2013-05-31_all_new_lexunits" + File.separatorChar;

    private static final String GET_NOUNS_WITHOUT_HYPHEN_QUERY = "SELECT l.orth_form AS orth_form, l.id AS id "
            + "FROM lex_unit_table l, synset_table s WHERE l.synset_id = s.id "
            + "and s.word_category_id = 1 and length(l.orth_form) > 3 "
            + "and l.orth_form not like '% %' and l.orth_form not like '%-%' "
            + "and l.id between ? and ?"
            + " order by l.id";

    public static void main(String[] args) throws IOException, Exception {
        CompoundDeterminer compoundDeterminer = new CompoundDeterminer();
        
        /******************************************
         * For creating a new compounds list,     *
         * you need to specify the relevant range *
         * of lexical units (by their IDs)        * 
         ******************************************/
        int fromLexUnitId = 118719; //118719
        int toLexUnitId = 119719; //125687 141904
        
        compoundDeterminer.processCompoundsWithHyphens(fromLexUnitId, toLexUnitId);
        compoundDeterminer.processNonCompoundsWithEmptySpace(fromLexUnitId, toLexUnitId);
        compoundDeterminer.processCompoundsWithoutHyphens(fromLexUnitId, toLexUnitId);
    }

    public CompoundDeterminer() throws Exception {
        germaNetCaseInsensitive = new GermaNet(username, password, databaseUrl, true);
        germaNet = new GermaNet(username, password, databaseUrl, false);
        try {
            this.connection = DriverManager.getConnection(databaseUrl, username, password);
        } catch (SQLException ex) {
            throw new Exception(ex);
        }

        compoundDeterminerASV = new CompoundDeterminerASV(germaNet);
        compoundDeterminerGN = new CompoundDeterminerGN(germaNet, connection);
        compoundDeterminerSMOR = new CompoundDeterminerSMOR(germaNet, connection);
    }

    public static void writeFile(String fileName, String content, boolean append) throws IOException {
        writeFile(fileName, content, append, utf8Encoding);
    }

    public static void writeFile(String fileName, String content, boolean append, String encoding) throws IOException {
        // this would produce temporary "in-between" files that are never needed
        boolean writeTmpFiles = false;
        if (!writeTmpFiles &&
                (fileName.contains("compounds_determined_with_") || fileName.contains("_to_review_"))) {
            return;
        }
        
        try {
            Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
                    new File(resultsFolder + fileName), append), encoding));
            writer.write(content);
            writer.close();
        } catch (Exception ex) {
            System.err.println(ex.getMessage());
        }
    }

    private Map<Integer, String> getNouns(int fromLexUnitId, int toLexUnitId) throws SQLException {
        Map<Integer, String> nouns = new LinkedHashMap<Integer, String>();
        PreparedStatement findNounsStatement = connection.prepareStatement(GET_NOUNS_WITHOUT_HYPHEN_QUERY);
        findNounsStatement.setInt(1, fromLexUnitId);
        findNounsStatement.setInt(2, toLexUnitId);
        ResultSet results = findNounsStatement.executeQuery();
        while (results.next()) {
//            System.out.println(results.getInt("id") + " " + results.getString("orth_form"));
            nouns.put(results.getInt("id"), results.getString("orth_form"));
        }
        results.close();

        return nouns;
    }

    private void processCompoundsWithoutHyphens(int fromLexUnitId, int toLexUnitId) throws SQLException, IOException {
//        while (//fromLexUnitId < germaNet.getNumLexUnits() &&
//                toLexUnitId < 125688) {
            Map<Integer, String> nouns = getNouns(fromLexUnitId, toLexUnitId);
            fileEnding = "_from_" + fromLexUnitId + "_to_" + toLexUnitId + ".txt";
            System.out.println("fileEnding=" + fileEnding);

//            writeFile(compoundsWithoutHyphensFinalFileName + fileEnding, CompoundDBEntry.COPY_INTO_QUERY, false);
            writeFile(compoundsWithoutHyphensFinalFileName + fileEnding, "", false);
//            writeFile(compoundsWithoutHyphensToReviewFileName + fileEnding, CompoundDBEntry.COPY_INTO_QUERY, false);
            writeFile(compoundsWithoutHyphensToReviewFileName + fileEnding, "", false);
            writeFile(nonCompoundsFinalFile + fileEnding, "", false);
            writeFile(nonCompoundsToReviewFile + fileEnding, "", false);

            compoundDeterminerSMOR.setFileEnding(fileEnding);
            compoundDeterminerGN.setFileEnding(fileEnding);
            compoundDeterminerASV.setFileEnding(fileEnding);

            // go through all extracted nouns
            Iterator it = nouns.entrySet().iterator();
            while (it.hasNext()) {
                Map.Entry<Integer, String> entry = (Map.Entry)it.next();
                String noun = entry.getValue();
                int id = entry.getKey();

                CompoundDBEntry smorCompound = compoundDeterminerSMOR.determineCompoundWithSMOR(id, noun);
                CompoundDBEntry gnCompound = compoundDeterminerGN.determineCompoundWithGermaNet(id, noun);
                CompoundDBEntry asvCompound = compoundDeterminerASV.determineCompoundWithASV(id, noun);
                CompoundDBEntry smorCompoundClone = new CompoundDBEntry(noun, smorCompound.getModifier(), smorCompound.getHead(), id, smorCompound.getComment());
                CompoundDBEntry gnCompoundClone = new CompoundDBEntry(noun, gnCompound.getModifier(), gnCompound.getHead(), id, gnCompound.getComment());
                CompoundDBEntry asvCompoundClone = new CompoundDBEntry(noun, asvCompound.getModifier(), asvCompound.getHead(), id, asvCompound.getComment());

//                System.out.println(smorCompound.toSQLString());
//                System.out.println(gnCompound.toSQLString());
//                System.out.println(asvCompound.toSQLString());

                HashMap<String, String> changeModifiers = new HashMap<String, String>();
                changeModifiers.put("Neben", "neben");
                changeModifiers.put("Gegen", "gegen");
                changeModifiers.put("Haupt", "haupt");
                changeModifiers.put("General", "general");
                changeModifiers.put("Rück", "rück");
                changeModifiers.put("Elektro", "elektro");
                changeModifiers.put("Ober", "ober");
                changeModifiers.put("Spitz", "spitz");
                changeModifiers.put("Mini", "mini");
                changeModifiers.put("Sonder", "sonder");
                changeModifiers.put("Brutto", "brutto");
                changeModifiers.put("Netto", "netto");
                changeModifiers.put("Bio", "bio");
                changeModifiers.put("Über", "über");
                changeModifiers.put("Audio", "audio");
                changeModifiers.put("Midi", "midi");
                changeModifiers.put("Lokal", "lokal");
                changeModifiers.put("Mikro", "mikro");
                changeModifiers.put("Makro", "makro");
                changeModifiers.put("Mittel", "mittel");
                changeModifiers.put("Meta", "meta");
                changeModifiers.put("Tief", "tief");
                changeModifiers.put("Zwischen", "zwischen");
                changeModifiers.put("Zweit", "zweit");

                changeModifiers.put("einzel", "einzeln");
                changeModifiers.put("Einzel", "einzeln");
                changeModifiers.put("doppel", "doppelt");
                changeModifiers.put("Doppel", "doppelt");
                changeModifiers.put("erst", "erste");
                changeModifiers.put("Putz", "putzen");
                changeModifiers.put("Spann", "spannen");
                changeModifiers.put("Warte", "warten");
                changeModifiers.put("Fernseh", "Fernsehen");

                if (changeModifiers.containsKey(smorCompound.getModifier())) {
                    smorCompound.setModifier(changeModifiers.get(smorCompound.getModifier()));
                }

                if (changeModifiers.containsKey(asvCompound.getModifier())) {
                    asvCompound.setModifier(changeModifiers.get(asvCompound.getModifier()));
                }

                if (changeModifiers.containsKey(gnCompound.getModifier())) {
                    gnCompound.setModifier(changeModifiers.get(gnCompound.getModifier()));
                }

                if (smorCompound.getHead().matches("[a-zäöü].*")) {
//                    System.out.println(noun + " " + id + ": smor-head small case: " + smorCompound.getHead());
                    smorCompound.setHead("");
                } else if (!noun.endsWith(smorCompound.getHead().toLowerCase())) {
//                    System.out.println(noun + " " + id + ": smor-head does not equal noun: " + smorCompound.getHead());
                    smorCompound.setHead("");
                }

                if (gnCompound.getHead().matches("[a-zäöü].*")) {
//                    System.out.println(noun + " " + id + ": gn-head small case: " + gnCompound.getHead());
                    gnCompound.setHead("");
                } else if (!noun.endsWith(gnCompound.getHead().toLowerCase())) {
//                    System.out.println(noun + " " + id + ": gn-head does not equal noun: " + gnCompound.getHead());
                    gnCompound.setHead("");
                }

                if (asvCompound.getHead().matches("[a-zäöü].*")) {
//                    System.out.println(noun + " " + id + ": asv-head small case: " + asvCompound.getHead());
                    asvCompound.setHead("");
                } else if (!noun.endsWith(asvCompound.getHead().toLowerCase())) {
//                    System.out.println(noun + " " + id + ": asv-head does not equal noun: " + asvCompound.getHead());
                    asvCompound.setHead("");
                }

                if (noun.endsWith("machung")) {
//                    System.out.println(noun + ": -machung");
                    writeFile(nonCompoundsFinalFile + fileEnding,
                            "np\t\t" + noun + "\t" + id + "\t\\N\t\t\t\\N\t\t\tnoun ends with \'machung\'; No compound induced\t"
                            + smorCompoundClone.toSmallSQLString() + "\t" + gnCompoundClone.toSmallSQLString() + "\t"
                            + asvCompoundClone.toSmallSQLString() + "\n", true);
                } else if (noun.endsWith("igkeit")
                        && !germaNetCaseInsensitive.getLexUnits(noun.substring(0, noun.lastIndexOf("igkeit"))).isEmpty()) {
//                    System.out.println(noun + ": existing word + igkeit");
                    writeFile(nonCompoundsFinalFile + fileEnding,
                            "np\t\t" + noun + "\t" + id + "\t\\N\t\t\t\\N\t\t\tnoun ends with \'igkeit\'; No compound induced\t"
                            + smorCompoundClone.toSmallSQLString() + "\t" + gnCompoundClone.toSmallSQLString() + "\t"
                            + asvCompoundClone.toSmallSQLString() + "\n", true);
//                        return new CompoundDBEntry("", "", "", id, "GN: existing word + \'igkeit\'");
                } else if (noun.endsWith("keit")
                        && !germaNetCaseInsensitive.getLexUnits(noun.substring(0, noun.lastIndexOf("keit"))).isEmpty()) {
//                    System.out.println(noun + ": existing word + keit");
                    writeFile(nonCompoundsFinalFile + fileEnding,
                            "np\t\t" + noun + "\t" + id + "\t\\N\t\t\t\\N\t\t\tnoun ends with \'-keit\'; No compound induced\t"
                            + smorCompoundClone.toSmallSQLString() + "\t" + gnCompoundClone.toSmallSQLString() + "\t"
                            + asvCompoundClone.toSmallSQLString() + "\n", true);
//                        return new CompoundDBEntry("", "", "", id, "GN: existing word + \'keit\'");
                } else if (noun.endsWith("keit")
                        && !noun.endsWith("fertigkeit")
                        && !noun.endsWith("fähigkeit")
                        && !noun.endsWith("tätigkeit")
                        && !noun.endsWith("geschwindigkeit")
                        && !noun.endsWith("wahrscheinlichkeit")
                        && !noun.endsWith("persinlichkeit")
                        && !noun.endsWith("möglichkeit")) {
//                    System.out.println(noun + ": -keit");
                    writeFile(nonCompoundsFinalFile + fileEnding,
                            "np\t\t" + noun + "\t" + id + "\t\\N\t\t\t\\N\t\t\tnoun ends with \'-keit\'; No compound induced\t"
                            + smorCompoundClone.toSmallSQLString() + "\t" + gnCompoundClone.toSmallSQLString() + "\t"
                            + asvCompoundClone.toSmallSQLString() + "\n", true);
//                        return new CompoundDBEntry("", "", "", id, "GN: ends with \'keit\'");
                } else if (noun.endsWith("heit")
                        && !germaNetCaseInsensitive.getLexUnits(noun.substring(0, noun.lastIndexOf("heit"))).isEmpty()) {
//                    System.out.println(noun + ": existing word + heit");
                    writeFile(nonCompoundsFinalFile + fileEnding,
                            "np\t\t" + noun + "\t" + id + "\t\\N\t\t\t\\N\t\t\tnoun ends with \'-heit\'; No compound induced\t"
                            + smorCompoundClone.toSmallSQLString() + "\t" + gnCompoundClone.toSmallSQLString() + "\t"
                            + asvCompoundClone.toSmallSQLString() + "\n", true);
//                        return new CompoundDBEntry("", "", "", id, "GN: existing word + \'heit\'");
                } else if (smorCompound.getHead().equals("") && gnCompound.getHead().equals("") && asvCompound.getHead().equals("")) {
                    if (noun.endsWith("keit") || noun.endsWith("heit") || noun.endsWith("ität") || noun.endsWith("ung")
                            || noun.endsWith("tum") || noun.endsWith("schaft") || noun.endsWith("tion")) {
                        writeFile(nonCompoundsToReviewFile + fileEnding, id + "\t" + noun + ": SMOR, GN and ASV are null; noun ends with \'-keit\'/\'-heit\'/\'-ität\'/\'-ung\'/\'-tum\'/\'-schaft\'/\'-tion\'\n", true);
                        if (!smorCompound.getComment().equals("")) {
                            writeFile(nonCompoundsToReviewFile + fileEnding, smorCompound.getComment() + "\n", true);
                        }
                        if (!gnCompound.getComment().equals("")) {
                            writeFile(nonCompoundsToReviewFile + fileEnding, gnCompound.getComment() + "\n", true);
                        }
                        if (!asvCompound.getComment().equals("")) {
                            writeFile(nonCompoundsToReviewFile + fileEnding, asvCompound.getComment() + "\n", true);
                        }
                        writeFile(nonCompoundsToReviewFile + fileEnding, "\n", true);
                        writeFile(nonCompoundsFinalFile + fileEnding,
                                "np\t\t" + noun + "\t" + id + "\t\\N\t\t\t\\N\t\t\tSMOR, GN and ASV are null; noun ends with \'-keit\'/\'-heit\'; No compound induced\t"
                                + smorCompoundClone.toSmallSQLString() + "\t" + gnCompoundClone.toSmallSQLString() + "\t"
                                + asvCompoundClone.toSmallSQLString() + "\n", true);
                    } else if ((!smorCompound.getComment().equals("") && !gnCompound.getComment().equals(""))
                            || (!asvCompound.getComment().equals("") && !gnCompound.getComment().equals(""))
                            || !asvCompound.getComment().equals("")
                            || !smorCompound.getComment().equals("")) {
                        writeFile(compoundsWithoutHyphensToReviewFileName + fileEnding, id + "\t" + noun + ": SMOR, GN and ASV are null\n", true);
                        if (!smorCompound.getComment().equals("")) {
                            writeFile(compoundsWithoutHyphensToReviewFileName + fileEnding, smorCompound.getComment() + "\n", true);
                        }
                        if (!gnCompound.getComment().equals("")) {
                            writeFile(compoundsWithoutHyphensToReviewFileName + fileEnding, gnCompound.getComment() + "\n", true);
                        }
                        if (!asvCompound.getComment().equals("")) {
                            writeFile(compoundsWithoutHyphensToReviewFileName + fileEnding, asvCompound.getComment() + "\n", true);
                        }
                        writeFile(compoundsWithoutHyphensToReviewFileName + fileEnding, "\n", true);
//                        writeFile(compoundsWithoutHyphensFinalFileName + fileEnding, noun + "\t" + id + "\n", true);
                        writeFile(compoundsWithoutHyphensFinalFileName + fileEnding,
                                noun + "\t" + id + "\t\\N\t\t\t\\N\t\t\tSMOR, GN and ASV are null; Compound induced, but not splitted\t"
                                + smorCompoundClone.toSmallSQLString() + "\t" + gnCompoundClone.toSmallSQLString() + "\t"
                                + asvCompoundClone.toSmallSQLString() + "\n", true);
                    } else {
                        writeFile(nonCompoundsToReviewFile + fileEnding, id + "\t" + noun + ": SMOR, GN and ASV are null\n", true);
                        if (!smorCompound.getComment().equals("")) {
                            writeFile(nonCompoundsToReviewFile + fileEnding, smorCompound.getComment() + "\n", true);
                        }
                        if (!gnCompound.getComment().equals("")) {
                            writeFile(nonCompoundsToReviewFile + fileEnding, gnCompound.getComment() + "\n", true);
                        }
                        if (!asvCompound.getComment().equals("")) {
                            writeFile(nonCompoundsToReviewFile + fileEnding, asvCompound.getComment() + "\n", true);
                        }
                        writeFile(nonCompoundsToReviewFile + fileEnding, "\n", true);
                        writeFile(nonCompoundsFinalFile + fileEnding,
                                "np\t\t" + noun + "\t" + id + "\t\\N\t\t\t\\N\t\t\tSMOR, GN and ASV are null; No compound induced\t"
                                + smorCompoundClone.toSmallSQLString() + "\t" + gnCompoundClone.toSmallSQLString() + "\t"
                                + asvCompoundClone.toSmallSQLString() + "\n", true);
                    }
                } else if (smorCompound.getHead().equals("") && gnCompound.getHead().equals("")) {
//                    if (!smorCompound.getComment().equals("") || !gnCompound.getComment().equals("")) {
                        writeFile(compoundsWithoutHyphensToReviewFileName + fileEnding, id + "\t" + noun + ": SMOR and GN are null\n", true);
                        writeFile(compoundsWithoutHyphensToReviewFileName + fileEnding, asvCompound.toSQLString() + "\n", true);
                        if (!smorCompound.getComment().equals("")) {
                            writeFile(compoundsWithoutHyphensToReviewFileName + fileEnding, smorCompound.getComment() + "\n", true);
                        }
                        if (!gnCompound.getComment().equals("")) {
                            writeFile(compoundsWithoutHyphensToReviewFileName + fileEnding, gnCompound.getComment() + "\n", true);
                        }
                        writeFile(compoundsWithoutHyphensToReviewFileName + fileEnding, "\n", true);
//                        writeFile(compoundsWithoutHyphensFinalFileName + fileEnding, noun + "\t" + id + "\n", true);
                        writeFile(compoundsWithoutHyphensFinalFileName + fileEnding,
                                noun + "\t" + id + "\t\\N\t\t\t\\N\t\t\tSMOR and GN are null; Compound induced, but not splitted\t"
                                + smorCompoundClone.toSmallSQLString() + "\t" + gnCompoundClone.toSmallSQLString() + "\t"
                                + asvCompoundClone.toSmallSQLString() + "\n", true);
//                    } else {
//                        writeFile(nonCompoundsFile + fileEnding, id + "\t" + noun + ": SMOR and GN are null\n", true);
//                        writeFile(nonCompoundsFile + fileEnding, asvCompound.toSQLString() + "\n", true);
//                        if (!smorCompound.getComment().equals("")) {
//                            writeFile(nonCompoundsFile + fileEnding, smorCompound.getComment() + "\n", true);
//                        }
//                        if (!gnCompound.getComment().equals("")) {
//                            writeFile(nonCompoundsFile + fileEnding, gnCompound.getComment() + "\n", true);
//                        }
//                        writeFile(nonCompoundsFile + fileEnding, "\n", true);
//                    }
                } else if (gnCompound.getHead().equals("") && asvCompound.getHead().equals("")) {
                    CompoundDBEntry compound = smorCompound;
                    compound.setComment("GN and ASV are null; " + smorCompound.getComment());
                    writeFinalResultInCorrectFile(compound, smorCompoundClone, gnCompoundClone, asvCompoundClone);
                } else if (smorCompound.getHead().equals("") && asvCompound.getHead().equals("")) {
                    if (gnCompound.isModifierInGermaNet() && gnCompound.getHeadHypernymDistance() < 5
                                    && gnCompound.getHeadHypernymDistance() > 0) {
                        CompoundDBEntry compound = gnCompound;
                        compound.setComment("SMOR and ASV are null; " + gnCompound.getComment());
                        writeFinalResultInCorrectFile(compound, smorCompoundClone, gnCompoundClone, asvCompoundClone);
                    } else if (!smorCompound.getComment().equals("") || !asvCompound.getComment().equals("")
//                            || gnCompound.getComment().equals("GN: head is hypernym")
//                            || gnCompound.getComment().equals("GN: head is part-whole related")
                            ) {
                        writeFile(compoundsWithoutHyphensToReviewFileName + fileEnding, id + "\t" + noun + ": SMOR and ASV are null\n", true);
                        writeFile(compoundsWithoutHyphensToReviewFileName + fileEnding, gnCompound.toSQLString() + "\n", true);
                        if (!smorCompound.getComment().equals("")) {
                            writeFile(compoundsWithoutHyphensToReviewFileName + fileEnding, smorCompound.getComment() + "\n", true);
                        }
                        if (!asvCompound.getComment().equals("")) {
                            writeFile(compoundsWithoutHyphensToReviewFileName + fileEnding, asvCompound.getComment() + "\n", true);
                        }
                        writeFile(compoundsWithoutHyphensToReviewFileName + fileEnding, "\n", true);
//                        writeFile(compoundsWithoutHyphensFinalFileName + fileEnding, noun + "\t" + id + "\n", true);
                        writeFile(compoundsWithoutHyphensFinalFileName + fileEnding,
                                noun + "\t" + id + "\t\\N\t\t\t\\N\t\t\tSMOR and ASV are null; Compound induced, but not splitted\t"
                                + smorCompoundClone.toSmallSQLString() + "\t" + gnCompoundClone.toSmallSQLString() + "\t"
                                + asvCompoundClone.toSmallSQLString() + "\n", true);
                    } else {
                        writeFile(nonCompoundsToReviewFile + fileEnding, id + "\t" + noun + ": SMOR and ASV are null\n", true);
                        writeFile(nonCompoundsToReviewFile + fileEnding, gnCompound.toSQLString() + "\n", true);
                        if (!smorCompound.getComment().equals("")) {
                            writeFile(nonCompoundsToReviewFile + fileEnding, smorCompound.getComment() + "\n", true);
                        }
                        if (!asvCompound.getComment().equals("")) {
                            writeFile(nonCompoundsToReviewFile + fileEnding, asvCompound.getComment() + "\n", true);
                        }
                        writeFile(nonCompoundsToReviewFile + fileEnding, "\n", true);
                        writeFile(nonCompoundsFinalFile + fileEnding,
                                "np\t\t" + noun + "\t" + id + "\t\\N\t\t\t\\N\t\t\tSMOR and ASV are null; No compound induced\t"
                                + smorCompoundClone.toSmallSQLString() + "\t" + gnCompoundClone.toSmallSQLString() + "\t"
                                + asvCompoundClone.toSmallSQLString() + "\n", true);
                    }
                } else if (asvCompound.getHead().equals("")) {
                    if (smorCompound.equals(gnCompound)) {
                        CompoundDBEntry compound = smorCompound;
                        compound.setComment("SMOR and GN agree, ASV is null; " + smorCompound.getComment());
                        writeFinalResultInCorrectFile(compound, smorCompoundClone, gnCompoundClone, asvCompoundClone);
                    } else {
                        CompoundDBEntry compound = determineIfSmorOutperformsOtherCompound(smorCompound, gnCompound);
                        if (compound != null) {
                            compound.setComment("SMOR and GN do not agree, ASV is null; " + compound.getComment());
                            writeFinalResultInCorrectFile(compound, smorCompoundClone, gnCompoundClone, asvCompoundClone);
                        } else {
                            compound = smorCompound;
                            compound.setComment("SMOR and GN do not agree, ASV is null; " + smorCompound.getComment());
                            writeFinalResultInCorrectFile(compound, smorCompoundClone, gnCompoundClone, asvCompoundClone);
                        }
                    }
                } else if (gnCompound.getHead().equals("")) {
                    if (smorCompound.equals(asvCompound)) {
                        CompoundDBEntry compound = smorCompound;
                        compound.setComment("SMOR and ASV agree, GN is null; " + smorCompound.getComment());
                        writeFinalResultInCorrectFile(compound, smorCompoundClone, gnCompoundClone, asvCompoundClone);
                    } else {
                        CompoundDBEntry compound = determineIfSmorOutperformsOtherCompound(smorCompound, asvCompound);
                        if (compound != null) {
                            compound.setComment("SMOR and ASV do not agree, GN is null; " + compound.getComment());
                            writeFinalResultInCorrectFile(compound, smorCompoundClone, gnCompoundClone, asvCompoundClone);
                        } else {
                            compound = smorCompound;
                            compound.setComment("SMOR and ASV do not agree, GN is null; " + smorCompound.getComment());
                            writeFinalResultInCorrectFile(compound, smorCompoundClone, gnCompoundClone, asvCompoundClone);
                        }
                    }
                } else if (smorCompound.getHead().equals("")) {
                    if (asvCompound.equals(gnCompound)) {
                        CompoundDBEntry compound = gnCompound;
                        compound.setComment("GN and ASV agree, SMOR is null; " + gnCompound.getComment());
                        writeFinalResultInCorrectFile(compound, smorCompoundClone, gnCompoundClone, asvCompoundClone);
                    } else {
                        CompoundDBEntry compound = determineIfOneCompoundOutperformsTheOther(gnCompound, asvCompound);
                        if (compound != null) {
                            compound.setComment("GN and ASV do not agree, SMOR is null; " + compound.getComment());
                            writeFinalResultInCorrectFile(compound, smorCompoundClone, gnCompoundClone, asvCompoundClone);
                        } else {
                            compound = gnCompound;
                            compound.setComment("GN and ASV do not agree, SMOR is null; " + gnCompound.getComment());
                            writeFinalResultInCorrectFile(compound, smorCompoundClone, gnCompoundClone, asvCompoundClone);
//                            writeFile(nonCompoundsFile + fileEnding, id + "\t" + noun + ": GN and ASV do not agree, SMOR is null\n", true);
//                            writeFile(nonCompoundsFile + fileEnding, gnCompound.toSQLString(), true);
//                            writeFile(nonCompoundsFile + fileEnding, asvCompound.toSQLString() + "\n", true);
                        }
                    }
                } else if (smorCompound.equals(gnCompound) && smorCompound.equals(asvCompound)) {
                    CompoundDBEntry compound = smorCompound;
                    compound.setComment("SMOR, GN, and ASV agree; " + smorCompound.getComment());
                    writeFinalResultInCorrectFile(compound, smorCompoundClone, gnCompoundClone, asvCompoundClone);
                } else if (smorCompound.equals(gnCompound) && !smorCompound.equals(asvCompound)) {
                    CompoundDBEntry compound = determineIfSmorOutperformsOtherCompound(smorCompound, asvCompound);
                    if (compound != null) {
//                        System.out.println("1");
                        compound.setComment("SMOR and GN agree, ASV not; " + compound.getComment());
                        writeFinalResultInCorrectFile(compound, smorCompoundClone, gnCompoundClone, asvCompoundClone);
                    } else {
//                        System.out.println("2");
                        compound = smorCompound;
                        compound.setComment("SMOR and GN agree, ASV not; " + smorCompound.getComment());
                        writeFinalResultInCorrectFile(compound, smorCompoundClone, gnCompoundClone, asvCompoundClone);
                    }
                } else if (smorCompound.equals(asvCompound) && !smorCompound.equals(gnCompound)) {
                    CompoundDBEntry compound = determineIfSmorOutperformsOtherCompound(smorCompound, gnCompound);
                    if (compound != null) {
                        compound.setComment("SMOR and ASV agree, GN not; " + compound.getComment());
                        writeFinalResultInCorrectFile(compound, smorCompoundClone, gnCompoundClone, asvCompoundClone);
                    } else {
                        compound = smorCompound;
                        compound.setComment("SMOR and ASV agree, GN not; " + smorCompound.getComment());
                        writeFinalResultInCorrectFile(compound, smorCompoundClone, gnCompoundClone, asvCompoundClone);
                    }
                } else if (gnCompound.equals(asvCompound) && !gnCompound.equals(smorCompound)) {
                    CompoundDBEntry compound = determineIfSmorOutperformsOtherCompound(smorCompound, gnCompound);
                    if (compound != null) {
                        compound.setComment("GN and ASV agree, SMOR not; " + compound.getComment());
                        writeFinalResultInCorrectFile(compound, smorCompoundClone, gnCompoundClone, asvCompoundClone);
                    } else {
                        compound = gnCompound;
                        compound.setComment("GN and ASV agree, SMOR not; " + gnCompound.getComment());
                        writeFinalResultInCorrectFile(compound, smorCompoundClone, gnCompoundClone, asvCompoundClone);
                    }
                } else {
                    CompoundDBEntry compound = determineIfSmorOutperformsOtherCompound(smorCompound, asvCompound);

                    if (compound == null) {
                        compound = determineIfOneCompoundOutperformsTheOther(smorCompound, gnCompound);
                        if (compound == null) {
                            compound = determineIfOneCompoundOutperformsTheOther(gnCompound, asvCompound);
                        }
                    } else if (determineIfOneCompoundOutperformsTheOther(compound, gnCompound) != null) {
                        compound = determineIfOneCompoundOutperformsTheOther(compound, gnCompound);
                    }
                    if (compound != null) {
                        compound.setComment("SMOR, GN, and ASV do not agree; " + compound.getComment());
                        writeFinalResultInCorrectFile(compound, smorCompoundClone, gnCompoundClone, asvCompoundClone);
                    } else {
                        compound = smorCompound;
                        compound.setComment("SMOR, GN, and ASV do not agree; " + smorCompound.getComment());
                        writeFinalResultInCorrectFile(compound, smorCompoundClone, gnCompoundClone, asvCompoundClone);
                    }
                }
            }

//            writeFile(compoundsWithoutHyphensFinalFileName + fileEnding, "\\.\n", true);
//            writeFile(compoundsWithoutHyphensToReviewFileName + fileEnding, "\\.\n", true);
            
//            fromLexUnitId = toLexUnitId + 1;
//            toLexUnitId += 5000;
//        }
    }

    private void writeFinalResultInCorrectFile(CompoundDBEntry compound, CompoundDBEntry smorCompound,
            CompoundDBEntry gnCompound, CompoundDBEntry asvCompound) throws IOException {
//        if (compound.getModifierId() > 0 && compound.getModifierHypernymDistance() < 0) {
//            writeFile(compoundsWithoutHyphensToReviewFileName + fileEnding, compound.toSQLString(), true);
//        } else {
/*            if (smorComment.getComment().equals("")) {
                smorComment = "\\N";
            }
            if (gnComment.equals("")) {
                gnComment = "\\N";
            }
            if (asvComment.equals("")) {
                asvComment = "\\N";
            }*/
            writeFile(compoundsWithoutHyphensFinalFileName + fileEnding,
                    compound.toSQLString()
//                    + "\t" + smorCompound.toSmallSQLString() + "\t"
//                    + gnCompound.toSmallSQLString() + "\t" + asvCompound.toSmallSQLString()
                    + "\n", true);
//        }
    }

    private void processCompoundsWithHyphens() throws SQLException, IOException {
        compoundDeterminerGN.processCompoundsWithHyphens();
    }

    private void processCompoundsWithHyphens(int fromLexUnitId, int toLexUnitId) throws SQLException, IOException {
        compoundDeterminerGN.processCompoundsWithHyphens(fromLexUnitId, toLexUnitId);
    }

    private void processNonCompoundsWithEmptySpace() throws SQLException, IOException {
        compoundDeterminerGN.processNonCompoundsWithEmptySpace();
    }

    private void processNonCompoundsWithEmptySpace(int fromLexUnitId, int toLexUnitId) throws SQLException, IOException {
        compoundDeterminerGN.processNonCompoundsWithEmptySpace(fromLexUnitId, toLexUnitId);
    }

    private CompoundDBEntry determineIfSmorOutperformsOtherCompound(CompoundDBEntry smorCompound, CompoundDBEntry otherCompound) {
        String smorComment = smorCompound.getComment().substring(smorCompound.getComment().lastIndexOf(":") + 2);
//        System.out.println("smorComment=" + smorComment);
        if (smorCompound.getHead().equals(otherCompound.getHead()) && !smorCompound.getModifier().equals(otherCompound.getModifier())) {
            smorComment = smorComment.split(" ")[0];
//            System.out.println("smorComment1=" + smorComment);
            if ((smorComment.contains("<V>") || smorComment.contains("<ADJ>")) && otherCompound.getHead().substring(0, 1).matches("[A-Z]")) {
                return smorCompound;
            }
        } else if (smorCompound.getModifier().equals(otherCompound.getModifier()) && !smorCompound.getHead().equals(otherCompound.getHead())) {
            smorComment = smorComment.substring(smorComment.lastIndexOf(" "));
//            System.out.println("smorComment2=" + smorComment);
//            System.out.println(otherCompound.getHead() + " " + otherCompound.getHead().matches("[a-z].*"));
            if ((smorComment.contains("<V>") || smorComment.contains("<ADJ>"))) {
                return otherCompound;
            } else if (otherCompound.getHead().matches("[a-zäöü].*") && !smorCompound.getHead().matches("[a-zäöü].*")) {
                return smorCompound;
            } else if (!otherCompound.getHead().matches("[a-zäöü].*") && smorCompound.getHead().matches("[a-zäöü].*")) {
                return otherCompound;
            } else if (smorCompound.isHeadInGermaNet() && !otherCompound.isHeadInGermaNet()) {
                return smorCompound;
            } else if (!smorCompound.isHeadInGermaNet() && otherCompound.isHeadInGermaNet()) {
                return otherCompound;
//            } else if (smorCompound.getHeadId() > 0 && otherCompound.getHeadId() < 0) {
//                return smorCompound;
//            } else if (smorCompound.getHeadId() < 0 && otherCompound.getHeadId() > 0) {
//                return otherCompound;
//            } else if (smorCompound.getHeadId() < otherCompound.getHeadId()) {
//                return smorCompound;
//            } else if (smorCompound.getHeadId() > otherCompound.getHeadId()) {
//                return otherCompound;
//            } else if (smorCompound.getHeadHypernymDistance() > 0 && otherCompound.getHeadHypernymDistance() < 0) {
//                return smorCompound;
//            } else if (smorCompound.getHeadHypernymDistance() < 0 && otherCompound.getHeadHypernymDistance() > 0) {
//                return otherCompound;
            } else if (smorCompound.getHeadHypernymDistance() > otherCompound.getHeadHypernymDistance()) {
//                System.out.println(germaNet.getLexUnitByID(smorCompound.getCompoundId()).getOrthForm() + " " + smorCompound.getCompoundId()
//                        + " has head hypernyms " + smorCompound.getHead() + " (correct) and " + otherCompound.getHead());
                return smorCompound;
            } else if (smorCompound.getHeadHypernymDistance() < otherCompound.getHeadHypernymDistance()) {
//                System.out.println(germaNet.getLexUnitByID(smorCompound.getCompoundId()).getOrthForm() + " " + smorCompound.getCompoundId()
//                        + " has head hypernyms " + smorCompound.getHead() + " and " + otherCompound.getHead() + " (correct)");
                return otherCompound;
            } else if (smorCompound.getHead().length() > otherCompound.getHead().length()) {
                return smorCompound;
            } else if (smorCompound.getHead().length() < otherCompound.getHead().length()) {
                return otherCompound;
            }
        }

        return determineIfOneCompoundOutperformsTheOther(smorCompound, otherCompound);
    }
    
    private CompoundDBEntry determineIfOneCompoundOutperformsTheOther(CompoundDBEntry compound1, CompoundDBEntry compound2) {
//        System.out.println(compound1.toSQLString());
//        System.out.println(compound2.toSQLString());
//        System.out.println(compound1.isHeadInGermaNet() + " " + compound1.isModifierInGermaNet() + " " +
//                compound2.isHeadInGermaNet() + " " + compound2.isModifierInGermaNet());
        if (compound1.isHeadInGermaNet() && compound1.isModifierInGermaNet()
                && !(compound2.isHeadInGermaNet() && compound2.isModifierInGermaNet())) {
//            System.out.println("A");
            return compound1;
        } else if (!(compound1.isHeadInGermaNet() && compound1.isModifierInGermaNet())
                && compound2.isHeadInGermaNet() && compound2.isModifierInGermaNet()) {
//            System.out.println("B");
            return compound2;
        } else if (compound1.isHeadInGermaNet() && !compound2.isHeadInGermaNet()) {
//            System.out.println("C");
            return compound1;
        } else if (!compound1.isHeadInGermaNet() && compound2.isHeadInGermaNet()) {
//            System.out.println("D");
            return compound2;
//        } else if (compound1.getHeadHypernymDistance() > 0 && compound2.getHeadHypernymDistance() < 0) {
//            return compound1;
//        } else if (compound1.getHeadHypernymDistance() < 0 && compound2.getHeadHypernymDistance() > 0) {
//            return compound2;
        } else if (compound1.getHeadHypernymDistance() > compound2.getHeadHypernymDistance()) {
//            System.out.println(germaNet.getLexUnitByID(compound1.getCompoundId()).getOrthForm() + " " + compound1.getCompoundId()
//                    + " has head hypernyms " + compound1.getHead() + " (correct) and " + compound2.getHead());
            return compound1; // this is against common sense, but e.g.:
                              //Müll+Verbrennungsanlage	getHeadHypernymDistance: 1
                              //Müllverbrennung+Anlage	getHeadHypernymDistance: 3 (this is correct binary splitting)
                              // or:
                              //leicht+ Metallgießerei	getHeadHypernymDistance: 1
                              //Leichtmetall+Gießerei	getHeadHypernymDistance: 2 (this is correct binary splitting)
                              // this also explains, why we do not need to check if one head-hypernym is not -1
        } else if (compound1.getHeadHypernymDistance() < compound2.getHeadHypernymDistance()) {
//            System.out.println(germaNet.getLexUnitByID(compound1.getCompoundId()).getOrthForm() + " " + compound1.getCompoundId()
//                    + " has head hypernyms " + compound1.getHead() + " and " + compound2.getHead() + " (correct)");
            return compound2; // this is against common sense, see above
        } else if (compound1.getModifierHypernymDistance() > 0 && compound2.getModifierHypernymDistance() < 0) {
//            System.out.println("E");
            return compound1;
        } else if (compound1.getModifierHypernymDistance() < 0 && compound2.getModifierHypernymDistance() > 0) {
//            System.out.println("F");
            return compound2;
        } else if (compound1.getModifierHypernymDistance() > compound2.getModifierHypernymDistance()) {
//            System.out.println("G");
            return compound2; // the same that accounts for head-hypernyms (see above) might also account for modifier-hypernyms.
                              // here, this is not verified, but done intuitively (not the same than for head-hypernym).
        } else if (compound1.getModifierHypernymDistance() > compound2.getModifierHypernymDistance()) {
//            System.out.println("H");
            return compound2;
        } else if (compound1.isHeadInGermaNet() && !compound2.isHeadInGermaNet()) {
//            System.out.println("head of compound1 (" + compound1.getHead()
//                    + ") is in GermaNet, but head of compound2 (" + compound2.getHead() + ") not)");
            return compound1;
        } else if (!compound1.isHeadInGermaNet() && compound2.isHeadInGermaNet()) {
//            System.out.println("head of compound2 (" + compound2.getHead()
//                    + ") is in GermaNet, but head of compound1 (" + compound1.getHead() + ") not)");
            return compound2;
        } else if (compound1.isModifierInGermaNet() && !compound2.isModifierInGermaNet()) {
//            System.out.println("modifier of compound1 (" + compound1.getModifier()
//                    + ") is in GermaNet, but modifier of compound2 (" + compound2.getModifier() + ") not)");
            return compound1;
        } else if (!compound1.isModifierInGermaNet() && compound2.isModifierInGermaNet()) {
//            System.out.println("modifier of compound2 (" + compound2.getModifier()
//                    + ") is in GermaNet, but modifier of compound1 (" + compound1.getModifier() + ") not)");
            return compound2;
        } else {
//            System.out.println("K");
            return null;
        }
    }
}
