package decompounder; import de.tuebingen.uni.sfs.germanet.dbapi.ConRelType; import de.tuebingen.uni.sfs.germanet.dbapi.GermaNet; import de.tuebingen.uni.sfs.germanet.dbapi.LexRelType; import de.tuebingen.uni.sfs.germanet.dbapi.LexUnit; import de.tuebingen.uni.sfs.germanet.dbapi.Synset; import java.util.ArrayList; import java.util.List; /** * * @author Verena Henrich */ public class CompoundDisambiguator { /** * Return hypernym of compound, i.e., correct reading of potentialHypernym. * Return null if potentialHypernym is no hypernym of compound. * * @param compound * @param potentialHypernym * @param germaNet * @return */ public static DisambiguatedHypernym disambiguateHypernym(LexUnit compound, String potentialHypernym, GermaNet germaNet) { List potentialHypernymList = new ArrayList(); potentialHypernymList.add(potentialHypernym); return disambiguateHypernym(compound, potentialHypernymList, germaNet); } // public static DisambiguatedHypernym disambiguateHypernym(LexUnit compound, List potentialHypernyms, GermaNet germaNet) { // return disambiguateHypernym(compound, potentialHypernyms, germaNet, 1000); // } public static DisambiguatedHypernym disambiguateHypernym(LexUnit compound, List potentialHypernyms, GermaNet germaNet) { LexUnit hypernym = null; int distance = -1; List lexUnits = new ArrayList(); for (String potentialHead : potentialHypernyms) { lexUnits.addAll(germaNet.getLexUnits(potentialHead)); } int i = 1, j = 1; List> allHypernyms = germaNet.getLexUnitByID(compound.getId()).getSynset().getAllHyperonyms(); for (List list : allHypernyms) { if (hypernym != null) { break; } for (Synset synset : list) { boolean matchInSameSynset = false; // System.out.println("i=" + i + ", j=" + j + ", " + synset.getAllOrthForms()); for (LexUnit lexUnit : synset.getLexUnits()) { if (potentialHypernyms.contains(lexUnit.getOrthForm())) { // System.out.println("match on level " + i); if (hypernym == null) { hypernym = lexUnit; distance = i; matchInSameSynset = true; } else if (hypernym != null && (hypernym.getOrthForm().length() < lexUnit.getOrthForm().length())) { // System.err.println(compound.getOrthForm() + ": " + lexUnit.getOrthForm() // + " is new hypernym, instead of " + hypernym.getOrthForm()); hypernym = lexUnit; distance = i; } if (!matchInSameSynset && hypernym != null) { System.err.println(compound.getOrthForm() + " " + compound.getId() + ": two hypernyms possible (" + hypernym.getOrthForm() + " and " + lexUnit.getOrthForm() + ")"); } } } j++; } i++; // if (i > maxDistance) { // break; // } } if (hypernym != null) { return new DisambiguatedHypernym(hypernym, distance); } else { return null; } } public static class DisambiguatedHypernym { private LexUnit hypernym; private int distance; public DisambiguatedHypernym(LexUnit head, int distance) { this.hypernym = head; this.distance = distance; } public LexUnit getHypernym() { return hypernym; } public int getDistance() { return distance; } } public static LexUnit disambiguatePWRelation(LexUnit compound, List potentialPWRelatedLexUnits, GermaNet germaNet) { List synsetsInPWR = compound.getSynset().getRelatedSynsets(ConRelType.HAS_COMPONENT_HOLONYM); synsetsInPWR.addAll(compound.getSynset().getRelatedSynsets(ConRelType.HAS_COMPONENT_MERONYM)); synsetsInPWR.addAll(compound.getSynset().getRelatedSynsets(ConRelType.HAS_MEMBER_HOLONYM)); synsetsInPWR.addAll(compound.getSynset().getRelatedSynsets(ConRelType.HAS_MEMBER_MERONYM)); synsetsInPWR.addAll(compound.getSynset().getRelatedSynsets(ConRelType.HAS_PORTION_HOLONYM)); synsetsInPWR.addAll(compound.getSynset().getRelatedSynsets(ConRelType.HAS_PORTION_MERONYM)); synsetsInPWR.addAll(compound.getSynset().getRelatedSynsets(ConRelType.HAS_SUBSTANCE_HOLONYM)); synsetsInPWR.addAll(compound.getSynset().getRelatedSynsets(ConRelType.HAS_SUBSTANCE_MERONYM)); for (Synset synsetInPWR : synsetsInPWR) { for (LexUnit lexUnit : synsetInPWR.getLexUnits()) { if (potentialPWRelatedLexUnits.contains(lexUnit.getOrthForm()) && lexUnit.getOrthForm().length() < compound.getOrthForm().length() - 3) { return lexUnit; } } } return null; } /** * considers all relations except part-whole relations and hypernyms * * @param compound * @param potentialRelatedLexUnits * @param germaNet * @return */ public static LexUnit disambiguateRelation(LexUnit compound, List potentialRelatedLexUnits, GermaNet germaNet) { List relatedSynsets = compound.getSynset().getRelatedSynsets(ConRelType.IS_ENTAILED_BY); relatedSynsets.addAll(compound.getSynset().getRelatedSynsets(ConRelType.IS_RELATED_TO)); relatedSynsets.addAll(compound.getSynset().getRelatedSynsets(ConRelType.CAUSES)); relatedSynsets.addAll(compound.getSynset().getRelatedSynsets(ConRelType.ENTAILS)); relatedSynsets.addAll(compound.getSynset().getRelatedSynsets(GermaNet.HAS_HYPONYM)); for (Synset synsetInPWR : relatedSynsets) { for (LexUnit lexUnit : synsetInPWR.getLexUnits()) { if (potentialRelatedLexUnits.contains(lexUnit.getOrthForm()) && lexUnit.getOrthForm().length() < compound.getOrthForm().length() - 3) { return lexUnit; } } } List relatedLexUnits = compound.getRelatedLexUnits(LexRelType.HAS_ANTONYM); relatedLexUnits.addAll(compound.getRelatedLexUnits(LexRelType.HAS_PERTAINYM)); for (LexUnit lexUnit : relatedLexUnits) { if (potentialRelatedLexUnits.contains(lexUnit.getOrthForm()) && lexUnit.getOrthForm().length() < compound.getOrthForm().length() - 3) { return lexUnit; } } return null; } public static LexUnit disambiguateSynonym(LexUnit compound, List potentialSynonyms, GermaNet germaNet) { for (LexUnit lexUnit : compound.getSynonyms()) { if (potentialSynonyms.contains(lexUnit.getOrthForm()) && lexUnit.getOrthForm().length() < compound.getOrthForm().length() - 3) { return lexUnit; } } return null; } }