Commit 38cdccff authored by Wei Qiu's avatar Wei Qiu
Browse files

Change how models are loaded

parent dc00d935
...@@ -16,4 +16,5 @@ hs_err_pid* ...@@ -16,4 +16,5 @@ hs_err_pid*
*.swp *.swp
target/ target/
.idea/ .idea/
.settings/
...@@ -11,3 +11,6 @@ server: ...@@ -11,3 +11,6 @@ server:
logging: logging:
appenders: appenders:
- type: ${LOG_APPENDER!'console'} - type: ${LOG_APPENDER!'console'}
# possible langs: bg cs de en et fa fr hu ko pl ro sk sl sr sv
langs: ["de", "en", "fr"]
...@@ -27,7 +27,7 @@ public class MarmotApplication extends Application<MarmotConfiguration> { ...@@ -27,7 +27,7 @@ public class MarmotApplication extends Application<MarmotConfiguration> {
@Override @Override
public void run(final MarmotConfiguration configuration, public void run(final MarmotConfiguration configuration,
final Environment environment) { final Environment environment) {
MarmotResource marmotResource = new MarmotResource(); MarmotResource marmotResource = new MarmotResource(configuration.getLangs());
IndexResource indexResource = new IndexResource(); IndexResource indexResource = new IndexResource();
environment.jersey().register(marmotResource); environment.jersey().register(marmotResource);
environment.jersey().register(indexResource); environment.jersey().register(indexResource);
......
...@@ -6,5 +6,16 @@ import org.hibernate.validator.constraints.*; ...@@ -6,5 +6,16 @@ import org.hibernate.validator.constraints.*;
import javax.validation.constraints.*; import javax.validation.constraints.*;
public class MarmotConfiguration extends Configuration { public class MarmotConfiguration extends Configuration {
// TODO: implement service configuration
@NotNull
private String[] langs;
public String[] getLangs() {
return langs;
}
@JsonProperty
public void setLangs(String[] langs) {
this.langs = langs;
}
} }
...@@ -8,12 +8,12 @@ import lemming.lemma.Lemmatizer; ...@@ -8,12 +8,12 @@ import lemming.lemma.Lemmatizer;
import marmot.morph.*; import marmot.morph.*;
import marmot.morph.Sentence; import marmot.morph.Sentence;
import marmot.util.FileUtils; import marmot.util.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.InputStream; import java.io.InputStream;
import java.util.ArrayList; import java.io.UnsupportedEncodingException;
import java.util.EnumSet; import java.util.*;
import java.util.List;
/** /**
* @author sunny ha * @author sunny ha
...@@ -21,9 +21,27 @@ import java.util.List; ...@@ -21,9 +21,27 @@ import java.util.List;
*/ */
public class MarmotTool implements TextCorpusProcessor { public class MarmotTool implements TextCorpusProcessor {
private static final Logger LOGGER = LoggerFactory.getLogger(MarmotTool.class);
private static final EnumSet<TextCorpusLayerTag> requiredLayers = private static final EnumSet<TextCorpusLayerTag> requiredLayers =
EnumSet.of(TextCorpusLayerTag.SENTENCES); EnumSet.of(TextCorpusLayerTag.SENTENCES);
private Set<String> supportedLangs = new HashSet<>();
MorphOptions options = new MorphOptions(); MorphOptions options = new MorphOptions();
HashMap<String, MorphTagger> taggerMap = new HashMap<>();
// load the models
public MarmotTool(String[] langs) {
for(String lang : langs) {
LOGGER.info("Loading model for {}", lang);
supportedLangs.add(lang);
ClassLoader classLoader = getClass().getClassLoader();
InputStream modelStream = classLoader.getResourceAsStream("models/" + lang + ".marmot");
MorphTagger tagger = FileUtils.loadFromStream(modelStream);
taggerMap.put(lang, tagger);
}
}
@Override @Override
public EnumSet<TextCorpusLayerTag> getRequiredLayers() { public EnumSet<TextCorpusLayerTag> getRequiredLayers() {
...@@ -42,20 +60,18 @@ public class MarmotTool implements TextCorpusProcessor { ...@@ -42,20 +60,18 @@ public class MarmotTool implements TextCorpusProcessor {
//for current purpose we use only german model //for current purpose we use only german model
//get model language of the token //get model language of the token
String lang = textCorpus.getLanguage(); String lang = textCorpus.getLanguage();
if (!supportedLangs.contains(lang)) {
throw new TextCorpusProcessorException("Unsupported language");
}
try {
//load the model of the language accordingly
ClassLoader classLoader = getClass().getClassLoader();
InputStream modelStream = classLoader.getResourceAsStream("models/" + lang + ".marmot");
MorphTagger tagger = FileUtils.loadFromStream(modelStream);
String lemmatizer_file = options.getLemmatizerFile(); String lemmatizer_file = options.getLemmatizerFile();
if (!lemmatizer_file.isEmpty()) { if (!lemmatizer_file.isEmpty()) {
Lemmatizer lemmatizer = FileUtils.loadFromFile(lemmatizer_file); Lemmatizer lemmatizer = FileUtils.loadFromFile(lemmatizer_file);
tagger.setPipeLineLemmatizer(lemmatizer); taggerMap.get(lang).setPipeLineLemmatizer(lemmatizer);
} }
if (!options.getMorphDict().isEmpty()) { if (!options.getMorphDict().isEmpty()) {
MorphWeightVector vector = (MorphWeightVector) tagger.getWeightVector(); MorphWeightVector vector = (MorphWeightVector) taggerMap.get(lang).getWeightVector();
MorphDictionary dict = vector.getMorphDict(); MorphDictionary dict = vector.getMorphDict();
if (dict != null) { if (dict != null) {
dict.addWordsFromFile(options.getMorphDict()); dict.addWordsFromFile(options.getMorphDict());
...@@ -80,7 +96,7 @@ public class MarmotTool implements TextCorpusProcessor { ...@@ -80,7 +96,7 @@ public class MarmotTool implements TextCorpusProcessor {
sentenceList.add(new Word(t.getString())); sentenceList.add(new Word(t.getString()));
} }
Sentence sentence = new Sentence(sentenceList); Sentence sentence = new Sentence(sentenceList);
lemma_tags= tagger.tagWithLemma(sentence); lemma_tags= taggerMap.get(lang).tagWithLemma(sentence);
List<Feature> feats; List<Feature> feats;
String[] morph; String[] morph;
for (int j =0; j<tokens.length; j++){ for (int j =0; j<tokens.length; j++){
...@@ -95,13 +111,6 @@ public class MarmotTool implements TextCorpusProcessor { ...@@ -95,13 +111,6 @@ public class MarmotTool implements TextCorpusProcessor {
} }
} }
//System.out.println(lemma_tags);
//for (int i= 0; i < lemma_tags.size(); i++) {
//System.out.println(String.format("token: %s : posTag %s", tokensLayer.getToken(i), lemma_tags.get(i)));
//}
} finally {
}
} }
} }
......
...@@ -5,6 +5,8 @@ import eu.clarin.weblicht.wlfxb.api.TextCorpusProcessor; ...@@ -5,6 +5,8 @@ import eu.clarin.weblicht.wlfxb.api.TextCorpusProcessor;
import eu.clarin.weblicht.wlfxb.api.TextCorpusProcessorException; import eu.clarin.weblicht.wlfxb.api.TextCorpusProcessorException;
import eu.clarin.weblicht.wlfxb.io.TextCorpusStreamed; import eu.clarin.weblicht.wlfxb.io.TextCorpusStreamed;
import eu.clarin.weblicht.wlfxb.io.WLFormatException; import eu.clarin.weblicht.wlfxb.io.WLFormatException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.ws.rs.*; import javax.ws.rs.*;
import javax.ws.rs.core.MediaType; import javax.ws.rs.core.MediaType;
...@@ -12,21 +14,20 @@ import javax.ws.rs.core.Response; ...@@ -12,21 +14,20 @@ import javax.ws.rs.core.Response;
import javax.ws.rs.core.StreamingOutput; import javax.ws.rs.core.StreamingOutput;
import java.io.*; import java.io.*;
import java.util.logging.Level;
import java.util.logging.Logger;
@Path("annotate") @Path("annotate")
public class MarmotResource { public class MarmotResource {
private static final Logger LOGGER = LoggerFactory.getLogger(MarmotResource.class);
private static final String TEXT_TCF_XML = "text/tcf+xml"; private static final String TEXT_TCF_XML = "text/tcf+xml";
private static final String FALL_BACK_MESSAGE = "Data processing failed"; private static final String FALL_BACK_MESSAGE = "Data processing failed";
private static final String TEMP_FILE_PREFIX = "references-output-temp"; private static final String TEMP_FILE_PREFIX = "references-output-temp";
private static final String TEMP_FILE_SUFFIX = ".xml"; private static final String TEMP_FILE_SUFFIX = ".xml";
private TextCorpusProcessor marmotTool; private TextCorpusProcessor marmotTool;
public MarmotResource() { public MarmotResource(String[] langs) {
marmotTool = new MarmotTool(); marmotTool = new MarmotTool(langs);
} }
@Path("marmot/bytes") @Path("marmot/bytes")
...@@ -118,7 +119,7 @@ public class MarmotResource { ...@@ -118,7 +119,7 @@ public class MarmotResource {
if (message == null) { if (message == null) {
message = FALL_BACK_MESSAGE; message = FALL_BACK_MESSAGE;
} }
Logger.getLogger(this.getClass().getName()).log(Level.SEVERE, message, ex); LOGGER.error("Failed {}", message, ex);
return Response.status(status).entity(message).type(MediaType.TEXT_PLAIN).build(); return Response.status(status).entity(message).type(MediaType.TEXT_PLAIN).build();
} }
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment