Initial commit

0b611765 · Ben Campbell · 0b611765 · 0b611765 · 0b611765 · 0b611765
Commit 0b611765 authored Jan 23, 2019 by Ben Campbell
20 changed files
--- a/src/main/resources/ASV_trees/kompVVic.tree
+++ b/src/main/resources/ASV_trees/kompVVic.tree
--- a/src/main/resources/SMOR/.DS_Store
+++ b/src/main/resources/SMOR/.DS_Store
--- a/src/main/resources/SMOR/lib/smor-ascii.ca
+++ b/src/main/resources/SMOR/lib/smor-ascii.ca
--- a/src/main/resources/SMOR/lib/smor-cap.ca
+++ b/src/main/resources/SMOR/lib/smor-cap.ca
--- a/src/main/resources/SMOR/lib/smor-guesser.ca
+++ b/src/main/resources/SMOR/lib/smor-guesser.ca
--- a/src/main/resources/SMOR/lib/smor-ss.ca
+++ b/src/main/resources/SMOR/lib/smor-ss.ca
--- a/src/main/resources/SMOR/lib/smor-uc.ca
+++ b/src/main/resources/SMOR/lib/smor-uc.ca
--- a/src/main/resources/SMOR/lib/smor.ca
+++ b/src/main/resources/SMOR/lib/smor.ca
--- a/src/main/resources/SMOR/src/Makefile
+++ b/src/main/resources/SMOR/src/Makefile
+
+# TYPE is either debug, prof, static, lib, Sun64bit or the empty string
+TYPE = 
+
+# try different definitions of SGILIB if hash_map is not found
+#SGILIB=
+SGILIB=-DSGI__gnu_cxx
+#SGILIB=-DSGIext
+
+# comment the following line if readline is not found
+#READLINE = -DREADLINE
+#LREADLINE = -lreadline -lhistory -lncurses
+
+# data type used for the encoding of the symbols (char, short, or int)
+# short is the default
+#CDT=-DCODE_DATA_TYPE=short
+
+CXX = g++
+WARNING = -Wall -Wcast-qual -Wconversion -std=c++98 -Wno-deprecated -ansi
+
+ifeq ($(TYPE),debug)
+  DEBUG = -ggdb
+  OPTIMIZATION = -O0
+else
+  DEBUG = -DNDEBUG
+  ifeq ($(TYPE),prof)
+    OPTIMIZATION = -O2
+    PROFILE = -pg
+  else
+    OPTIMIZATION = -O3
+    ifeq ($(TYPE),static)
+      STATIC = -static
+    else
+      ifeq ($(TYPE),Sun64bit)
+        LOPT = -L/usr/lib/64 -L/usr/local/lib/sparcv9 -R/usr/lib/64 -R/usr/local/lib/sparcv9
+	CXX = g++64
+	BITS= -mcpu=v9
+      else
+        ifeq ($(TYPE),lib)
+          LIB = -fPIC
+        endif
+      endif
+    endif
+  endif
+endif
+
+
+CC = g++
+CL = $(CC)
+CFLAGS = $(DEBUG) $(PROFILE) $(LIB) $(OPTIMIZATION) $(WARNING) $(SGILIB) $(CDT) $(BITS) $(READLINE)
+LDFLAGS = $(DEBUG) $(PROFILE) $(STATIC) $(LOPT)
+
+CXXFLAGS = $(CFLAGS)
+LXXFLAGS = $(LDFLAGS)
+
+INSTALL = install
+INSTALL_MANPAGES = install -m 644
+INSTALL_DIR = install -d -m 755
+
+DESTDIR =
+PREFIX = /usr/local/
+
+PROGRAMS = fst-compiler fst-infl fst-generate fst-print fst-compare \
+	fst-compact fst-infl2 fst-lowmem fst-infl3 fst-lattice fst-match \
+	fst-parse fst-parse2 fst-train fst-compiler-utf8 fst-text2bin fst-mor
+ALLPROGRAMS = $(PROGRAMS)
+
+ifeq ($(TYPE),lib)
+  TARGET = libsfst.tar.gz
+else
+  ifeq ($(TYPE),debug)
+    TARGET = $(ALLPROGRAMS)
+  else
+    TARGET = strip
+  endif
+endif
+
+
+OBJ = basic.o utf8.o alphabet.o fst.o operators.o determinise.o hopcroft.o
+OBJ2 = $(OBJ) compact.o make-compact.o interface.o
+
+.PHONY: all clean realclean archive remake install strip
+
+all: $(TARGET)
+
+strip: $(ALLPROGRAMS)
+	strip $(ALLPROGRAMS)
+
+fst-api.zip: test.C alphabet.C alphabet.h basic.C basic.h compact.C \
+	compact.h lowmem.C lowmem.h robust.C
+	zip $@ $^
+
+fst-compact: compact.o make-compact.o
+
+fst-generate: generate.o
+
+fst-match: compact.o
+
+fst-train: compact.o
+
+fst-compiler: $(OBJ2) default-scanner.o
+
+fst-compiler-utf8: $(OBJ2) utf8-scanner.o fst-compiler.o
+	$(CXX) -o $@ $^ $(LDFLAGS)
+
+fst-mor: $(OBJ) fst-mor.o
+	$(CXX) -o $@ $^ $(LDFLAGS) $(LREADLINE)
+
+fst-text2bin: $(OBJ) fst-text2bin.o
+	$(CXX) -o $@ $^ $(LDFLAGS)
+
+%: $(OBJ) %.o
+	$(CXX) -o $@ $^ $(LDFLAGS)
+
+fst-infl2: alphabet.o basic.o compact.o robust.o utf8.o fst-infl2.o
+	$(CXX) -o $@ $^ $(LDFLAGS)
+
+fst-infl2-daemon: alphabet.o basic.o compact.o robust.o utf8.o fst-infl2-daemon.o
+	$(CXX) -o $@ $^ $(LDFLAGS)
+
+fst-infl3: alphabet.o basic.o utf8.o lowmem.o fst-infl3.o
+	$(CXX) -o $@ $^ $(LDFLAGS)
+
+test: test.o
+	$(CL) -L. -lsfst -o $@ $^ $(LDFLAGS)
+
+fst-compiler.C: fst.h compact.h make-compact.h interface.h fst-compiler.yy
+	bison -d -o $@ fst-compiler.yy
+	if [ -f fst-compiler.C.h ]; then mv fst-compiler.C.h fst-compiler.h; fi
+	if [ -f fst-compiler.H ]; then mv fst-compiler.H fst-compiler.h; fi
+
+default-scanner.C: fst.h interface.h scanner.ll fst-compiler.C default-scanner.ll
+	flex -o$@ default-scanner.ll
+
+utf8-scanner.C: fst.h interface.h scanner.ll fst-compiler.C utf8-scanner.ll
+	flex -o$@ utf8-scanner.ll
+
+default-scanner.ll: scanner.ll
+	sed -e '/^utf8>/d' -e 's/^default> *//' scanner.ll > default-scanner.ll
+
+utf8-scanner.ll: scanner.ll
+	sed -e '/^default>/d' -e 's/^utf8> *//' scanner.ll > utf8-scanner.ll
+
+libsfst.tar.gz: libsfst.so compact.h alphabet.h basic.h fst.h 
+	tar -zcf $@ $^
+
+libsfst.so: alphabet.o basic.o utf8.o compact.o fst.o operators.o determinise.o generate.o hopcroft.o
+	$(CL) -shared -o $@ $^ $(LDFLAGS)
+
+remake: clean $(ALLPROGRAMS)
+
+clean:
+	-rm -f *.o *~ Makefile.bak y.tab.h man1/*~ 2>&- > /dev/null
+
+realclean: clean
+	-rm -f $(ALLPROGRAMS) test 2>&- > /dev/null
+
+install: $(PROGRAMS)
+#	$(INSTALL_DIR) $(DESTDIR)$(PREFIX)bin
+	for p in $(PROGRAMS); do $(INSTALL) $$p $(DESTDIR)$(PREFIX)bin/$$p; done
+
+maninstall:
+	$(INSTALL_DIR) $(DESTDIR)$(PREFIX)man/man1
+	for m in man1/*.1; do $(INSTALL_MANPAGES) $$m $(DESTDIR)$(PREFIX)man/$$m; done
+
+
+archive: 
+	tar -zhcf VERSION-`date '+%y%m%d'`.tar.gz *.ll *.yy *.[Ch] Makefile man1/*
+
+Makefile: *.C *.ll *.yy *.h
+	-makedepend -Y -- $(CFLAGS) -- *.C 2>/dev/null 
+
+# DO NOT DELETE
+
+TransducerTest.o: Transducer.h
+alphabet.o: utf8.h alphabet.h basic.h sgi.h
+basic.o: basic.h
+compact.o: compact.h alphabet.h basic.h sgi.h
+default-scanner.o: interface.h utf8.h fst.h alphabet.h basic.h sgi.h mem.h
+default-scanner.o: fst-compiler.h
+determinise.o: fst.h alphabet.h basic.h sgi.h mem.h
+fst-compact.o: make-compact.h fst.h alphabet.h basic.h sgi.h mem.h compact.h
+fst-compare.o: fst.h alphabet.h basic.h sgi.h mem.h
+fst-compiler.o: make-compact.h fst.h alphabet.h basic.h sgi.h mem.h compact.h
+fst-compiler.o: interface.h utf8.h
+fst-generate.o: fst.h alphabet.h basic.h sgi.h mem.h
+fst-infl.o: fst.h alphabet.h basic.h sgi.h mem.h
+fst-infl2-daemon.o: Socket.h compact.h alphabet.h basic.h sgi.h
+fst-infl2.o: compact.h alphabet.h basic.h sgi.h
+fst-infl3.o: lowmem.h alphabet.h basic.h sgi.h
+fst-lattice.o: fst.h alphabet.h basic.h sgi.h mem.h
+fst-lowmem.o: fst.h alphabet.h basic.h sgi.h mem.h
+fst-match.o: compact.h alphabet.h basic.h sgi.h
+fst-mor.o: fst.h alphabet.h basic.h sgi.h mem.h
+fst-parse.o: fst.h alphabet.h basic.h sgi.h mem.h
+fst-parse2.o: fst.h alphabet.h basic.h sgi.h mem.h
+fst-print.o: fst.h alphabet.h basic.h sgi.h mem.h
+fst-text2bin.o: fst.h alphabet.h basic.h sgi.h mem.h
+fst-train.o: compact.h alphabet.h basic.h sgi.h
+fst.o: fst.h alphabet.h basic.h sgi.h mem.h
+generate.o: fst.h alphabet.h basic.h sgi.h mem.h
+interface.o: interface.h utf8.h fst.h alphabet.h basic.h sgi.h mem.h
+lowmem.o: lowmem.h alphabet.h basic.h sgi.h
+make-compact.o: make-compact.h fst.h alphabet.h basic.h sgi.h mem.h compact.h
+operators.o: fst.h alphabet.h basic.h sgi.h mem.h
+robust.o: compact.h alphabet.h basic.h sgi.h
+test.o: compact.h alphabet.h basic.h sgi.h
+utf8-scanner.o: interface.h utf8.h fst.h alphabet.h basic.h sgi.h mem.h
+utf8-scanner.o: fst-compiler.h
+utf8.o: utf8.h
+hopcraft.o: fst.h alphabet.h basic.h sgi.h mem.h
--- a/src/main/resources/SMOR/src/Socket.h
+++ b/src/main/resources/SMOR/src/Socket.h
+
+/*******************************************************************/
+/*                                                                 */
+/*     File: Socket.h                                              */
+/*   Author: Helmut Schmid                                         */
+/*  Purpose:                                                       */
+/*  Created: Fri Aug 15 14:19:19 2008                              */
+/* Modified: Wed Sep 29 08:44:43 2010 (schmid)                     */
+/*                                                                 */
+/*******************************************************************/
+
+
+namespace SFST {
+
+  /*****************  class Socket  **********************************/
+
+  class Socket {
+  
+    int portno;  /* port address */
+    int sockfd;
+    struct sockaddr_in serv_addr;
+    struct sockaddr cli_addr;
+    socklen_t clilen;
+  
+  public:
+    int next_client() {
+      return accept( sockfd, (struct sockaddr *)&serv_addr, &clilen);
+    }
+
+  Socket( int port=7070 ): portno( port ) {
+      /* create a socket */
+      sockfd = socket(AF_INET, SOCK_STREAM, 0);
+      if (sockfd < 0) {
+	fprintf(stderr, "ERROR opening socket\n");
+	exit(1);
+      }
+    
+      /* initialise serv_addr with zeros */
+      bzero((char *) &serv_addr, sizeof(serv_addr));
+      serv_addr.sin_family = AF_INET;
+    
+      /* convert portno to network byte order and */
+      /* store it in serv_addr.sin_port */
+      serv_addr.sin_port = htons(portno);
+    
+      /* set the host IP address (available in INADDR_ANY) */
+      serv_addr.sin_addr.s_addr = INADDR_ANY;
+    
+      /* bind the socket to a host and port */
+      if (bind(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) {
+	fprintf(stderr, "ERROR on binding\n");
+	exit(1);
+      }
+    
+      /* Listen to the socket; up to 5 connections at a time */
+      listen(sockfd, 5);
+      clilen = sizeof(cli_addr);
+    }
+  };
+
+}
--- a/src/main/resources/SMOR/src/Transducer.h
+++ b/src/main/resources/SMOR/src/Transducer.h
+
+/*******************************************************************/
+/*                                                                 */
+/*     File: Transducer.h                                          */
+/*   Author: Helmut Schmid                                         */
+/*                                                                 */
+/*******************************************************************/
+
+#include <stdio.h>
+#include <vector>
+
+namespace SFST {
+
+  class Transition {
+  public:
+    char lower;
+    char upper;
+    int  target;
+
+    Transition( char l, char u, size_t t ) { lower = l; upper = u; target = t; };
+  };
+
+  class State {
+  public:
+    bool final;
+    vector<Transition> transition;
+
+    State() { final = false; };
+  };
+
+  class Transducer {
+
+  private:
+    vector<State> state;
+
+    void analyze1( int sn, const char *s, vector<char> &ana, 
+		   vector<vector<char> > &analyses )
+    {
+      if (*s == 0 && state[sn].final)
+	analyses.push_back( ana );
+    
+      vector<Transition> &t=state[sn].transition;
+      for( size_t i=0; i<t.size(); i++ ) {
+	if (t[i].upper == 0) {
+	  ana.push_back(t[i].lower);
+	  analyze1( t[i].target, s, ana, analyses);
+	  ana.pop_back();
+	}
+	else if (t[i].upper == *s) {
+	  ana.push_back(t[i].lower);
+	  analyze1( t[i].target, s+1, ana, analyses);
+	  ana.pop_back();
+	}
+      }
+    }
+
+  public:
+    Transducer( FILE *file ) {
+      char buffer[1000];
+      for( unsigned int line=0; (fgets(buffer, 1000, file)); line++ ) {
+	int s, t;
+	char u, l;
+	if (sscanf( buffer, "final: %d", &s) == 1) {
+	  if (s >= (int)state.size())
+	    state.resize( s+1 );
+	  state[s].final = true;
+	}
+	else if (sscanf( buffer, "%d %c:<> %d", &s, &l, &t) == 3) {
+	  if (s >= (int)state.size())
+	    state.resize( s+1 );
+	  state[s].transition.push_back(Transition(l, 0, t));
+	}
+	else if (sscanf( buffer, "%d <>:%c %d", &s, &u, &t) == 3) {
+	  if (s >= (int)state.size())
+	    state.resize( s+1 );
+	  state[s].transition.push_back(Transition(0, u, t));
+	}
+	else if (sscanf( buffer, "%d %c:%c %d", &s, &l, &u, &t) == 4) {
+	  if (s >= (int)state.size())
+	    state.resize( s+1 );
+	  state[s].transition.push_back(Transition(l, u, t));
+	}
+	else if (sscanf( buffer, "%d %c %d", &s, &l, &t) == 3) {
+	  if (s >= (int)state.size())
+	    state.resize( s+1 );
+	  state[s].transition.push_back(Transition(l, l, t));
+	}
+	else {
+	  fprintf(stderr,"Error: in line %u of transducer file at: %s\n",
+		  line, buffer);
+	  exit(1);
+	}
+      }
+    }
+
+    void analyze( const char *s, vector<vector<char> > &analyses ) {
+      vector<char> ana;
+      analyze1( 0, s, ana, analyses );
+    }
+  };
+
+}
--- a/src/main/resources/SMOR/src/TransducerTest.C
+++ b/src/main/resources/SMOR/src/TransducerTest.C
+#include "Transducer.h"
+
+#include <iostream>
+using std::cerr;
+
+int main( int argc, char **argv )
+
+{
+  FILE *file;
+
+  file = fopen(argv[1],"rb");  // open the input file
+  if (file == NULL)  exit(1);
+  try {
+    Transducer transducer(file); // (1)  read the transducer
+      
+    char buffer[1000];
+    while (fgets(buffer, 1000, stdin)) {  // (2) next input line
+      // delete newline character
+      int l=strlen(buffer)-1;
+      if (buffer[l] == '\n')
+	buffer[l] = '\0';
+      printf("> %s\n", buffer);  // print the input line
+
+      vector<vector<char> > analyses;
+      transducer.analyze(buffer, analyses);  // (3) analyse the input
+
+      if (analyses.size() == 0)
+	printf( "no result for %s\n", buffer);
+      else
+	for( size_t i=0; i<analyses.size(); i++ ) {
+	  for( size_t k=0; k<analyses[i].size(); k++ )
+	    fputc(analyses[i][k], stdout);
+	  fputc('\n', stdout);
+	}
+    }
+  }
+  catch (const char *p) {
+    cerr << p << "\n";
+    return 1;
+  }
+
+  return 0;
+}
--- a/src/main/resources/SMOR/src/alphabet.C
+++ b/src/main/resources/SMOR/src/alphabet.C
+
+/*******************************************************************/
+/*                                                                 */
+/*  FILE     alphabet.C                                            */
+/*  MODULE   alphabet                                              */
+/*  PROGRAM  SFST                                                  */
+/*  AUTHOR   Helmut Schmid, IMS, University of Stuttgart           */
+/*                                                                 */
+/*  PURPOSE  basic FST functions                                   */
+/*                                                                 */
+/*******************************************************************/
+
+#include <climits>
+#include <cstring>
+
+#include "utf8.h"
+#include "alphabet.h"
+
+namespace SFST {
+
+  using std::vector;
+  using std::ostream;
+
+  const int BUFFER_SIZE=100000;
+
+  char EpsilonString[]="<>";
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::add                                                  */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Alphabet::add( const char *symbol, Character c )
+
+  {
+    char *s = fst_strdup(symbol);
+    cm[c] = s;
+    sm[s] = c;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::Alphabet                                             */
+  /*                                                                 */
+  /*******************************************************************/
+
+  Alphabet::Alphabet()
+
+  { 
+    utf8 = false;
+    add(EpsilonString, Label::epsilon);
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::clear                                                */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Alphabet::clear()
+
+  {
+    char **s=new char*[cm.size()];
+    ls.clear();
+    sm.clear();
+
+    size_t i, n=0;
+    for( CharMap::iterator it=cm.begin(); it!=cm.end(); it++ )
+      s[n++] = it->second;
+    cm.clear();
+
+    for( i=0; i<n; i++ )
+      free(s[i]);
+    delete[] s;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::print                                                */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Alphabet::print(void)
+
+  {
+    for( CharMap::iterator it=cm.begin(); it!=cm.end(); it++ )
+      fprintf(stderr, "%i\t%s\n", it->first, it->second);
+    return;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::new_marker                                           */
+  /*                                                                 */
+  /*******************************************************************/
+
+  Character Alphabet::new_marker()
+
+  {
+    // find some unused character code
+    for(Character i=1; i!=0; i++)
+      if (cm.find(i) == cm.end()) {
+	// create a unique identifier string
+	char symbol[100];
+	sprintf(symbol,">%ld<",(long)i);
+	add(symbol, i);
+	return i;
+      }
+  
+    throw "Error: too many symbols in transducer definition";
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  is_marker_symbol                                               */
+  /*                                                                 */
+  /*******************************************************************/
+
+  static bool is_marker_symbol( const char *s )
+
+  {
+    // recogize strings matching the expression ">[0-9]+<"
+    if (s != NULL && *s == '>') {
+      do { s++; } while (*s >= '0' && *s <= '9');
+      if (*s=='<' && *(s+1) == 0 && *(s-1) != '>')
+	return true;
+    }
+    return false;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::delete_markers                                       */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Alphabet::delete_markers()
+
+  {
+    vector<char*> sym;
+    vector<Character> code;
+    vector<Label> label;
+
+    for( CharMap::const_iterator it=cm.begin(); it!=cm.end(); it++ ) {
+      Character c=it->first;
+      char *s=it->second;
+      if (!is_marker_symbol(s)) {
+	sym.push_back(fst_strdup(s));
+	code.push_back(c);
+      }
+    }
+    
+    for( LabelSet::const_iterator it=begin(); it!=end(); it++ ) {
+      Label l=*it;
+      if (!is_marker_symbol(code2symbol(l.upper_char())) &&
+	  !is_marker_symbol(code2symbol(l.lower_char())))
+	label.push_back(l);
+    }
+
+    clear();
+
+    for( size_t i=0; i<sym.size(); i++ ) {
+      add_symbol(sym[i], code[i]);
+      free(sym[i]);
+    }
+    for( size_t i=0; i<label.size(); i++ )
+      insert( label[i] );
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::add_symbol                                           */
+  /*                                                                 */
+  /*******************************************************************/
+
+  Character Alphabet::add_symbol(const char *symbol)
+
+  {
+    if (sm.find(symbol) != sm.end())
+      return sm[symbol];
+
+    // assign the symbol to some unused character
+    for(Character i=1; i!=0; i++)
+      if (cm.find(i) == cm.end()) {
+	add(symbol, i);
+	return i;
+      }
+  
+    throw "Error: too many symbols in transducer definition";
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::add_symbol                                           */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Alphabet::add_symbol( const char *symbol, Character c )
+
+  {
+    // check whether the symbol was previously defined
+    int sc=symbol2code(symbol);
+    if (sc != EOF) {
+      if ((Character)sc == c)
+	return;
+
+      if (strlen(symbol) < 60) {
+	static char message[100];
+	sprintf(message, "Error: reinserting symbol '%s' in alphabet with incompatible character value %u %u", symbol, (unsigned)sc, (unsigned)c);
+	throw message;
+      }
+      else
+	throw "reinserting symbol in alphabet with incompatible character value";
+    }
+
+    // check whether the character is already in use
+    const char *s=code2symbol(c);
+    if (s == NULL)
+      add(symbol, c);
+    else {
+      if (strcmp(s, symbol) != 0) {
+	static char message[100];
+	if (strlen(symbol) < 70)
+	  sprintf(message,"Error: defining symbol %s as character %d (previously defined as %s)", symbol, (unsigned)c, s);
+	else
+	  sprintf(message,"Error: defining a (very long) symbol with previously used character");
+	throw message;
+      }
+    }
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::write_char                                           */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Alphabet::write_char( Character c, char *buffer, int *pos, 
+			     bool with_brackets) const
+  {
+    const char *s = code2symbol(c);
+
+    if (s) {
+      int i = 0;
+      int l=(int)strlen(s)-1;
+      if (!with_brackets && s[i] == '<' && s[l] == '>') { i++; l--; }
+      while (i <= l)
+	buffer[(*pos)++] = s[i++];
+    }
+    else {
+      unsigned int uc = c;
+      if (uc>=32 && uc<256)
+	buffer[(*pos)++] = (char)c;
+      else {
+	sprintf(buffer+(*pos),"\\%u", uc);
+	*pos += (int)strlen(buffer+(*pos));
+      }
+    }
+    buffer[*pos] = '\0';
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::write_char                                           */
+  /*                                                                 */
+  /*******************************************************************/
+
+  const char *Alphabet::write_char( Character c, bool with_brackets ) const
+
+  {
+    static char buffer[1000];
+    int n=0;
+
+    write_char( c, buffer, &n, with_brackets );
+    return buffer;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::write_label                                          */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Alphabet::write_label( Label l, char *buffer, int *pos, 
+			      bool with_brackets ) const
+  {
+    Character lc=l.lower_char();
+    Character uc=l.upper_char();
+    write_char( lc, buffer, pos, with_brackets );
+    if (lc != uc) {
+      buffer[(*pos)++] = ':';
+      write_char( uc, buffer, pos, with_brackets );
+    }
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::write_label                                          */
+  /*                                                                 */
+  /*******************************************************************/
+
+  const char *Alphabet::write_label( Label l, bool with_brackets  ) const
+
+  {
+    static char buffer[1000];
+    int n=0;
+    write_label( l, buffer, &n, with_brackets );
+    return buffer;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::insert_symbols                                       */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Alphabet::insert_symbols( const Alphabet &a )
+
+  {
+    for( CharMap::const_iterator it=a.cm.begin(); it!=a.cm.end(); it++ )
+      add_symbol(it->second, it->first);
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::complement                                           */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Alphabet::complement( vector<Character> &sym )
+
+  {
+    vector<Character> result;
+    for( CharMap::const_iterator it=cm.begin(); it!=cm.end(); it++ ) {
+      Character c = it->first;
+      if (c != Label::epsilon) {
+	size_t i;
+	for( i=0; i<sym.size(); i++ )
+	  if (sym[i] == c)
+	    break;
+	if (i == sym.size())
+	  result.push_back(c);
+      }
+    }
+    sym.swap(result);
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::copy                                                 */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Alphabet::copy( const Alphabet &a )
+
+  {
+    insert_symbols( a );
+    utf8 = a.utf8;
+    for( LabelSet::const_iterator it=a.begin(); it!=a.end(); it++ )
+      ls.insert( *it );
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::compose                                              */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Alphabet::compose( const Alphabet &la, const Alphabet &ua )
+
+  {
+    // insert the symbols
+    insert_symbols(la);
+    insert_symbols(ua);
+    utf8 = la.utf8;
+
+    hash_map<Character, hash_set<Character> > cs;
+
+    // create a hash table for a quick lookup of the target characters
+    for( iterator it=ua.begin(); it!=ua.end(); it++ ) {
+      Character lc=it->lower_char();
+      if (lc == Label::epsilon)
+	insert(*it);
+      else
+	cs[lc].insert(it->upper_char());
+    }
+
+    for( iterator it=la.begin(); it!=la.end(); it++ ) {
+      Character uc=it->upper_char();
+      if (uc == Label::epsilon)
+	insert(*it);
+      else {
+	if (cs.find(uc) != cs.end()) {
+	  hash_set<Character> s=cs[uc];
+	  Character lc=it->lower_char();
+	  for( hash_set<Character>::iterator it=s.begin(); it!=s.end(); it++)
+	    insert(Label(lc, *it));
+	}
+      }
+    }
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  operator<<(Alphabet)                                           */
+  /*                                                                 */
+  /*******************************************************************/
+
+  ostream &operator<<( ostream &s, const Alphabet &a )
+
+  {
+    for( Alphabet::CharMap::const_iterator it=a.cm.begin(); it!=a.cm.end(); it++ )
+      s << it->first << " -> " << it->second << "\n";
+    for( Alphabet::iterator it=a.begin(); it!=a.end(); it++ )
+      s << a.write_label(*it) << " ";
+    s << "\n";
+    return s;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::next_mcsym                                           */
+  /*                                                                 */
+  /*  recognizes multi-character symbols which are enclosed with     */
+  /*  angle brackets <...>. If the argument flag insert is true,     */
+  /*  the multi-character symbol must be already in the lexicon in   */
+  /*  order to be recognized.                                        */
+  /*                                                                 */
+  /*******************************************************************/
+
+  int Alphabet::next_mcsym( char* &string, bool insert )
+
+  {
+    char *start=string;
+
+    if (*start == '<')
+      // symbol might start here
+      for( char *end=start+1; *end; end++ )
+	if (*end == '>') {
+	  // matching pair of angle brackets found
+	  // mark the end of the substring with \0
+	  char lastc = *(++end);
+	  *end = 0;
+
+	  int c;
+	  if (insert)
+	    c = add_symbol( start );
+	  else
+	    c = symbol2code(start);
+	  // restore the original string
+	  *end = lastc;
+
+	  if (c != EOF) {
+	    // symbol found
+	    // return its code
+	    string = end;
+	    return (Character)c;
+	  }
+	  else
+	    // not a complex character
+	    break;
+	}
+    return EOF;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::next_code                                            */
+  /*                                                                 */
+  /*******************************************************************/
+
+  int Alphabet::next_code( char* &string, bool extended, bool insert )
+
+  {
+    if (*string == 0)
+      return EOF; // finished
+
+    int c = next_mcsym(string, insert);
+    if (c != EOF)
+      return c;
+
+    if (extended && *string == '\\')
+      string++; // remove quotation
+
+    if (utf8) {
+      unsigned int c = utf8toint( &string );
+      if (c == 0) {
+	fprintf(stderr, "Error in UTF-8 encoding!\n");
+	return EOF; // error encountered in utf8 character
+      }
+      return (int)add_symbol(int2utf8(c));
+    }
+    else {
+      char buffer[2];
+      buffer[0] = *string;
+      buffer[1] = 0;
+      string++;
+      return (int)add_symbol(buffer);
+    }
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::next_label                                           */
+  /*                                                                 */
+  /*******************************************************************/
+
+  Label Alphabet::next_label( char* &string, bool extended )
+
+  {
+    // read first character
+    int c = next_code( string, extended );
+    if (c == EOF)
+      return Label(); // end of string reached
+
+    Character lc=(Character)c;
+    if (!extended || *string != ':') { // single character?
+      if (lc == Label::epsilon)
+	return next_label(string, extended); // ignore epsilon
+      return Label(lc);
+    }
+
+    // read second character
+    string++; // jump over ':'
+    c = next_code( string );
+    if (c == EOF) {
+      static char buffer[1000];
+      sprintf(buffer,"Error: incomplete symbol in input file: %s", string);
+      throw buffer;
+    }
+
+    Label l(lc, (Character)c);
+    if (l.is_epsilon())
+      return next_label(string, extended); // ignore epsilon transitions
+    return l;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::string2symseq                                        */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Alphabet::string2symseq( char *s, vector<Character> &ch )
+
+  {
+    int c;
+    while ((c = next_code(s, false)) != EOF)
+      ch.push_back((Character)c);
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::string2labelseq                                      */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Alphabet::string2labelseq( char *s, vector<Label> &labels )
+
+  {
+    Label l;
+    while ((l = next_label(s)) != Label::epsilon)
+      labels.push_back(l);
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::store                                                */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Alphabet::store( FILE *file ) const
+
+  {
+    char c=(utf8)? (char)1: (char)0;
+    fputc(c, file);
+
+    // write the symbol mapping
+    Character n=(Character)cm.size();
+    fwrite(&n, sizeof(n), 1, file);
+    for( CharMap::const_iterator it=cm.begin(); it!=cm.end(); it++ ) {
+      Character c=it->first;
+      char *s=it->second;
+      fwrite(&c, sizeof(c), 1, file);
+      fwrite(s, sizeof(char), strlen(s)+1, file);
+    }
+
+    // write the character pairs
+    n = (Character)size();
+    fwrite(&n, sizeof(n), 1, file);
+    for( LabelSet::const_iterator p=ls.begin(); p!=ls.end(); p++ ) {
+      Character c=p->lower_char();
+      fwrite(&c, sizeof(c), 1, file);
+      c = p->upper_char();
+      fwrite(&c, sizeof(c), 1, file);
+    }
+
+    if (ferror(file))
+      throw "Error encountered while writing alphabet to file\n";
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::read                                                 */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Alphabet::read( FILE *file )
+
+  {
+    utf8 = (fgetc(file) != 0);
+
+    // read the symbol mapping
+    Character n=0;
+    read_num(&n, sizeof(n), file);
+    for( unsigned i=0; i<n; i++) {
+      char buffer[BUFFER_SIZE];
+      Character c;
+      read_num(&c, sizeof(c), file);
+      if (!read_string(buffer, BUFFER_SIZE, file) || 
+	  feof(file) || ferror(file))
+	throw "Error1 occurred while reading alphabet!\n";
+      add_symbol(buffer, c);
+    }
+
+    // read the character pairs
+    read_num(&n, sizeof(n), file);
+    if (ferror(file))
+      throw "Error2 occurred while reading alphabet!\n";
+    for( unsigned i=0; i<n; i++) {
+      Character lc, uc;
+      read_num(&lc, sizeof(lc), file);
+      read_num(&uc, sizeof(uc), file);
+      insert(Label(lc, uc));
+    }
+    if (ferror(file))
+      throw "Error3 occurred while reading alphabet!\n";
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::compute_score                                        */
+  /*                                                                 */
+  /*******************************************************************/
+
+  int Alphabet::compute_score( Analysis &ana )
+
+  {
+    // check whether the morpheme boundaries are explicitly marked
+    // with <X> tags
+    int score=0;
+    for( size_t i=0; i<ana.size(); i++ ) {
+
+      // get next symbol
+      const char *sym=write_char(ana[i].lower_char());
+
+      if (strcmp(sym,"<X>") == 0)
+	score--;
+    }
+    if (score <  0)
+      return score;
+
+    // No explicit morpheme boundary markers have been found.
+    // Count the number of part-of-speech and PREF tags.
+    for( size_t i=0; i<ana.size(); i++ ) {
+
+      // get next symbol
+      const char *sym=write_char(ana[i].lower_char());
+
+      // Is it not a multi-character symbol
+      if (sym[0] != '<' || sym[1] == 0)
+	continue;
+
+      // Is it a POS tag starting with "+" like <+NN>?
+      if (sym[1] == '+') {
+	const char *t=sym+2;
+	for( ; *t >= 'A' && *t <= 'Z'; t++) ;
+	if (t > sym+2 && *t == '>')
+	  return score;
+      }
+
+      // Is it a potential POS tag (i.e. all uppercase)?
+      const char *t = sym+1;
+      for( ; *t >= 'A' && *t <= 'Z'; t++) ;
+      if (t == sym+1 || *t != '>')
+	continue;
+
+      // uppercase symbol found
+      if (strcmp(sym,"<SUFF>") == 0 ||
+	  strcmp(sym,"<OLDORTH>") == 0 ||
+	  strcmp(sym,"<NEWORTH>") == 0)
+	continue; // not what we are looking for
+
+      // disprefer nouns with prefixes
+      if (strcmp(sym,"<PREF>") == 0)
+	score-=2;
+
+      if (strcmp(sym,"<V>") == 0 || strcmp(sym,"<ADJ>") == 0) {
+	bool is_verb=(strcmp(sym,"<V>")==0);
+	// get the next non-empty symbol
+	Character c=Label::epsilon;
+	size_t k;
+	for( k=i+1; k<ana.size(); k++ )
+	  if ((c = ana[k].lower_char()) != Label::epsilon)
+	    break;
+	// Is it a participle
+	if (c != Label::epsilon) {
+	  sym = write_char(c);
+	  if (strcmp(sym,"<OLDORTH>") == 0 || 
+	      strcmp(sym,"<NEWORTH>") == 0 || 
+	      strcmp(sym,"<SUFF>") == 0) {
+	    for( k++; k<ana.size(); k++ )
+	      if ((c = ana[k].lower_char()) != Label::epsilon)
+		break;
+	    if (c != Label::epsilon)
+	      sym = write_char(c);
+	  }
+	  if (is_verb &&
+	      (strcmp(sym,"<PPres>") == 0 || strcmp(sym,"<PPast>") == 0))
+	    continue; // don't consider participles as complex
+	  if (!is_verb &&
+	      (strcmp(sym,"<Sup>") == 0 || strcmp(sym,"<Comp>") == 0))
+	    continue; // don't consider participles as complex
+	}
+      }
+      score--;
+    }
+    return score;
+  }
+
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::disambiguate                                         */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Alphabet::disambiguate( vector<Analysis> &analyses )
+
+  {
+    // compute the scores
+    int bestscore=INT_MIN;
+    vector<int> score;
+
+    for( size_t i=0; i<analyses.size(); i++ ) {
+      score.push_back(compute_score(analyses[i]));
+      if (bestscore < score[i])
+	bestscore = score[i];
+    }
+
+    // delete suboptimal analyses
+    size_t k=0;
+    for( size_t i=0; i<analyses.size(); i++ )
+      if (score[i] == bestscore)
+	analyses[k++] = analyses[i];
+    analyses.resize(k);
+  }
+
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::print_analysis                                       */
+  /*                                                                 */
+  /*******************************************************************/
+
+  char *Alphabet::print_analysis( Analysis &ana, bool both_layers )
+
+  {
+    vector<char> ch;
+
+    // for each transition
+    for( size_t i=0; i<ana.size(); i++ ) {
+
+      // get the transition label
+      Label l=ana[i];
+      const char *s;
+
+      // either print the analysis symbol or the whole label
+      if (both_layers) {
+	s = write_label(l);
+	// quote colons
+	if (strcmp(s,":") == 0)
+	  ch.push_back('\\');
+      }
+      else if (l.lower_char() != Label::epsilon)
+	s = write_char(l.lower_char());
+      else
+	continue;
+
+      // copy the characters to the character array
+      while (*s)
+	ch.push_back(*(s++));
+    }
+    ch.push_back(0); // terminate the string
+
+    static char *result=NULL;
+    if (result != NULL)
+      delete[] result;
+    result = new char[ch.size()];
+    for( size_t i=0; i<ch.size(); i++ )
+      result[i] = ch[i];
+  
+    return result;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Alphabet::operator==                                           */
+  /*                                                                 */
+  /*******************************************************************/
+
+  bool Alphabet::operator==(const Alphabet &alpha) const
+
+  {
+    for ( SymbolMap::const_iterator it = this->sm.begin(); it != this->sm.end(); it++ )
+      {
+	SymbolMap::const_iterator alpha_it = alpha.sm.find(it->first);
+	if ( alpha_it == alpha.sm.end() )
+	  return false;
+	if ( alpha_it->second == it->second )
+	  return false;
+      }
+    for ( SymbolMap::const_iterator alpha_it = alpha.sm.begin(); alpha_it != alpha.sm.end(); alpha_it++ )
+      {
+	SymbolMap::const_iterator it = this->sm.find(alpha_it->first);
+	if ( it == this->sm.end() )
+	  return false;
+	if ( it->second == alpha_it->second )
+	  return false;
+      }
+    return true;
+  }
+
+}
--- a/src/main/resources/SMOR/src/alphabet.h
+++ b/src/main/resources/SMOR/src/alphabet.h
+/*******************************************************************/
+/*                                                                 */
+/*  FILE     alphabet.h                                            */
+/*  MODULE   alphabet                                              */
+/*  PROGRAM  SFST                                                  */
+/*  AUTHOR   Helmut Schmid, IMS, University of Stuttgart           */
+/*                                                                 */
+/*  PURPOSE  finite state tools                                    */
+/*                                                                 */
+/*******************************************************************/
+
+#ifndef _ALPHABET_H_
+#define _ALPHABET_H_
+
+#include <stdio.h>
+
+#include "basic.h"
+
+#include <set>
+#include <vector>
+
+#include <iostream>
+
+#include <cstring>
+
+#include "sgi.h"
+
+namespace SFST {
+
+#ifndef CODE_DATA_TYPE
+  typedef unsigned short Character;  // data type of the symbol codes
+#else
+  typedef unsigned CODE_DATA_TYPE Character;
+#endif
+
+  // data type used to indicate whether some action is to be performed
+  // on the analysis level (lower) or the surface level (upper)
+  typedef enum {upper, lower} Level;
+
+
+  /*****************  class Label  ***********************************/
+
+  class Label {
+
+  private:
+    // data structure where the two symbols are stored
+    struct {
+      Character lower;
+      Character upper;
+    } label;
+
+  public:
+    static const Character epsilon=0; // code of the empty symbol
+
+    // new label with two identical symbols
+    Label( Character c=epsilon ) { label.lower = label.upper = c; };
+
+    // new label with two different symbols
+    Label( Character c1, Character c2 )
+      { label.lower = c1; label.upper = c2; };
+
+    // returns the indicated symbol of the label
+    Character get_char( Level l ) const
+    { return ((l==upper)? label.upper: label.lower); };
+
+    // returns the "upper" symbol of the label (i.e. the surface symbol)
+    Character upper_char() const {  return label.upper; };
+
+    // returns the "lower" symbol of the label (i.e. the analysis symbol)
+    Character lower_char() const {  return label.lower; };
+
+    // replaces symbols in a label
+    Label replace_char( Character c, Character nc ) const {
+      Label l = *this;
+      if (l.label.lower == c)
+	l.label.lower = nc;
+      if (l.label.upper == c)
+	l.label.upper = nc;
+      return l;
+    };
+
+    // operators checking the equality of labels
+    int operator==( Label l ) const
+    { return (label.lower==l.label.lower && label.upper==l.label.upper); };
+    int operator!=( Label l ) const
+    { return !(l == *this); };
+
+    // comparison operator needed for sorting labels in compact.C
+    int operator<( Label l ) const { 
+      if (upper_char() < l.upper_char())
+	return true;
+      if (upper_char() > l.upper_char())
+	return false;
+      if (lower_char() < l.lower_char())
+	return true;
+      return false;
+    };
+    int operator>( Label l ) const { 
+      if (upper_char() > l.upper_char())
+	return true;
+      if (upper_char() < l.upper_char())
+	return false;
+      if (lower_char() > l.lower_char())
+	return true;
+      return false;
+    };
+
+    // check whether the label is epsilon (i.e. both symbols are epsilon)
+    // transitions with epsilon labels are epsilon transitions
+    int is_epsilon() const
+    { return (label.upper == epsilon && label.lower == epsilon); };
+
+    // check whether the "upper" symbol is epsilon
+    int upper_is_epsilon() const
+    { return (label.upper == epsilon); };
+
+    // check whether the "lower" symbol is epsilon
+    int lower_is_epsilon() const
+    { return (label.lower == epsilon); };
+
+    // hash function needed to store labels in a hash table
+    struct label_hash {
+      size_t operator() ( const Label l ) const {
+	return (size_t)l.lower_char() ^ 
+	  ((size_t)l.upper_char() << 16) ^
+	  ((size_t)l.upper_char() >> 16);
+      }
+    };
+
+    // hash function needed to store labels in a hash table
+    struct label_cmp {
+      bool operator() ( const Label l1, const Label l2 ) const {
+	return (l1.lower_char() < l2.lower_char() ||
+		(l1.lower_char() == l2.lower_char() && 
+		 l1.upper_char() < l2.upper_char()));
+      }
+    };
+
+    // comparison operator needed to store labels in a hash table
+    struct label_eq {
+      bool operator() ( const Label l1, const Label l2 ) const {
+	return (l1.lower_char() == l2.lower_char() &&
+		l1.upper_char() == l2.upper_char());
+      }
+    };
+  };
+
+  typedef std::vector<Label> Analysis;
+
+
+  /*****************  class Alphabet  *******************************/
+
+  class Alphabet {
+
+    // string comparison operators needed to stored strings in a hash table
+    struct eqstr {
+      bool operator()(const char* s1, const char* s2) const {
+	return strcmp(s1, s2) == 0;
+      }
+    };
+
+    // data structure storing labels without repetitions (i.e. as a set)
+    typedef std::set<Label, Label::label_cmp> LabelSet;
+
+    // hash table used to map the symbols to their codes
+    typedef hash_map<const char*, Character, hash<const char*>,eqstr> SymbolMap;
+
+  public: // HFST addition
+    // hash table used to map the codes back to the symbols
+    typedef hash_map<Character, char*> CharMap;
+
+    // HFST addition
+    bool operator==(const Alphabet &alpha) const;
+
+  private:
+    SymbolMap sm; // maps symbols to codes
+
+    CharMap  cm; // maps codes to symbols
+    LabelSet ls; // set of labels known to the alphabet
+
+    // add a new symbol with symbol code c
+    void add( const char *symbol, Character c );
+
+  public:
+    bool utf8;
+
+    // iterators over the set of known labels
+    typedef LabelSet::iterator iterator;
+    typedef LabelSet::const_iterator const_iterator;
+    Alphabet();
+    ~Alphabet() { clear(); };
+    const_iterator begin() const { return ls.begin(); };
+    const_iterator end() const { return ls.end(); };
+    size_t size() const { return ls.size(); };
+
+    // HFST additions
+    CharMap get_char_map(void) { return cm; };
+    void print(void);
+
+    void clear();
+    void clear_char_pairs() { ls.clear(); };
+
+    // lookup a label in the alphabet
+    iterator find( Label l ) { return ls.find(l); };
+
+    // insert a label in the alphabet
+    void insert( Label l ) { if (!l.is_epsilon()) ls.insert(l); };
+
+    // insert the known symbols from another alphabet
+    void insert_symbols( const Alphabet& );
+
+    // insert the labels and known symbols from another alphabet
+    void copy( const Alphabet& );
+
+    // create the alphabet of a transducer obtained by a composition operation
+    void compose( const Alphabet &la, const Alphabet &ua );
+
+    // add a symbol to the alphabet and return its code
+    Character add_symbol(const char *symbol);
+
+    // add a symbol to the alphabet with a given code
+    void add_symbol(const char *symbol, Character c );
+
+    // create a new marker symbol and return its code
+    Character new_marker( void );
+    void delete_markers();
+
+    // compute the complement of a symbol set
+    void complement( std::vector<Character> &sym );
+  
+    // return the code of the argument symbol
+    int symbol2code( const char *s ) const { 
+      SymbolMap::const_iterator p = sm.find(s);
+      if (p != sm.end()) return p->second;
+      return EOF;
+    };
+
+    // return the symbol for the given symbol code
+    const char *code2symbol( Character c ) const {
+      CharMap::const_iterator p=cm.find(c);
+      if (p == cm.end())
+	return NULL;
+      else
+	return p->second;
+    };
+
+    // write the symbol for the given symbol code into a string
+    void write_char( Character c, char *buffer, int *pos,
+		     bool with_brackets=true ) const;
+
+    // write the symbol pair of a given label into a string
+    void write_label( Label l, char *buffer, int *pos,
+		      bool with_brackets=true ) const;
+
+    // write the symbol for the given symbol code into a buffer and return
+    // a pointer to it
+    // the flag "with_brackets" indicates whether the angle brackets
+    // surrounding multi-character symbols are to be printed or not
+    const char *write_char( Character c, bool with_brackets=true ) const;
+
+    // write the symbol pair of a given label into a string
+    // and return a pointer to it
+    const char *write_label( Label l, bool with_brackets=true ) const;
+
+    // scan the next multi-character symbol in the argument string
+    int next_mcsym( char*&, bool insert=true );
+
+    // scan the next symbol in the argument string
+    int next_code( char*&, bool extended=true, bool insert=true );
+
+    // convert a character string into a symbol or label sequence
+    void string2symseq( char*, std::vector<Character>& );
+    void string2labelseq( char*, std::vector<Label>& );
+
+    // scan the next label in the argument string
+    Label next_label( char*&, bool extended=true );
+
+    // store the alphabet in the argument file (in binary form)
+    void store( FILE* ) const;
+
+    // read the alphabet from the argument file
+    void read( FILE* );
+
+    // disambiguation and printing of analyses
+    int compute_score( Analysis &ana );
+    void disambiguate( std::vector<Analysis> &analyses );
+    char *print_analysis( Analysis &ana, bool both_layers );
+
+    friend std::ostream &operator<<(std::ostream&, const Alphabet&);
+  };
+
+  // write the alphabet to the output stream (in readable form)
+  std::ostream &operator<<(std::ostream&, const Alphabet&);
+}
+
+#endif
--- a/src/main/resources/SMOR/src/alphabet.o
+++ b/src/main/resources/SMOR/src/alphabet.o
--- a/src/main/resources/SMOR/src/basic.C
+++ b/src/main/resources/SMOR/src/basic.C
+
+/*******************************************************************/
+/*                                                                 */
+/*  FILE     basic.C                                               */
+/*  MODULE   basic                                                 */
+/*  PROGRAM  SFST                                                  */
+/*  AUTHOR   Helmut Schmid, IMS, University of Stuttgart           */
+/*                                                                 */
+/*  PURPOSE                                                        */
+/*                                                                 */
+/*******************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "basic.h"
+
+namespace SFST {
+
+  bool Switch_Bytes=false;
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  fst_strdup                                                     */
+  /*                                                                 */
+  /*******************************************************************/
+
+  char* fst_strdup(const char* pString)
+
+  {
+    char* pStringCopy = (char*)malloc(strlen(pString) + 1);
+    if (pStringCopy == NULL) {
+      fprintf(stderr, "\nError: out of memory (malloc failed)\naborted.\n");
+      exit(1);
+    }
+    strcpy(pStringCopy, pString);
+    return pStringCopy;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  read_string                                                    */
+  /*                                                                 */
+  /*******************************************************************/
+
+  int read_string( char *buffer, int size, FILE *file )
+
+  {
+    for( int i=0; i<size; i++ ) {
+      int c=fgetc(file);
+      if (c == EOF || c == 0) {
+	buffer[i] = 0;
+	return (c==0);
+      }
+      buffer[i] = (char)c;
+    }
+    buffer[size-1] = 0;
+    return 0;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  read_num                                                       */
+  /*                                                                 */
+  /*******************************************************************/
+
+  size_t read_num( void *p, size_t n, FILE *file )
+
+  {
+    char *pp=(char*)p;
+    size_t result=fread( pp, 1, n, file );
+    if (Switch_Bytes) {
+      size_t e=n/2;
+      for( size_t i=0; i<e; i++ ) {
+	char tmp=pp[i];
+	pp[i] = pp[--n];
+	pp[n] = tmp;
+      }
+    }
+    return result;
+  }
+}
--- a/src/main/resources/SMOR/src/basic.h
+++ b/src/main/resources/SMOR/src/basic.h
+
+/*******************************************************************/
+/*                                                                 */
+/*  FILE     basic.h                                               */
+/*  MODULE   basic                                                 */
+/*  PROGRAM  SFST                                                  */
+/*  AUTHOR   Helmut Schmid, IMS, University of Stuttgart           */
+/*                                                                 */
+/*  PURPOSE                                                        */
+/*                                                                 */
+/*******************************************************************/
+
+#ifndef _BASIC_H_
+#define _BASIC_H_
+
+#include <stdio.h>
+
+namespace SFST {
+
+  extern bool Switch_Bytes;
+
+  char* fst_strdup(const char* pString);
+  int read_string( char *buffer, int size, FILE *file );
+  size_t read_num( void *p, size_t size, FILE *file );
+
+}
+#endif
--- a/src/main/resources/SMOR/src/basic.o
+++ b/src/main/resources/SMOR/src/basic.o
--- a/src/main/resources/SMOR/src/compact.C
+++ b/src/main/resources/SMOR/src/compact.C
+/*******************************************************************/
+/*                                                                 */
+/*  FILE     compact.C                                             */
+/*  MODULE   compact                                               */
+/*  PROGRAM  SFST                                                  */
+/*  AUTHOR   Helmut Schmid, IMS, University of Stuttgart           */
+/*                                                                 */
+/*  PURPOSE  Code needed for analysing data                        */
+/*                                                                 */
+/*******************************************************************/
+
+#include <stdio.h>
+#include <math.h>
+
+#include <limits.h>
+
+#include "compact.h"
+
+namespace SFST {
+
+  using std::equal_range;
+  using std::vector;
+  using std::pair;
+
+  class label_less {
+  public:
+    bool operator()(const Label l1, const Label l2) const {
+      return l1.upper_char() < l2.upper_char();
+    }
+  };
+
+  const int BUFFER_SIZE=1000;
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  CompactTransducer::convert                                     */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void CompactTransducer::convert( CAnalysis &cana, Analysis &ana )
+
+  {
+    ana.resize(cana.size());
+    for( size_t i=0; i<cana.size(); i++ )
+      ana[i] = label[cana[i]];
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  CompactTransducer::analyze                                     */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void CompactTransducer::analyze(unsigned int n, vector<Character> &input,
+				  size_t ipos, CAnalysis &ca, 
+				  vector<CAnalysis> &analyses )
+  {
+    // "n" is the number of the current transducer node/state
+    // "input" is the sequence of input symbols
+    // "ipos" is the input position currently analysed
+    // "ca" stores the incomplete analysis string
+    // "analyses" stores the analyses found so far
+
+    if (analyses.size() > 10000)
+      return; // limit the maximal number of analyses
+
+    // Is the input string fully analyzed and the current node a final node?
+    if (finalp[n] && ipos == input.size())
+      // store the new analysis
+      analyses.push_back(ca);
+
+    // follow the epsilon transitions
+    // first_arc[n] is the number of the first outgoing transition of node n
+    // first_arc[n+1]-1 is the number of the last outgoing transition of node n
+    // first_arc[n+1] is the number of the first outgoing transition of node n+1
+    unsigned int i;
+    for( i=first_arc[n]; 
+	 i<first_arc[n+1] && label[i].upper_char() == Label::epsilon; 
+	 i++)
+      {
+	ca.push_back(i);
+	analyze(target_node[i], input, ipos, ca, analyses);
+	ca.pop_back();
+      }
+
+    // follow the non-epsilon transitions
+
+    // scan the next input symbol
+    if (ipos < input.size()) {
+      // find the set of arcs with matching upper character in the sorted list
+      pair<Label*,Label*>range = 
+	equal_range(label+i, label+first_arc[n+1], Label(input[ipos]), 
+		    label_less());
+      unsigned int to = (unsigned int)(range.second - label);
+
+      // follow the non-epsilon transitions
+      for( i=(unsigned)(range.first-label); i<to; i++) {
+	ca.push_back(i);
+	analyze(target_node[i], input, ipos+1, ca, analyses);
+	ca.pop_back();
+      }
+    }
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  CompactTransducer::analyze_string                              */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void CompactTransducer::analyze_string( char *s, vector<CAnalysis> &analyses )
+
+  {
+    // "s" input string to be analyzed
+    // "analyses" is the data structure in which the results are stored
+    // and returned
+
+    vector<Character> input;
+
+    alphabet.string2symseq( s, input );
+
+    analyses.clear();
+    CAnalysis ca; // data structure where the current incomplete analysis
+    // is stored
+    analyze(0, input, 0, ca, analyses); // start the analysis
+
+    if (analyses.size() > 10000)
+      fprintf(stderr,"Warning: Only the first 10000 analyses considered for \"%s\"!\n", s);
+  
+    if (simplest_only && analyses.size() > 1)
+      disambiguate( analyses ); // select the simplest analyses
+  }
+
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  CompactTransducer::~CompactTransducer                          */
+  /*                                                                 */
+  /*******************************************************************/
+
+  CompactTransducer::~CompactTransducer()
+
+  {
+    delete[] finalp;
+    delete[] first_arc;
+    delete[] label;
+    delete[] target_node;
+    delete[] final_logprob;
+    delete[] arc_logprob;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  CompactTransducer::CompactTransducer                           */
+  /*                                                                 */
+  /*******************************************************************/
+
+  CompactTransducer::CompactTransducer()
+
+  {
+    both_layers = false;
+    simplest_only = false;
+    number_of_nodes = 0;
+    number_of_arcs = 0;
+    finalp = NULL;
+    first_arc = NULL;
+    label = NULL;
+    target_node = NULL;
+    arc_logprob = final_logprob = (float*)NULL;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  CompactTransducer::read_finalp                                 */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void CompactTransducer::read_finalp( FILE *file )
+
+  {
+    int k=0;
+    unsigned char n=0;
+    for( size_t i=0; i<number_of_nodes; i++ ) {
+      if (k == 0) {
+	n = (unsigned char)fgetc(file);
+	k = 8;
+      }
+      k--;
+      if (n & (1 << k))
+	finalp[i] = 1;
+      else
+	finalp[i] = 0;
+    }
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  CompactTransducer::read_first_arcs                             */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void CompactTransducer::read_first_arcs( FILE *file )
+
+  {
+    int k=0;
+    unsigned int n=0;
+    int bits=(int)ceil(log(number_of_arcs+1)/log(2));
+
+    for( size_t i=0; i<=number_of_nodes; i++ ) {
+      first_arc[i] = n >> (sizeof(n)*8 - bits);
+      n <<= bits;
+      k -= bits;
+      if (k < 0) {
+	read_num(&n,sizeof(n),file);
+	first_arc[i] |= n >> (sizeof(n)*8 + k);
+	n <<= -k;
+	k += (int)sizeof(n) * 8;
+      }
+    }
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  CompactTransducer::read_target_nodes                           */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void CompactTransducer::read_target_nodes( FILE *file )
+
+  {
+    int k=0;
+    unsigned int n=0;
+    int bits=(int)ceil(log(number_of_nodes)/log(2));
+
+    for( size_t i=0; i<number_of_arcs; i++ ) {
+      target_node[i] = n >> (sizeof(n)*8 - bits);
+      n <<= bits;
+      k -= bits;
+      if (k < 0) {
+	read_num(&n,sizeof(n),file);
+	target_node[i] |= n >> (sizeof(n)*8 + k);
+	n <<= -k;
+	k += (int)sizeof(n) * 8;
+      }
+    }
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  CompactTransducer::read_labels                                 */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void CompactTransducer::read_labels( FILE *file )
+
+  {
+    size_t N=0;
+    Label Num2Label[alphabet.size()];
+    for( Alphabet::const_iterator it=alphabet.begin();
+	 it != alphabet.end(); it++ )
+      {
+	Label l=*it;
+	Num2Label[N++] = l;
+      }
+
+    int k=0;
+    unsigned int n=0;
+    int bits=(int)ceil(log((double)alphabet.size())/log(2));
+
+    for( size_t i=0; i<number_of_arcs; i++ ) {
+      unsigned int l = n >> (sizeof(n)*8 - bits);
+      n <<= bits;
+      k -= bits;
+      if (k < 0) {
+	read_num(&n,sizeof(n),file);
+	l |= n >> (sizeof(n)*8 + k);
+	n <<= -k;
+	k += (int)sizeof(n) * 8;
+      }
+      label[i] = Num2Label[l];
+    }
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  CompactTransducer::read_probs                                  */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void CompactTransducer::read_probs( FILE *file )
+
+  {
+    size_t n,m;
+    fread(&n, sizeof(n), 1, file);
+    if (fread(&m, sizeof(n), 1, file) != 1 ||
+	n != node_count() || m != arc_count())
+      {
+	fprintf(stderr,"Error: incompatible probability file!\n");
+	exit(1);
+      }
+    final_logprob = new float[n];
+    arc_logprob = new float[m];
+    fread(final_logprob, sizeof(float), n, file);
+    if (fread(arc_logprob, sizeof(float), n, file) != n) {
+      fprintf(stderr,"Error: in probability file!\n");
+      exit(1);
+    }
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  CompactTransducer::CompactTransducer                           */
+  /*                                                                 */
+  /*******************************************************************/
+
+  CompactTransducer::CompactTransducer( FILE *file, FILE *pfile )
+
+  {
+    both_layers = false;
+    simplest_only = false;
+
+    if (fgetc(file) != 'c')
+      throw "Error: wrong file format (not a compact transducer)\n";
+
+    alphabet.read(file);
+
+    read_num(&number_of_nodes,sizeof(number_of_nodes),file);
+    read_num(&number_of_arcs,sizeof(number_of_arcs),file);
+
+    if (!ferror(file)) {
+      // memory allocation
+      finalp = new char[number_of_nodes];
+      first_arc = new unsigned[number_of_nodes+1];
+      label = new Label[number_of_arcs];
+      target_node = new unsigned[number_of_arcs];
+    
+      // reading the data
+      read_finalp(file);
+      read_first_arcs(file);
+      read_labels(file);
+      read_target_nodes(file);
+    }
+
+    if (pfile == NULL)
+      arc_logprob = final_logprob = (float*)NULL;
+    else
+      read_probs(pfile);
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  CompactTransducer::longest_match2                              */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void CompactTransducer::longest_match2(unsigned int n, char *string, int l, 
+					 CAnalysis &ca, int &bl, CAnalysis &ba)
+  {
+    // n: transducer state
+    // string: rest string
+    // l: length of current analysis
+    // bl: length of the currently longest match
+    // ca: current analysis
+    // ba: best analysis
+
+    if (finalp[n] && l > bl) {
+      // store the new analysis
+      bl = l;
+      ba = ca; // copy the arc vector
+    }
+
+    // follow the epsilon transitions
+    unsigned int i;
+    for( i=first_arc[n]; 
+	 i<first_arc[n+1] && label[i].upper_char() == Label::epsilon; 
+	 i++)
+      {
+	ca.push_back(i);
+	longest_match2(target_node[i], string, l, ca, bl, ba);
+	ca.pop_back();
+      }
+
+    // follow the non-epsilon transitions
+    char *end=string;
+    int c=alphabet.next_code(end, false, false);
+    l += (int)(end - string);
+    if (c != EOF) {
+      // find the set of arcs with matching upper character in the sort list
+      pair<Label*,Label*>range = 
+	equal_range(label+i, label+first_arc[n+1], Label((Character)c), 
+		    label_less());
+      unsigned int to = (unsigned int)(range.second - label);
+      for( i=(unsigned)(range.first-label); i<to; i++) {
+	ca.push_back(i);
+	longest_match2(target_node[i], end, l, ca, bl, ba);
+	ca.pop_back();
+      }
+    }
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  CompactTransducer::print_analysis                              */
+  /*                                                                 */
+  /*******************************************************************/
+
+  char *CompactTransducer::print_analysis( CAnalysis &cana )
+
+  {
+    Analysis ana;
+    convert(cana, ana);
+    return alphabet.print_analysis( ana, both_layers );
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  CompactTransducer::longest_match                               */
+  /*                                                                 */
+  /*******************************************************************/
+
+  const char *CompactTransducer::longest_match( char* &string )
+
+  {
+    vector<char> analysis;
+    CAnalysis ca, ba;
+    int l=0;
+    longest_match2(0, string, 0, ca, l, ba);
+
+    // no match? return the next character
+    if (ba.size() == 0) {
+      int c=alphabet.next_code(string, false, false);
+      return alphabet.code2symbol((Character)c);
+    }
+
+    string += l;
+    return print_analysis( ba );
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  CompactTransducer::disambiguate                                */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void CompactTransducer::disambiguate( vector<CAnalysis> &analyses )
+
+  {
+    // compute the scores
+    int bestscore=INT_MIN;
+    vector<int> score;
+    Analysis ana;
+
+    for( size_t i=0; i<analyses.size(); i++ ) {
+      convert(analyses[i], ana);
+      score.push_back(alphabet.compute_score(ana));
+      if (bestscore < score[i])
+	bestscore = score[i];
+    }
+
+    // delete suboptimal analyses
+    size_t k=0;
+    for( size_t i=0; i<analyses.size(); i++ )
+      if (score[i] == bestscore)
+	analyses[k++] = analyses[i];
+    analyses.resize(k);
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  CompactTransducer::train2                                      */
+  /*                                                                 */
+  /*******************************************************************/
+
+  bool CompactTransducer::train2( char *s, vector<double> &arcfreq, 
+				  vector<double> &finalfreq )
+  {
+    vector<CAnalysis> analyses;
+    vector<Label> input;
+    alphabet.string2labelseq( s, input );
+
+    CAnalysis ca; // data structure where the analysis is stored
+    unsigned int n=0;
+    bool failure=false;
+    for( size_t i=0; i<input.size(); i++ ) {
+      failure = true;
+      for( unsigned int k=first_arc[n]; k<first_arc[n+1]; k++) {
+	if (label[k] == input[i]) {
+	  ca.push_back(k);
+	  n = target_node[k];
+	  failure = false;
+	  break;
+	}
+      }
+      if (failure)
+	break;
+    }
+    if (failure || !finalp[n]) {
+      fprintf(stderr,"Warning: The following input is not covered:\n%s\n", s);
+      return false;
+    }
+
+    for( size_t k=0; k<ca.size(); k++ )
+      arcfreq[ca[k]]++;
+    finalfreq[target_node[ca.back()]]++;
+
+    return true;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  CompactTransducer::train                                       */
+  /*                                                                 */
+  /*******************************************************************/
+
+  bool CompactTransducer::train( char *s, vector<double> &arcfreq, 
+				 vector<double> &finalfreq )
+  {
+    vector<CAnalysis> analyses;
+    vector<Character> input;
+    alphabet.string2symseq( s, input );
+
+    CAnalysis ca; // data structure where the current incomplete analysis
+    // is stored
+    analyze(0, input, 0, ca, analyses); // start the analysis
+
+    if (analyses.size() > 10000)
+      return true; // ignore inputs with more than 10000 analyses
+    else if (analyses.size() == 0)
+      return false;
+  
+    if (simplest_only && analyses.size() > 1)
+      disambiguate( analyses ); // select the simplest analyses
+
+    if (analyses.size() > 0) {
+      double incr = 1.0 / (double)analyses.size();
+      CAnalysis arcs;
+
+      for( size_t i=0; i<analyses.size(); i++ ) {
+	CAnalysis &arcs=analyses[i];
+	for( size_t k=0; k<arcs.size(); k++ )
+	  arcfreq[arcs[k]] += incr;
+	finalfreq[target_node[arcs.back()]] += incr;
+      }
+    }
+    return true;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  CompactTransducer::estimate_probs                              */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void CompactTransducer::estimate_probs( vector<double> &arcfreq, 
+					  vector<double> &finalfreq )
+  {
+    // turn frequencies into probabilities
+    for( size_t n=0; n<finalfreq.size(); n++ ) {
+      double sum = finalfreq[n];
+      for( size_t a=first_arc[n]; a<first_arc[n+1]; a++ )
+	sum += arcfreq[a];
+      if (sum == 0.0)
+	sum = 1.0;
+      finalfreq[n] = finalfreq[n] / sum;
+      for( size_t a=first_arc[n]; a<first_arc[n+1]; a++ )
+	arcfreq[a] = arcfreq[a] / sum;
+    }
+  }
+
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  CompactTransducer::compute_probs                               */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void CompactTransducer::compute_probs( vector<CAnalysis> &analyses, 
+					 vector<double> &prob )
+  {
+    prob.resize(analyses.size());
+    double sum=0.0;
+    for( size_t i=0; i<analyses.size(); i++ ) {
+      CAnalysis &a=analyses[i];
+
+      // compute the probability
+      double logprob=0.0;
+      for( size_t k=0; k<a.size(); k++ )
+	logprob += arc_logprob[a[k]];
+      logprob += final_logprob[target_node[a.back()]];
+      prob[i] = exp(logprob);
+      sum += prob[i];
+    }
+
+    // sort the analyses
+    vector<CAnalysis> oldanalyses(analyses);
+    vector<double> oldprob(prob);
+    for( size_t i=0; i<analyses.size(); i++ ) {
+      prob[i] = -1.0;
+      size_t n=0;
+      for( size_t k=0; k<oldanalyses.size(); k++ )
+	if (prob[i] < oldprob[k]) {
+	  prob[i] = oldprob[k];
+	  n = k;
+	}
+      analyses[i] = oldanalyses[n];
+      oldprob[n] = -1.0;
+      prob[i] /= sum; // normalization
+    }
+  }
+}
--- a/src/main/resources/SMOR/src/compact.h
+++ b/src/main/resources/SMOR/src/compact.h
+/*******************************************************************/
+/*                                                                 */
+/*  FILE     compact.h                                             */
+/*  MODULE   compact                                               */
+/*  PROGRAM  SFST                                                  */
+/*  AUTHOR   Helmut Schmid, IMS, University of Stuttgart           */
+/*                                                                 */
+/*  PURPOSE  finite state tools                                    */
+/*                                                                 */
+/*******************************************************************/
+
+#ifndef _COMPACT_H_
+#define _COMPACT_H_
+
+#include "alphabet.h"
+
+#include <vector>
+
+namespace SFST {
+
+  typedef std::vector<unsigned int> CAnalysis;
+    
+  class CompactTransducer {
+    
+  protected:
+
+    // the following data structures are used to store the nodes
+
+    unsigned int number_of_nodes; // number of nodes in the transducer
+    char *finalp;  // finalp[i] is 1 if node i is final and 0 otherwise
+    unsigned int *first_arc;  // first_arc[i] is the number of the first
+    // arc outgoing from node i
+
+    // the following data structures are used to store the transition arcs
+
+    unsigned int number_of_arcs; // total number of arcs in the transducer
+    Label *label;              // the label (character pair) of arc i
+    unsigned int *target_node; // target node of arc i
+
+    // the following data structures are used to store the stochastic parameters
+    float *final_logprob;
+    float *arc_logprob;
+
+    // functions needed to read the transducer from a file
+
+    void read_finalp( FILE *file );
+    void read_first_arcs( FILE *file );
+    void read_target_nodes( FILE *file );
+    void read_labels( FILE *file );
+    void read_probs( FILE *file );
+
+    // functions needed to analyze data with the transducer
+
+    void analyze( unsigned int n, std::vector<Character> &ch, size_t ipos,
+		  CAnalysis&, std::vector<CAnalysis>&);
+
+    // function selecting the simplest morphological analysis
+
+    int compute_score( CAnalysis &ana );
+    void disambiguate( std::vector<CAnalysis> &analyses );
+
+    // functions for longest-match analysis of input data
+
+    void longest_match2(unsigned int, char*, int, CAnalysis&, int&, CAnalysis&);
+
+    void convert( CAnalysis &cana, Analysis &ana );
+  
+  public:
+    size_t node_count() { return number_of_nodes; };
+    size_t arc_count() { return number_of_arcs; };
+
+    bool both_layers;   // print surface and analysis symbols
+    bool simplest_only; // print only the simplest analyses
+
+    Alphabet alphabet;  // data structure which maps symbols to numeric codes
+    CompactTransducer(); // dummy constructor
+    CompactTransducer( FILE*, FILE *pfile=NULL ); // reads a (stochastic) transducer
+    ~CompactTransducer();  // destroys a transducer
+  
+    // the analysis function returns the set of analyses for the string "s"
+    // in the argument "analyses"
+    void analyze_string( char *s, std::vector<CAnalysis > &analyses );
+
+    void compute_probs( std::vector<CAnalysis> &analyses, std::vector<double> &prob );
+    char *print_analysis( CAnalysis &ana );
+  
+    // longest-match analysis
+    const char *longest_match( char*& );
+
+    // EM training
+    bool train2( char *s, std::vector<double> &arcfreq, std::vector<double> &finalfreq );
+    bool train( char *s, std::vector<double> &arcfreq, std::vector<double> &finalfreq );
+    void estimate_probs( std::vector<double> &arcfreq, std::vector<double> &finalfreq );
+
+    // robust analysis
+    float robust_analyze_string( char *string, std::vector<CAnalysis> &analyses,
+				 float ErrorsAllowed );
+  };
+}
+#endif