Initial commit

0b611765 · Ben Campbell · 0b611765 · 0b611765 · 0b611765 · 0b611765
Commit 0b611765 authored Jan 23, 2019 by Ben Campbell
20 changed files
--- a/src/main/resources/SMOR/src/man1/fst-infl2.1
+++ b/src/main/resources/SMOR/src/man1/fst-infl2.1
+.TH fst-infl 1 "November 2004" "" "fst-infl"
+.SH NAME
+fst-infl fst-infl2 fst-infl3 \- morphological analysers
+.SH SYNOPSIS
+.B fst-infl [ options ]
+.I file
+[
+.I input-file
+[
+.I output-file
+]
+]
+.br
+.B fst-infl2 [ options ]
+.I file
+[
+.I input-file
+[
+.I output-file
+]
+]
+.br
+.B fst-infl3 [ options ]
+.I file
+[
+.I input-file
+[
+.I output-file
+]
+]
+.SH OPTIONS
+.TP
+.B \-t file
+Read an alternative transducer from
+.I file
+and use it if the main transducer fails to find an analysis. By
+iterating this option, a cascade of transducers may be tried to find
+an analysis.
+.TP
+.B \-b
+Print surface and analysis symbols. (fst-infl2 only)
+.TP
+.B \-n
+Print multi-character symbols without the enclosing angle brackets.
+(fst-infl only)
+.TP
+.B \-d
+The analyses are symbolically disambiguated by returning only analyses
+with a minimal number of morphemes. This option requires that morpheme
+boundaries are marked with the tag <X>. If no <X> tag is found in the
+analysis string, then the program (basically) counts the number of
+multi-character symbols consisting entirely of upper-case characters
+and uses this count for disambiguation. The latter heuristic was
+developed for the German SMOR morphology. (This option is only
+available with fst-infl2 and fst-infl3.)
+.TP
+.B \-e n
+If no regular analysis is found, do robust matching and print analyses
+with up to
+.I n
+edit errors. The set of edit operations currently includes
+replacement, insertion and deletion. Each operation has currently a
+fixed error weight of 1. (fst-infl2 only)
+.TP
+.B \-% f
+Disambiguates the analyses statistically and prints the most likely
+analyses with at least f % of the total probability mass of the
+analyses. The transducer weights are read from a file obtained by
+appending
+.I .prob
+to the name of the transducer file. The weight files are created with
+.I fst-train. 
+(fst-infl2 only)
+.TP
+.B \-p
+Print the probability of each analysis. (fst-infl2 only)
+.TP
+.B \-c
+use this option if the transducer was compiled on a computer with a
+different endianness. If you have a transducer which was compiled
+on a Sparc computer and you want to use it on a Pentium, you need to
+use this option. (fst-infl2 only)
+.TP
+.B \-q
+Suppress status messages.
+.TP
+.B \-h
+Print usage information.
+.SH DESCRIPTION
+.I fst-infl
+is a morphological analyser. The first argument is the name of a file
+which was generated by
+.I fst-compiler.
+The second argument is the name of the input file. The third argument
+is the output file. If the third argument is missing, output is
+directed to 
+.I stdout.
+If the second argument is missing, as well, input is read from
+.I stdin.
+
+.I fst-infl2
+is similar to
+.I fst-infl
+but needs a transducer in compact format (see the man pages for
+.I fst-compiler and fst-compact). fst-infl2 is implemented differently
+from fst-infl and usually much faster.
+
+.I fst-infl3
+is also similar to
+.I fst-infl
+but needs a transducer in lowmem format (see the man pages for
+.I fst-compiler and fst-lowmem). fst-infl3 accesses the transducer on
+disc rather than reading it into memory. It starts very fast and needs
+very little memory, but is slower than fst-infl2.
+
+.I fst-infl
+reads the transducer which is stored in the argument file. Then it
+reads the input file line by line. Each line is analysed with the
+transducer and all resulting analyses are printed (see also the man
+pages for
+.I fst-mor).
+
+.SH BUGS
+No bugs are known so far.
+.SH "SEE ALSO"
+fst-compiler, fst-mor
+.SH AUTHOR
+Helmut Schmid,
+Institute for Computational Linguistics,
+University of Stuttgart,
+Email: schmid@ims.uni-stuttgart.de,
+This software is available under the GNU Public License.
--- a/src/main/resources/SMOR/src/man1/fst-infl3.1
+++ b/src/main/resources/SMOR/src/man1/fst-infl3.1
+.TH fst-infl 1 "November 2004" "" "fst-infl"
+.SH NAME
+fst-infl fst-infl2 fst-infl3 \- morphological analysers
+.SH SYNOPSIS
+.B fst-infl [ options ]
+.I file
+[
+.I input-file
+[
+.I output-file
+]
+]
+.br
+.B fst-infl2 [ options ]
+.I file
+[
+.I input-file
+[
+.I output-file
+]
+]
+.br
+.B fst-infl3 [ options ]
+.I file
+[
+.I input-file
+[
+.I output-file
+]
+]
+.SH OPTIONS
+.TP
+.B \-t file
+Read an alternative transducer from
+.I file
+and use it if the main transducer fails to find an analysis. By
+iterating this option, a cascade of transducers may be tried to find
+an analysis.
+.TP
+.B \-b
+Print surface and analysis symbols. (fst-infl2 only)
+.TP
+.B \-n
+Print multi-character symbols without the enclosing angle brackets.
+(fst-infl only)
+.TP
+.B \-d
+The analyses are symbolically disambiguated by returning only analyses
+with a minimal number of morphemes. This option requires that morpheme
+boundaries are marked with the tag <X>. If no <X> tag is found in the
+analysis string, then the program (basically) counts the number of
+multi-character symbols consisting entirely of upper-case characters
+and uses this count for disambiguation. The latter heuristic was
+developed for the German SMOR morphology. (This option is only
+available with fst-infl2 and fst-infl3.)
+.TP
+.B \-e n
+If no regular analysis is found, do robust matching and print analyses
+with up to
+.I n
+edit errors. The set of edit operations currently includes
+replacement, insertion and deletion. Each operation has currently a
+fixed error weight of 1. (fst-infl2 only)
+.TP
+.B \-% f
+Disambiguates the analyses statistically and prints the most likely
+analyses with at least f % of the total probability mass of the
+analyses. The transducer weights are read from a file obtained by
+appending
+.I .prob
+to the name of the transducer file. The weight files are created with
+.I fst-train. 
+(fst-infl2 only)
+.TP
+.B \-p
+Print the probability of each analysis. (fst-infl2 only)
+.TP
+.B \-c
+use this option if the transducer was compiled on a computer with a
+different endianness. If you have a transducer which was compiled
+on a Sparc computer and you want to use it on a Pentium, you need to
+use this option. (fst-infl2 only)
+.TP
+.B \-q
+Suppress status messages.
+.TP
+.B \-h
+Print usage information.
+.SH DESCRIPTION
+.I fst-infl
+is a morphological analyser. The first argument is the name of a file
+which was generated by
+.I fst-compiler.
+The second argument is the name of the input file. The third argument
+is the output file. If the third argument is missing, output is
+directed to 
+.I stdout.
+If the second argument is missing, as well, input is read from
+.I stdin.
+
+.I fst-infl2
+is similar to
+.I fst-infl
+but needs a transducer in compact format (see the man pages for
+.I fst-compiler and fst-compact). fst-infl2 is implemented differently
+from fst-infl and usually much faster.
+
+.I fst-infl3
+is also similar to
+.I fst-infl
+but needs a transducer in lowmem format (see the man pages for
+.I fst-compiler and fst-lowmem). fst-infl3 accesses the transducer on
+disc rather than reading it into memory. It starts very fast and needs
+very little memory, but is slower than fst-infl2.
+
+.I fst-infl
+reads the transducer which is stored in the argument file. Then it
+reads the input file line by line. Each line is analysed with the
+transducer and all resulting analyses are printed (see also the man
+pages for
+.I fst-mor).
+
+.SH BUGS
+No bugs are known so far.
+.SH "SEE ALSO"
+fst-compiler, fst-mor
+.SH AUTHOR
+Helmut Schmid,
+Institute for Computational Linguistics,
+University of Stuttgart,
+Email: schmid@ims.uni-stuttgart.de,
+This software is available under the GNU Public License.
--- a/src/main/resources/SMOR/src/man1/fst-lattice.1
+++ b/src/main/resources/SMOR/src/man1/fst-lattice.1
+.TH fst-lattice 1 "October 2005" "" "fst-lattice"
+.SH NAME
+fst-lattice \- analyzes the input printing a transducer rather than a
+set of strings
+.SH SYNOPSIS
+.B fst-lattice
+.I file1 [ file [ file ] ]
+.SH OPTIONS
+.TP
+.B \-h
+print usage information.
+.TP
+.B \-q
+quiet mode
+.TP
+.B \-a
+print the analysis characters only (and not the surface characters).debugging mode
+.SH DESCRIPTION
+.I fst-lattice
+reads a transducer from the first argument file, composes it with each
+line of the input file and prints the resulting transducers.
+.TP
+This program is useful to print the set of possible analyses compactly
+in the form of a transducer/automaton.
+.SH BUGS
+No bugs are known so far.
+.SH "SEE ALSO"
+fst-infl, fst-compiler
+.SH AUTHOR
+Helmut Schmid,
+Institute for Computational Linguistics,
+University of Stuttgart,
+Email: schmid@ims.uni-stuttgart.de,
+This software is available under the GNU Public License.
--- a/src/main/resources/SMOR/src/man1/fst-lowmem.1
+++ b/src/main/resources/SMOR/src/man1/fst-lowmem.1
+.TH fst-lowmem 1 "March 2005" "" "fst-lowmem"
+.SH NAME
+fst-lowmem \- transforms transducers to the lowmem format
+.SH SYNOPSIS
+.B fst-lowmem
+.I [ file [ file ] ]
+.SH OPTIONS
+.TP
+.B \-s
+Switch surface and analysis layer of the transducer. You have to use this
+switch in order to use
+.I fst-infl3
+for generation rather than analysis.
+.SH DESCRIPTION
+.I fst-lowmem
+reads a transducer in standard format from the input and writes it in
+lowmem format to the output. The lowmem format is only support by the
+fst-infl3 program.
+
+.SH BUGS
+No bugs are known so far.
+.SH "SEE ALSO"
+fst-compiler, fst-infl3
+.SH AUTHOR
+Helmut Schmid,
+Institute for Computational Linguistics,
+University of Stuttgart,
+Email: schmid@ims.uni-stuttgart.de,
+This software is available under the GNU Public License.
--- a/src/main/resources/SMOR/src/man1/fst-match.1
+++ b/src/main/resources/SMOR/src/man1/fst-match.1
+.TH fst-infl 1 "March 2005" "" "fst-match"
+.SH NAME
+fst-match \- longest match analyser
+.SH SYNOPSIS
+.B fst-match [ options ]
+.I file
+[
+.I input-file
+[
+.I output-file
+]
+]
+.SH OPTIONS
+.TP
+.B \-q
+Suppress status messages.
+.TP
+.B \-h
+Print usage information.
+.SH DESCRIPTION
+.I fst-match
+is a longest match analyser. The first argument is the name of a
+compact transducer file which was generated by
+.I fst-compiler -c.
+The second argument is the name of the input file. The third argument
+is the output file. If the third argument is missing, output is
+directed to 
+.I stdout.
+If the second argument is missing, as well, input is read from
+.I stdin.
+
+.I fst-match
+reads the argument transducer and performs a longest-match analysis of
+the input string. Given the transducer <x>:<>ab+<x>:<>, it will
+analyse the string baabbca as ba<x>abb<x>ca.
+
+.SH BUGS
+If the longest match can be mapped to several target strings, only one
+of them is printed.
+.SH "SEE ALSO"
+fst-compiler, fst-mor, fst-infl, fst-parse
+.SH AUTHOR
+Helmut Schmid,
+Institute for Computational Linguistics,
+University of Stuttgart,
+Email: schmid@ims.uni-stuttgart.de,
+This software is available under the GNU Public License.
--- a/src/main/resources/SMOR/src/man1/fst-mor.1
+++ b/src/main/resources/SMOR/src/man1/fst-mor.1
+.TH fst-mor 1 "February 2002" "" "fst-mor"
+.SH NAME
+fst-mor \- Interactive morphological analyser and generator
+.SH SYNOPSIS
+.B fst-mor [options]
+.I file
+.SH OPTIONS
+.TP
+.B \-n
+Print multi-character symbols without the enclosing angle brackets.
+.TP
+.B \-h
+Print usage information.
+.SH DESCRIPTION
+.B fst-mor
+is an interactive morphological analyser and generator. The argument
+is the name of a file which was generated by
+.I fst-compiler
+(without using option -c).
+
+.I fst-mor
+reads the transducer which is stored in the argument file and prompts
+the user for input. Each input line is processed until the user enters
+"q" which ends the program.
+
+.I fst-mor
+has two modes, an analysis mode (default) and a generation mode.
+Entering an empty input line switches between the two modes.
+
+In order to explain what the program does in generation and analysis,
+consider the following transducer:
+.IP
+a b:x c:<> d
+.PP
+In generation mode, the program will print "axd" if the user enters
+"abcd" and "no result" otherwise. In other words, the program maps the
+left symbols to the right symbols of the transducer.
+
+In analysis mode, the program will print "abcd" if the user enters
+"axd" and "no result" otherwise.
+.SH BUGS
+No bugs are known so far.
+.SH "SEE ALSO"
+fst-compiler
+.SH AUTHOR
+Helmut Schmid,
+Institute for Computational Linguistics,
+University of Stuttgart,
+Email: schmid@ims.uni-stuttgart.de,
+This software is available under the GNU Public License.
--- a/src/main/resources/SMOR/src/man1/fst-parse.1
+++ b/src/main/resources/SMOR/src/man1/fst-parse.1
+.TH fst-parse 1 "October 2003" "" "fst-parse"
+.SH NAME
+fst-parse \- analyzes input with a pipeline of transducers
+.SH SYNOPSIS
+.B fst-parse
+.I file1 [ file [ file ] ]
+.SH OPTIONS
+.TP
+.B \-t file
+add the transducer stored in 
+.I file
+at the end of the pipeline.
+.TP
+.B \-h
+print usage information.
+.TP
+.B \-q
+quiet mode
+.TP
+.B \-d
+debugging mode
+.SH DESCRIPTION
+.I fst-parse
+must be used instead of 
+.I fst-infl
+if several transducers have to be composed online in order to produce
+an analysis. Otherwise,
+.I fst-parse
+is identical to
+.I fst-infl.
+Option -t can be used multiply.
+
+.SH BUGS
+No bugs are known so far.
+.SH "SEE ALSO"
+fst-infl, fst-compiler
+.SH AUTHOR
+Helmut Schmid,
+Institute for Computational Linguistics,
+University of Stuttgart,
+Email: schmid@ims.uni-stuttgart.de,
+This software is available under the GNU Public License.
--- a/src/main/resources/SMOR/src/man1/fst-parse2.1
+++ b/src/main/resources/SMOR/src/man1/fst-parse2.1
+.TH fst-parse2 1 "October 2003" "" "fst-parse2"
+.SH NAME
+fst-parse2 \- analyses input with a pipeline of transducers
+.SH SYNOPSIS
+.B fst-parse2
+.I file1 [ file [ file ] ]
+.SH OPTIONS
+.TP
+.B \-t file
+add the transducer stored in 
+.I file
+at the end of the pipeline.
+.TP
+.B \-h
+print usage information.
+.TP
+.B \-q
+quiet mode
+.TP
+.B \-d
+debugging mode
+.SH DESCRIPTION
+.I fst-parse2
+is similar to 
+.I fst-parse
+but prints the transducer resulting from the composition of the input
+string with the argument transducer(s) instead of a sequence of
+analysis strings.
+
+.SH BUGS
+No bugs are known so far.
+.SH "SEE ALSO"
+fst-parse
+.SH AUTHOR
+Helmut Schmid,
+Institute for Computational Linguistics,
+University of Stuttgart,
+Email: schmid@ims.uni-stuttgart.de,
+This software is available under the GNU Public License.
--- a/src/main/resources/SMOR/src/man1/fst-print.1
+++ b/src/main/resources/SMOR/src/man1/fst-print.1
+.TH fst-print 1 "February 2002" "" "fst-print"
+.SH NAME
+fst-print \- prints transducers in text form
+.SH SYNOPSIS
+.B fst-print
+.I [ file ]
+.SH DESCRIPTION
+.I fst-print
+prints a transducer in readable form to the terminal. The argument is
+the name of a file which was generated by
+.I fst-compiler
+(without the -c option!). If the argument is missing, input is read
+from
+.I stdin.
+
+.SH BUGS
+No bugs are known so far.
+.SH "SEE ALSO"
+fst-compiler
+.SH AUTHOR
+Helmut Schmid,
+Institute for Computational Linguistics,
+University of Stuttgart,
+Email: schmid@ims.uni-stuttgart.de,
+This software is available under the GNU Public License.
--- a/src/main/resources/SMOR/src/man1/fst-text2bin.1
+++ b/src/main/resources/SMOR/src/man1/fst-text2bin.1
+.TH fst-text2bin 1 "June 2007" "" "fst-text2bin"
+.SH NAME
+fst-text2bin \- converts a transducer from its text form to binary format
+.SH SYNOPSIS
+.B fst-text2bin 
+.I text-file
+.I bin-file
+.SH OPTIONS
+.TP
+.B \-h
+Print usage information.
+.SH DESCRIPTION
+.B fst-text2bin
+is a tool which reads a transducer which was stored in text form from the input file and write it in the binary format to the output file. The input format is identical to the output format of the 
+.I fst-print
+command. The output format is equivalent to the standard output format of the
+.I fst-compiler
+program. The programs
+.I fst-compact
+and
+.I fst-lowmem
+can be used to produce the other binary transducer formats.
+
+.SH BUGS
+No bugs are known so far.
+.SH "SEE ALSO"
+fst-compiler, fst-print, fst-compact, fst-lowmem
+.SH AUTHOR
+Helmut Schmid,
+Institute for Computational Linguistics,
+University of Stuttgart,
+Email: schmid@ims.uni-stuttgart.de,
+This software is available under the GNU Public License.
--- a/src/main/resources/SMOR/src/man1/fst-train.1
+++ b/src/main/resources/SMOR/src/man1/fst-train.1
+.TH fst-train 1 "October 2005" "" "fst-train"
+.SH NAME
+fst-train \- learning transducer weights
+.SH SYNOPSIS
+.B fst-train [ options ]
+.I file
+[
+.I input-file
+]
+.SH OPTIONS
+.TP
+.B \-t file
+use multiple transducers in the same way as 
+.B fst-infl2.
+.TP
+.B \-b
+This option is used for supervised training with disambiguated data.
+.TP
+.B \-d
+Disambiguate the analyses symbolically as described in the man pages
+of 
+.B fst-infl2.
+.TP
+.B \-q
+quiet mode
+.SH DESCRIPTION
+.I fst-train
+is used to learn statistical weights for the transducers transitions
+based on training data. Training is either unsupervised (default) or
+supervised (option -b).
+.br
+In supervised mode, the input contains fully disambiguated data with
+the surface and the analysis form. The format restrictions are
+identical to those applying for lexicon entries, i.e. all operators
+other than the colon operator (:) are interpreted literally.
+.br
+In unsupervised mode, the input data consists of surface strings. The
+format is identical to the input format of 
+.I fst-infl
+and
+.I fst-infl2.
+.br
+The transducer weights are stored in files whose names are obtained by
+appending 
+.I .prob
+to the names of the transducer files.
+.SH BUGS
+No bugs are known so far.
+.SH "SEE ALSO"
+fst-infl2, fst-compiler
+.SH AUTHOR
+Helmut Schmid,
+Institute for Computational Linguistics,
+University of Stuttgart,
+Email: schmid@ims.uni-stuttgart.de,
+This software is available under the GNU Public License.
--- a/src/main/resources/SMOR/src/mem.h
+++ b/src/main/resources/SMOR/src/mem.h
+/*******************************************************************/
+/*                                                                 */
+/*  FILE     mem.h                                                 */
+/*  MODULE   mem                                                   */
+/*  PROGRAM  SFST                                                  */
+/*  AUTHOR   Helmut Schmid, IMS, University of Stuttgart           */
+/*                                                                 */
+/*  PURPOSE  memory management functions                           */
+/*                                                                 */
+/*******************************************************************/
+
+#ifndef _MEM_H_
+#define _MEM_H_
+
+#include <stdlib.h>
+#include <assert.h>
+
+namespace SFST {
+
+#define MEMBUFFER_SIZE 100000
+
+
+  /*****************  class Mem  *************************************/
+
+  class Mem {
+
+  private:
+
+    struct MemBuffer {
+      char buffer[MEMBUFFER_SIZE];
+      struct MemBuffer *next;
+    };
+
+    MemBuffer *first_buffer;
+    long pos;
+    void add_buffer() {
+      MemBuffer *mb=(MemBuffer*)malloc(sizeof(MemBuffer));
+      if (mb == NULL)
+	throw "Allocation of memory failed in Mem::add_buffer!";
+      mb->next = first_buffer;
+      first_buffer = mb;
+      pos = 0;
+    }
+
+  public:
+    Mem() { first_buffer = NULL; add_buffer(); }
+    ~Mem() { clear(); }
+
+    void clear() {
+      while (first_buffer) {
+	MemBuffer *next = first_buffer->next;
+	free(first_buffer);
+	first_buffer = next;
+      }
+      pos = 0;
+    }
+
+    void *alloc( size_t n ) {
+      void *result;
+    
+      /* do memory alignment to multiples of 4 */
+      if (n % 4)
+	n += 4 - (n % 4);
+    
+      if (first_buffer == NULL || pos+n > MEMBUFFER_SIZE)
+	add_buffer();
+      if (pos+n > MEMBUFFER_SIZE)
+	throw "Allocation of memory block larger than MEMBUFFER_SIZE attempted!";
+    
+      result = (void*)(first_buffer->buffer + pos);
+      pos += n;
+      return result;
+    }
+
+    //class MemError {};
+
+  };
+
+}
+#endif
--- a/src/main/resources/SMOR/src/operators.C
+++ b/src/main/resources/SMOR/src/operators.C
+
+/*******************************************************************/
+/*                                                                 */
+/*  FILE     operators.C                                           */
+/*  MODULE   operators                                             */
+/*  PROGRAM  SFST                                                  */
+/*  AUTHOR   Helmut Schmid, IMS, University of Stuttgart           */
+/*                                                                 */
+/*******************************************************************/
+
+
+#include "fst.h"
+
+using std::pair;
+using std::cerr;
+
+namespace SFST {
+
+  static void compose_nodes( Node*, Node*, Node*, Transducer*, PairMapping& );
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  check_cyclicity                                                */
+  /*                                                                 */
+  /*******************************************************************/
+
+  static bool check_cyclicity( Node *node, NodeHashSet &visited, 
+			       const Alphabet &alphabet)
+  { 
+ 
+    if (!visited.insert(node).second)
+      return true; // node was visited before
+
+    for( ArcsIter p(node->arcs()); p; p++ ) {
+      Arc *arc=p;
+      if (arc->label().upper_is_epsilon())
+	if (check_cyclicity(arc->target_node(), visited, alphabet)) {
+	  cerr << alphabet.write_label(arc->label()) << "\n";
+	  return true;
+	}
+    }
+    visited.erase(node);
+    return false;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::infinitely_ambiguous_node                          */
+  /*                                                                 */
+  /*******************************************************************/
+
+  bool Transducer::infinitely_ambiguous_node( Node *node )
+
+  {
+    if (!node->was_visited( vmark )) {
+      NodeHashSet visited;
+      if (check_cyclicity(node, visited, alphabet))
+	return true;
+
+      // iterate over all outgoing arcs
+      for( ArcsIter p(node->arcs()); p; p++ ) {
+	Arc *arc=p;
+	if (infinitely_ambiguous_node( arc->target_node() ))
+	  return true;
+      }
+    }
+    return false;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::is_infinitely_ambiguous                            */
+  /*                                                                 */
+  /*******************************************************************/
+
+  bool Transducer::is_infinitely_ambiguous()
+
+  {
+    incr_vmark();
+    return infinitely_ambiguous_node(root_node());
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::is_cyclic_node                                     */
+  /*                                                                 */
+  /*******************************************************************/
+
+  bool Transducer::is_cyclic_node( Node *node, NodeHashSet &previous )
+
+  {
+    if (!node->was_visited( vmark )) {
+      NodeHashSet visited;
+    
+      NodeHashSet::iterator it=previous.insert(node).first;
+
+      // iterate over all outgoing arcs
+      for( ArcsIter p(node->arcs()); p; p++ ) {
+	Arc *arc=p;
+	if (previous.find(arc->target_node()) != previous.end() || 
+	    is_cyclic_node( arc->target_node(), previous ))
+	  return true;
+      }
+
+      previous.erase(it);
+    }
+    return false;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::is_cyclic                                          */
+  /*                                                                 */
+  /*******************************************************************/
+
+  bool Transducer::is_cyclic()
+
+  {
+    incr_vmark();
+    NodeHashSet previous;
+    return is_cyclic_node(root_node(), previous);
+  }
+
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::is_automaton_node                                  */
+  /*                                                                 */
+  /*******************************************************************/
+
+  bool Transducer::is_automaton_node( Node *node )
+
+  {
+    if (!node->was_visited( vmark )) {
+      // iterate over all outgoing arcs
+      for( ArcsIter p(node->arcs()); p; p++ ) {
+	Arc *arc=p;
+	Label l=arc->label();
+	if (l.upper_char() != l.lower_char())
+	  return false;
+	if (!is_automaton_node( arc->target_node()))
+	  return false;
+      }
+    }
+    return true;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::is_automaton                                       */
+  /*                                                                 */
+  /*******************************************************************/
+
+  bool Transducer::is_automaton()
+
+  {
+    incr_vmark();
+    return is_automaton_node(root_node());
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::is_empty                                           */
+  /*                                                                 */
+  /*******************************************************************/
+
+  bool Transducer::is_empty()
+
+  {
+    if (!minimised) {
+      Transducer *tmp=&minimise();
+      bool result=tmp->is_empty();
+      delete tmp;
+      return result;
+    }
+    if (root_node()->is_final())
+      return false;
+    return root_node()->arcs()->is_empty();
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::generates_empty_string                             */
+  /*                                                                 */
+  /*******************************************************************/
+
+  bool Transducer::generates_empty_string()
+
+  {
+    if (!minimised) {
+      Transducer *tmp=&minimise();
+      bool result=tmp->root_node()->is_final();
+      delete tmp;
+      return result;
+    }
+    return root_node()->is_final();
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::reverse_node                                       */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Transducer::reverse_node( Node *node, Transducer *na )
+
+  {
+    if (!node->was_visited( vmark )) {
+
+      // create a new node
+      node->set_forward( na->new_node() );
+
+      if (node->is_final())
+	// add epsilon transition from new root to this node
+	na->root_node()->add_arc( Label(), node->forward(), na );
+    
+      // iterate over all outgoing arcs
+      for( ArcsIter p(node->arcs()); p; p++ ) {
+	Arc *arc=p;
+      
+	// reverse the subgraph headed by the target node
+	reverse_node( arc->target_node(), na );
+	Node *n = arc->target_node()->forward();
+
+	// create the reverse arc
+	n->add_arc( arc->label(), node->forward(), na );
+      }
+    }
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::reverse                                            */
+  /*                                                                 */
+  /*******************************************************************/
+
+  Transducer &Transducer::reverse()
+
+  {
+    Transducer *na = new Transducer();
+    na->alphabet.copy(alphabet);
+
+    incr_vmark();
+    reverse_node(root_node(), na);
+    root_node()->forward()->set_final(1);
+    return *na;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::recode_label                                       */
+  /*                                                                 */
+  /*******************************************************************/
+
+  Label Transducer::recode_label( Label l, bool lswitch, bool recode, 
+				  Alphabet &al )
+  {
+    if (lswitch)
+      l = Label(l.upper_char(), l.lower_char());
+
+    if (recode) {
+      Character lc = al.add_symbol(alphabet.code2symbol(l.lower_char()));
+      Character uc = al.add_symbol(alphabet.code2symbol(l.upper_char()));
+      l = Label(lc, uc);
+      al.insert(l);
+    }
+
+    return l;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::copy_nodes                                         */
+  /*                                                                 */
+  /*******************************************************************/
+
+  Node *Transducer::copy_nodes( Node *node, Transducer *a, 
+				bool lswitch, bool recode )
+  {
+    if (!node->was_visited(vmark)) {
+
+      node->set_forward(a->new_node());
+
+      // define final nodes
+      if (node->is_final())
+	node->forward()->set_final(1);
+
+      // iterate over all outgoing arcs of node
+      for( ArcsIter p(node->arcs()); p; p++ ) {
+	Arc *arc=p;
+	Node *tn = copy_nodes( arc->target_node(), a, lswitch, recode );
+
+	// Add a link to the new node
+	Label l=recode_label(arc->label(), lswitch, recode, a->alphabet);
+	node->forward()->add_arc( l, tn, a );
+      }
+    }
+
+    return node->forward();
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::copy                                               */
+  /*                                                                 */
+  /*******************************************************************/
+
+  Transducer &Transducer::copy( bool lswitch, const Alphabet *al )
+
+  {
+    bool recode = false;
+    Transducer *na = new Transducer();
+    if (al == NULL)
+      al = &alphabet;
+    else
+      recode = true;
+
+    na->alphabet.utf8 = al->utf8;
+    if (lswitch) {
+      na->alphabet.insert_symbols(*al);
+      for( Alphabet::iterator it=al->begin(); it!=al->end(); it++ ) {
+	Character lc=it->lower_char();
+	Character uc=it->upper_char();
+	na->alphabet.insert(Label(uc,lc));
+      }
+    }
+    else
+      na->alphabet.copy(*al);
+
+    na->deterministic = deterministic;
+    na->minimised = minimised;
+    na->root_node()->set_final(root_node()->is_final());
+    incr_vmark();
+
+    root_node()->set_forward(na->root_node());
+    root_node()->was_visited(vmark);
+
+    for( ArcsIter p(root_node()->arcs()); p; p++ ) {
+      Arc *arc=p;
+      Node *target_node=copy_nodes(arc->target_node(), na, lswitch, recode);
+      Label l = recode_label(arc->label(), lswitch, recode, na->alphabet);
+      na->root_node()->add_arc( l, target_node, na);
+    }
+
+    return *na;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::operator |                                         */
+  /*                                                                 */
+  /*******************************************************************/
+
+  Transducer &Transducer::operator|( Transducer &a )
+
+  {
+    Transducer *na = new Transducer();
+    na->alphabet.copy(alphabet);
+    na->alphabet.copy(a.alphabet);
+
+    incr_vmark();
+    na->root_node()->add_arc( Label(), copy_nodes(root_node(), na), na);
+    a.incr_vmark();
+    na->root_node()->add_arc( Label(), a.copy_nodes(a.root_node(), na), na);
+
+    return *na;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::rec_cat_nodes                                      */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Transducer::rec_cat_nodes( Node *node, Node *node2 )
+
+  {
+    if (!node->was_visited( vmark )) {
+
+      // iterate over all outgoing arcs of node
+      for( ArcsIter p(node->arcs()); p; p++ ) {
+	Arc *arc=p;
+	rec_cat_nodes( arc->target_node(), node2 );
+      }
+    
+      if (node->is_final()) {
+	// link this node to node2
+	node->set_final(0);
+	node->add_arc( Label(), node2, this );
+      }
+    }
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::operator+                                          */
+  /*                                                                 */
+  /*******************************************************************/
+
+  Transducer &Transducer::operator+( Transducer &a )
+
+  {
+    Transducer *na = new Transducer();
+    na->alphabet.copy(alphabet);
+    na->alphabet.copy(a.alphabet);
+
+    // copy Transducer1 to the new Transducer
+    incr_vmark();
+    Node *node=copy_nodes(root_node(), na);
+    na->root_node()->add_arc( Label(), node, na);
+
+    // copy Transducer2 to the new Transducer
+    a.incr_vmark();
+    node=a.copy_nodes(a.root_node(), na);
+
+    // catenate the two automata
+    na->incr_vmark();
+    na->rec_cat_nodes(na->root_node(), node);
+
+    return *na;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::kleene_star                                        */
+  /*   (HFST addition: now works for cyclic transducers as well)     */
+  /*                                                                 */
+  /*******************************************************************/
+
+  Transducer &Transducer::kleene_star()
+
+  {
+    Transducer *na = &copy();
+    na->alphabet.copy(alphabet);
+
+    // HFST addition
+    Transducer eps;
+    eps.root_node()->set_final(1);
+    Transducer *tmp = &(eps + *na);
+    delete na;
+    na = tmp;
+
+    // link back to the start node
+    na->incr_vmark();
+    na->rec_cat_nodes(na->root_node(), na->root_node());
+ 
+    na->root_node()->set_final(1);  // root node is already final
+    na->deterministic = na->minimised = false;
+
+    return *na;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::negate_nodes                                       */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Transducer::negate_nodes( Node *node, Node *accept )
+
+  {
+    if (!node->was_visited(vmark)) {
+      node->set_final( !node->is_final() );
+
+      for( ArcsIter p(node->arcs()); p; p++ ) {
+	Arc *arc=p;
+	negate_nodes( arc->target_node(), accept );
+      }
+
+      for( Alphabet::iterator it=alphabet.begin(); it!=alphabet.end(); it++)
+	if (!node->target_node(*it))
+	  node->add_arc( *it, accept, this );
+    }
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::operator!                                          */
+  /*                                                                 */
+  /*******************************************************************/
+
+  Transducer &Transducer::operator!()
+
+  {
+    Transducer *na;
+
+    if (alphabet.size() == 0)
+      throw "Negation of Transducer with undefined alphabet attempted!";
+
+    if (minimised)
+      na = &copy();
+    else
+      na = &minimise();
+    na->alphabet.copy(alphabet);
+
+    Node *accept_node=na->new_node();
+    accept_node->set_final(1);
+    for( Alphabet::iterator it=alphabet.begin(); it!=alphabet.end(); it++)
+      accept_node->add_arc( *it, accept_node, na );
+
+    na->incr_vmark();
+    na->negate_nodes( na->root_node(), accept_node );
+    na->minimised = na->deterministic = false;
+
+    return *na;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  conjoin_nodes                                                  */
+  /*                                                                 */
+  /*******************************************************************/
+
+  static void conjoin_nodes( Node *n1, Node *n2, Node *node, 
+			     Transducer *a, PairMapping &map )
+  
+  {
+    // if both input nodes are final, so is the new one
+    if (n1->is_final() && n2->is_final())
+      node->set_final(1);
+
+    // iterate over all outgoing arcs of the first node
+    for( ArcsIter i(n1->arcs()); i; i++ ) {
+      Arc *arc=i;
+      Label l=arc->label();
+      Node *t1 = arc->target_node();
+      Node *t2 = n2->target_node(l);
+
+      // Does the second node have an outgoing arc with the same label?
+      if (t2) {
+	// Check whether this node pair has been encountered before
+	PairMapping::iterator it=map.find(t1, t2);
+      
+	if (it == map.end()) {
+	  // new node pair
+	  // create a new node in the conjunction Transducer
+	  Node *target_node = a->new_node();
+	  // map the target node pair to the new node
+	  map[pair<Node*,Node*>(t1,t2)] = target_node;
+	  // add an arc to the new node
+	  node->add_arc( l, target_node, a );
+	  // recursion
+	  conjoin_nodes( t1, t2, target_node, a, map );
+	}
+	else {
+	  // add an arc to the already existing target node 
+	  node->add_arc( l, it->second, a );
+	}
+      }
+    }
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::operator &                                         */
+  /*                                                                 */
+  /*******************************************************************/
+
+  Transducer &Transducer::operator&( Transducer &a )
+
+  {
+    Transducer *tmp1=NULL;
+    Transducer *tmp2=NULL;
+    Node *r1, *r2;
+
+    if (deterministic)
+      r1 = root_node();
+    else {
+      tmp1 = &determinise();
+      r1 = tmp1->root_node();
+    }
+
+    if (a.deterministic)
+      r2 = a.root_node();
+    else {
+      tmp2 = &a.determinise();
+      r2 = tmp2->root_node();
+    }
+
+    PairMapping map;
+
+    Transducer *na = new Transducer();
+    na->alphabet.copy(alphabet);
+    na->alphabet.copy(a.alphabet);
+
+    // map the two root nodes to the new root node
+    map[pair<Node*,Node*>(r1, r2)] = na->root_node();
+
+    // recursively conjoin the two automata
+    conjoin_nodes( r1, r2, na->root_node(), na, map);
+
+    na->deterministic = 1;
+    delete tmp1;
+    delete tmp2;
+
+    return *na;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  add_composed_node                                              */
+  /*                                                                 */
+  /*******************************************************************/
+
+  static void add_composed_node( Label l, Node *n1, Node *n2, Node *node, 
+				 Transducer *a, PairMapping &map )
+  
+  {
+    // Check whether this node pair has been encountered before
+    PairMapping::iterator it=map.find(n1, n2);
+  
+    if (it != map.end()) {
+      // add an arc to the already existing target node 
+      node->add_arc( l, it->second, a );
+      return;
+    }
+
+    // create a new node in the composed Transducer
+    Node *target_node = a->new_node();
+  
+    // map the target node pair to the new node
+    map[pair<Node*,Node*>(n1,n2)] = target_node;
+  
+    // add an arc to the new node
+    node->add_arc( l, target_node, a );
+  
+    // recursion
+    compose_nodes( n1, n2, target_node, a, map );
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  compose_nodes                                                  */
+  /*                                                                 */
+  /*******************************************************************/
+
+  static void compose_nodes( Node *n1, Node *n2, Node *node, 
+			     Transducer *a, PairMapping &map )
+  {
+    // if both input nodes are final, so is the new one
+    if (n1->is_final() && n2->is_final())
+      node->set_final(1);
+
+    // iterate over all outgoing arcs of the first node
+    for( ArcsIter i(n1->arcs()); i; i++ ) {
+      Arc *arc1=i;
+      Node *t1 = arc1->target_node();
+      Label l1=arc1->label();
+      Character uc1=l1.upper_char();
+      Character lc1=l1.lower_char();
+
+      if (uc1 == Label::epsilon)
+	add_composed_node( l1, t1, n2, node, a, map );
+
+      else {
+	for( ArcsIter k(n2->arcs()); k; k++ ) {
+	  Arc *arc2=k;
+	  Node *t2 = arc2->target_node();
+	  Label l2=arc2->label();
+	  Character lc2=l2.lower_char();
+	  Character uc2=l2.upper_char();
+	
+	  if (uc1 == lc2)
+	    add_composed_node( Label(lc1,uc2), t1, t2, node, a, map );
+	}
+      }
+    }
+
+    // epsilon input characters of the second Transducer
+    for( ArcsIter i(n2->arcs()); i; i++ ) {
+      Arc *arc=i;
+      Node *t = arc->target_node();
+      Label l=arc->label();
+      Character lc=l.lower_char();
+	
+      if (lc == Label::epsilon)
+	add_composed_node( l, n1, t, node, a, map );
+    }
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::operator ||                                        */
+  /*                                                                 */
+  /*******************************************************************/
+
+  Transducer &Transducer::operator||( Transducer &a )
+
+  {
+    PairMapping map;
+
+    Transducer *na = new Transducer();
+    na->alphabet.compose(alphabet, a.alphabet);
+
+    // map the two root nodes to the new root node
+    map[pair<Node*,Node*>(root_node(), a.root_node())] = na->root_node();
+
+    // recursively compose the two automata
+    compose_nodes( root_node(), a.root_node(), na->root_node(), na, map );
+
+    return *na;
+  }
+
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::operator/                                          */
+  /*                                                                 */
+  /*******************************************************************/
+
+  Transducer &Transducer::operator/( Transducer &a )
+
+  {
+    complete_alphabet();
+    a.alphabet.copy(alphabet);
+    Transducer *a1 = &(!a);
+    Transducer *a2 = &(*this & *a1);
+    delete a1;
+    return *a2;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::compare_nodes                                      */
+  /*                                                                 */
+  /*******************************************************************/
+
+  bool Transducer::compare_nodes( Node *node, Node *node2, Transducer &a2 )
+
+  {
+    if (node->was_visited( vmark )) {
+      if (node2->was_visited( a2.vmark ))
+	return (node->forward() == node2 && node2->forward() == node);
+      else
+	return false;
+    }
+    else if (node2->was_visited( a2.vmark ))
+      return false;
+
+    node->set_forward( node2 );
+    node2->set_forward( node );
+    
+    if (node->is_final() != node2->is_final())
+      return false;
+
+    // iterate over all outgoing arcs
+    for( ArcsIter p(node->arcs()); p; p++ ) {
+      Arc *arc=p;
+      Node *t2=node2->target_node(arc->label());
+
+      if (t2 == NULL)
+	return false;
+      else if (!compare_nodes(arc->target_node(), t2, a2))
+	return false;
+    }
+    for( ArcsIter p(node2->arcs()); p; p++ ) {
+      Arc *arc=p;
+      if (node->target_node(arc->label()) == NULL)
+	return false;
+    }
+
+    return true;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::operator ==                                        */
+  /*                                                                 */
+  /*******************************************************************/
+
+  bool Transducer::operator==( Transducer &a )
+
+  {
+    Transducer *p1 = (minimised)? this: &minimise();
+    Transducer *p2 = (a.minimised)? &a: &a.minimise();
+
+    p1->incr_vmark();
+    p2->incr_vmark();
+    bool result = p1->compare_nodes(p1->root_node(), p2->root_node(), *p2 );
+
+    if (p1 != this)  delete p1;
+    if (p2 != &a)    delete p2;
+
+    return result;
+  }
+
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::map_nodes                                          */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Transducer::map_nodes( Node *node, Node *node2, Transducer *a, Level level)
+
+  {
+    if (!node->was_visited(vmark)) {
+
+      node->set_forward(node2);
+
+      // define final nodes
+      if (node->is_final())
+	node2->set_final(1);
+
+      // iterate over all outgoing arcs of node
+      for( ArcsIter p(node->arcs()); p; p++ ) {
+	Arc *arc=p;
+	Label l(arc->label().get_char(level));
+	Node *t2=NULL, *t=arc->target_node();
+
+	if (t->check_visited(vmark))
+	  t2 = t->forward();
+	else
+	  t2 = a->new_node(); // create a new node
+      
+	node2->add_arc(l, t2, a); // add a link to the node
+
+	map_nodes( t, t2, a, level );
+      }
+    }
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::level                                              */
+  /*                                                                 */
+  /*******************************************************************/
+
+  Transducer &Transducer::level( Level level )
+
+  {
+    Transducer *na = new Transducer();
+
+    for( Alphabet::iterator it=alphabet.begin(); it!=alphabet.end(); it++ ) {
+      Character c = it->get_char(level);
+      if (alphabet.code2symbol(c) != NULL)
+	na->alphabet.add_symbol( alphabet.code2symbol(c), c );
+      na->alphabet.insert(Label(c));
+    }
+
+    incr_vmark();
+    map_nodes(root_node(), na->root_node(), na, level );
+
+    return *na;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::freely_insert_at_node                              */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Transducer::freely_insert_at_node( Node *node, Label l )
+
+  {
+    if (!node->was_visited(vmark)) {
+      node->add_arc(l, node, this); // add a recursive link labelled with l
+
+      // iterate over all outgoing arcs of node
+      for( ArcsIter p(node->arcs()); p; p++ ) {
+	Arc *arc=p;
+	freely_insert_at_node(arc->target_node(), l );
+      }
+    }
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::freely_insert                                      */
+  /*                                                                 */
+  /*******************************************************************/
+
+  Transducer &Transducer::freely_insert( Label l )
+
+  {
+    Transducer *na = &copy();
+
+    na->incr_vmark();
+    na->freely_insert_at_node(na->root_node(), l );
+
+    return *na;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::splice_arc                                         */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Transducer::splice_arc( Node *node, Node *node2, Node *next_node,
+			       Transducer *a )
+  {
+    if (node->is_final()) {
+      // link final node to the next node
+      node2->add_arc( Label(), next_node, a );
+      return;
+    }
+
+    // iterate over the outgoing arcs
+    for( ArcsIter p(node->arcs()); p; p++ ) {
+      Arc *arc=p;
+      Node *tn=a->new_node();
+
+      node2->add_arc( arc->label(), tn, a );
+      splice_arc( arc->target_node(), tn, next_node, a );
+    }
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::splice_nodes                                       */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Transducer::splice_nodes(Node *node, Node *node2, Label sl, 
+				Transducer *sa, Transducer *a)
+  {
+    if (!node->was_visited(vmark)) {
+
+      node->set_forward(node2);
+
+      // define final nodes
+      if (node->is_final())
+	node2->set_final(1);
+
+      // iterate over all outgoing arcs of node
+      for( ArcsIter p(node->arcs()); p; p++ ) {
+	Arc *arc=p;
+	Node *t2=NULL, *t=arc->target_node();
+
+	if (t->check_visited(vmark))
+	  t2 = t->forward();
+	else
+	  t2 = a->new_node(); // create a new node
+
+	if (arc->label() == sl)
+	  // insert the transducer
+	  splice_arc(sa->root_node(), node2, t2, a);
+	else
+	  // add a link to the node
+	  node2->add_arc(arc->label(), t2, a);
+
+	splice_nodes( t, t2, sl, sa, a );
+      }
+    }
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::splice                                             */
+  /*                                                                 */
+  /*******************************************************************/
+
+  Transducer &Transducer::splice( Label sl, Transducer *sa )
+
+  {
+    Alphabet::iterator it;
+
+    Transducer *na = new Transducer();
+
+    for( it=alphabet.begin(); it!=alphabet.end(); it++ ) {
+      Label l = *it;
+      if (l != sl)
+	na->alphabet.insert(l);
+    }
+    for( it=sa->alphabet.begin(); it!=sa->alphabet.end(); it++ )
+      na->alphabet.insert(*it);
+
+    incr_vmark();
+    splice_nodes(root_node(), na->root_node(), sl, sa, na );
+
+    return *na;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::replace_char                                       */
+  /*                                                                 */
+  /*******************************************************************/
+
+  Transducer &Transducer::replace_char( Character c, Character nc )
+
+  {
+    Alphabet::iterator it;
+
+    Transducer *na = new Transducer();
+
+    for( it=alphabet.begin(); it!=alphabet.end(); it++ ) {
+      Label l = *it;
+      na->alphabet.insert(l.replace_char(c,nc));
+    }
+
+    incr_vmark();
+    replace_char2(root_node(), na->root_node(), c, nc, na );
+
+    return *na;
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Transducer::replace_char2                                      */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Transducer::replace_char2(Node *node, Node *node2, Character c, 
+				 Character nc, Transducer *a)
+  {
+    if (!node->was_visited(vmark)) {
+
+      node->set_forward(node2);
+
+      // define final nodes
+      if (node->is_final())
+	node2->set_final(1);
+
+      // iterate over all outgoing arcs of node
+      for( ArcsIter p(node->arcs()); p; p++ ) {
+	Arc *arc=p;
+	Node *t2=NULL, *t=arc->target_node();
+
+	if (t->check_visited(vmark))
+	  t2 = t->forward();
+	else
+	  t2 = a->new_node(); // create a new node
+
+	node2->add_arc(arc->label().replace_char(c, nc), t2, a);
+	replace_char2( t, t2, c, nc, a );
+      }
+    }
+  }
+}
--- a/src/main/resources/SMOR/src/operators.o
+++ b/src/main/resources/SMOR/src/operators.o
--- a/src/main/resources/SMOR/src/robust.C
+++ b/src/main/resources/SMOR/src/robust.C
+
+/*******************************************************************/
+/*                                                                 */
+/*     File: robust.C                                              */
+/*   Author: Helmut Schmid                                         */
+/*  Purpose:                                                       */
+/*  Created: Wed Aug  3 08:49:16 2005                              */
+/* Modified: Thu Nov 27 17:15:25 2008 (schmid)                     */
+/*                                                                 */
+/*******************************************************************/
+
+#include "compact.h"
+
+using std::vector;
+
+namespace SFST {
+
+  // data structure for a search path
+
+  class Path {
+  public:
+    unsigned int arc_number; // number of the current transducer arc
+    unsigned int position;   // number of processed input symbols
+    float errors;            // errors accumulated so far
+    int previous;            // back-pointer (for printing)
+  
+    // contructor
+    Path( unsigned int n, unsigned int p, float e, unsigned int pp )
+    { arc_number = n; position = p; errors = e; previous = (int)pp; };
+
+    // constructor for the intial path
+    Path() { arc_number = 0; position = 0; errors = 0; previous = -1; };
+
+    // check whether a path is the intial path
+    bool is_start() { return previous == -1; };
+  };
+
+
+  // search data structure containing all the search paths
+
+  class Agenda {
+  private:
+    float errors_allowed;  // maximum number of errors allowed
+    vector<Path> path;     // set of active and inactive paths (for printing)
+    vector<size_t> active_path;    // set of currently active search paths
+    vector<size_t> complete_path;  // set of complete search paths
+
+  public:
+
+    Agenda( float e ) {
+      // initialization
+      errors_allowed = e;
+      path.push_back(Path());     // initial search path
+      active_path.push_back(0);   // one active search path
+    };
+
+    // get the highest ranked active search path
+    Path &best_active_path() { return path[active_path.back()]; };
+
+    // retrieve an active path by its index number
+    Path &get_active_path( int i ) { return path[active_path[i]]; };
+
+    // get the highest ranked complete search path
+    Path &first_complete_path() { return path[complete_path[0]]; };
+
+    // check whether the analysis is finished
+    bool finished() {
+      return (active_path.size() == 0 || // no more active paths
+	      (complete_path.size() > 0 && // best analysis was found
+	       best_active_path().errors > first_complete_path().errors));
+    };
+
+    // add a new search path
+    void add_path( int s, unsigned int pos, float e, int pp, bool final );
+
+    void add_analysis( int sn, CAnalysis &ana );
+    void extract_analyses( vector<CAnalysis> &analyses );
+
+    friend class CompactTransducer;
+  };
+
+
+  // trivial error functions for the beginning
+
+  float mismatch_error( Character c, Character c2) { 
+    return 1.0;
+  };
+  float deletion_error( Character c) { return 1.0; };
+  float insertion_error( Character c) { return 1.0; };
+  float transpose_error( Character c, Character c2) { return 1.0; };
+
+
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Agenda::add_path                                               */
+  /*                                                                 */
+  /*******************************************************************/
+
+  void Agenda::add_path(int arc, unsigned int pos, float e, int pp, bool final)
+
+  {
+    // check whether the number of allowed errors is exceeded
+    if (e > errors_allowed)
+      return;
+
+    // check whether a complete analysis with fewer errors exists
+    if (complete_path.size() > 0 && first_complete_path().errors < e)
+      return;
+  
+    // store the new search path
+    size_t sn=path.size();              // index of the new search path
+    path.push_back(Path(arc, pos, e, pp)); // add the new path
+
+    // sorted insertion of the new active path (reversed order)
+    active_path.push_back(sn);    // increase the array size by 1
+
+    // copy the paths up to the insertion position
+    int i=(int)active_path.size()-1;
+    while (i > 0) {
+      Path &s=get_active_path(i-1);
+      if (e < s.errors || (e == s.errors && pos >= s.position))
+	break;
+      active_path[i] = active_path[i-1];
+      i--;
+    }
+    active_path[i] = sn;
+
+    if (final) {
+      // Is the new analysis better than the previous ones?
+      if (complete_path.size() > 0 && first_complete_path().errors > e)
+	complete_path.clear(); // delete all the previous analyses
+      complete_path.push_back(sn);
+    }
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Agenda::add_analysis                                           */
+  /*                                                                 */
+  /*******************************************************************/
+  
+  void Agenda::add_analysis( int sn, CAnalysis &ana )
+
+  {
+    Path &cs=path[sn];
+    if (!cs.is_start()) {
+      add_analysis( cs.previous, ana );
+      ana.push_back(cs.arc_number);
+    }
+  }
+  
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  Agenda::extract_analyses                                       */
+  /*                                                                 */
+  /*******************************************************************/
+  
+  void Agenda::extract_analyses( vector<CAnalysis> &analyses )
+
+  {
+    analyses.resize(complete_path.size());
+    for( size_t i=0; i<complete_path.size(); i++ )
+      add_analysis((int)complete_path[i], analyses[i]);
+  }
+
+
+  /*******************************************************************/
+  /*                                                                 */
+  /*  CompactTransducer::robust_analyze_string                       */
+  /*                                                                 */
+  /*******************************************************************/
+  
+  float CompactTransducer::robust_analyze_string( char *string, 
+						  vector<CAnalysis> &analyses,
+						  float ErrorsAllowed )
+  {
+    analyses.clear();
+  
+    // convert the input string to a sequence of symbols
+    vector<Character> input;
+    alphabet.string2symseq( string, input );
+
+    // initialize the agenda
+    Agenda agenda( ErrorsAllowed );
+
+    // start the analysis
+    while (!agenda.finished()) {
+
+      // get the highest ranked search path
+      unsigned int sn=(unsigned)agenda.active_path.back();
+      Path cs=agenda.path[sn];
+      agenda.active_path.pop_back();
+
+      unsigned int state=cs.is_start()? 0: target_node[cs.arc_number];
+
+      // for all transitions from the current state
+      for( unsigned int i=first_arc[state]; i<first_arc[state+1]; i++ ) {
+	Label l = label[i];               // label of the transition
+	Character tc = l.upper_char();    // surface symbol
+
+	if (cs.position == input.size()) {
+	  if (tc == Label::epsilon)  // epsilon transition
+	    agenda.add_path(i, cs.position, cs.errors, sn, 
+			    finalp[target_node[i]]);
+
+	  else  // insertion of symbol
+	    agenda.add_path(i, cs.position, cs.errors + insertion_error(tc),
+			    sn, finalp[target_node[i]]);
+	}
+
+	else {
+	  Character ic = input[cs.position];
+
+	  if (tc == Label::epsilon) // epsilon transition
+	    agenda.add_path(i, cs.position, cs.errors, sn, false);
+	  else if (tc == ic) { // matching symbols
+	    bool f=(cs.position+1==input.size() && finalp[target_node[i]]);
+	    agenda.add_path(i, cs.position+1, cs.errors, sn, f);
+	  }
+
+	  else {
+	    // symbol mismatch
+	    bool f=(cs.position+1==input.size() && finalp[target_node[i]]);
+	    agenda.add_path(i, cs.position+1, cs.errors+mismatch_error(tc, ic),
+			    sn, f);
+	  
+	    // deletion of symbol
+	    f = (cs.position+1==input.size() && 
+		 finalp[target_node[cs.arc_number]]);
+	    agenda.add_path(cs.arc_number, cs.position+1,
+			    cs.errors+deletion_error(ic), cs.previous, f);
+	  
+	    // insertion of symbol
+	    f = (cs.position==input.size() && finalp[target_node[i]]);
+	    agenda.add_path(i, cs.position, cs.errors + insertion_error(tc),
+			    sn, f);
+	  }
+	}
+      }
+    }
+
+    if (agenda.complete_path.size() == 0)
+      return 0.0;
+    agenda.extract_analyses( analyses );
+    return agenda.first_complete_path().errors;
+  }
+}
--- a/src/main/resources/SMOR/src/robust.o
+++ b/src/main/resources/SMOR/src/robust.o
--- a/src/main/resources/SMOR/src/scanner.h
+++ b/src/main/resources/SMOR/src/scanner.h
+/*******************************************************************/
+/*                                                                 */
+/*  FILE     scanner.h                                             */
+/*  MODULE   scanner                                               */
+/*  PROGRAM  SFST                                                  */
+/*  AUTHOR   Helmut Schmid, IMS, University of Stuttgart           */
+/*                                                                 */
+/*******************************************************************/
+
+extern char *FileName;
+extern bool UTF8;
+extern bool Verbose;
--- a/src/main/resources/SMOR/src/scanner.ll
+++ b/src/main/resources/SMOR/src/scanner.ll
+%option 8Bit batch yylineno noyywrap
+
+/* the "incl" state is used to pick up the name of an include file */
+%x incl
+
+%{
+/*******************************************************************/
+/*                                                                 */
+/*  FILE     scanner.ll                                            */
+/*  MODULE   scanner                                               */
+/*  PROGRAM  SFST                                                  */
+/*  AUTHOR   Helmut Schmid, IMS, University of Stuttgart           */
+/*                                                                 */
+/*******************************************************************/
+
+#include <string.h>
+
+#include "interface.h"
+#include "fst-compiler.h"
+#include "scanner.h"
+
+using namespace SFST;
+
+#define MAX_INCLUDE_DEPTH 10
+  
+int Include_Stack_Ptr = 0;
+YY_BUFFER_STATE Include_Stack[MAX_INCLUDE_DEPTH];
+char *Name_Stack[MAX_INCLUDE_DEPTH];
+int  Lineno_Stack[MAX_INCLUDE_DEPTH];
+
+bool Verbose=true;
+char *FileName=NULL;
+
+default> bool UTF8=false;
+utf8> bool UTF8=true;
+
+static char *unquote(char *string, bool del_quote=true) {
+  char *s=string, *result=string;
+  if (del_quote)
+    string++;
+
+  while (*string) {
+    if (*string == '\\')
+      string++;
+    *(s++) = *(string++);
+  }
+
+  if (del_quote)
+    s--;
+  *s = '\0';
+
+  return fst_strdup(result);
+}
+
+static void print_lineno() {
+  if (!Verbose)
+    return;
+  fputc('\r',stderr);
+  for( int i=0; i<Include_Stack_Ptr; i++ )
+    fputs("  ", stderr);
+  fprintf(stderr,"%s: %d", FileName, yylineno);
+}
+
+extern void yyerror( const char *text );
+
+%}
+
+CC	[\x80-\xbf]
+C1	[A-Za-z0-9._/\-]
+C2	[A-Za-z0-9._/\-&()+,=?\^|~]
+C3	[A-Za-z0-9._/\-&()+,=?\^|~#<>]
+C4	[A-Za-z0-9._/\-&()+,=?\^|~$<>]
+C5	[\!-;\?-\[\]-\~=]
+FN	[A-Za-z0-9._/\-*+]
+
+%%
+
+^[ \t]*\#use[ \t]*revdet2[ \t]*\n { Transducer::hopcroft_minimisation = false;};
+^[ \t]*\#use[ \t]*default[ \t]*\n { Transducer::hopcroft_minimisation = true;};
+
+#include           BEGIN(incl);
+<incl>[ \t]*       /* eat the whitespace */
+<incl>{FN}+        { error2("Missing quotes",yytext); }
+<incl>\"{FN}+\"    { /* got the include file name */
+                     FILE *file;
+                     char *name=fst_strdup(yytext+1);
+		     name[strlen(name)-1] = 0;
+                     if ( Include_Stack_Ptr >= MAX_INCLUDE_DEPTH ) {
+		       fprintf( stderr, "Includes nested too deeply" );
+		       exit( 1 );
+		     }
+		     if (Verbose) fputc('\n', stderr);
+		     file = fopen( name, "rt" );
+		     if (!file)
+                       error2("Can't open include file", name);
+                     else {
+                       Name_Stack[Include_Stack_Ptr] = FileName;
+                       FileName = name;
+                       Lineno_Stack[Include_Stack_Ptr] = yylineno;
+		       yylineno = 1;
+		       Include_Stack[Include_Stack_Ptr++]=YY_CURRENT_BUFFER;
+		       yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
+                       yyin = file;
+		       print_lineno();
+		       BEGIN(INITIAL);
+                     }
+                  }
+<<EOF>>           {
+                     if (Verbose)
+		       fputc('\n', stderr);
+                     if ( --Include_Stack_Ptr < 0 )
+		       yyterminate();
+		     else {
+                       free(FileName);
+                       FileName = Name_Stack[Include_Stack_Ptr];
+                       yylineno = Lineno_Stack[Include_Stack_Ptr];
+		       yy_delete_buffer( YY_CURRENT_BUFFER );
+		       yy_switch_to_buffer(Include_Stack[Include_Stack_Ptr]);
+                     }
+                  }
+
+
+^[ \t]*\%.*\r?\n  { print_lineno();  /* ignore comments */ }
+
+\%.*\\[ \t]*\r?\n { print_lineno();  /* ignore comments */ }
+
+\%.*              { /* ignore comments */ }
+
+
+^[ \t]*ALPHABET[ \t]*= { return ALPHA; }
+
+\|\|              { return COMPOSE; }
+"<=>"             { yylval.type = twol_both; return ARROW; }
+"=>"              { yylval.type = twol_right;return ARROW; }
+"<="              { yylval.type = twol_left; return ARROW; }
+"^->"             { yylval.rtype = repl_up;   return REPLACE; }
+"_->"             { yylval.rtype = repl_down; return REPLACE; }
+"/->"             { yylval.rtype = repl_right;return REPLACE; }
+"\\->"            { yylval.rtype = repl_left; return REPLACE; }
+">>"              { return PRINT; }
+"<<"              { return INSERT; }
+"__"              { return POS; }
+"^_"              { return SWITCH; }
+
+[.,{}\[\]()&!?|*+:=_\^\-] { return yytext[0]; }
+
+\$=({C3}|(\\.))+\$ { yylval.name = fst_strdup(yytext); return RVAR; }
+
+\$({C3}|(\\.))+\$ { yylval.name = fst_strdup(yytext); return VAR; }
+
+#=({C4}|(\\.))+# { yylval.name = fst_strdup(yytext); return RSVAR; }
+
+#({C4}|(\\.))+# { yylval.name = fst_strdup(yytext); return SVAR; }
+
+\<({C5}|\\.)*\>   { yylval.name = unquote(yytext,false); return SYMBOL; }
+
+\"<{FN}+>\" { 
+                    yylval.value = fst_strdup(yytext+2);
+		    yylval.value[strlen(yylval.value)-2] = 0;
+                    return STRING2;
+                  }
+
+\"{FN}+\" { 
+                    yylval.value = fst_strdup(yytext+1);
+		    yylval.value[strlen(yylval.value)-1] = 0;
+                    return STRING;
+                  }
+
+[ \t]             { /* ignored */ }
+\\[ \t]*([ \t]\%.*)?\r?\n { print_lineno(); /* ignored */ }
+\r?\n             { print_lineno(); return NEWLINE; }
+
+\\[0-9]+          { long l=atol(yytext+1); 
+default>	    if (l <= 255) { yylval.uchar=(unsigned char)l; return CHARACTER; }
+utf8>		    if (l <= 1114112) { yylval.value=fst_strdup(int2utf8((unsigned)l)); return UTF8CHAR; }
+		    yyerror("invalid expression");
+                  }
+
+default> \\.	  { yylval.uchar = yytext[1]; return CHARACTER; }
+default> .	  { yylval.uchar = yytext[0]; return CHARACTER; }
+
+utf8> \\.                { yylval.value=fst_strdup(yytext+1); return UTF8CHAR; }
+utf8> [\x00-\x7f]        { yylval.value=fst_strdup(yytext); return UTF8CHAR; }
+utf8> [\xc0-\xdf]{CC}    { yylval.value=fst_strdup(yytext); return UTF8CHAR; }
+utf8> [\xe0-\xef]{CC}{2} { yylval.value=fst_strdup(yytext); return UTF8CHAR; }
+utf8> [\xf0-\xff]{CC}{3} { yylval.value=fst_strdup(yytext); return UTF8CHAR; }
+
+%%
--- a/src/main/resources/SMOR/src/sgi.h
+++ b/src/main/resources/SMOR/src/sgi.h
+/*******************************************************************/
+/*                                                                 */
+/*     File: sgi.h                                                 */
+/*   Author: Helmut Schmid                                         */
+/*  Purpose:                                                       */
+/*  Created: Thu Sep 11 15:58:25 2008                              */
+/* Modified: Fri Sep 12 08:17:03 2008 (schmid)                     */
+/* Modified: Wed May 26 12:54:00 2010 (hfst)                       */
+/*******************************************************************/
+
+#ifndef _SGI_INCLUDED
+#define _SGI_INCLUDED
+
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+#if HAVE_BACKWARD_HASH_MAP
+#  include <backward/hash_map>
+#elif HAVE_EXT_HASH_MAP
+#  include <ext/hash_map>
+#elif HAVE_HASH_MAP
+#  include <hash_map>
+#elif SGIext
+#  include <ext/hash_map>
+#elif SGI__gnu_cxx
+#  include <ext/hash_map>
+#else
+#  warning "unknown hash_map"
+#  include <hash_map>
+#endif
+#if HAVE_BACKWARD_HASH_SET
+#  include <backward/hash_set>
+#elif HAVE_EXT_HASH_SET
+#  include <ext/hash_set>
+#elif HAVE_HASH_SET
+#  include <hash_set>
+#elif SGIext
+#  include <ext/hash_set>
+#elif SGI__gnu_cxx
+#  include <ext/hash_set>
+#else
+#  warning "missing hash_set"
+#  include <hash_set>
+#endif
+
+// Hfst addition
+namespace SFST 
+{
+// from <http://gcc.gnu.org/onlinedocs/libstdc++/manual/backwards.html>
+#ifdef __GNUC__
+#  if __GNUC__ < 3
+  using ::hash_map;
+  using ::hash_set;
+  using ::hash;
+#  elif __GNUC__ == 3 && __GNUC_MINOR__ == 0
+  using std::hash_map;
+  using std::hash_set;
+  using std::hash;
+#  else 
+  using __gnu_cxx::hash_map;
+  using __gnu_cxx::hash_set;
+  using __gnu_cxx::hash;
+#  endif
+#else
+  using std::hash_map;
+  using std::hash_set;
+  using std::hash;
+#endif
+}
+
+#endif
--- a/src/main/resources/SMOR/src/test.C
+++ b/src/main/resources/SMOR/src/test.C
+// This is just a simple program which shows
+// how the transducer library is used.
+
+#include "compact.h"
+
+int main( int argc, char **argv )
+
+{
+  FILE *file;
+
+  file = fopen(argv[1],"rb");  // open the input file
+  if (file == NULL)  exit(1);
+  try {
+    CompactTransducer ca(file); // read the transducer
+      
+    char buffer[1000];
+    std::vector<CAnalysis> analyses;
+    while (fgets(buffer, 1000, stdin)) {  // next input line
+      int l=strlen(buffer)-1;  // delete the newline character
+      if (buffer[l] == '\n')
+	buffer[l] = '\0';
+      printf("> %s\n", buffer);  // print the input line
+
+      ca.analyze_string(buffer, analyses);  // analyse the input
+
+      if (analyses.size() == 0)
+	printf( "no result for %s\n", buffer);  // analysis has failed
+      else  // print all analyses
+	for( size_t i=0; i<analyses.size(); i++ ) {
+	  fputs(ca.print_analysis(analyses[i]), stdout);
+	  fputc('\n', stdout);
+	}
+    }
+  }
+  catch (const char *p) {   // deal with exceptions
+    std::cerr << p << "\n";
+    return 1;
+  }
+
+  return 0;
+}