Commit 0b611765 authored by Ben Campbell's avatar Ben Campbell
Browse files

Initial commit

parents
/*******************************************************************/
/* */
/* FILE fst-mor.C */
/* MODULE fst-mor */
/* PROGRAM SFST */
/* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
/* */
/*******************************************************************/
#include "fst.h"
using std::cerr;
using std::cout;
#ifdef READLINE
#include <readline/readline.h>
#include <readline/history.h>
#else
char *readline( const char *prompt ) {
static char buffer[10000];
printf("%s", prompt);
if ((fgets(buffer,9999,stdin)) == NULL)
return NULL;
size_t l = strlen(buffer);
if (buffer[l-1] == '\n')
buffer[l-1] = 0;
return buffer;
}
#endif
using namespace SFST;
bool WithBrackets=true;
/*******************************************************************/
/* */
/* usage */
/* */
/*******************************************************************/
void usage()
{
cerr << "\nUsage: fst-mor [options] file [file [file]]\n\n";
cerr << "Options:\n";
cerr << "-n: print multi-character symbols without enclosing angle brackets\n";
cerr << "-h: print this message\n";
exit(1);
}
/*******************************************************************/
/* */
/* get_flags */
/* */
/*******************************************************************/
void get_flags( int *argc, char **argv )
{
for( int i=1; i<*argc; i++ ) {
if (strcmp(argv[i],"-h") == 0) {
usage();
argv[i] = NULL;
}
else if (strcmp(argv[i],"-n") == 0) {
WithBrackets = false;
argv[i] = NULL;
}
}
// remove flags from the argument list
int k;
for( int i=k=1; i<*argc; i++)
if (argv[i] != NULL)
argv[k++] = argv[i];
*argc = k;
}
/*******************************************************************/
/* */
/* main */
/* */
/*******************************************************************/
int main( int argc, char **argv )
{
FILE *file;
get_flags(&argc, argv);
if (argc < 2)
usage();
if (argc < 2)
usage();
if ((file = fopen(argv[1],"rb")) == NULL) {
fprintf(stderr,"\nError: Cannot open fst file %s\n\n", argv[1]);
exit(1);
}
cout << "reading transducer...\n";
try {
Transducer a(file);
fclose(file);
cout << "finished.\n";
int analyze=1;
for(;;) {
const char *prompt=(analyze)? "analyze> ": "generate> ";
char *input_string=readline(prompt);
if (input_string == NULL || strcmp(input_string,"q") == 0)
break;
#ifdef READLINE
add_history(input_string);
#endif
if (strcmp(input_string,"") == 0)
analyze = !analyze;
else if (analyze) {
if (!a.analyze_string(input_string, stdout, WithBrackets))
printf( "no result for %s\n", input_string);
}
else {
if (!a.generate_string(input_string, stdout, WithBrackets))
printf( "no result for %s\n", input_string);
}
#ifdef READLINE
free(input_string);
#endif
}
}
catch(const char* p) {
cerr << p << "\n";
return 1;
}
return 0;
}
/*******************************************************************/
/* */
/* FILE fst-parse.C */
/* MODULE fst-parse */
/* PROGRAM SFST */
/* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
/* */
/*******************************************************************/
#include "fst.h"
using std::cerr;
using std::vector;
using namespace SFST;
#define BUFFER_SIZE 10000
bool Debug=false;
bool Verbose=false;
vector<char*> TFileNames;
/*******************************************************************/
/* */
/* usage */
/* */
/*******************************************************************/
void usage()
{
cerr << "\nUsage: fst-parse [options] transducer [infile [outfile]]\n\n";
cerr << "Options:\n";
cerr << "-t t: compose transducer t\n";
cerr << "-h: print this message\n";
cerr << "-q: suppress status messages\n";
cerr << "-d: print debugging output\n";
exit(1);
}
/*******************************************************************/
/* */
/* get_flags */
/* */
/*******************************************************************/
void get_flags( int *argc, char **argv )
{
for( int i=1; i<*argc; i++ ) {
if (strcmp(argv[i],"-q") == 0) {
Verbose = false;
argv[i] = NULL;
}
else if (strcmp(argv[i],"-h") == 0) {
usage();
argv[i] = NULL;
}
else if (strcmp(argv[i],"-d") == 0) {
Debug = true;
argv[i] = NULL;
}
else if (i < (*argc)-1 && strcmp(argv[i],"-t") == 0) {
TFileNames.push_back(argv[i+1]);
argv[i++] = NULL;
argv[i] = NULL;
}
}
// remove flags from the argument list
int k;
for( int i=k=1; i<*argc; i++)
if (argv[i] != NULL)
argv[k++] = argv[i];
*argc = k;
}
/*******************************************************************/
/* */
/* main */
/* */
/*******************************************************************/
int main( int argc, char **argv )
{
FILE *file, *outfile;
get_flags(&argc, argv);
if (argc < 2)
usage();
TFileNames.push_back(argv[1]);
vector<Transducer*> a;
try {
for( size_t i=0; i<TFileNames.size(); i++ ) {
if ((file = fopen(TFileNames[i],"rb")) == NULL) {
fprintf(stderr,"\nError: Cannot open transducer file \"%s\"\n\n",
TFileNames[i]);
exit(1);
}
if (Verbose)
fprintf(stderr,"reading transducer %s ...", TFileNames[i]);
a.push_back(new Transducer(file));
fclose(file);
if (Verbose)
fputs("finished.\n",stderr);
}
if (argc <= 2)
file = stdin;
else {
if ((file = fopen(argv[2],"rt")) == NULL) {
fprintf(stderr,"Error: Cannot open input file %s\n\n", argv[2]);
exit(1);
}
}
if (argc <= 3)
outfile = stdout;
else {
if ((outfile = fopen(argv[3],"wt")) == NULL) {
fprintf(stderr,"Error: Cannot open output file %s\n\n", argv[3]);
exit(1);
}
}
char buffer[BUFFER_SIZE];
while (fgets(buffer, BUFFER_SIZE, file)) {
int l=(int)strlen(buffer)-1;
if (buffer[l] == '\n')
buffer[l] = '\0';
Transducer *t = new Transducer(buffer, &a.back()->alphabet, false);
for( int i=(int)a.size()-1; i>=0; i-- ) {
if (Debug) {
cerr << "\n";
cerr << *t;
}
Transducer *t2 = &(*a[i] || *t);
delete t;
t = t2;
}
Transducer *t2 = &t->lower_level();
delete t;
t = &t2->minimise();
delete t2;
if (Debug) {
cerr << "result:\n";
cerr << *t;
}
t->alphabet.copy(a[0]->alphabet);
if (!t->print_strings( outfile ))
fprintf(outfile, "no analysis for \"%s\"\n", buffer);
delete t;
}
}
catch (const char *p) {
cerr << p << "\n";
return 1;
}
return 0;
}
/*******************************************************************/
/* */
/* FILE fst-parse.C */
/* MODULE fst-parse */
/* PROGRAM SFST */
/* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
/* */
/*******************************************************************/
#include "fst.h"
using std::cerr;
using std::cout;
using std::vector;
using namespace SFST;
#define BUFFER_SIZE 10000
bool Debug=false;
bool Verbose=false;
vector<char*> TFileNames;
/*******************************************************************/
/* */
/* usage */
/* */
/*******************************************************************/
void usage()
{
cerr << "\nUsage: fst-parse [options] transducer [infile]\n\n";
cerr << "Options:\n";
cerr << "-t t: compose transducer t\n";
cerr << "-h: print this message\n";
cerr << "-q: suppress status messages\n";
cerr << "-d: print debugging output\n";
exit(1);
}
/*******************************************************************/
/* */
/* get_flags */
/* */
/*******************************************************************/
void get_flags( int *argc, char **argv )
{
for( int i=1; i<*argc; i++ ) {
if (strcmp(argv[i],"-q") == 0) {
Verbose = false;
argv[i] = NULL;
}
else if (strcmp(argv[i],"-h") == 0) {
usage();
argv[i] = NULL;
}
else if (strcmp(argv[i],"-d") == 0) {
Debug = true;
argv[i] = NULL;
}
else if (i < (*argc)-1 && strcmp(argv[i],"-t") == 0) {
TFileNames.push_back(argv[i+1]);
argv[i++] = NULL;
argv[i] = NULL;
}
}
// remove flags from the argument list
int k;
for( int i=k=1; i<*argc; i++)
if (argv[i] != NULL)
argv[k++] = argv[i];
*argc = k;
}
/*******************************************************************/
/* */
/* main */
/* */
/*******************************************************************/
int main( int argc, char **argv )
{
FILE *file;
get_flags(&argc, argv);
if (argc < 2 || argc > 3)
usage();
TFileNames.push_back(argv[1]);
vector<Transducer*> a;
try {
for( size_t i=0; i<TFileNames.size(); i++ ) {
if ((file = fopen(TFileNames[i],"rb")) == NULL) {
fprintf(stderr,"\nError: Cannot open transducer file \"%s\"\n\n",
TFileNames[i]);
exit(1);
}
if (Verbose)
fprintf(stderr,"reading transducer %s ...", TFileNames[i]);
a.push_back(new Transducer(file));
fclose(file);
if (Verbose)
fputs("finished.\n",stderr);
}
if (argc <= 2)
file = stdin;
else {
if ((file = fopen(argv[2],"rt")) == NULL) {
fprintf(stderr,"Error: Cannot open input file %s\n\n", argv[2]);
exit(1);
}
}
char buffer[BUFFER_SIZE];
while (fgets(buffer, BUFFER_SIZE, file)) {
int l=(int)strlen(buffer)-1;
if (buffer[l] == '\n')
buffer[l] = '\0';
Transducer *t = new Transducer(buffer, &a.back()->alphabet, false);
for( int i=(int)a.size()-1; i>=0; i-- ) {
if (Debug) {
cerr << "\n";
cerr << *t;
}
Transducer *t2 = &(*a[i] || *t);
delete t;
t = t2;
}
Transducer *t2 = &t->lower_level();
delete t;
t = &t2->minimise();
delete t2;
if (Debug) {
cerr << "result:\n";
cerr << *t;
}
t->alphabet.copy(a[0]->alphabet);
cout << *t;
delete t;
}
}
catch (const char *p) {
cerr << p << "\n";
return 1;
}
return 0;
}
/*******************************************************************/
/* */
/* FILE fst-print.C */
/* MODULE fst-print */
/* PROGRAM SFST */
/* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
/* */
/*******************************************************************/
#include "fst.h"
using std::cout;
using std::cerr;
using namespace SFST;
/*******************************************************************/
/* */
/* main */
/* */
/*******************************************************************/
int main( int argc, char **argv )
{
FILE *file;
if (argc > 1 && (!strcmp(argv[1],"-h") ||
!strcmp(argv[1],"-help") ||
!strcmp(argv[1],"-?")))
{
fprintf(stderr,"\nUsage: %s [file]\n\n", argv[0]);
exit(1);
}
if (argc == 1)
file = stdin;
else if ((file = fopen(argv[1],"rb")) == NULL) {
fprintf(stderr,"\nError: Cannot open transducer file %s\n\n", argv[1]);
exit(1);
}
try {
Transducer a(file);
fclose(file);
fprintf(stderr,"size=%lu\n", a.size());
cout << a;
}
catch (const char *p) {
cerr << p << "\n";
return 1;
}
return 0;
}
/*******************************************************************/
/* */
/* FILE fst-text2bin.C */
/* MODULE fst-text2bin */
/* PROGRAM SFST */
/* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
/* */
/*******************************************************************/
#include "fst.h"
using std::cerr;
using namespace SFST;
/*******************************************************************/
/* */
/* main */
/* */
/*******************************************************************/
int main( int argc, char **argv )
{
FILE *file;
if (argc < 3 || (argc > 1 && (strcmp(argv[1],"-h") == 0 ||
strcmp(argv[1],"-help") == 0 ||
strcmp(argv[1],"-?") == 0)))
{
fprintf(stderr,"\nUsage: %s file file\n\n", argv[0]);
exit(1);
}
if ((file = fopen(argv[1],"rt")) == NULL) {
fprintf(stderr,"\nError: Cannot open input file %s\n\n", argv[1]);
exit(1);
}
try {
Transducer a(file, false); // read a transducer in text format
fclose(file);
if ((file = fopen(argv[2],"wb")) == NULL) {
fprintf(stderr,"\nError: Cannot open output file %s\n\n", argv[2]);
exit(1);
}
a.store(file);
fclose(file);
}
catch (const char *p) {
cerr << p << "\n";
return 1;
}
return 0;
}
/*******************************************************************/
/* */
/* File: fst-train.C */
/* Author: Helmut Schmid */
/* Purpose: EM training of a transducer */
/* Created: Mon Aug 8 15:11:36 2005 */
/* Modified: Thu Nov 27 17:20:49 2008 (schmid) */
/* */
/*******************************************************************/
#include <math.h>
#include "compact.h"
using std::cerr;
using std::vector;
using namespace SFST;
const int BUFFER_SIZE=1000;
bool Verbose=true;
bool BothLayers=false;
bool Disambiguate=false;
vector<char*> Filenames;
/*******************************************************************/
/* */
/* print_parameters */
/* */
/*******************************************************************/
void print_parameters( vector<double> &arcfreq, vector<double> &finalfreq,
FILE *file )
{
size_t n = finalfreq.size();
fwrite(&n, sizeof(n), 1, file);
n = arcfreq.size();
fwrite(&n, sizeof(n), 1, file);
for( size_t n=0; n<finalfreq.size(); n++ ) {
float f = (float)log(finalfreq[n]);
fwrite(&f, sizeof(f), 1, file);
}
for( size_t a=0; a<arcfreq.size(); a++ ) {
float f = (float)log(arcfreq[a]);
fwrite(&f, sizeof(f), 1, file);
}
}
/*******************************************************************/
/* */
/* usage */
/* */
/*******************************************************************/
void usage()
{
cerr << "\nUsage: fst-train [options] file [file]\n\n";
cerr << "Options:\n";
cerr << "-t tfile: alternative transducer\n";
cerr << "-b: input with surface and analysis characters\n";
cerr << "-d: disambiguate symbolically (use only the simplest analyses)\n";
cerr << "-q: suppress status messages\n";
cerr << "-h: print this message\n";
exit(1);
}
/*******************************************************************/
/* */
/* get_flags */
/* */
/*******************************************************************/
void get_flags( int *argc, char **argv )
{
for( int i=1; i<*argc; i++ ) {
if (strcmp(argv[i],"-q") == 0) {
Verbose = false;
argv[i] = NULL;
}
else if (strcmp(argv[i],"-d") == 0) {
Disambiguate = true;
argv[i] = NULL;
}
else if (strcmp(argv[i],"-b") == 0) {
BothLayers = true;
argv[i] = NULL;
}
else if (strcmp(argv[i],"-h") == 0) {
usage();
argv[i] = NULL;
}
else if (i < *argc-1) {
if (strcmp(argv[i],"-t") == 0) {
Filenames.push_back(argv[i+1]);
argv[i] = NULL;
argv[++i] = NULL;
}
}
}
// remove flags from the argument list
int k;
for( int i=k=1; i<*argc; i++)
if (argv[i] != NULL)
argv[k++] = argv[i];
*argc = k;
}
/*******************************************************************/
/* */
/* main */
/* */
/*******************************************************************/
int main( int argc, char **argv )
{
FILE *file;
vector<CompactTransducer*> transducer;
get_flags(&argc, argv);
if (argc < 2)
usage();
Filenames.push_back(argv[1]);
try {
for( size_t i=0; i<Filenames.size(); i++ ) {
if ((file = fopen(Filenames[i],"rb")) == NULL) {
fprintf(stderr, "\nError: Cannot open transducer file %s\n\n",
Filenames[i]);
exit(1);
}
if (Verbose)
cerr << "reading transducer from file \"" << Filenames[i] <<"\"...\n";
transducer.push_back(new CompactTransducer(file));
fclose(file);
transducer[i]->simplest_only = Disambiguate;
if (Verbose)
cerr << "finished.\n";
}
if (argc <= 2)
file = stdin;
else {
if ((file = fopen(argv[2],"rt")) == NULL) {
fprintf(stderr,"\nError: Cannot open input file %s\n\n",argv[2]);
exit(1);
}
}
vector<vector<double> > arcfreq, finalfreq;
finalfreq.resize(transducer.size());
arcfreq.resize(transducer.size());
for( size_t i=0; i<transducer.size(); i++ ) {
finalfreq[i].resize(transducer[i]->node_count(), 0.0);
arcfreq[i].resize(transducer[i]->arc_count(), 0.0);
}
char buffer[BUFFER_SIZE];
int N=0;
while (fgets(buffer, BUFFER_SIZE, file)) {
if (Verbose && ++N % 100 == 0)
fprintf(stderr,"\r%d", N);
int l=(int)strlen(buffer)-1;
if (buffer[l] == '\n')
buffer[l] = '\0';
for( size_t i=0; i<transducer.size(); i++ ) {
if (BothLayers) {
if (transducer[i]->train2(buffer, arcfreq[i], finalfreq[i] ))
break;
}
else {
if (transducer[i]->train(buffer, arcfreq[i], finalfreq[i] ))
break;
}
}
}
if (Verbose)
fputc('\n', stderr);
for( size_t i=0; i<transducer.size(); i++ ) {
char buffer[1000];
FILE *outfile;
sprintf(buffer, "%s.prob", Filenames[i]);
if ((outfile = fopen(buffer,"wb")) == NULL) {
fprintf(stderr, "\nError: Cannot open probability file %s.prob\n\n",
Filenames[i]);
exit(1);
}
transducer[i]->estimate_probs( arcfreq[i], finalfreq[i] );
print_parameters( arcfreq[i], finalfreq[i], outfile );
}
}
catch (const char *p) {
cerr << p << "\n";
return 1;
}
return 0;
}
/*******************************************************************/
/* */
/* FILE fst.C */
/* MODULE fst */
/* PROGRAM SFST */
/* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
/* */
/* PURPOSE basic FST functions */
/* */
/*******************************************************************/
#include "fst.h"
namespace SFST {
using std::vector;
using std::istream;
using std::ostream;
using std::cerr;
const int BUFFER_SIZE=100000;
bool Transducer::hopcroft_minimisation=true;
/*******************************************************************/
/* */
/* Arcs::size */
/* */
/*******************************************************************/
int Arcs::size() const
{
int n=0;
for( Arc *p=first_arcp; p; p=p->next ) n++;
for( Arc *p=first_epsilon_arcp; p; p=p->next ) n++;
return n;
}
/*******************************************************************/
/* */
/* Arcs::target_node */
/* */
/*******************************************************************/
Node *Arcs::target_node( Label l )
{
Arc *arc;
for( arc=first_arcp; arc; arc=arc->next)
if (arc->label() == l)
return arc->target_node();
return NULL;
}
const Node *Arcs::target_node( Label l ) const
{
const Arc *arc;
for( arc=first_arcp; arc; arc=arc->next)
if (arc->label() == l)
return arc->target_node();
return NULL;
}
/*******************************************************************/
/* */
/* Transducer::new_node */
/* */
/*******************************************************************/
Node *Transducer::new_node()
{
Node *node=(Node*)mem.alloc( sizeof(Node) );
node->init();
return node;
}
/*******************************************************************/
/* */
/* Transducer::new_arc */
/* */
/*******************************************************************/
Arc *Transducer::new_arc( Label l, Node *target )
{
Arc *arc=(Arc*)mem.alloc( sizeof(Arc) );
arc->init( l, target);
return arc;
}
/*******************************************************************/
/* */
/* Arcs::add_arc */
/* */
/*******************************************************************/
void Arcs::add_arc( Label l, Node *node, Transducer *a )
{
Arc *arc=a->new_arc( l, node );
if (l.is_epsilon()) {
arc->next = first_epsilon_arcp;
first_epsilon_arcp = arc;
}
else {
arc->next = first_arcp;
first_arcp = arc;
}
}
/*******************************************************************/
/* */
/* Arcs::remove_arc */
/* */
/*******************************************************************/
int Arcs::remove_arc( Arc *arc )
{
Arc **p = (arc->label().is_epsilon()) ? &first_epsilon_arcp : &first_arcp;
for( ; *p; p=&(*p)->next )
if (*p == arc) {
*p = arc->next;
return 1;
}
return 0;
}
/*******************************************************************/
/* */
/* Node::init */
/* */
/*******************************************************************/
void Node::init()
{
final = false;
visited = 0;
arcsp.init();
forwardp = NULL;
}
/*******************************************************************/
/* */
/* Node::clear_visited */
/* */
/*******************************************************************/
void Node::clear_visited( NodeHashSet &nodeset )
{
if (nodeset.find( this ) == nodeset.end()) {
visited = 0;
nodeset.insert( this );
fprintf(stderr," %lu", (unsigned long)nodeset.size());
for( ArcsIter p(arcs()); p; p++ ) {
Arc *arc=p;
arc->target_node()->clear_visited( nodeset );
}
}
}
/*******************************************************************/
/* */
/* NodeNumbering::number_node */
/* */
/*******************************************************************/
void NodeNumbering::number_node( Node *node, Transducer &a )
{
if (!node->was_visited( a.vmark )) {
nummap[node] = (int)nodes.size();
nodes.push_back(node);
for( ArcsIter p(node->arcs()); p; p++ ) {
Arc *arc=p;
number_node( arc->target_node(), a );
}
}
}
/*******************************************************************/
/* */
/* NodeNumbering::NodeNumbering */
/* */
/*******************************************************************/
NodeNumbering::NodeNumbering( Transducer &a )
{
a.incr_vmark();
number_node( a.root_node(), a );
}
/*******************************************************************/
/* */
/* Transducer::add_string */
/* */
/*******************************************************************/
void Transducer::add_string( char *s, bool extended, Alphabet *a )
{
if (a == NULL)
a = &alphabet;
Node *node=root_node();
Label l;
while (!(l = a->next_label(s, extended)).is_epsilon()) {
a->insert(l);
Arcs *arcs=node->arcs();
node = arcs->target_node( l );
if (node == NULL) {
node = new_node();
arcs->add_arc( l, node, this );
}
}
node->set_final(1);
}
/*******************************************************************/
/* */
/* Transducer::Transducer */
/* */
/*******************************************************************/
Transducer::Transducer( vector<Label> &path )
: root(), mem()
{
Node *node=root_node();
vmark = 0;
deterministic = minimised = true;
for( size_t i=0; i<path.size(); i++ ) {
Arcs *arcs=node->arcs();
node = new_node();
arcs->add_arc( path[i], node, this );
}
node->set_final(1);
}
/*******************************************************************/
/* */
/* Transducer::Transducer */
/* */
/*******************************************************************/
Transducer::Transducer( istream &is, const Alphabet *a, bool verbose )
: root(), mem()
{
bool extended=false;
int n=0;
char buffer[10000];
vmark = 0;
deterministic = true;
minimised = false;
if (a) {
alphabet.copy(*a);
extended = true;
}
while (is.getline(buffer, 10000)) {
if (verbose && ++n % 10000 == 0) {
if (n == 10000)
cerr << "\n";
cerr << "\r" << n << " words";
}
// delete final whitespace characters
int l;
for( l=(int)strlen(buffer)-1; l>=0; l-- )
if ((buffer[l] != ' ' && buffer[l] != '\t' && buffer[l] != '\r') ||
(l > 0 && buffer[l-1] == '\\'))
break;
buffer[l+1] = 0;
add_string(buffer, extended);
}
if (verbose && n >= 10000)
cerr << "\n";
}
/*******************************************************************/
/* */
/* Transducer::Transducer */
/* */
/*******************************************************************/
Transducer::Transducer( char *s, const Alphabet *a, bool extended )
: root(), mem()
{
vmark = 0;
deterministic = minimised = true;
if (a)
alphabet.copy(*a);
add_string(s, extended);
}
/*******************************************************************/
/* */
/* Transducer::clear */
/* */
/*******************************************************************/
void Transducer::clear()
{
vmark = 0;
deterministic = minimised = false;
root.init();
mem.clear();
alphabet.clear();
}
/*******************************************************************/
/* */
/* Transducer::store_symbols */
/* */
/*******************************************************************/
void Transducer::store_symbols(Node *node, SymbolMap &symbol,
LabelSet &labels)
{
if (!node->was_visited( vmark )) {
Arcs *arcs=node->arcs();
for( ArcsIter p(arcs); p; p++ ) {
Arc *arc=p;
Label l=arc->label();
labels.insert(l);
Character c = l.upper_char();
if (symbol.find(c) == symbol.end()) {
const char *s = alphabet.code2symbol(c);
if (s)
symbol[c] = fst_strdup(s);
}
c = l.lower_char();
if (symbol.find(c) == symbol.end()) {
const char *s = alphabet.code2symbol(c);
if (s)
symbol[c] = fst_strdup(s);
}
store_symbols( arc->target_node(), symbol, labels );
}
}
}
/*******************************************************************/
/* */
/* Transducer::minimise_alphabet */
/* */
/*******************************************************************/
void Transducer::minimise_alphabet()
{
SymbolMap symbols;
LabelSet labels;
incr_vmark();
store_symbols(root_node(), symbols, labels);
alphabet.clear();
for( SymbolMap::iterator it=symbols.begin(); it!=symbols.end(); it++ ) {
alphabet.add_symbol( it->second, it->first );
free(it->second);
}
for( LabelSet::iterator it=labels.begin(); it!=labels.end(); it++ )
alphabet.insert(*it);
}
/*******************************************************************/
/* */
/* Transducer::rev_det_minimise */
/* */
/*******************************************************************/
Transducer &Transducer::rev_det_minimise( bool verbose )
{
if (minimised)
return copy();
Transducer *a1, *a2;
a1 = &reverse();
a2 = &a1->determinise();
delete a1;
a1 = &a2->reverse();
delete a2;
a2 = &a1->determinise();
delete a1;
a2->minimised = true;
a2->minimise_alphabet();
return *a2;
}
/*******************************************************************/
/* */
/* Transducer::size_node */
/* */
/*******************************************************************/
size_t Transducer::size_node( Node *node )
{
size_t result = 0;
if (!node->was_visited( vmark )) {
result++;
for( ArcsIter it(node->arcs()); it; it++ ) {
Arc *arc=it;
result += size_node( arc->target_node() );
}
}
return result;
}
/*******************************************************************/
/* */
/* Transducer::size_node */
/* */
/*******************************************************************/
size_t Transducer::size()
{
incr_vmark();
return size_node(root_node());
}
/*******************************************************************/
/* */
/* Transducer::enumerate_paths_node */
/* */
/*******************************************************************/
void Transducer::enumerate_paths_node( Node *node, vector<Label> &path,
NodeHashSet &previous,
vector<Transducer*> &result )
{
if (node->is_final())
result.push_back(new Transducer(path));
for( ArcsIter it(node->arcs()); it; it++ ) {
Arc *arc=it;
NodeHashSet::iterator it=previous.insert(node).first;
path.push_back(arc->label());
enumerate_paths_node( arc->target_node(), path, previous, result );
path.pop_back();
previous.erase(it);
}
}
/*******************************************************************/
/* */
/* Transducer::enumerate_paths */
/* */
/*******************************************************************/
bool Transducer::enumerate_paths( vector<Transducer*> &result )
{
if (is_infinitely_ambiguous())
return true;
for( size_t i=0; i<result.size(); i++ )
delete result[i];
result.clear();
vector<Label> path;
NodeHashSet previous;
enumerate_paths_node( root_node(), path, previous, result );
return false;
}
/*******************************************************************/
/* */
/* Transducer::print_strings_node */
/* */
/*******************************************************************/
int Transducer::print_strings_node(Node *node, char *buffer, int pos,
FILE *file, bool with_brackets )
{
int result = 0;
if (node->was_visited( vmark )) {
if (node->forward() != NULL) { // cycle detected
cerr << "Warning: cyclic analyses (cycle aborted)\n";
return 0;
}
node->set_forward(node); // used like a flag for loop detection
}
if (pos == BUFFER_SIZE)
throw "Output string in function print_strings_node is too long";
if (node->is_final()) {
buffer[pos] = '\0';
fprintf(file,"%s\n", buffer);
result = 1;
}
for( ArcsIter i(node->arcs()); i; i++ ) {
int p=pos;
Arc *arc=i;
Label l=arc->label();
alphabet.write_label(l, buffer, &p, with_brackets);
result |= print_strings_node(arc->target_node(), buffer, p,
file, with_brackets );
}
node->set_forward(NULL);
return result;
}
/*******************************************************************/
/* */
/* Transducer::print_strings */
/* */
/*******************************************************************/
int Transducer::print_strings( FILE *file, bool with_brackets )
{
char buffer[BUFFER_SIZE];
incr_vmark();
return print_strings_node( root_node(), buffer, 0, file, with_brackets );
}
/*******************************************************************/
/* */
/* Transducer::analyze_string */
/* */
/*******************************************************************/
bool Transducer::analyze_string( char *string, FILE *file, bool with_brackets )
{
vector<Character> input;
alphabet.string2symseq( string, input );
vector<Label> labels;
for( size_t i=0; i<input.size(); i++ )
labels.push_back(Label(input[i]));
Transducer a1(labels);
Transducer *a2=&(*this || a1);
Transducer *a3=&(a2->lower_level());
delete a2;
a2 = &a3->minimise();
delete a3;
a2->alphabet.copy(alphabet);
bool result = a2->print_strings( file, with_brackets );
delete a2;
return result;
}
/*******************************************************************/
/* */
/* Transducer::generate_string */
/* */
/*******************************************************************/
bool Transducer::generate_string( char *string, FILE *file, bool with_brackets)
{
Transducer a1(string, &alphabet, false);
Transducer *a2=&(a1 || *this);
Transducer *a3=&(a2->upper_level());
delete a2;
a2 = &a3->minimise();
delete a3;
a2->alphabet.copy(alphabet);
bool result = a2->print_strings( file, with_brackets );
delete a2;
return result;
}
/*******************************************************************/
/* */
/* complete */
/* */
/*******************************************************************/
static void complete( Node *node, Alphabet &alphabet, VType vmark)
{
if (node->was_visited( vmark ))
return;
for( ArcsIter p(node->arcs()); p; p++ ) {
Arc *arc=p;
if (!arc->label().is_epsilon())
alphabet.insert(arc->label());
complete(arc->target_node(), alphabet, vmark);
}
}
/*******************************************************************/
/* */
/* Transducer::complete_alphabet */
/* */
/*******************************************************************/
void Transducer::complete_alphabet()
{
incr_vmark();
complete(root_node(), alphabet, vmark);
}
/*******************************************************************/
/* */
/* print_node */
/* */
/*******************************************************************/
static void print_node( ostream &s, Node *node, NodeNumbering &index,
VType vmark, Alphabet &abc )
{
if (!node->was_visited( vmark )) {
Arcs *arcs=node->arcs();
for( ArcsIter p(arcs); p; p++ ) {
Arc *arc=p;
s << index[node] << "\t" << index[arc->target_node()];
s << "\t" << abc.write_char(arc->label().lower_char());
s << "\t" << abc.write_char(arc->label().upper_char());
s << "\n";
}
if (node->is_final())
s << index[node] << "\n";
for( ArcsIter p(arcs); p; p++ ) {
Arc *arc=p;
print_node( s, arc->target_node(), index, vmark, abc );
}
}
}
/*******************************************************************/
/* */
/* operator<< */
/* */
/*******************************************************************/
ostream &operator<<( ostream &s, Transducer &a )
{
NodeNumbering index(a);
a.incr_vmark();
print_node( s, a.root_node(), index, a.vmark, a.alphabet );
return s;
}
/*******************************************************************/
/* */
/* store_node_info */
/* */
/*******************************************************************/
static void store_node_info( FILE *file, Node *node )
{
// write final flag
char c=node->is_final();
fwrite(&c,sizeof(c),1,file);
// write the number of arcs
int nn = node->arcs()->size();
if (nn > 65535)
throw "Error: in function store_node\n";
unsigned short n=(unsigned short)nn;
fwrite(&n,sizeof(n),1,file);
}
/*******************************************************************/
/* */
/* store_arc_label */
/* */
/*******************************************************************/
static void store_arc_label( FILE *file, Arc *arc )
{
Label l=arc->label();
Character lc=l.lower_char();
Character uc=l.upper_char();
fwrite(&lc,sizeof(lc),1,file);
fwrite(&uc,sizeof(uc),1,file);
}
/*******************************************************************/
/* */
/* store_node */
/* */
/*******************************************************************/
static void store_node( FILE *file, Node *node, NodeNumbering &index,
VType vmark )
{
if (!node->was_visited( vmark )) {
store_node_info( file, node );
// write the arcs
for( ArcsIter p(node->arcs()); p; p++ ) {
Arc *arc=p;
store_arc_label( file, arc );
unsigned int t=index[arc->target_node()];
fwrite(&t,sizeof(t),1,file);
store_node(file, arc->target_node(), index, vmark );
}
}
}
/*******************************************************************/
/* */
/* store_lowmem_node */
/* */
/*******************************************************************/
static void store_lowmem_node( FILE *file, Node *node, NodeNumbering &index,
vector<unsigned int> &startpos)
{
store_node_info( file, node );
// write the arcs
for( ArcsIter p(node->arcs()); p; p++ ) {
Arc *arc=p;
store_arc_label( file, arc );
unsigned int t=startpos[index[arc->target_node()]];
fwrite(&t,sizeof(t),1,file);
}
}
/*******************************************************************/
/* */
/* Transducer::store_lowmem */
/* */
/*******************************************************************/
void Transducer::store_lowmem( FILE *file )
{
fputc('l',file);
alphabet.store(file);
// storing size of index table
NodeNumbering index(*this);
// compute the start position of the first node
unsigned int pos=(unsigned int)ftell(file);
vector<unsigned int> startpos;
for( size_t i=0; i<index.number_of_nodes(); i++ ) {
startpos.push_back(pos);
Node *node=index.get_node(i);
Arcs *arcs=node->arcs();
pos += (unsigned)(sizeof(char) // size of final flag
+ sizeof(unsigned short) // size of number of arcs
+ arcs->size() * (sizeof(Character) * 2 + sizeof(unsigned int))); // size of n arcs
}
// storing nodes
for( size_t i=0; i<index.number_of_nodes(); i++ )
store_lowmem_node( file, index.get_node(i), index, startpos );
}
/*******************************************************************/
/* */
/* Transducer::store */
/* */
/*******************************************************************/
void Transducer::store( FILE *file )
{
fputc('a',file);
NodeNumbering index(*this);
incr_vmark();
unsigned int n=(unsigned)index.number_of_nodes();
fwrite(&n,sizeof(n),1,file);
store_node( file, root_node(), index, vmark );
alphabet.store(file);
}
/*******************************************************************/
/* */
/* read_node */
/* */
/*******************************************************************/
static void read_node( FILE *file, Node *node, Node **p, Transducer *a )
{
char c;
fread(&c,sizeof(c),1,file);
node->set_final(c);
unsigned short n;
fread( &n, sizeof(n), 1, file);
for( int i=0; i<n; i++ ) {
Character lc,uc;
unsigned int t;
fread(&lc,sizeof(lc),1,file);
fread(&uc,sizeof(uc),1,file);
fread(&t,sizeof(t),1,file);
if (ferror(file))
throw "Error encountered while reading transducer from file";
if (p[t])
node->add_arc( Label(lc,uc), p[t], a );
else {
p[t] = a->new_node();
node->add_arc( Label(lc,uc), p[t], a );
read_node(file, p[t], p, a );
}
}
}
/*******************************************************************/
/* */
/* Transducer::read_transducer_binary */
/* */
/*******************************************************************/
void Transducer::read_transducer_binary( FILE *file )
{
if (fgetc(file) != 'a')
throw "Error: wrong file format (not a standard transducer)\n";
vmark = deterministic = 0;
unsigned int n;
fread(&n,sizeof(n),1,file); // number of nodes
if (ferror(file))
throw "Error encountered while reading transducer from file";
Node **p=new Node*[n]; // maps indices to nodes
p[0] = root_node();
for( unsigned int i=1; i<n; i++)
p[i] = NULL;
read_node( file, root_node(), p, this );
delete[] p;
alphabet.read(file);
vmark = 1;
deterministic = minimised = 1;
}
/*******************************************************************/
/* */
/* error_message */
/* */
/*******************************************************************/
static void error_message( size_t line )
{
static char message[1000];
sprintf(message, "Error: in line %u of text transducer file",
(unsigned int)line);
throw message;
}
/*******************************************************************/
/* */
/* Transducer::create_node */
/* */
/*******************************************************************/
Node *Transducer::create_node( vector<Node*> &node, char *s, size_t line )
{
char *p;
long n = strtol(s, &p, 10);
if (s == p || n < 0)
error_message( line );
if ((long)node.size() <= n)
node.resize(n+1, NULL);
if (node[n] == NULL)
node[n] = new Node;
return node[n];
}
/*******************************************************************/
/* */
/* next_string */
/* */
/*******************************************************************/
static char *next_string( char* &s, size_t line )
{
// scan the input up to the next tab or newline character
// and unquote symbols preceded by a backslash
char *p = s;
char *q = s;
while (*q!=0 && *q!='\t' && *q!='\n' && *q!='\r') {
if (*q == '\\')
q++;
*(p++) = *(q++);
}
if (p == s)
error_message(line); // no string found
char *result=s;
// skip over following whitespace
while (*q == ' ' || *q == '\t' || *q == '\n' || *q == '\r')
q++;
if (*q == 0)
s = NULL; // end of string was reached
else
s = q; // move the string pointer s
*p = 0; // mark the end of the result string
return result;
}
/*******************************************************************/
/* */
/* Transducer::read_transducer_text */
/* */
/*******************************************************************/
void Transducer::read_transducer_text( FILE *file )
{
vector<Node*> nodes;
nodes.push_back(root_node());
vmark = deterministic = 0;
char buffer[10000];
for( size_t line=0; fgets(buffer, 10000, file ); line++ ) {
char *p = buffer;
char *s = next_string(p, line);
Node *node = create_node( nodes, s, line );
if (p == NULL)
node->set_final(true);
else {
s = next_string(p, line);
Node *target = create_node( nodes, s, line );
s = next_string(p, line);
Character lc = alphabet.add_symbol(s);
s = next_string(p, line);
Character uc = alphabet.add_symbol(s);
Label l(lc,uc);
if (l == Label::epsilon)
error_message( line );
alphabet.insert(l);
node->add_arc( l, target, this );
}
}
vmark = 1;
deterministic = minimised = 1;
}
/*******************************************************************/
/* */
/* Transducer::Transducer */
/* */
/*******************************************************************/
Transducer::Transducer( FILE *file, bool binary )
{
if (binary)
read_transducer_binary( file );
else
read_transducer_text( file );
}
/* EPSILON REMOVAL ALGORITHM written by Erik Axelson starts here */
/*******************************************************************/
/* */
/* node_in_copy_tr */
/* */
/*******************************************************************/
/* Find the corresponding node in 'copy_tr' for 'node'. If needed, create a new node to 'copy_tr'
and update 'mapper' accordingly. */
Node *node_in_copy_tr( Node *node, Transducer *copy_tr, NodeNumbering &nn, map<int, Node*> &mapper ) {
int node_number = nn[node]; // node number in original transducer
map<int,Node*>::iterator it = mapper.find(node_number); // iterator to associated node in copy_tr
if (it == mapper.end()) {
Node *associated_node = copy_tr->new_node(); // create new node in copy_tr
if (node->is_final())
associated_node->set_final(true);
mapper[node_number] = associated_node; // and associate it with node_number
return associated_node;
}
else
return it->second;
}
/*******************************************************************/
/* */
/* Transducer::copy_nodes */
/* */
/*******************************************************************/
/* Recursive epsilon removal algorithm. Copies arcs and their
target nodes starting from search_node to node copy_tr_start_node
in transducer copy_tr. nn and mapper are used to associate nodes
with nodes in copy_tr. */
void Transducer::copy_nodes( Node *search_node, Transducer *copy_tr,
Node *copy_tr_start_node,
NodeNumbering &nn, map<int, Node*> &mapper ) {
// go through all arcs leaving from search node
// (the iterator lists the epsilon arcs first)
for( ArcsIter it(search_node->arcs()); it; it++ ) {
Arc arc=*it;
if (arc.label().is_epsilon()) {
// 'forward', which is originally NULL, is used as a flag
// for detecting epsilon transition loops
if (search_node->forward() != copy_tr_start_node) {
search_node->set_forward(copy_tr_start_node); // set epsilon flag
if (arc.target_node()->is_final())
copy_tr_start_node->set_final(true);
copy_nodes(arc.target_node(), copy_tr, copy_tr_start_node, nn, mapper);
search_node->set_forward(NULL); // remove epsilon flag
}
}
else {
// target node in copy_tr
Node *copy_tr_end_node = node_in_copy_tr(arc.target_node(), copy_tr, nn, mapper);
// add arc to copy_tr
copy_tr_start_node->add_arc( Label(arc.label().lower_char(),
arc.label().upper_char()),
copy_tr_end_node,
copy_tr );
// if the target node is not visited, copy nodes recursively
if ( !(arc.target_node()->was_visited(vmark)) )
copy_nodes(arc.target_node(), copy_tr, copy_tr_end_node, nn, mapper);
}
}
}
/*******************************************************************/
/* */
/* Transducer::remove_epsilons */
/* */
/*******************************************************************/
Transducer &Transducer::remove_epsilons()
{
if ( deterministic || minimised )
return this->copy();
NodeNumbering nn(*this);
incr_vmark();
Transducer *copy_tr = new Transducer();
copy_tr->alphabet.copy(alphabet);
map<int, Node*> mapper;
// mark root node as visited
root_node()->was_visited(vmark);
// set copy_tr root node final, if needed
if (root_node()->is_final())
copy_tr->root_node()->set_final(true);
// associate the root_nodes in this and copy_tr
// (node numbering for root_node is zero)
mapper[0] = copy_tr->root_node();
copy_nodes(root_node(), copy_tr, copy_tr->root_node(), nn, mapper);
incr_vmark();
return *copy_tr;
}
// EPSILON REMOVAL ALGORITHM ENDS
}
/*******************************************************************/
/* */
/* FILE fst.h */
/* MODULE fst */
/* PROGRAM SFST */
/* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
/* */
/* PURPOSE finite state tools */
/* */
/*******************************************************************/
#ifndef _FST_H_
#define _FST_H_
#include "alphabet.h"
/*******************************************************************/
/* include commands */
/*******************************************************************/
#include <string>
#include <vector>
#include <map>
#include <set>
using std::map;
using std::set;
using std::vector;
#include "mem.h"
namespace SFST {
typedef unsigned short VType;
extern int Quiet;
class Node;
class Arc;
class Arcs;
class Transducer;
class Transition;
struct hashf {
size_t operator()(const Node *n) const { return (size_t) n; }
};
struct equalf {
int operator()(const Node *n1, const Node *n2) const { return n1==n2; }
};
typedef hash_set<Node*, hashf, equalf> NodeHashSet;
/***************** class Arc *************************************/
class Arc {
private:
Label l;
Node *target;
Arc *next;
public:
void init( Label ll, Node *node ) { l=ll; target=node; };
Label label( void ) const { return l; };
Node *target_node( void ) { return target; };
const Node *target_node( void ) const { return target; };
friend class Arcs;
friend class ArcsIter;
};
/***************** class Arcs ************************************/
class Arcs {
private:
Arc *first_arcp;
Arc *first_epsilon_arcp;
public:
void init( void ) { first_arcp = first_epsilon_arcp = NULL; };
Arcs( void ) { init(); };
Node *target_node( Label l );
const Node *target_node( Label l ) const;
void add_arc( Label, Node*, Transducer* );
int remove_arc( Arc* );
bool is_empty( void ) const { return !(first_arcp || first_epsilon_arcp); };
bool epsilon_transition_exists( void ) const { return first_epsilon_arcp != NULL; };
bool non_epsilon_transition_exists( void ) const { return first_arcp != NULL; };
int size( void ) const;
friend class ArcsIter;
};
/***************** class ArcsIter ********************************/
class ArcsIter {
// ArcsIter iterates over the arcs starting with epsilon arcs
private:
Arc *current_arcp;
Arc *more_arcs;
public:
typedef enum {all,non_eps,eps} IterType;
ArcsIter( const Arcs *arcs, IterType type=all ) {
more_arcs = NULL;
if (type == all) {
if (arcs->first_epsilon_arcp) {
current_arcp = arcs->first_epsilon_arcp;
more_arcs = arcs->first_arcp;
}
else
current_arcp = arcs->first_arcp;
}
else if (type == non_eps)
current_arcp = arcs->first_arcp;
else
current_arcp = arcs->first_epsilon_arcp;
};
void operator++( int ) {
if (current_arcp) {
current_arcp = current_arcp->next;
if (!current_arcp && more_arcs) {
current_arcp = more_arcs;
more_arcs = NULL;
}
}
};
operator Arc*( void ) const { return current_arcp; };
};
/***************** class Node ************************************/
class Node {
private:
bool final;
VType visited;
Arcs arcsp;
Node *forwardp;
public:
Node( void ) { init(); };
void init( void );
bool is_final( void ) const { return final; };
void set_final( bool flag ) { final = flag; };
void set_forward( Node *node ) { forwardp = node; };
const Node *target_node( Label l ) const { return arcs()->target_node(l); };
Node *target_node( Label l ) { return arcs()->target_node(l); };
void add_arc( Label l, Node *n, Transducer *a ) { arcs()->add_arc(l, n, a); };
Arcs *arcs( void ) { return &arcsp; };
const Arcs *arcs( void ) const { return &arcsp; };
Node *forward( void ) { return forwardp; };
void clear_visited( NodeHashSet &nodeset );
bool was_visited( VType vmark ) {
if (visited == vmark)
return true;
visited = vmark;
return false;
};
bool check_visited( VType vm ) // leaves the visited flag unchanged
{ return (visited==vm); };
};
/***************** class Node2Int *********************************/
class Node2Int {
struct hashf {
size_t operator()(const Node *node) const {
return (size_t)node;
}
};
struct equalf {
int operator()(const Node *n1, const Node *n2) const {
return (n1 == n2);
}
};
typedef hash_map<Node*, int, hashf, equalf> NL;
private:
NL number;
public:
int &operator[]( Node *node ) {
NL::iterator it=number.find(node);
if (it == number.end())
return number.insert(NL::value_type(node, 0)).first->second;
return it->second;
};
};
/***************** class NodeNumbering ****************************/
class NodeNumbering {
private:
vector<Node*> nodes;
Node2Int nummap;
void number_node( Node*, Transducer& );
public:
NodeNumbering( Transducer& );
int operator[]( Node *node ) { return nummap[node]; };
size_t number_of_nodes( void ) { return nodes.size(); };
Node *get_node( size_t n ) { return nodes[n]; };
};
/***************** class PairMapping ****************************/
class PairMapping {
// This class is used to map a node pair from two transducers
// to a single node in another transducer
typedef std::pair<Node*, Node*> NodePair;
private:
struct hashf {
size_t operator()(const NodePair p) const {
return (size_t)p.first ^ (size_t)p.second;
}
};
struct equalf {
int operator()(const NodePair p1, const NodePair p2) const {
return (p1.first==p2.first && p1.second == p2.second);
}
};
typedef hash_map<NodePair, Node*, hashf, equalf> PairMap;
PairMap pm;
public:
typedef PairMap::iterator iterator;
iterator begin( void ) { return pm.begin(); };
iterator end( void ) { return pm.end(); };
iterator find( Node *n1, Node *n2 )
{ return pm.find( NodePair(n1,n2) ); };
Node* &operator[]( NodePair p ) { return pm.operator[](p); };
};
/***************** class Transducer *******************************/
class Transducer {
private:
VType vmark;
Node root;
Mem mem;
typedef std::set<Label, Label::label_cmp> LabelSet;
typedef hash_map<Character, char*> SymbolMap;
void incr_vmark( void ) {
if (++vmark == 0) {
NodeHashSet nodes;
root.clear_visited( nodes );
fprintf(stderr,"clearing flags\n");
vmark = 1;
}
};
void reverse_node( Node *old_node, Transducer *new_node );
Label recode_label( Label, bool lswitch, bool recode, Alphabet& );
Node *copy_nodes( Node *n, Transducer *a,
bool lswitch=false, bool recode=false );
void rec_cat_nodes( Node*, Node* );
void negate_nodes( Node*, Node* );
bool compare_nodes( Node *node, Node *node2, Transducer &a2 );
void map_nodes( Node *node, Node *node2, Transducer *a, Level level );
void freely_insert_at_node( Node *node, Label l );
int print_strings_node(Node *node, char *buffer, int pos, FILE *file, bool);
bool infinitely_ambiguous_node( Node* );
bool is_cyclic_node( Node*, NodeHashSet &visited );
bool is_automaton_node( Node* );
bool generate1( Node*, Node2Int&, char*, int, char*, int, FILE* );
void store_symbols( Node*, SymbolMap&, LabelSet& );
void splice_nodes(Node*, Node*, Label sl, Transducer*, Transducer*);
void splice_arc( Node*, Node*, Node*, Transducer* );
void enumerate_paths_node( Node*, vector<Label>&, NodeHashSet&,
vector<Transducer*>& );
void replace_char2( Node*, Node*, Character, Character, Transducer* );
Node *create_node( vector<Node*>&, char*, size_t line );
void read_transducer_binary( FILE* );
void read_transducer_text( FILE* );
void build_TT( Node *node, vector<Transition> &transtab, NodeNumbering &nn);
size_t size_node( Node *node );
public:
static bool hopcroft_minimisation;
bool deterministic;
bool minimised;
Alphabet alphabet; // The set of all labels, i.e. character pairs
Transducer( void ) : root(), mem()
{ vmark = 0; deterministic = minimised = false; };
Transducer( Transducer&, vector<size_t>&, NodeNumbering&, size_t );
// convertion of a string to an transducer
Transducer( char *s, const Alphabet *a=NULL, bool extended=false );
// reads a word list from a file and stores it in the transducer
Transducer( std::istream&, const Alphabet *a=NULL, bool verbose=false );
// reads a transducer from a binary or text file
Transducer( FILE*, bool binary=true );
// turns a sequence of labels into a transducer
Transducer( vector<Label>& );
// HFST additions...
Transducer &expand( set<char*> &s );
Node *expand_nodes( Node *node, Transducer *a, set<char*> &s );
void expand_node( Node *origin, Label &l, Node *target, Transducer *a, set<char*> &s );
void copy_nodes( Node *search_node, Transducer *copy_tr,
Node *start_node,
NodeNumbering &nn, map<int, Node*> &mapper );
Transducer &remove_epsilons();
// ...HFST additions end
Node *root_node( void ) { return &root; }; // returns the root node
const Node *root_node( void ) const { return &root; }; // returns the root node
Node *new_node( void ); // memory alocation for a new node
Arc *new_arc( Label l, Node *target ); // memory alocation for a new arc
void add_string( char *s, bool extended=false, Alphabet *a=NULL );
void complete_alphabet( void );
void minimise_alphabet( void );
int print_strings( FILE*, bool with_brackets=true ); //enumerate all strings
bool analyze_string( char *s, FILE *file, bool with_brackets=true );
bool generate_string( char *s, FILE *file, bool with_brackets=true );
bool generate( FILE *file, bool separate=false );
void clear( void ); // clears the transducer. The resulting transducer
// is like one created with Transducer()
// copy duplicates an transducer
// if called with a non-zero argument, upper and lower level are switched
Transducer &copy( bool lswitch=false, const Alphabet *al=NULL );
Transducer &switch_levels( void ) { return copy( true ); };
Transducer &splice( Label l, Transducer *a);
Transducer &freely_insert( Label l );
Transducer &replace_char( Character c, Character nc );
Transducer &level( Level );
Transducer &lower_level( void ) // creates an transducer for the "lower" language
{ return level(lower); };
Transducer &upper_level( void ) // creates an transducer for the "upper" language
{ return level(upper); };
Transducer &determinise( void ); // creates a deterministic transducer
Transducer &rev_det_minimise( bool verbose );
Transducer &hopcroft_minimise( bool verbose );
Transducer &minimise( bool verbose=true ) {
if (hopcroft_minimisation)
return hopcroft_minimise( verbose );
return rev_det_minimise( verbose );
}
void store( FILE* ); // stores the transducer in binary format
void store_lowmem( FILE* );
void read( FILE* ); // reads an transducer in binary format
bool enumerate_paths( vector<Transducer*>& );
size_t size();
void build_transtab( vector<Transition> &transtab, NodeNumbering &nn );
Transducer &reverse( void ); // reverse language
Transducer &operator|( Transducer& ); // union, disjunction
Transducer &operator+( Transducer& ); // concatenation
Transducer &operator/( Transducer& ); // subtraction
Transducer &operator&( Transducer& ); // intersection, conjunction
Transducer &operator||( Transducer& ); // composition
Transducer &operator!( void ); // complement, negation
Transducer &kleene_star( void );
bool operator==( Transducer& ); // minimises its arguments first
bool is_cyclic( void );
bool is_automaton( void );
bool is_infinitely_ambiguous( void );
bool is_empty( void ); // For efficiency reasons, these functions
bool generates_empty_string( void );// are better called after minimisation
friend class NodeNumbering;
friend class EdgeCount;
friend class MakeCompactTransducer;
friend std::ostream &operator<<(std::ostream&, Transducer&);
};
}
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment