Commit 0b611765 authored by Ben Campbell's avatar Ben Campbell
Browse files

Initial commit

parents
# TYPE is either debug, prof, static, lib, Sun64bit or the empty string
TYPE =
# try different definitions of SGILIB if hash_map is not found
#SGILIB=
SGILIB=-DSGI__gnu_cxx
#SGILIB=-DSGIext
# comment the following line if readline is not found
#READLINE = -DREADLINE
#LREADLINE = -lreadline -lhistory -lncurses
# data type used for the encoding of the symbols (char, short, or int)
# short is the default
#CDT=-DCODE_DATA_TYPE=short
CXX = g++
WARNING = -Wall -Wcast-qual -Wconversion -std=c++98 -Wno-deprecated -ansi
ifeq ($(TYPE),debug)
DEBUG = -ggdb
OPTIMIZATION = -O0
else
DEBUG = -DNDEBUG
ifeq ($(TYPE),prof)
OPTIMIZATION = -O2
PROFILE = -pg
else
OPTIMIZATION = -O3
ifeq ($(TYPE),static)
STATIC = -static
else
ifeq ($(TYPE),Sun64bit)
LOPT = -L/usr/lib/64 -L/usr/local/lib/sparcv9 -R/usr/lib/64 -R/usr/local/lib/sparcv9
CXX = g++64
BITS= -mcpu=v9
else
ifeq ($(TYPE),lib)
LIB = -fPIC
endif
endif
endif
endif
endif
CC = g++
CL = $(CC)
CFLAGS = $(DEBUG) $(PROFILE) $(LIB) $(OPTIMIZATION) $(WARNING) $(SGILIB) $(CDT) $(BITS) $(READLINE)
LDFLAGS = $(DEBUG) $(PROFILE) $(STATIC) $(LOPT)
CXXFLAGS = $(CFLAGS)
LXXFLAGS = $(LDFLAGS)
INSTALL = install
INSTALL_MANPAGES = install -m 644
INSTALL_DIR = install -d -m 755
DESTDIR =
PREFIX = /usr/local/
PROGRAMS = fst-compiler fst-infl fst-generate fst-print fst-compare \
fst-compact fst-infl2 fst-lowmem fst-infl3 fst-lattice fst-match \
fst-parse fst-parse2 fst-train fst-compiler-utf8 fst-text2bin fst-mor
ALLPROGRAMS = $(PROGRAMS)
ifeq ($(TYPE),lib)
TARGET = libsfst.tar.gz
else
ifeq ($(TYPE),debug)
TARGET = $(ALLPROGRAMS)
else
TARGET = strip
endif
endif
OBJ = basic.o utf8.o alphabet.o fst.o operators.o determinise.o hopcroft.o
OBJ2 = $(OBJ) compact.o make-compact.o interface.o
.PHONY: all clean realclean archive remake install strip
all: $(TARGET)
strip: $(ALLPROGRAMS)
strip $(ALLPROGRAMS)
fst-api.zip: test.C alphabet.C alphabet.h basic.C basic.h compact.C \
compact.h lowmem.C lowmem.h robust.C
zip $@ $^
fst-compact: compact.o make-compact.o
fst-generate: generate.o
fst-match: compact.o
fst-train: compact.o
fst-compiler: $(OBJ2) default-scanner.o
fst-compiler-utf8: $(OBJ2) utf8-scanner.o fst-compiler.o
$(CXX) -o $@ $^ $(LDFLAGS)
fst-mor: $(OBJ) fst-mor.o
$(CXX) -o $@ $^ $(LDFLAGS) $(LREADLINE)
fst-text2bin: $(OBJ) fst-text2bin.o
$(CXX) -o $@ $^ $(LDFLAGS)
%: $(OBJ) %.o
$(CXX) -o $@ $^ $(LDFLAGS)
fst-infl2: alphabet.o basic.o compact.o robust.o utf8.o fst-infl2.o
$(CXX) -o $@ $^ $(LDFLAGS)
fst-infl2-daemon: alphabet.o basic.o compact.o robust.o utf8.o fst-infl2-daemon.o
$(CXX) -o $@ $^ $(LDFLAGS)
fst-infl3: alphabet.o basic.o utf8.o lowmem.o fst-infl3.o
$(CXX) -o $@ $^ $(LDFLAGS)
test: test.o
$(CL) -L. -lsfst -o $@ $^ $(LDFLAGS)
fst-compiler.C: fst.h compact.h make-compact.h interface.h fst-compiler.yy
bison -d -o $@ fst-compiler.yy
if [ -f fst-compiler.C.h ]; then mv fst-compiler.C.h fst-compiler.h; fi
if [ -f fst-compiler.H ]; then mv fst-compiler.H fst-compiler.h; fi
default-scanner.C: fst.h interface.h scanner.ll fst-compiler.C default-scanner.ll
flex -o$@ default-scanner.ll
utf8-scanner.C: fst.h interface.h scanner.ll fst-compiler.C utf8-scanner.ll
flex -o$@ utf8-scanner.ll
default-scanner.ll: scanner.ll
sed -e '/^utf8>/d' -e 's/^default> *//' scanner.ll > default-scanner.ll
utf8-scanner.ll: scanner.ll
sed -e '/^default>/d' -e 's/^utf8> *//' scanner.ll > utf8-scanner.ll
libsfst.tar.gz: libsfst.so compact.h alphabet.h basic.h fst.h
tar -zcf $@ $^
libsfst.so: alphabet.o basic.o utf8.o compact.o fst.o operators.o determinise.o generate.o hopcroft.o
$(CL) -shared -o $@ $^ $(LDFLAGS)
remake: clean $(ALLPROGRAMS)
clean:
-rm -f *.o *~ Makefile.bak y.tab.h man1/*~ 2>&- > /dev/null
realclean: clean
-rm -f $(ALLPROGRAMS) test 2>&- > /dev/null
install: $(PROGRAMS)
# $(INSTALL_DIR) $(DESTDIR)$(PREFIX)bin
for p in $(PROGRAMS); do $(INSTALL) $$p $(DESTDIR)$(PREFIX)bin/$$p; done
maninstall:
$(INSTALL_DIR) $(DESTDIR)$(PREFIX)man/man1
for m in man1/*.1; do $(INSTALL_MANPAGES) $$m $(DESTDIR)$(PREFIX)man/$$m; done
archive:
tar -zhcf VERSION-`date '+%y%m%d'`.tar.gz *.ll *.yy *.[Ch] Makefile man1/*
Makefile: *.C *.ll *.yy *.h
-makedepend -Y -- $(CFLAGS) -- *.C 2>/dev/null
# DO NOT DELETE
TransducerTest.o: Transducer.h
alphabet.o: utf8.h alphabet.h basic.h sgi.h
basic.o: basic.h
compact.o: compact.h alphabet.h basic.h sgi.h
default-scanner.o: interface.h utf8.h fst.h alphabet.h basic.h sgi.h mem.h
default-scanner.o: fst-compiler.h
determinise.o: fst.h alphabet.h basic.h sgi.h mem.h
fst-compact.o: make-compact.h fst.h alphabet.h basic.h sgi.h mem.h compact.h
fst-compare.o: fst.h alphabet.h basic.h sgi.h mem.h
fst-compiler.o: make-compact.h fst.h alphabet.h basic.h sgi.h mem.h compact.h
fst-compiler.o: interface.h utf8.h
fst-generate.o: fst.h alphabet.h basic.h sgi.h mem.h
fst-infl.o: fst.h alphabet.h basic.h sgi.h mem.h
fst-infl2-daemon.o: Socket.h compact.h alphabet.h basic.h sgi.h
fst-infl2.o: compact.h alphabet.h basic.h sgi.h
fst-infl3.o: lowmem.h alphabet.h basic.h sgi.h
fst-lattice.o: fst.h alphabet.h basic.h sgi.h mem.h
fst-lowmem.o: fst.h alphabet.h basic.h sgi.h mem.h
fst-match.o: compact.h alphabet.h basic.h sgi.h
fst-mor.o: fst.h alphabet.h basic.h sgi.h mem.h
fst-parse.o: fst.h alphabet.h basic.h sgi.h mem.h
fst-parse2.o: fst.h alphabet.h basic.h sgi.h mem.h
fst-print.o: fst.h alphabet.h basic.h sgi.h mem.h
fst-text2bin.o: fst.h alphabet.h basic.h sgi.h mem.h
fst-train.o: compact.h alphabet.h basic.h sgi.h
fst.o: fst.h alphabet.h basic.h sgi.h mem.h
generate.o: fst.h alphabet.h basic.h sgi.h mem.h
interface.o: interface.h utf8.h fst.h alphabet.h basic.h sgi.h mem.h
lowmem.o: lowmem.h alphabet.h basic.h sgi.h
make-compact.o: make-compact.h fst.h alphabet.h basic.h sgi.h mem.h compact.h
operators.o: fst.h alphabet.h basic.h sgi.h mem.h
robust.o: compact.h alphabet.h basic.h sgi.h
test.o: compact.h alphabet.h basic.h sgi.h
utf8-scanner.o: interface.h utf8.h fst.h alphabet.h basic.h sgi.h mem.h
utf8-scanner.o: fst-compiler.h
utf8.o: utf8.h
hopcraft.o: fst.h alphabet.h basic.h sgi.h mem.h
/*******************************************************************/
/* */
/* File: Socket.h */
/* Author: Helmut Schmid */
/* Purpose: */
/* Created: Fri Aug 15 14:19:19 2008 */
/* Modified: Wed Sep 29 08:44:43 2010 (schmid) */
/* */
/*******************************************************************/
namespace SFST {
/***************** class Socket **********************************/
class Socket {
int portno; /* port address */
int sockfd;
struct sockaddr_in serv_addr;
struct sockaddr cli_addr;
socklen_t clilen;
public:
int next_client() {
return accept( sockfd, (struct sockaddr *)&serv_addr, &clilen);
}
Socket( int port=7070 ): portno( port ) {
/* create a socket */
sockfd = socket(AF_INET, SOCK_STREAM, 0);
if (sockfd < 0) {
fprintf(stderr, "ERROR opening socket\n");
exit(1);
}
/* initialise serv_addr with zeros */
bzero((char *) &serv_addr, sizeof(serv_addr));
serv_addr.sin_family = AF_INET;
/* convert portno to network byte order and */
/* store it in serv_addr.sin_port */
serv_addr.sin_port = htons(portno);
/* set the host IP address (available in INADDR_ANY) */
serv_addr.sin_addr.s_addr = INADDR_ANY;
/* bind the socket to a host and port */
if (bind(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) {
fprintf(stderr, "ERROR on binding\n");
exit(1);
}
/* Listen to the socket; up to 5 connections at a time */
listen(sockfd, 5);
clilen = sizeof(cli_addr);
}
};
}
/*******************************************************************/
/* */
/* File: Transducer.h */
/* Author: Helmut Schmid */
/* */
/*******************************************************************/
#include <stdio.h>
#include <vector>
namespace SFST {
class Transition {
public:
char lower;
char upper;
int target;
Transition( char l, char u, size_t t ) { lower = l; upper = u; target = t; };
};
class State {
public:
bool final;
vector<Transition> transition;
State() { final = false; };
};
class Transducer {
private:
vector<State> state;
void analyze1( int sn, const char *s, vector<char> &ana,
vector<vector<char> > &analyses )
{
if (*s == 0 && state[sn].final)
analyses.push_back( ana );
vector<Transition> &t=state[sn].transition;
for( size_t i=0; i<t.size(); i++ ) {
if (t[i].upper == 0) {
ana.push_back(t[i].lower);
analyze1( t[i].target, s, ana, analyses);
ana.pop_back();
}
else if (t[i].upper == *s) {
ana.push_back(t[i].lower);
analyze1( t[i].target, s+1, ana, analyses);
ana.pop_back();
}
}
}
public:
Transducer( FILE *file ) {
char buffer[1000];
for( unsigned int line=0; (fgets(buffer, 1000, file)); line++ ) {
int s, t;
char u, l;
if (sscanf( buffer, "final: %d", &s) == 1) {
if (s >= (int)state.size())
state.resize( s+1 );
state[s].final = true;
}
else if (sscanf( buffer, "%d %c:<> %d", &s, &l, &t) == 3) {
if (s >= (int)state.size())
state.resize( s+1 );
state[s].transition.push_back(Transition(l, 0, t));
}
else if (sscanf( buffer, "%d <>:%c %d", &s, &u, &t) == 3) {
if (s >= (int)state.size())
state.resize( s+1 );
state[s].transition.push_back(Transition(0, u, t));
}
else if (sscanf( buffer, "%d %c:%c %d", &s, &l, &u, &t) == 4) {
if (s >= (int)state.size())
state.resize( s+1 );
state[s].transition.push_back(Transition(l, u, t));
}
else if (sscanf( buffer, "%d %c %d", &s, &l, &t) == 3) {
if (s >= (int)state.size())
state.resize( s+1 );
state[s].transition.push_back(Transition(l, l, t));
}
else {
fprintf(stderr,"Error: in line %u of transducer file at: %s\n",
line, buffer);
exit(1);
}
}
}
void analyze( const char *s, vector<vector<char> > &analyses ) {
vector<char> ana;
analyze1( 0, s, ana, analyses );
}
};
}
#include "Transducer.h"
#include <iostream>
using std::cerr;
int main( int argc, char **argv )
{
FILE *file;
file = fopen(argv[1],"rb"); // open the input file
if (file == NULL) exit(1);
try {
Transducer transducer(file); // (1) read the transducer
char buffer[1000];
while (fgets(buffer, 1000, stdin)) { // (2) next input line
// delete newline character
int l=strlen(buffer)-1;
if (buffer[l] == '\n')
buffer[l] = '\0';
printf("> %s\n", buffer); // print the input line
vector<vector<char> > analyses;
transducer.analyze(buffer, analyses); // (3) analyse the input
if (analyses.size() == 0)
printf( "no result for %s\n", buffer);
else
for( size_t i=0; i<analyses.size(); i++ ) {
for( size_t k=0; k<analyses[i].size(); k++ )
fputc(analyses[i][k], stdout);
fputc('\n', stdout);
}
}
}
catch (const char *p) {
cerr << p << "\n";
return 1;
}
return 0;
}
This diff is collapsed.
/*******************************************************************/
/* */
/* FILE alphabet.h */
/* MODULE alphabet */
/* PROGRAM SFST */
/* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
/* */
/* PURPOSE finite state tools */
/* */
/*******************************************************************/
#ifndef _ALPHABET_H_
#define _ALPHABET_H_
#include <stdio.h>
#include "basic.h"
#include <set>
#include <vector>
#include <iostream>
#include <cstring>
#include "sgi.h"
namespace SFST {
#ifndef CODE_DATA_TYPE
typedef unsigned short Character; // data type of the symbol codes
#else
typedef unsigned CODE_DATA_TYPE Character;
#endif
// data type used to indicate whether some action is to be performed
// on the analysis level (lower) or the surface level (upper)
typedef enum {upper, lower} Level;
/***************** class Label ***********************************/
class Label {
private:
// data structure where the two symbols are stored
struct {
Character lower;
Character upper;
} label;
public:
static const Character epsilon=0; // code of the empty symbol
// new label with two identical symbols
Label( Character c=epsilon ) { label.lower = label.upper = c; };
// new label with two different symbols
Label( Character c1, Character c2 )
{ label.lower = c1; label.upper = c2; };
// returns the indicated symbol of the label
Character get_char( Level l ) const
{ return ((l==upper)? label.upper: label.lower); };
// returns the "upper" symbol of the label (i.e. the surface symbol)
Character upper_char() const { return label.upper; };
// returns the "lower" symbol of the label (i.e. the analysis symbol)
Character lower_char() const { return label.lower; };
// replaces symbols in a label
Label replace_char( Character c, Character nc ) const {
Label l = *this;
if (l.label.lower == c)
l.label.lower = nc;
if (l.label.upper == c)
l.label.upper = nc;
return l;
};
// operators checking the equality of labels
int operator==( Label l ) const
{ return (label.lower==l.label.lower && label.upper==l.label.upper); };
int operator!=( Label l ) const
{ return !(l == *this); };
// comparison operator needed for sorting labels in compact.C
int operator<( Label l ) const {
if (upper_char() < l.upper_char())
return true;
if (upper_char() > l.upper_char())
return false;
if (lower_char() < l.lower_char())
return true;
return false;
};
int operator>( Label l ) const {
if (upper_char() > l.upper_char())
return true;
if (upper_char() < l.upper_char())
return false;
if (lower_char() > l.lower_char())
return true;
return false;
};
// check whether the label is epsilon (i.e. both symbols are epsilon)
// transitions with epsilon labels are epsilon transitions
int is_epsilon() const
{ return (label.upper == epsilon && label.lower == epsilon); };
// check whether the "upper" symbol is epsilon
int upper_is_epsilon() const
{ return (label.upper == epsilon); };
// check whether the "lower" symbol is epsilon
int lower_is_epsilon() const
{ return (label.lower == epsilon); };
// hash function needed to store labels in a hash table
struct label_hash {
size_t operator() ( const Label l ) const {
return (size_t)l.lower_char() ^
((size_t)l.upper_char() << 16) ^
((size_t)l.upper_char() >> 16);
}
};
// hash function needed to store labels in a hash table
struct label_cmp {
bool operator() ( const Label l1, const Label l2 ) const {
return (l1.lower_char() < l2.lower_char() ||
(l1.lower_char() == l2.lower_char() &&
l1.upper_char() < l2.upper_char()));
}
};
// comparison operator needed to store labels in a hash table
struct label_eq {
bool operator() ( const Label l1, const Label l2 ) const {
return (l1.lower_char() == l2.lower_char() &&
l1.upper_char() == l2.upper_char());
}
};
};
typedef std::vector<Label> Analysis;
/***************** class Alphabet *******************************/
class Alphabet {
// string comparison operators needed to stored strings in a hash table
struct eqstr {
bool operator()(const char* s1, const char* s2) const {
return strcmp(s1, s2) == 0;
}
};
// data structure storing labels without repetitions (i.e. as a set)
typedef std::set<Label, Label::label_cmp> LabelSet;
// hash table used to map the symbols to their codes
typedef hash_map<const char*, Character, hash<const char*>,eqstr> SymbolMap;
public: // HFST addition
// hash table used to map the codes back to the symbols
typedef hash_map<Character, char*> CharMap;
// HFST addition
bool operator==(const Alphabet &alpha) const;
private:
SymbolMap sm; // maps symbols to codes
CharMap cm; // maps codes to symbols
LabelSet ls; // set of labels known to the alphabet
// add a new symbol with symbol code c
void add( const char *symbol, Character c );
public:
bool utf8;
// iterators over the set of known labels
typedef LabelSet::iterator iterator;
typedef LabelSet::const_iterator const_iterator;
Alphabet();
~Alphabet() { clear(); };
const_iterator begin() const { return ls.begin(); };
const_iterator end() const { return ls.end(); };
size_t size() const { return ls.size(); };
// HFST additions
CharMap get_char_map(void) { return cm; };
void print(void);
void clear();
void clear_char_pairs() { ls.clear(); };
// lookup a label in the alphabet
iterator find( Label l ) { return ls.find(l); };
// insert a label in the alphabet
void insert( Label l ) { if (!l.is_epsilon()) ls.insert(l); };
// insert the known symbols from another alphabet
void insert_symbols( const Alphabet& );
// insert the labels and known symbols from another alphabet
void copy( const Alphabet& );
// create the alphabet of a transducer obtained by a composition operation
void compose( const Alphabet &la, const Alphabet &ua );
// add a symbol to the alphabet and return its code
Character add_symbol(const char *symbol);
// add a symbol to the alphabet with a given code
void add_symbol(const char *symbol, Character c );
// create a new marker symbol and return its code
Character new_marker( void );
void delete_markers();
// compute the complement of a symbol set
void complement( std::vector<Character> &sym );
// return the code of the argument symbol
int symbol2code( const char *s ) const {
SymbolMap::const_iterator p = sm.find(s);
if (p != sm.end()) return p->second;
return EOF;
};
// return the symbol for the given symbol code
const char *code2symbol( Character c ) const {
CharMap::const_iterator p=cm.find(c);
if (p == cm.end())
return NULL;
else
return p->second;
};
// write the symbol for the given symbol code into a string
void write_char( Character c, char *buffer, int *pos,
bool with_brackets=true ) const;
// write the symbol pair of a given label into a string
void write_label( Label l, char *buffer, int *pos,
bool with_brackets=true ) const;
// write the symbol for the given symbol code into a buffer and return
// a pointer to it
// the flag "with_brackets" indicates whether the angle brackets
// surrounding multi-character symbols are to be printed or not
const char *write_char( Character c, bool with_brackets=true ) const;
// write the symbol pair of a given label into a string
// and return a pointer to it
const char *write_label( Label l, bool with_brackets=true ) const;
// scan the next multi-character symbol in the argument string
int next_mcsym( char*&, bool insert=true );
// scan the next symbol in the argument string
int next_code( char*&, bool extended=true, bool insert=true );
// convert a character string into a symbol or label sequence
void string2symseq( char*, std::vector<Character>& );
void string2labelseq( char*, std::vector<Label>& );
// scan the next label in the argument string
Label next_label( char*&, bool extended=true );
// store the alphabet in the argument file (in binary form)
void store( FILE* ) const;
// read the alphabet from the argument file
void read( FILE* );
// disambiguation and printing of analyses
int compute_score( Analysis &ana );
void disambiguate( std::vector<Analysis> &analyses );
char *print_analysis( Analysis &ana, bool both_layers );
friend std::ostream &operator<<(std::ostream&, const Alphabet&);
};
// write the alphabet to the output stream (in readable form)
std::ostream &operator<<(std::ostream&, const Alphabet&);
}
#endif
/*******************************************************************/
/* */
/* FILE basic.C */
/* MODULE basic */
/* PROGRAM SFST */
/* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
/* */
/* PURPOSE */
/* */
/*******************************************************************/
#include <stdlib.h>
#include <string.h>
#include "basic.h"
namespace SFST {
bool Switch_Bytes=false;
/*******************************************************************/
/* */
/* fst_strdup */
/* */
/*******************************************************************/
char* fst_strdup(const char* pString)
{
char* pStringCopy = (char*)malloc(strlen(pString) + 1);
if (pStringCopy == NULL) {
fprintf(stderr, "\nError: out of memory (malloc failed)\naborted.\n");
exit(1);
}
strcpy(pStringCopy, pString);
return pStringCopy;
}
/*******************************************************************/
/* */
/* read_string */
/* */
/*******************************************************************/
int read_string( char *buffer, int size, FILE *file )
{
for( int i=0; i<size; i++ ) {
int c=fgetc(file);
if (c == EOF || c == 0) {
buffer[i] = 0;
return (c==0);
}
buffer[i] = (char)c;
}
buffer[size-1] = 0;
return 0;
}
/*******************************************************************/
/* */
/* read_num */
/* */
/*******************************************************************/
size_t read_num( void *p, size_t n, FILE *file )
{
char *pp=(char*)p;
size_t result=fread( pp, 1, n, file );
if (Switch_Bytes) {
size_t e=n/2;
for( size_t i=0; i<e; i++ ) {
char tmp=pp[i];
pp[i] = pp[--n];
pp[n] = tmp;
}
}
return result;
}
}
/*******************************************************************/
/* */
/* FILE basic.h */
/* MODULE basic */
/* PROGRAM SFST */
/* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
/* */
/* PURPOSE */
/* */
/*******************************************************************/
#ifndef _BASIC_H_
#define _BASIC_H_
#include <stdio.h>
namespace SFST {
extern bool Switch_Bytes;
char* fst_strdup(const char* pString);
int read_string( char *buffer, int size, FILE *file );
size_t read_num( void *p, size_t size, FILE *file );
}
#endif
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment