-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathfs.py
114 lines (95 loc) · 2.81 KB
/
fs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
"""fs.py: A wrapper module for basic finite-state operations
The HFST engine used for accomplishing the operations but all functions make copies of their arguments when it is necessary to avoid side-effects.
© Kimmo Koskenniemi, 2018. This is free code under the GPL 3 license."""
import hfst
import grapheme
import cfg
def expr(e):
"""Return an FST corresponding to a XFST regular expression"""
res = hfst.regex(e)
res.minimize()
return res
def concat(f, g):
"""Return the concatenation of two FSTs"""
res = f.copy()
res.concatenate(g)
res.minimize()
return res
def star(f):
"""Return the Kleene star iteration of an FST"""
res = f.copy()
res.repeat_star()
res.minimize()
return res
def plus(f):
"""Return the Kleene plus iteration of an FST"""
res = f.copy()
res.repeat_plus()
res.minimize()
return res
def crossprod(f, g):
"""Return the cross-product of two FSAs"""
res = f.copy()
res.cross_product(g)
res.minimize()
return res
def compose(f, g):
"""Return the composition of two FSTs"""
res = f.copy()
res.compose(g)
res.minimize()
return res
def union(f, g):
"""Return the union of two FSTs"""
res = f.copy()
res.disjunct(g)
res.minimize()
return res
def intersect(f, g):
"""Return the intersection of two FSTs
Both arguments are assumed to be length preserving mappings.
"""
res = f.copy()
res.conjunct(g)
res.minimize()
return res
def upper(f):
"""Return the input projection of an FST"""
res = f.copy()
res.input_project()
res.minimize()
return res
def lower(f):
"""Return the output projection of an FST"""
res = f.copy()
res.output_project()
res.minimize()
return res
def symbol_to_fsa(sym):
"""Return a FSA which accepts the one letter string 'sym'
The symbol 'sym' may be e.g. a composed Unicode grapheme, i.e. a
string of two or more Unicode characters.
"""
bfsa = hfst.HfstBasicTransducer()
string_pair_path = ((sym, sym))
bfsa.disjunct(string_pair_path, 0)
fsa = hfst.fst(bfsa)
return(fsa)
def symbol_pair_to_fst(insym, outsym):
""""Return a FST which accepts one the pair string 'insym:outsym'"""
bfst = hfst.HfstBasicTransducer()
string_pair_path = ((insym, outsym))
bfsa.disjunct(string_pair_path, 0)
fst = hfst.fst(bfst)
return(fst)
def string_to_fsa(grapheme_string):
"""Return a FSA which accepts the sequence of graphemes in the string"""
bfsa = hfst.HfstBasicTransducer()
grapheme_list = list(grapheme.graphemes(grapheme_string))
string_pair_path = tuple(zip(grapheme_list, grapheme_list))
if cfg.verbosity >= 10:
print(grapheme_list)
print(string_pair_path)
bfsa.disjunct(string_pair_path, 0)
fsa = hfst.HfstTransducer(bfsa)
return(fsa)