forked from koskenni/pytwolc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
guessbyasking.py
94 lines (83 loc) · 2.94 KB
/
guessbyasking.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# lookuptest.py
copyright = """Copyright © 2017, Kimmo Koskenniemi
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or (at
your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import hfst, sys, argparse
argparser = argparse.ArgumentParser(
"python3 gyessbyasking.py",
description="Guess lexicon entries by asking forms from the user")
argparser.add_argument(
"guesser", help="Guesser file FST", default="finv-guess.fst")
argparser.add_argument(
"-r", "--reject", default=1000000, type=int,
help="reject candidates which are worse than the best by REJECTION or more")
argparser.add_argument(
"-v", "--verbosity", default=0, type=int,
help="level of diagnostic output")
args = argparser.parse_args()
guesser_fil = hfst.HfstInputStream(args.guesser)
guesser_fst = guesser_fil.read()
guesser_fil.close()
guesser_fst.invert()
guesser_fst.minimize()
guesser_fst.lookup_optimize()
print("\nENTER FORMS OF A WORD:\n")
while True:
remaining = set()
weight = {}
first = True
while True:
linenl = sys.stdin.readline()
if not linenl: exit()
line = linenl.strip()
if line == "":
print("GIVING UP THIS WORD\n\n")
break
if line[0] == '-':
res = guesser_fst.lookup(line[1:], output="tuple")
else:
res = guesser_fst.lookup(line, output="tuple")
if args.verbosity >= 10:
print("lookup result =", res)
if len(res) == 0:
print("FITS NO PATTERN! INGORED.")
continue
entries = set()
for entry, w in res:
entries.add(entry)
if entry in weight:
weight[entry] = min(w, weight[entry])
else:
weight[entry] = w
if first:
first = False
remain = entries
elif line[0] == '-':
remain = remaining - entries
else:
remain = remaining & entries
best_weight = min([weight[e] for e in remain])
rema = set()
for e in remain:
if weight[e] <= best_weight + args.reject:
rema.add(e)
if len(rema) == 1:
print("\n" + "="*18)
print(list(rema)[0], ";")
print("="*18 + "\n")
break
elif not rema:
print("DOES NOT FIT! IGNORED.")
else:
rml = [(entry, weight[entry]) for entry in rema]
print(" ", rml)
remaining = rema