-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathparsing.py
97 lines (84 loc) · 2.82 KB
/
parsing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
def normalize_ans(a):
a = a.lower().strip(' <>*().?,\'\"')
a = a.replace(' ', ' ')
a = a.replace(' ', ' ')
if a.startswith('a '):
a = a[2:]
if a.startswith('an '):
a = a[3:]
if a.startswith('the '):
a = a[4:]
return a
def normalize_sample(a):
a = a.lower().strip(' <>*().!?,\'\"')
if a.startswith("a "):
a = a[2:]
if a.startswith("an "):
a = a[3:]
if a.startswith("the "):
a = a[4:]
if a.endswith(" and"):
a = a[:-4]
if a.startswith("yes,") or a.startswith("yes!") or a.startswith("yes "):
a = a[:3]
if a.startswith("no,") or a.startswith("no!") or a.startswith("no "):
a = a[:2]
a = a.replace(",", " ")
a = a.replace(" the ", " ")
a = a.replace(" a ", " ")
a = a.replace(" an ", " ")
a = a.replace(" ", " ")
a = a.replace(" ", " ")
a = a.replace("-", "")
a = a.replace("'", "")
a = a.replace("\"", "")
a = a.replace(".", " ")
a = a.replace("?", " ")
a = a.replace("!", " ")
a = a.replace("\n", " ")
a = a.replace(" ", " ")
a = a.replace(" ", " ")
a = a.strip()
return a
def group_ans_simple(guesses):
# input: 1d list of answers
# normalize guesses first
guesses = [normalize_ans(g) for g in guesses]
# groups answers by string equality
guesses_and_probs = sorted(set([(g, guesses.count(g) / len(guesses)) for g in set(guesses)]), key=lambda x: x[1], reverse=True)
guesses, probs = zip(*guesses_and_probs)
return list(guesses), list(probs)
def answers_are_equivalent_heuristic(a, b):
# assume a, b already normalized
a_words = a.split(" ")
b_words = b.split(" ")
# if every word in a is in b or vice versa, return true
if a == b:
return True
if all([w in b_words for w in a_words]) and all([w in a_words for w in b_words]):
return True
if a.endswith(b) and a[-len(b)-1:-len(b)] != ",":
return True
if b.endswith(a) and b[-len(a)-1:-len(a)] != ",":
return True
if a == b + "s":
return True
if b == a + "s":
return True
return False
def add_to_equiv_class_heuristic(equiv_classes, ans, conf):
# assume ans already normalized
for key in equiv_classes.keys():
if answers_are_equivalent_heuristic(ans, key):
equiv_classes[key] += conf
return
equiv_classes[ans] = conf
def build_equiv_class_heuristics(answers, confidences):
equiv_classes = {}
# print("answers, confidences", answers, confidences)
for ans, conf in zip(answers, confidences):
# if not unique list, conf is just ans_count
# print("e, a, c", equiv_classes, ans, conf)
add_to_equiv_class_heuristic(equiv_classes, ans, conf)
# assert that sum of the values in equiv class = 1
return equiv_classes