forked from zalio/word-problem-solver-thesis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_all_questions.py
143 lines (127 loc) · 8.2 KB
/
test_all_questions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
from number_assigners.type0_num_assigner import read_type0_questions
from number_assigners.type1_num_assigner import read_type1_questions
from number_assigners.type2_num_assigner import read_type2_questions
from number_assigners.type3_num_assigner import read_type3_questions
from number_assigners.type4_num_assigner import read_type4_questions
from number_assigners.type5_num_assigner import read_type5_questions
from number_assigners.type6_num_assigner import read_type6_questions
from number_assigners.type7_num_assigner import read_type7_questions
from number_assigners.type8_num_assigner import read_type8_questions
from number_assigners.type9_num_assigner import read_type9_questions
from number_assigners.type10_num_assigner import read_type10_questions
from number_assigners.type11_num_assigner import read_type11_questions
from number_assigners.type12_num_assigner import read_type12_questions
from number_assigners.type13_num_assigner import read_type13_questions
from number_assigners.type14_num_assigner import read_type14_questions
from number_assigners.type15_num_assigner import read_type15_questions
from number_assigners.type16_num_assigner import read_type16_questions
from number_assigners.type17_num_assigner import read_type17_questions
from number_assigners.type18_num_assigner import read_type18_questions
from number_assigners.type19_num_assigner import read_type19_questions
from number_assigners.type20_num_assigner import read_type20_questions
from number_assigners.type21_num_assigner import read_type21_questions
from number_assigners.type22_num_assigner import read_type22_questions # NEW
from number_assigners.type23_num_assigner import read_type23_questions # NEW
from sympy import *
def calculate_detailed_accuracy(predicted_answers, actual_answers):
correct_predictions_count = 0
incorrect_predictions_count = 0
unsolvable_predictions_count = 0
for i in range(len(predicted_answers)):
solved_flag = False
if predicted_answers[i] is None:
unsolvable_predictions_count += 1
continue
if predicted_answers[i] == actual_answers[i]:
correct_predictions_count += 1
solved_flag = True
elif len(actual_answers[i]) == 2 and predicted_answers[i] is not None and actual_answers[i] is not None:
x = Symbol("x")
y = Symbol("y")
if predicted_answers[i][x] == actual_answers[i][y] and predicted_answers[i][y] == actual_answers[i][x]:
correct_predictions_count += 1
solved_flag = True
if not solved_flag:
incorrect_predictions_count += 1
if len(predicted_answers) == 0:
return 0, correct_predictions_count, incorrect_predictions_count, unsolvable_predictions_count
return (correct_predictions_count / len(predicted_answers), correct_predictions_count, incorrect_predictions_count,
unsolvable_predictions_count)
def test_same_type_questions(same_type_questions, type_no, detailed_output=False):
predicted_answers = []
actual_answers = []
index = 0
for q in same_type_questions:
if detailed_output:
print("\n********** Results of Question", index, "from Type", type_no, "Questions **********\n")
print(" ORIGINAL PROBLEM TEXT : ", q.original_question, "\n")
predicted_eq = q.predict_equation_from_generated_q()
if predicted_eq is None:
if detailed_output:
print("Question", index, "has been failed to be solved.\n")
predicted_answers.append(None)
else:
pred_answer = predicted_eq.solve_equation()
if detailed_output:
print("Predicted number slot values: ", predicted_eq)
print("Predicted answer --> ", pred_answer, "\n")
predicted_answers.append(pred_answer)
actual_eq = q.solution_equation
actual_answer = actual_eq.solve_equation()
if detailed_output:
print("Actual number slot values: ", actual_eq)
print("Actual answer --> ", actual_answer)
actual_answers.append(actual_answer)
index += 1
if detailed_output:
print("\n" + "-" * 80, "\n----- The total number of type", type_no, "questions:", str(len(actual_answers)),
"-----\n")
accuracy_value, correct_predictions_count, incorrect_predictions_count, unsolvable_predictions_count = \
calculate_detailed_accuracy(predicted_answers, actual_answers)
return accuracy_value, correct_predictions_count, incorrect_predictions_count, unsolvable_predictions_count
def test_all_questions():
all_questions = {0: read_type0_questions(), 1: read_type1_questions(), 2: read_type2_questions(),
3: read_type3_questions(), 4: read_type4_questions(), 5: read_type5_questions(),
6: read_type6_questions(), 7: read_type7_questions(), 8: read_type8_questions(),
9: read_type9_questions(), 10: read_type10_questions(), 11: read_type11_questions(),
12: read_type12_questions(), 13: read_type13_questions(), 14: read_type14_questions(),
15: read_type15_questions(), 16: read_type16_questions(), 17: read_type17_questions(),
18: read_type18_questions(), 19: read_type19_questions(), 20: read_type20_questions(),
21: read_type21_questions(), 22: read_type22_questions(), 23: read_type23_questions()}
# 22 and 23 are new.
total_correct_predictions_count, total_incorrect_predictions_count, total_unsolvable_predictions_count = 0, 0, 0
for k, v in all_questions.items():
accuracy_value, correct_predictions_count, incorrect_predictions_count, unsolvable_predictions_count = \
test_same_type_questions(v, str(k), detailed_output=True)
print("----->Accuracy for the type", str(k), "questions =", str(accuracy_value * 100), "%\n\n" + "~" * 120,
"\n")
total_correct_predictions_count += correct_predictions_count
total_incorrect_predictions_count += incorrect_predictions_count
total_unsolvable_predictions_count += unsolvable_predictions_count
print("*" * 120, "\n" + "*" * 120, "\n")
total_num_of_questions = (total_correct_predictions_count + total_incorrect_predictions_count +
total_unsolvable_predictions_count)
"""print("The total number of all questions: 514\n")
print("The total number of correct predictions out of all questions:", str(total_correct_predictions_count), "\n")
print("The total number of incorrect predictions out of all questions:", str(total_incorrect_predictions_count),
"\n")
print("The total number of unsolvable questions out of all questions:", str(total_unsolvable_predictions_count),
"\n")
print("=====> Final accuracy of all 514 questions =", str((total_correct_predictions_count / 514) * 100), "%\n")"""
print("The total number of all questions:", total_num_of_questions, "\n")
print("The total number of correct predictions out of all questions:", str(total_correct_predictions_count), "\n")
print("The total number of incorrect predictions out of all questions:", str(total_incorrect_predictions_count),
"\n")
print("The total number of unsolvable questions out of all questions:", str(total_unsolvable_predictions_count),
"\n")
print("=====> Final accuracy of all", total_num_of_questions, "questions =",
str((total_correct_predictions_count / total_num_of_questions) * 100), "%\n")
"""print("The total number of all questions: 396\n")
print("The total number of correct predictions out of all questions:", str(total_correct_predictions_count), "\n")
print("The total number of incorrect predictions out of all questions:", str(total_incorrect_predictions_count),
"\n")
print("The total number of unsolvable questions out of all questions:", str(total_unsolvable_predictions_count),
"\n")
print("=====> Final accuracy of all 396 questions =", str((total_correct_predictions_count / 396) * 100), "%\n")"""
if __name__ == "__main__":
test_all_questions()