-
Notifications
You must be signed in to change notification settings - Fork 0
/
predict_text.py
55 lines (44 loc) · 1.72 KB
/
predict_text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import pickle
import tkinter as tk
with open('data/suggester.pickle', 'rb') as f:
s = pickle.load(f)
root = tk.Tk()
canvas = tk.Canvas(root, width=800, height=300)
canvas.pack()
with open('data/suggestions.pickle', 'rb') as f:
suggestions = pickle.load(f)
# Convert tfidf counts and PMI scores into a linear scale from 0-1
unigram_divisor = max([score for ngram, score in suggestions.items() if len(ngram.split(' ')) == 1]) # ranked by tfidf count
other_ngram_divisor = max([score for ngram, score in suggestions.items() if len(ngram.split(' ')) != 1]) # ranked by PMI
def fill_text(event):
if event.char != '\t': # Doesn't fill unless tab is clicked
return
text = t.get('1.0', 'end-1c').split(' ')
prefixes = [' '.join(text[-i:]) for i in range(1, 4) if i <= len(text)]
all_suggestions = {}
for pre in prefixes:
for suggestion in s.search(pre.lower()):
all_suggestions[suggestion] = pre
highest_score = 0
best_ngram = ''
for ngram in all_suggestions.keys():
if len(ngram.split(' ')) == 1:
scaled = suggestions[ngram] / unigram_divisor
else:
scaled = suggestions[ngram] / other_ngram_divisor
# Make suggestions like "point of view" less important if
# "many p" is typed
if len(all_suggestions[ngram].split(' ')) == 1:
scaled *= 0.3
if scaled > highest_score:
highest_score = scaled
best_ngram = ngram
if best_ngram:
t.insert('end', best_ngram[len(all_suggestions[best_ngram]):])
t.delete(tk.END)
t = tk.Text(root, height=10, width=60, wrap='word')
t.bind('<Key>', fill_text)
t.pack()
t.focus()
canvas.create_window(400, 150, window=t)
root.mainloop()