-
Notifications
You must be signed in to change notification settings - Fork 0
/
GetMIInter.py
112 lines (89 loc) · 3.3 KB
/
GetMIInter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#coding=utf-8
import numpy as np
class GetMIInter(object):
def __init__(self):
# loading inner mi
print "loading inter mi..."
self.__interN = 0
self.__interDicS = {}
self.__interMIDic = {}
f = open("data/miinter.txt",'r')
line = f.readline()
while line:
para = line.split(" ");
self.__interMIDic[para[0]] = int(para[1])
line = f.readline()
f.close()
f = open("data/miinter_words.txt",'r')
self.__interN = int(f.readline(),10)
print "interN: %d " % (self.__interN)
#line = f.readline().decode('utf-8')
line = f.readline()
while line:
para = line.split(" ");
self.__interDicS[para[0]] = int(para[1])
line = f.readline()
f.close()
def getMI(self, c1, c2):
pa = ""
if (c1 < c2):
pa = c1 + c2
else:
pa = c2 + c1
#print(pa)
nsent = 0
if (self.__interMIDic.has_key(pa)):
nsent = self.__interMIDic[pa]
na = 0
nb = 0
#print(nsent)
if (self.__interDicS.has_key(c1)):
na = self.__interDicS[c1]
if (self.__interDicS.has_key(c2)):
nb = self.__interDicS[c2]
innmis = np.log2(float(nsent+1) * float(self.__interN) / (float(na+1) * float(nb+1)))
return innmis
def lineSplit(self, line):
chars = []
for i in range(0, len(line), 3):
c = line[i:i+3]
#print(c)
chars.append(c)
return chars
def getMIScore(self, sline, oline):
swords = self.lineSplit(sline)
owords = self.lineSplit(oline)
swordNum = len(swords)
owordNum = len(owords)
intmis = 0.0
for i in range(0, owordNum):
for j in range(0,swordNum):
pa = ""
if (owords[i] < swords[j]):
pa = owords[i] + swords[j]
else:
pa = swords[j] + owords[i]
nsent = 0
if (self.__interMIDic.has_key(pa)):
nsent = self.__interMIDic[pa]
na = 0
nb = 0
if (self.__interDicS.has_key(owords[i])):
na = self.__interDicS[owords[i]]
if (self.__interDicS.has_key(swords[j])):
nb = self.__interDicS[swords[j]]
intmis += np.log2(float(nsent+1) * float(self.__interN) / (float(na+1) * float(nb+1)))
interMIScore = intmis / (owordNum * swordNum)
return interMIScore
def main():
myMI = GetMIinter()
while True:
string1 = raw_input("input word1> \n")
string2 = raw_input("input word2> \n")
string1 = string1.strip()
string2 = string2.strip()
mi= myMI.getMI(string1[0:3], string2[0:3]) + myMI.getMI(string1[0:3], string2[3:]) + \
myMI.getMI(string1[3:], string2[0:3]) + myMI.getMI(string1[3:3], string2[3:])
print (np.exp(mi/4.0))
if __name__ == "__main__":
main()