-
Notifications
You must be signed in to change notification settings - Fork 2
/
simhash.py
37 lines (32 loc) · 1.19 KB
/
simhash.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def sim_hash(trace, limit=512):
if trace:
result = 0
lines = trace.splitlines(True)
words = []
for line in lines:
word_list = line.split()
filter_list = [word for word in word_list if len(word) > 0]
words.extend(filter_list)
split_set = set(words)
# preserve ordering when computing the split list
split_list = [word for word in words if word in split_set]
for i in range(min(len(split_list), limit)):
result ^= hash(split_list[i])
return unicode(hex(result))
else:
return None
def main():
trace_1 = '''
Error: Error message
at null._onTimeout (/examples/error-module.js:7:29)
at Timer.listOnTimeout [as ontimeout] (timers.js:110:15)
'''
trace_2 = '''
Error: Error message
at console._onTimeout (/examples/error-module.js:7:29)
at Timer.listOnTimeout [as ontimeout] (timers.js:110:15)
'''
print('sim_hash = %s' % (sim_hash(trace_1)))
print('sim_hash = %s' % (sim_hash(trace_2)))
if __name__ == '__main__':
main()