-
Notifications
You must be signed in to change notification settings - Fork 95
/
top_ten.py
78 lines (48 loc) · 1.31 KB
/
top_ten.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import sys
import json
import operator
from pprint import pprint
def hw():
print 'Hello, world!'
def lines(fp):
print str(len(fp.readlines()))
def main():
dic={}
count=1
new=open(sys.argv[1])
for line in new:
data=json.loads(line)
sum=0
if "entities" in data:
l=data["entities"]
#l2=l.encode('ascii','ignore')
ht=l["hashtags"]
#ter=l2.split(" ")
#print(ter)
#print ht
for d in ht:
ht3=d["text"]
#print ht2
ht2=ht3.encode('ascii','ignore')
if ht2 in dic:
dic[ht2]+=1
else:
dic.update({ht2:count})
#for ht2,count in dic.items():
# print str(ht2)+"\t"+str(count)
#del dic["_"]
#del dic["__"]
#del dic["___"]
#del dic[""]
sorts = sorted(dic.iteritems(), key=operator.itemgetter(1), reverse=True)
#s=sorts
#print sorts
i=0
for k in sorts:
i=i+1
if (i<=10):
#print str(k)+"\t"+str(dic[k])
print k[0]+"\t"+str(k[1])
#print max(dic.iteritems(), key=operator.itemgetter(1))[0]
if __name__ == '__main__':
main()