-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtop_ten.py
71 lines (53 loc) · 1.55 KB
/
top_ten.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import sys
import re
import json
def main():
tweet_file = open(sys.argv[1])
tweet_entites = loadTweets(tweet_file)
hash_tags= {}
hash_tag_objs = []
for tweet in tweet_entites:
addHashTags(hash_tags, tweet)
buildHashTagObjs(hash_tag_objs, hash_tags)
#sorted(state_objs, key=StateObj.getSent).pop()
hash_tag_objs = sorted(hash_tag_objs, key=HashTag.getCount)
top_ten = []
for index in range(1,11):
top_ten.append(hash_tag_objs.pop())
for index in range(0,10):
print top_ten[index].getTag(), float(top_ten[index].getCount())
def buildHashTagObjs(hash_tag_objs, hash_tags):
for k in hash_tags:
hash_tag_objs.append(HashTag(k, hash_tags[k]))
def loadTweets(fp):
tweets = []
for line in fp:
tweet = json.loads(line)
if u'entities' in tweet:
entities = tweet[u'entities']
if u'hashtags' in entities:
tweets.append(tweet[u'entities'])
else:
print 'Tweet did not have any hash tags'
return tweets
def addHashTags(hash_tags, tweet):
tags = getHashTagsForTweet(tweet)
for tag in tags:
tag_text = tag[u'text'].encode('utf-8')
hash_tags[tag_text] = hash_tags.get(tag_text, 0) + 1
#state_sents[tweet.getState()] = state_sents.get(tweet.getState(), 0) + 1
def getHashTagsForTweet(tweet):
if u'hashtags' in tweet:
return tweet[u'hashtags']
else:
return []
class HashTag:
def __init__(self, tag, count):
self.tag = tag
self.count = count
def getTag(self):
return self.tag
def getCount(self):
return self.count
if __name__ == '__main__':
main()