Skip to content

Commit

Permalink
Merge branch 'master' of github.com:DocNow/twarc
Browse files Browse the repository at this point in the history
  • Loading branch information
edsu committed Apr 12, 2020
2 parents 0e6bdfd + c63368a commit c910212
Showing 1 changed file with 18 additions and 2 deletions.
20 changes: 18 additions & 2 deletions utils/emojis.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,30 @@
import json
import fileinput
import collections
import optparse

import emoji

opt_parser = optparse.OptionParser()

opt_parser.add_option(
"-n",
"--number",
dest="number",
type="int",
default= 10
)
options, args = opt_parser.parse_args()
tweets = args

number_of_emojis = options.number
tweets = tweets.pop()

counts = collections.Counter()

EMOJI_RE = emoji.get_emoji_regexp()

for line in fileinput.input():
for line in open(tweets):
tweet = json.loads(line)
if 'full_text' in tweet:
text = tweet['full_text']
Expand All @@ -20,5 +36,5 @@
for char in EMOJI_RE.findall(text):
counts[char] += 1

for char, count in counts.most_common():
for char, count in counts.most_common(number_of_emojis):
print("%s %5i" % (char, count))

0 comments on commit c910212

Please sign in to comment.