This repository has been archived by the owner on Mar 15, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
collect_tweets.py
50 lines (40 loc) · 1.83 KB
/
collect_tweets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import tweet_search
import pandas as pd
import pickle
from datetime import datetime
from tqdm import tqdm
filters = " lang:ja -filter:links -filter:replies -filter:images exclude:retweets"
positive_emoji = " -\"😀\" -\"😁\" -\"😂\" -\"🤣\" -\"😃\" \
-\"😄\" -\"😅\" -\"😆\" -\"😉\" -\"😊\" \
-\"😋\" -\"😎\" -\"😍\" -\"😘\" -\"🥰\" \
-\"😙\" -\"😚\" -\"🙂\" -\"🤩\" -\"🤗\""
negative_emoji = " -\"😫\" -\"😢\" -\"😰\" -\"😱\" -\"😩\" \
-\"😤\" -\"😞\" -\"😖\" -\"😟\" -\"😠\" \
-\"😡\" -\"🤬\" -\"👎\""
key_emoji = {"😡":0, "😩":0, "😰":0,
"👍":1, "😘":1, "😆":1}
#---------------------------------------------------------------
# emoji search
#---------------------------------------------------------------
def emoji_search(emoji, label, loop):
exceptions = [positive_emoji, negative_emoji][label]
keyword = emoji + filters + exceptions
df = tweet_search.search(keyword=keyword, loop=loop)
df = pd.concat([df, pd.DataFrame([label]*len(df), columns=["label"])], axis=1)
return df
#---------------------------------------------------------------
# main function
#---------------------------------------------------------------
def main():
df = pd.DataFrame(columns=["time", "tweet", "label"])
for i, e in enumerate(key_emoji):
tqdm.write("{}/{}".format(i+1, len(key_emoji)))
df = pd.concat([df,emoji_search(e, key_emoji[e], 180)], ignore_index=True)
print("Saving tweets data...")
f = open("tweets_data" + datetime.now().strftime('%Y%m%d_%H%M%S') + ".pkl", "wb")
pickle.dump(df, f)
f.close()
print("Complete!")
return df
if __name__ == "__main__":
main()