This repository has been archived by the owner on Mar 15, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
recent_tweets_predict.py
168 lines (137 loc) · 5.57 KB
/
recent_tweets_predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import tweet_search
import pandas as pd
import predict
import time
import datetime
import sys
import os
import json
from collections import OrderedDict
from constant import *
from database import session
from timedata import *
from sqlalchemy.sql import func, text
#---------------------------------------------------------------
# return proportions of positive tweets and negative tweets of japan
#---------------------------------------------------------------
def predict_negaposi():
# search tweets written in Japanese
keyword = "lang:ja -filter:links -filter:replies -filter:images exclude:retweets"
df = tweet_search.search(keyword=keyword, loop = 50, log=False)
tweets = df["tweet"].values
# predict
results = predict.predict_tweets(tweets)
# calculate proportions
return calculate_negaposi(results)
#---------------------------------------------------------------
# return proportions of positive tweets and negative tweets of a user
#---------------------------------------------------------------
def predict_user_negaposi(screen_name=" "):
if screen_name == " ":
return
df = tweet_search.get_user_timeline(screen_name)
# if df is error
if len(df) == 0 or screen_name[1:]=="" or '@' in screen_name[1:]:
return -1, -1
tweets = df["tweet"].values
# predict
results = predict.predict_tweets(tweets)
# calculate proportions
return calculate_negaposi(results)
#---------------------------------------------------------------
# return proportions of positive tweets and negative tweets including a word
#---------------------------------------------------------------
def predict_word_negaposi(word=" "):
if word == " ":
return
word = word + " lang:ja -filter:links -filter:replies -filter:images exclude:retweets"
# search
df = tweet_search.search(word, loop=2, log=False)
tweets = df["tweet"].values
# predict
results = predict.predict_tweets(tweets)
# calculate proportions
return calculate_negaposi(results)
#---------------------------------------------------------------
# calculate proportions of positive tweets and negative tweets
#---------------------------------------------------------------
def calculate_negaposi(results):
pos_p = 0
neg_p = 0
for res in results:
if res[0] >= 0.6:
pos_p += 1
if res[0] < 0.3:
neg_p += 1
pos_p = (pos_p / len(results)) * 100
neg_p = (neg_p / len(results)) * 100
return pos_p, neg_p
#---------------------------------------------------------------
# update DB
#---------------------------------------------------------------
def update_db(now, pos_now, neg_now):
p_col = "pos"+"{0:02d}".format(now.minute)
n_col = "neg"+"{0:02d}".format(now.minute)
now_id = int(now.strftime("%Y%m%d%H"))
timedata = session.query(TimeData).filter(TimeData.id==now_id).scalar()
if timedata is None:
new_timedata = TimeData()
new_timedata.id = now_id
new_timedata.__dict__[p_col] = pos_now
new_timedata.__dict__[n_col] = neg_now
session.add(new_timedata)
session.commit()
else:
str_sql_pos = "UPDATE negaposi SET " + p_col + " = " + str(pos_now) + "WHERE id = " + str(now_id)
str_sql_neg = "UPDATE negaposi SET " + n_col + " = " + str(neg_now) + "WHERE id = " + str(now_id)
update_pos = text(str_sql_pos)
update_neg = text(str_sql_neg)
session.execute(update_pos)
session.execute(update_neg)
session.commit()
return
#---------------------------------------------------------------
# return negaposi data used in index.html
#---------------------------------------------------------------
def get_latest_data():
latest_id = session.query(func.max(TimeData.id).label("latest")).one().latest
latest_row = session.query(TimeData).filter(TimeData.id==latest_id).one()
for i in range(12):
str_minute = "{0:02d}".format(i*5)
p_col = "pos"+str_minute
n_col = "neg"+str_minute
if(latest_row.__dict__[p_col] != 0.0 and latest_row.__dict__[n_col] != 0.0):
latest_pos = latest_row.__dict__[p_col]
latest_neg = latest_row.__dict__[n_col]
str_data_time = str(latest_id) + str_minute
return str_data_time, latest_pos, latest_neg
def get_24hours_from(start_id):
sql = "SELECT * FROM negaposi WHERE id >= " + str(start_id) + " ORDER BY id ASC FETCH FIRST 24 ROWS ONLY"
rows = session.execute(sql)
res = json.dumps([(dict(row.items())) for row in rows])
return res
def get_7days_from(start_id):
sql = "SELECT * FROM negaposi WHERE id >= " + str(start_id) + " ORDER BY id ASC FETCH FIRST 168 ROWS ONLY"
rows = session.execute(sql)
res = json.dumps([(dict(row.items())) for row in rows])
return res
#---------------------------------------------------------------
# retun user data used in tree.html
#---------------------------------------------------------------
def get_user_data(user_name):
pos, neg = predict_user_negaposi(user_name)
return pos, neg
#---------------------------------------------------------------
# main function
#---------------------------------------------------------------
def main():
now = datetime.datetime.now()
try:
pos, neg = predict_negaposi()
except:
pos = 0
neg = 0
update_db(now, pos, neg)
return
if __name__ == "__main__":
main()