-
Notifications
You must be signed in to change notification settings - Fork 1
/
opinion.py
178 lines (132 loc) · 5.29 KB
/
opinion.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import re
import nltk
import joblib
import tweepy
import datetime
import os
from dotenv import load_dotenv
load_dotenv()
class TwitterClient(object):
def __init__(self):
# Class constructor or initialization method.
consumer_key = os.getenv('API_KEY')
consumer_secret = os.getenv('API_SECRET_KEY')
try:
self.auth = tweepy.AppAuthHandler(consumer_key, consumer_secret)
self.api = tweepy.API(self.auth)
except:
print("Error: Authentication Error")
# Processing Tweets
def preprocessTweets(self, tweet):
# Convert www.* or https?://* to URL
tweet = re.sub('((www\.[^\s]+)|(https?://[^\s]+))', 'URL', tweet)
# Convert @username to __HANDLE
tweet = re.sub('@[^\s]+', '__HANDLE', tweet)
# Replace #word with word
tweet = re.sub(r'#([^\s]+)', r'\1', tweet)
# trim
tweet = tweet.strip('\'"')
# Repeating words like happyyyyyyyy
rpt_regex = re.compile(r"(.)\1{1,}", re.IGNORECASE)
tweet = rpt_regex.sub(r"\1\1", tweet)
# Emoticons
emoticons = \
[
('__positive__', [':-)', ':)', '(:', '(-:', ':-D', ':D', 'X-D',
'XD', 'xD', '<3', ':\*', ';-)', ';)', ';-D', ';D', '(;', '(-;', ]),
('__negative__', [':-(', ':(', '(:',
'(-:', ':,(', ':\'(', ':"(', ':((', ]),
]
def replace_parenth(arr):
return [text.replace(')', '[)}\]]').replace('(', '[({\[]') for text in arr]
def regex_join(arr):
return '(' + '|'.join(arr) + ')'
emoticons_regex = [(repl, re.compile(regex_join(replace_parenth(regx)))) for (
repl, regx) in emoticons]
for (repl, regx) in emoticons_regex:
tweet = re.sub(regx, ' '+repl+' ', tweet)
# Convert to lower case
tweet = tweet.lower()
return tweet
# Stemming of Tweets
def stem(self, tweet):
stemmer = nltk.stem.PorterStemmer()
tweet_stem = ''
words = [word if(word[0:2] == '__') else word.lower()
for word in tweet.split()
if len(word) >= 3]
words = [stemmer.stem(w) for w in words]
tweet_stem = ' '.join(words)
return tweet_stem
# Predict the sentiment
def predict(self, tweet, classifier):
# Utility function to classify sentiment of passed tweet
tweet_processed = self.stem(self.preprocessTweets(tweet))
if (('__positive__') in (tweet_processed)):
sentiment = 1
return sentiment
elif (('__negative__') in (tweet_processed)):
sentiment = 0
return sentiment
else:
X = [tweet_processed]
sentiment = classifier.predict(X)
return (sentiment[0])
def get_tweets(self, classifier, query, count=1000):
'''
Main function to fetch tweets and parse them.
'''
# empty list to store parsed tweets
tweets = []
try:
# call twitter api to fetch tweets
# fetched_tweets = self.api.search_30_day(label='development',query=query)
fetched_tweets = self.api.search_tweets(query, count=count)
# parsing tweets one by one
for tweet in fetched_tweets:
# empty dictionary to store required params of a tweet
parsed_tweet = {}
# saving text of tweet
parsed_tweet['text'] = tweet.text
# saving sentiment of tweet
parsed_tweet['sentiment'] = self.predict(
tweet.text, classifier)
# appending parsed tweet to tweets list
if tweet.retweet_count > 0:
# if tweet has retweets, ensure that it is appended only once
if parsed_tweet not in tweets:
tweets.append(parsed_tweet)
else:
tweets.append(parsed_tweet)
# return parsed tweets
return tweets
except tweepy.errors.TweepyException as e:
print("Error : " + str(e))
# Main function
def main():
print('Loading the Classifier, please wait....')
classifier = joblib.load('svmClassifier.pkl')
# creating object of TwitterClient Class
api = TwitterClient()
# calling function to get tweets
q = 0
while (q == 0):
query = input("Enter the Topic for Opinion Mining: ")
tweets = api.get_tweets(classifier, query, count=1000)
ntweets = [tweet for tweet in tweets if tweet['sentiment'] == 0]
ptweets = [tweet for tweet in tweets if tweet['sentiment'] == 1]
neg = (100*len(ntweets)/len(tweets))
pos = (100*len(ptweets)/len(tweets))
# console output of sentiment
print("Opinion Mining on ", query)
# percentage of negative tweets
print("Negative tweets percentage: ", neg)
# percentage of positive tweets
print("Positive tweets percentage: ", pos)
now = datetime.datetime.now()
print("Date and Time analysed: ", str(now))
q = int(input("Do you want to exit[Press 1 for Yes/ 0 for No]? "))
if(q == 0):
break
if __name__ == "__main__":
main()