-
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtwitter.py
81 lines (65 loc) · 2.56 KB
/
twitter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import tweepy
from textblob import TextBlob
import re
# Function to extract tweets
def clean_tweet(tweet):
'''
Use sumple regex statemnents to clean tweet text by removing links and special characters
'''
return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t]) \
|(\w+:\/\/\S+)", " ", tweet).split())
def deEmojify(text):
'''
Strip all non-ASCII characters to remove emoji characters
'''
if text:
return text.encode('ascii', 'ignore').decode('ascii')
else:
return None
def cleanTxt(text):
text = re.sub('@[A-Za-z0–9]+', '', text) #Removing @mentions
text = re.sub('#', '', text) # Removing '#' hash tag
text = re.sub('RT[\s]+', '', text) # Removing RT
text = re.sub('https?:\/\/\S+', '', text) # Removing hyperlink
return text
def getSubjectivity(text):
return TextBlob(text).sentiment.subjectivity
def getPolarity(text):
return TextBlob(text).sentiment.polarity
def get_tweets(keyword,_size):
print(_size)
# Authorization to consumer key and consumer secret
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
# Access to user's access key and access secret
auth.set_access_token(access_key, access_secret)
# Calling api
api = tweepy.API(auth)
tweet_list = []
location_list = []
time_stamp = []
twitter_user = []
subjectivity = []
polarity = []
for tweet in api.search(q=keyword, lang="en", rpp=_size,tweet_mode='extended',count=_size):
temp = tweet.entities['user_mentions']
if len(temp)<=0 or tweet.user.location==" ":
continue
name_ = temp[0]['name']
time_stamp.append(tweet.created_at)
twitter_user.append(name_)
text = tweet.full_text
clean_text = clean_tweet(text)
clean_text = cleanTxt(clean_text)
emojified_text = clean_tweet(clean_text)
subjectivity.append(getSubjectivity(emojified_text))
polarity.append(getPolarity(emojified_text))
subjectivity.append(getSubjectivity(emojified_text))
tweet_list.append(emojified_text)
location_list.append(tweet.user.location)
return [time_stamp,location_list,twitter_user,subjectivity,polarity,tweet_list]
#get_tweets("sdv",12)
'''# Driver code
if __name__ == '__main__':
# Here goes the twitter handle for the user
# whose tweets are to be extracted.
get_tweets("samsung") '''