-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSubreddit Sentiment & Keyword Analysis Script.py
78 lines (54 loc) · 2.47 KB
/
Subreddit Sentiment & Keyword Analysis Script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import pandas as pd
import praw
import nltk
import random
from pprint import pprint
# Enter your own client_id, client_secret, username and password, or follow this quick start guide: https://github.com/reddit-archive/reddit/wiki/OAuth2-Quick-Start-Example#first-steps
reddit = praw.Reddit(user_agent='Comment Extraction (by /u/USERNAME)',client_id='enter_here',client_secret="enter_here",username='enter_here', password='enter_here')
from textblob import TextBlob
import matplotlib.pyplot as plt
from wordcloud import WordCloud
def subreddit_hot(subreddit):
print('Commonly used words displayed as a Word Cloud for the 50 most recent hot posts in: ', subreddit)
# open new file and write in data
with open('%s_hot.txt' % subreddit, 'a') as file:
posts = subreddit.hot(limit=50)
with open('%s_hot.txt' % subreddit, 'w') as file:
for post in posts:
file.write(post.title + '\n')
with open('%s_hot.txt' % subreddit, 'r') as file:
wordcloud_data = file.read()
# generate wordcloud
from wordcloud import WordCloud, STOPWORDS
stopwords = set(STOPWORDS)
stopwords.update(['It','This','be'])
wordcloud = WordCloud(stopwords=stopwords,height=1000, width=3000, max_words=40, \
background_color='white').generate(wordcloud_data)
plt.figure()
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.margins(x=0, y=0)
plt.show()
# begin sentiment analysis
with open('%s_hot.txt' % subreddit, 'r') as file:
for w in file:
if w not in stopwords:
file_posts = file.readlines()
labels = ['Negative', 'Neutral', 'Positive']
values = [0,0,0]
for posts in file_posts:
sentiment = TextBlob(posts)
polarity = round((sentiment.polarity + 1) * 3) % 3
values[polarity] = values[polarity] + 1
print('\n\n','Sentiment breakdown for 50 most recent hot posts in: ', subreddit, '\n\n')
colors = ['red','gray','green']
plt.pie(values, labels=labels, colors=colors, autopct='%1.1f%%', shadow=True, startangle=140)
plt.axis('equal')
plt.show()
# print hot post titles
print('\n\n','This week\'s top posts in ' , subreddit)
for submission in subreddit.top("week", limit=10):
print(submission.title)
# Add subreddits you want to analyze below.
subreddit_hot(reddit.subreddit('apple'))
subreddit_hot(reddit.subreddit('microsoft'))