forked from ankmathur96/Stock-Sentiment
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsentiment.py
116 lines (103 loc) · 4.02 KB
/
sentiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
from data import word_sentiments
#########################################################################################################
# sentiment.py currently uses a pre-calculated sentiment indexing used in several open source projects. #
# Finance-specific sentiment analysis coming soon. #
#########################################################################################################
#####UTIL#####
def reduce(reduce_fn, s, initial):
reduced = initial
for x in s:
reduced = reduce_fn(reduced, x)
return reduced
def keep_if(filter_fn, s):
return [x for x in s if filter_fn(x)]
##############
def make_sentiment(value):
"""Return a sentiment, which represents a value that may not exist.
>>> positive = make_sentiment(0.2)
>>> neutral = make_sentiment(0)
>>> unknown = make_sentiment(None)
>>> has_sentiment(positive)
True
>>> has_sentiment(neutral)
True
>>> has_sentiment(unknown)
False
>>> sentiment_value(positive)
0.2
>>> sentiment_value(neutral)
0
"""
assert (value is None) or (-1 <= value <= 1), 'Bad sentiment value'
def sentiment_attributes():
return value
return sentiment_attributes
def has_sentiment(s):
"""Return whether sentiment s has a value."""
sentiment_val = s()
if sentiment_val == None:
return False
return True
def sentiment_value(s):
"""Return the value of a sentiment s."""
assert has_sentiment(s), 'No sentiment value'
return s() #s() retrieves the value.
def get_word_sentiment(word):
"""Return a sentiment representing the degree of positive or negative
feeling in the given word.
>>> sentiment_value(get_word_sentiment('good'))
0.875
>>> sentiment_value(get_word_sentiment('bad'))
-0.625
>>> sentiment_value(get_word_sentiment('winning'))
0.5
>>> has_sentiment(get_word_sentiment('Berkeley'))
False
"""
return make_sentiment(word_sentiments.get(word))
def analyze_site_sentiment(tweet):
"""Return a sentiment representing the degree of positive or negative
feeling in a given text segments
If no words in the tweet have a sentiment value, return
make_sentiment(None).
>>> positive = 'i love my job. #winning'
>>> round(sentiment_value(analyze_tweet_sentiment(positive)), 5)
0.29167
>>> negative = 'i hate my job'
>>> sentiment_value(analyze_tweet_sentiment(negative))
-0.25
>>> no_sentiment = 'berkeley golden bears!', None, 0, 0)
>>> has_sentiment(analyze_tweet_sentiment(no_sentiment))
False
"""
words = tweet_words(tweet)
sentiment_vals = []
for each_word in words:
each_word_sent = get_word_sentiment(each_word)
if has_sentiment(each_word_sent):
sentiment_vals.append(sentiment_value(each_word_sent))
if len(sentiment_vals) == 0:
return make_sentiment(None)
return make_sentiment(sum(sentiment_vals) / len(sentiment_vals))
def average_sentiments(articles_by_site):
"""Calculate the average sentiment of all articles by averaging over all
the articles from each site. Return the result as a dictionary from site
names to average sentiment values (numbers).
If a site has no articles with sentiment values, leave it out of the
dictionary entirely. Do NOT include sites with no articles, or with articles
that have no sentiment, as 0. 0 represents neutral sentiment, not unknown
sentiment.
Arguments:
articles_by_site -- A dictionary from site names to lists of articles
"""
avg_site_sentiment = {}
for each_site in articles_by_site:
site_articles = articles_by_site[each_site]
sentiments = [sentiment_value(analyze_site_sentiment(each_site)) \
for each_site in site_articles \
if has_sentiment(analyze_site_sentiment(each_site))]
if len(sentiments) == 0:
continue
avg = sum(sentiments)/len(sentiments)
avg_site_sentiment[each_site] = avg
return avg_site_sentiment