-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFrequencyAndSentimentScoreGenerator.py
115 lines (108 loc) · 5.75 KB
/
FrequencyAndSentimentScoreGenerator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# Need to fix the Y axis label for dispersion plot.
'''This python class processes the cleaned story content imported from the two JSON files.
Python methods functions in this class:'''
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
#nltk.sentiment.SentimentIntensityAnalyzer is nltk's pretrained sentiment analyzer.
import json
import matplotlib.pyplot as plt
class nltkProcessing:
def __init__(self, filePath, key):
self.filePath = filePath
self.key = key
self.tokenized = []
self.senIntAna = SentimentIntensityAnalyzer()
self.loadProcess()
def fileOpenerRead(self, filePath):
with open(filePath, "r") as file:
return json.load(file)
def fileOpenerEdit(self, filePath):
with open(filePath, "w") as file:
return json.load(file)
def loadProcess(self):
with open(self.filePath, "r") as file:
storyContent = json.load(file)
storyContentAsList = storyContent[self.key].split(" ")
cleanedQuotesStoryContentAsList = [word for word in storyContentAsList if word != "''" and word != "``"]
cleanedQuotesStoryContentAsString = " ".join(cleanedQuotesStoryContentAsList)
self.tokenized = nltk.word_tokenize(cleanedQuotesStoryContentAsString)
#print(cleanedQuotesStoryContentAsString)
self.textObject = nltk.Text(self.tokenized)
self.processedContent = cleanedQuotesStoryContentAsString #This stores the processed content so that when a function needs the processed content to be passed through, it could have have access to it.
def defineConcordance(self, word, width, lines):
return self.textObject.concordance(word, width=width, lines=lines)
def defineCollocation(self, nums):
return self.textObject.collocation_list()[:nums]
def plotDispersion(self, targets, title="Dispersion Plot"):
nltk.draw.dispersion.dispersion_plot(self.textObject, targets, title=title)
plt.show()
def countWords(self, word):
return self.tokenized.count(word)
def collocationGram(self, n, nums):
NGram = nltk.ngrams(self.tokenized, n)
frequencyDistri = nltk.FreqDist(NGram)
return frequencyDistri.most_common(nums)
def SIA(self, story):
return self.senIntAna.polarity_scores(story)
if __name__ == "__main__":
# Change the following three file destinations according to the specific usage context
StoryCleanedContentSaveStopwords = "StoryContentCleanedSaveStopwords.json"
StoryCleanedContent = "StoryContentCleaned.json"
OriginalStory = "RawStoryContent.json"
# Using different cleaned texts yields different sentiment analysis scores.
# Remember to modify the item passed into nltkProcessing in the line below.
continueOrExit = "Y"
while continueOrExit == "Y":
fileFinder = input("Please indicate which JSON file to open: \n1 = Cleaned story content including stopwords\n2 = Cleaned story content excluding stopwords\n3 = Raw story content\nType here: ")
storyHolder = ""
if fileFinder == "1":
storyHolder = "StoryContentCleanedSaveStopwords.json"
print("Processing using cleaned story content including stopwords....")
elif fileFinder == "2":
storyHolder = "StoryContentCleaned.json"
print("Processing using cleaned story content excluding stopwords....")
elif fileFinder == "3":
storyHolder = "RawStoryContent.json"
print("========\nProcessing using raw story content....\n========")
else:
print("Invalid selection.")
storyFinder = ""
with open(storyHolder, "r") as file:
story = json.load(file)
print("Please type the title of the story to analyze: \n")
for title in story:
print(title)
storySelector = input("\nType story title here: ")
storyFinder = storySelector
print("========\nProcessing selected story format....\n========")
processor = nltkProcessing(storyHolder, storyFinder) # Creating a new nltkProcessing object
processor.defineConcordance("kreme", width=60, lines=3) #update "kreme" to user input
collocations = processor.defineCollocation(nums=4)
print("Most common word combinations:")
print(collocations)
target = ["said", "Kreme"] #update target words for dispersion graph
#processor.plotDispersion(target, title="Word Usage Dispersion Graph")
countWord = "kreme" #update this to user input
print("Number of appearances for the word " + countWord + ": ", processor.countWords(countWord))
collocationGramCombo = processor.collocationGram(3, 3)
print("\nMost common collections of phrases: ")
print(collocationGramCombo)
#sentiment analysis section
sentimentAnalysis = processor.SIA(processor.processedContent)
with open("SentimentScore.json", "r") as file:
scoresheet = json.load(file)
versionScoreDictionary = {}
temporaryScoreList = []
for key, value in sentimentAnalysis.items():
temporaryScoreList.append(value)
versionScoreDictionaryStoryName = storyFinder + fileFinder
versionScoreDictionary[versionScoreDictionaryStoryName] = temporaryScoreList
if storyFinder not in scoresheet:
scoresheet[storyFinder] = []
if versionScoreDictionary not in scoresheet[storyFinder]:
scoresheet[storyFinder].append(versionScoreDictionary)
with open("SentimentScore.json", "w") as file:
json.dump(scoresheet, file, indent=4)
print(sentimentAnalysis)
continueOrExit = input("Would you like to continue processing stories?\n(Y/N): ")
print("========\nProcessing session has been ended\n========")