-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
131 lines (113 loc) · 5.2 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import csv
from collections import Counter
from textblob import TextBlob
import textstat
import openpyxl
# Read text from the first column of an xlsx file
def read_text_from_xlsx(file_path):
workbook = openpyxl.load_workbook(file_path)
sheet = workbook.active
text = ""
for row in sheet.iter_rows(min_col=1, max_col=1, values_only=True):
if row[0] is not None:
text += str(row[0]) + " "
return text.strip()
# Replace the text variable assignment with the function call
text = read_text_from_xlsx('your_file.xlsx')
from itertools import chain
from collections import Counter
# Analyze keyword density for 1-word and 2-5-word phrases
def keyword_density(text, min_words=1, max_words=5, include_words=None, exclude_words=None):
words = text.lower().split()
filtered_words = [word for word in words if (include_words is None or word in include_words) and (exclude_words is None or word not in exclude_words)]
word_count = len(filtered_words)
# Generate n-grams (phrases of n words) for n between min_words and max_words
ngrams = {n: [] for n in range(min_words, max_words + 1)}
for n in range(min_words, max_words + 1):
ngrams[n].extend(zip(*[filtered_words[i:] for i in range(n)]))
# Calculate keyword density for n-grams
keyword_density = {}
for n in range(min_words, max_words + 1):
ngram_counts = Counter(ngrams[n])
ngram_keyword_density = {' '.join(ngram): (count / word_count) * 100 for ngram, count in ngram_counts.items()}
keyword_density.update({f'{n}-word phrases, {phrase}': density for phrase, density in ngram_keyword_density.items()})
return keyword_density
# Content analysis
def content_analysis(text):
word_count = len(text.split())
char_length = len(text)
letters = sum(c.isalpha() for c in text)
sentences = textstat.sentence_count(text)
syllables = textstat.syllable_count(text)
avg_words_per_sentence = word_count / sentences
avg_syllables_per_word = syllables / word_count
return {
'Word Count': word_count,
'Character Length': char_length,
'Letters': letters,
'Sentences': sentences,
'Syllables': syllables,
'Average Words/Sentence': avg_words_per_sentence,
'Average Syllables/Word': avg_syllables_per_word,
}
# Readability analysis
def readability_analysis(text):
return {
'Reading Ease': textstat.flesch_reading_ease(text),
'Grade Level': textstat.flesch_kincaid_grade(text),
'Gunning Fog': textstat.gunning_fog(text),
'Coleman Liau Index': textstat.coleman_liau_index(text),
'Smog Index': textstat.smog_index(text),
'Automated Reading Index': textstat.automated_readability_index(text),
}
# Sentiment analysis
def sentiment_analysis(text):
blob = TextBlob(text)
sentences = blob.sentences
sentiment_data = []
for sentence in sentences:
sentiment_data.append({
'Sentence': str(sentence),
'Sentiment': sentence.sentiment.polarity,
'Words Quantity': len(sentence.words),
'Readability': textstat.flesch_reading_ease(str(sentence)),
})
return sentiment_data
# Write results to a CSV file
def export_to_csv(data, filename):
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
fieldnames = list(data[0].keys())
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for row in data:
writer.writerow(row)
# Perform the analysis
keyword_density_data = keyword_density(text)
content_analysis_data = content_analysis(text)
readability_analysis_data = readability_analysis(text)
sentiment_analysis_data = sentiment_analysis(text)
# Export the results to CSV files
# Combine content analysis and readability analysis data into one dictionary
combined_data = {**content_analysis_data, **readability_analysis_data}
# Export combined data (content analysis and readability analysis) to a CSV file
def export_combined_data_to_csv(data, filename):
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
fieldnames = list(data.keys())
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerow(data)
# Perform the analysis
keyword_density_data = keyword_density(text)
content_analysis_data = content_analysis(text)
readability_analysis_data = readability_analysis(text)
sentiment_analysis_data = sentiment_analysis(text)
# Export the results to CSV files
# Combine content analysis and readability analysis data into one dictionary
combined_data = {**content_analysis_data, **readability_analysis_data}
# Export the combined content and readability analysis data to a CSV file
export_combined_data_to_csv(combined_data, 'content_readability_analysis.csv')
# Export keyword density data to a CSV file
keyword_density_list = [{'Keyword': k, 'Density': v} for k, v in keyword_density_data.items()]
export_to_csv(keyword_density_list, 'keyword_density.csv')
# Export sentiment analysis data to a CSV file
export_to_csv(sentiment_analysis_data, 'sentiment_analysis.csv')