-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdiscrete.py
155 lines (144 loc) · 3.9 KB
/
discrete.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import json
import re
import numpy as np
from snownlp import SnowNLP
import pkuseg
import matplotlib.pyplot as plt
from jieba.analyse import *
#分割出风雅颂
f = open('shijing.json', 'r', encoding='UTF-8',errors = 'ignore')
content = f.read()
context = json.loads(content)
feng = []
ya = []
song = []
for i in range(305):
if i < 160:
feng.append(context[i]['content'])
elif i < 265:
ya.append(context[i]['content'])
else:
song.append(context[i]['content'])
f.close()
#引入停用词
path = 'cn_stopwords.txt'
f = open(path, 'r', encoding='UTF-8',errors = 'ignore')
stop = f.read()
stopword = re.split(r'[\n]', stop)
def poem_character_number(poemlist):
#传入列表,输出zong字数
poemstr = ''
for i in poemlist:
poemstr += i
punctuation = ",。!?、 ()【】<>《》=:+-*—“”…\n"
character_clean = []
for character in poemstr:
if character in punctuation:
pass
else:
character_clean.append(character)
return len(character_clean)
def compare_character():
#比较平均长度
feng_average = 0
ya_average = 0
song_average = 0
print('风部分篇目平均长度')
for poem in feng:
feng_average += poem_character_number(poem) / 160
print(feng_average)
print('雅部分篇目平均长度')
for poem in ya:
ya_average += poem_character_number(poem) / 105
print(ya_average)
print('颂部分篇目平均长度')
for poem in song:
song_average += poem_character_number(poem) / 40
print(song_average)
def variance(poemlist):
poemstr = ''
poem_sentence_list = []
poem_sentence_number = []
for i in poemlist:
poemstr += i
poem_sentence_list = re.split(r'[,。!?]', poemstr)
#del(poem_sentence_list[-1])
for i in poem_sentence_list:
poem_sentence_number.append(len(i))
return np.var(poem_sentence_number)
def compare_variance():
feng_variance = 0
ya_variance = 0
song_variance = 0
print('风部分篇目平均方差')
for poem in feng:
feng_variance += variance(poem) / 160
print(feng_variance)
print('雅部分篇目平均方差')
for poem in ya:
ya_variance += variance(poem) / 105
print(ya_variance)
print('颂部分篇目平均方差')
for poem in song:
song_variance += variance(poem) / 40
print(song_variance)
def remove_stop(poem_list):
word_list = []
clean_list = []
seg = pkuseg.pkuseg()
for sentence in poem_list:
word = seg.cut(sentence)
word_list += word
for word in word_list:
if word in stopword:
pass
else:
clean_list.append(word)
def cal_emo(test):
poemstr = ''
poem_sentence_list = []
for i in test:
poemstr += i
poem_sentence_list = re.split(r'[,。!?]', poemstr)
del(poem_sentence_list[-1])
emotion = 0
for sentence in poem_sentence_list:
s = SnowNLP(sentence)
emotion += s.sentiments
emotion = emotion / len(poem_sentence_list)
return emotion
def mean_emo(poem):
all_emo = 0
for i in poem:
all_emo += cal_emo(i)
all_emo = all_emo / len(poem)
return all_emo
def draw_emotion(poem):
x = range(len(poem))
y = []
for i in poem:
y.append(cal_emo(i))
plt.ylabel('number')
plt.xlabel('emotion value')
plt.plot(x,y)
plt.show()
def topic(poem):
seg = pkuseg.pkuseg()
word_list = []
for i in poem:
for j in i:
word = seg.cut(j)
word_list += word
poemstr = ' '.join(word_list)
keyword = []
for i in extract_tags(poemstr, topK=64, withWeight=True):
keyword.append(i[0])
return keyword
if __name__ == '__main__':
#compare_character()
#compare_variance()
sj = feng + ya + song
print(mean_emo(sj))
#draw_emotion(ya)
#sj_key = topic(sj)
#print(sj_key)