File tree Expand file tree Collapse file tree 1 file changed +4
-3
lines changed Expand file tree Collapse file tree 1 file changed +4
-3
lines changed Original file line number Diff line number Diff line change @@ -85,10 +85,11 @@ def to_ts_vector(text: str):
85
85
# 替换字符串
86
86
text = replace_word (word_dict , text )
87
87
# 分词
88
- result = jieba .posseg .lcut (text , HMM = True , use_paddle = True )
88
+ filter_word = jieba .analyse .extract_tags (text , topK = 100 )
89
+ result = jieba .lcut (text , HMM = True , use_paddle = True )
89
90
# 过滤标点符号
90
- result = [item for item in result if not jieba_remove_flag_list .__contains__ (item . flag ) ]
91
- result_ = [{'word' : get_key_by_word_dict (result [index ]. word , word_dict ), 'index' : index } for index in
91
+ result = [item for item in result if filter_word .__contains__ (item ) and len ( item ) < 10 ]
92
+ result_ = [{'word' : get_key_by_word_dict (result [index ], word_dict ), 'index' : index } for index in
92
93
range (len (result ))]
93
94
result_group = group_by (result_ , lambda r : r ['word' ])
94
95
return " " .join (
You can’t perform that action at this time.
0 commit comments