-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsentence2logistcal.py
executable file
·156 lines (133 loc) · 5.43 KB
/
sentence2logistcal.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# -*- coding:utf-8 -*-
import os
import jieba.posseg as pseg
import jieba
from readDict import readPropertyWord
from readDict import readQuestionWord
from pyltp import Postagger, Parser
from const.controller import LTP_DATA_DIR
jieba.load_userdict('./qadata/userdict.txt')
propertylist, propertydict = readPropertyWord() # 读取关系词,并做成词典
questionlist, questiondict = readQuestionWord() # 读取问题词,并做成词典
nertypelist = ['VER', 'POT']
def answerrecognition(sentence, entitylist, poslist, indexset): # 命名实体识别、抽取句子中的关系词、问题词
indexlist = [] # 取出indexset中较小的值,组成indexlist
for index in indexset:
smallnum = 1000
for i in index:
if i < smallnum:
smallnum = i
indexlist.append(smallnum)
allwordlist = entitylist
allposlist = poslist
allweilist = indexlist
resultwordlist = []
resultposlist = []
seg_list2 = pseg.cut(sentence)
jiebawordlist = []
jiebaposlist = []
for i in seg_list2:
jiebawordlist.append(i.word)
jiebaposlist.append(i.flag)
index = 0
while index != len(jiebawordlist):
word = ""
if len(allwordlist) != 0:
word = allwordlist[0]
if jiebawordlist[index] in word:
resultwordlist.append(word)
resultposlist.append(allposlist[0])
while index < len(jiebawordlist) and jiebawordlist[index] in word:
index = index + 1
allwordlist.pop(0)
allposlist.pop(0)
elif jiebawordlist[index] in propertylist:
resultwordlist.append(jiebawordlist[index])
resultposlist.append("property")
index = index + 1
elif jiebawordlist[index] in questionlist:
resultwordlist.append(jiebawordlist[index])
resultposlist.append("question")
index = index + 1
else:
resultwordlist.append(jiebawordlist[index])
resultposlist.append("null")
index = index + 1
return resultwordlist, resultposlist
def findproperty(i, arcshead, arcsrela, resultposlist): # 寻找属性词
if resultposlist[i] != "property" and arcshead[i] - 1 >= 0:
i = arcshead[i] - 1
return findproperty(i, arcshead, arcsrela, resultposlist)
elif resultposlist[i] != "property" and arcshead[i] - 1 < 0:
return -1
else:
return i
def findobject(i, arcshead, arcsrela, resultposlist):
if resultposlist[i] not in nertypelist and arcshead[i] - 1 >= 0:
i = arcshead[i] - 1
return findproperty(i, arcshead, arcsrela, resultposlist)
elif resultposlist[i] not in nertypelist and arcshead[i] - 1 < 0:
return -1
else:
return i
def answersemantic(resultwordlist, resultposlist): # 根据ltp进行句法分析,转换为
postagger = Postagger() # 初始化实例
pos_model_path = os.path.join(LTP_DATA_DIR, 'pos.model')
postagger.load(pos_model_path) # 加载模型
parser = Parser() # 初始化实例
par_model_path = os.path.join(LTP_DATA_DIR, 'parser.model')
parser.load(par_model_path) # 加载模型
postags = postagger.postag(resultwordlist) # 词性标注''
poslist = []
for i in postags:
poslist.append(str(i))
print(poslist)
arcs = parser.parse(resultwordlist, poslist)
print("\t".join("%d:%s" % (arc.head, arc.relation) for arc in arcs))
arcshead = []
arcsrela = []
for i in arcs:
arcshead.append(i.head)
arcsrela.append(i.relation)
print(arcshead)
print(arcsrela)
semanticlist = []
length = len(resultwordlist)
poedictlist = []
quenum = -1
for i in range(0, len(resultposlist)):
if resultposlist[i] == "question":
quenum = i
print("resultposlist,resultwordlist: ", resultwordlist, resultposlist)
for i in range(0, length):
if resultposlist[i] in nertypelist:
num = findproperty(i, arcshead, arcsrela, resultposlist)
if num != -1:
# resultposlist[arcshead[i]-1]=="property":#战狼2的上映日期是什么时候 mov的属性是
# if arcsrela[i]=="ATT" or arcsrela[i]=="SBV":
poedict = {}
poedict["headnode"] = resultwordlist[i]
poedict["headnodetype"] = resultposlist[i]
if quenum == -1:
questr = ""
else:
questr = questiondict[resultwordlist[quenum]]
properresult = getrelation(propertydict[resultwordlist[num]], resultposlist[i], questr)
endnodetype = getnodetype(propertydict[resultwordlist[num]], resultposlist[i], questr)
poedict["relation"] = properresult
poedict["endnode"] = ""
poedict["endnodetype"] = endnodetype
poedict["quesion"] = questr
poedictlist.append(poedict)
print(poedictlist)
postagger.release() # 释放模型
parser.release() # 释放模型
return poedictlist
def getrelation(property, nodetype, questiontype): # 桥接操作
if (property == "verseNextTo" or property == "verseBeforeTo") and nodetype == "VER":
return property
def getnodetype(property, nodetype, questiontype):
nodetypeget = ""
if (property == "verseNextTo" or property == "verseBeforeTo") and nodetype == "VER":
nodetypeget = nodetype
return nodetypeget