-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathquestion.py
100 lines (92 loc) · 3.09 KB
/
question.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
def isTag(tuple, tag_prefix):
if tuple[1][0]== tag_prefix:
return True
else:
return False
def stripPunc(sent):
return(sent.translate(str.maketrans('', '', string.punctuation)))
def question(sent="", debug=False):
q_body = ""
verb = ""
MD_tag = ""
Exist_tag = ""
isProper = None
isPrepositional = False
isWh = False
isMD = False
isExist = False
nounFound = False
token = ""
tagged_sent = pos_tag(word_tokenize(stripPunc(sent)))
if debug:
print(sent, end = "\n\n")
print(tagged_sent, end = "\n\n")
for idx,tuple in enumerate(tagged_sent):
token = tuple[0]
if tuple[1] == "." or tuple[1] == "," or tuple[1] == "CD":
continue
if tuple[1] == "IN":
isPrepositional = True
continue
if isPrepositional and len(verb) == 0 and not isTag(tuple, "V"):
continue
if nounFound and idx > 1 and len(verb) > 0 and not isTag(tuple, "V"):
break
if idx == 0 and isTag(tuple, "W"):
isWH = True
break
if tuple[1] == "MD":
isMD = True
MD_tag = token[0].upper() + token[1:]
continue
if tuple[1] == "EX":
isExist = True
Exist_tag = token[0].upper() + token[1:]
continue
if isTag(tuple, "V"):
# Verb will always start with a lower case
token = token[0].lower() + token[1:]
if len(verb) == 0:
verb = token
else:
verb += " " + token
else:
# Reached here if not a verb
if tuple[1] == 'NNP' or tuple[1] == 'NNPS':
nounFound = True
if isProper is None:
isProper = True
elif isTag(tuple, "N"):
nounFound = True
token = token[0].lower() + token[1:]
if isProper is None:
isProper = False
if idx == 0:
# if start of sentence and not proper, then make lower
token = token[0].lower() + token[1:]
elif isTag(tuple, "P"):
nounFound = True
if not (tuple[1] == "PRP" and token == "I"):
# if start of sentence and not proper, then make lower
token = token[0].lower() + token[1:]
else:
# Reached here is not verb, or noun
if not token.isupper():
token = token[0].lower() + token[1:]
q_body += " " + token
if isWh:
if debug:
print(sent,end="\n\n")
return sent
else:
if isMD:
q_body = MD_tag + q_body + " " + verb + "?"
elif isExist:
q_body = Exist_tag + q_body + " " + verb + "?"
elif isProper:
q_body = "Who " + verb + q_body + "?"
else:
q_body = "What " + verb + q_body + "?"
if debug:
print(q_body, end="\n\n")
return q_body