forked from UKPLab/props-de
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse_props.py
109 lines (84 loc) · 3.06 KB
/
parse_props.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
"""
Usage:
parse_props.py [INPUT] (-g|-t) [--original] [--props] [--oie] [--dep]
parse_props.py (-h|--help)
Parse sentences into the PropS representation scheme
Arguments:
INPUT input file composed of one sentence per line. if not specified, will use stdin instead
Options:
-h display this help
-t print textual PropS representation
-g print graphical representation (in svg format)
--original print original sentence
--props print the PropS representation of the input
--oie print open-ie like extractions
--dep print the intermediate dependency representation
"""
#!/usr/bin/env python
#coding:utf8
import os, sys, codecs, time, datetime
import fileinput
import os.path
from io import StringIO
from subprocess import call
from docopt import docopt
from propsde.applications.viz_tree import DepTreeVisualizer
import propsde.applications.run as run
import sys
if sys.version_info[0] >= 3:
unicode = str
stdout_encoding = sys.stdout.encoding or sys.getfilesystemencoding()
def main(arguments):
outputType = 'html'
sep = "<br>"
if arguments['-t']:
outputType = 'pdf'
sep = "\n"
graphical = (outputType=='html')
gs = run.parseSentences(arguments["file"])
i = 0
for g,tree in gs:
if arguments['INPUT']:
file_name = os.path.splitext(arguments['INPUT'])[0] + unicode(i)
else:
file_name = 'output' + unicode(i)
# print sentence (only if in graphical mode)
if (arguments["--original"]):
sent = g.originalSentence
print (sent+sep).encode('utf-8')
#print dependency tree
if (arguments['--dep']):
if graphical:
f = codecs.open(file_name + '_dep.svg', 'w', encoding='utf-8')
try:
d = DepTreeVisualizer.from_conll_unicode(tree)
f.write(d.as_svg(compact=True,flat=True))
except:
print('error creating dep svg', file_name)
f.close()
#else:
print (tree).encode('utf-8')
#print PropS output
if (arguments['--props']):
if graphical:
try:
dot = g.drawToFile("","svg")
f = codecs.open(file_name + '_props.svg', 'w', encoding='utf-8')
f.write(dot.create(format='svg'))
f.close()
except:
print('error creating props svg', file_name)
#else:
print(str(g))
#print open ie like extractions
if (arguments["--oie"]):
for prop in g.getPropositions('pdf'):
print(str(prop))
i += 1
if __name__ == "__main__":
arguments = docopt(__doc__)
if arguments["INPUT"]:
arguments["file"] = codecs.open(arguments["INPUT"], encoding='utf8')
else:
arguments["file"] = sys.stdin
main(arguments)