-
Notifications
You must be signed in to change notification settings - Fork 19
/
resources.py
94 lines (83 loc) · 3.91 KB
/
resources.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/env python
#coding=utf-8
'''
Definition of Resources class. Used inside collect.py, this stores
additional information of the training data such as the
token-to-subgraph phrasetable, the list of named entities
etc.
@author: Marco Damonte (m.damonte@sms.ed.ac.uk)
@since: 03-10-16
'''
from collections import defaultdict
import cPickle as pickle
import operator
from subgraph import Subgraph
from node import Node
from variables import Variables
class Resources:
@staticmethod
def store_table(model_dir):
table = {}
freq = {}
print "Storing tables.."
print "Number of tokens:", len(Resources.phrasetable.keys())
for i, token in enumerate(Resources.phrasetable):
if i % 100 == 0:
print "Token:", i
sg = max(Resources.phrasetable[token].iteritems(), key=operator.itemgetter(1))[0]
table[token] = sg
pickle.dump(table, open(model_dir + "/phrasetable.p", "wb"))
@staticmethod
def init_table(model_dir, empty = True):
Resources.phrasetable = defaultdict(lambda : defaultdict(int))
if empty == False:
Resources.phrasetable = pickle.load(open(model_dir + "/phrasetable.p", "rb"))
Resources.organizations = defaultdict(list)
for line in open(model_dir + "/organizations.txt"):
fields = line.strip().split()
if len(fields) > 1:
Resources.organizations[fields[0]] = fields[1]
else:
Resources.organizations = None
Resources.seen_org = []
Resources.forg = open(model_dir + "/organizations.txt","w")
# Resources.verbalization_list = {}
# for line in open("resources/verbalization-list-v1.06.txt"):
# line = line.strip().split()
# if line[0] == "VERBALIZE":
# var = Variables()
# nodes = []
# ntop = Node(None, var.nextVar(), line[3], False)
# nodes.append(ntop)
# relations = []
# fields = line[4:]
# for i in range(0,len(fields),2):
# if fields[i + 1] == "-":
# n = Node(None, '-', "", True)
# else:
# n = Node(None, var.nextVar(), fields[i + 1], False)
# nodes.append(n)
# relations.append((ntop,n,fields[i]))
# Resources.verbalization_list[line[1]] = Subgraph(nodes, relations)
# for line in open("resources/have-org-role-91-roles-v1.06.txt"):
# line = line.strip().split()
# if line[0] == "USE-HAVE-ORG-ROLE-91-ARG2":
# var = Variables()
# ntop = Node(None, var.nextVar(), "have-org-role-91", False)
# node = Node(None, var.nextVar(), line[1], False)
# Resources.verbalization_list[line[1]] = Subgraph([ntop, node], [(ntop, node, ":ARG2")])
# for line in open("resources/have-rel-role-91-roles-v1.06.txt"):
# if "#" in line:
# line = line.split("#")[0]
# line = line.strip().split()
# if len(line) > 0 and line[0] == "USE-HAVE-REL-ROLE-91-ARG2":
# var = Variables()
# if len(line) >= 3 and line[2] == ":standard":
# ntop = Node(None, var.nextVar(), "have-rel-role-91", False)
# node = Node(None, var.nextVar(), line[3], False)
# Resources.verbalization_list[line[1]] = Subgraph([ntop, node], [(ntop, node, ":ARG2")])
# Resources.verbalization_list[line[3]] = Subgraph([ntop, node], [(ntop, node, ":ARG2")])
# else:
# ntop = Node(None, var.nextVar(), "have-rel-role-91", False)
# node = Node(None, var.nextVar(), line[1], False)
# Resources.verbalization_list[line[1]] = Subgraph([ntop, node], [(ntop, node, ":ARG2")])