-
Notifications
You must be signed in to change notification settings - Fork 0
/
search_utils.py
189 lines (171 loc) · 7.74 KB
/
search_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import torch
from torch.nn.utils.rnn import pad_sequence
def init_search(batchidxs, nelist, db, nolengths=True):
bsz = len(batchidxs)
if db.sel_firstlast_idxing:
srcs = pad_sequence([torch.LongTensor(db.d.toks2idxs(db.val_srcs[idx]))
for idx in batchidxs], padding_value=db.pad_idx)
assert srcs[0][0].item() != db.d.w2i["<src>"]
neighbs = pad_sequence([torch.LongTensor(db.d.toks2idxs(db.train_tgts[idx]))
for idx in nelist], padding_value=db.pad_idx)
else:
padsrcs = [[db.d.w2i["<src>"]] + db.d.toks2idxs(db.val_srcs[idx])
for idx in batchidxs]
[src.append(db.d.w2i["</src>"]) for src in padsrcs]
srcs = pad_sequence([torch.LongTensor(padsrc) for padsrc in padsrcs],
padding_value=db.pad_idx)
padtgts = [[db.d.w2i["<tgt>"]] + db.d.toks2idxs(db.train_tgts[idx])
for idx in nelist]
[padtgt.append(db.d.w2i["</tgt>"]) for padtgt in padtgts]
neighbs = pad_sequence([torch.LongTensor(padtgt) for padtgt in padtgts],
padding_value=db.pad_idx)
canvases = torch.LongTensor(
[db.d.w2i["<tgt>"], db.d.w2i["</tgt>"]]).view(-1, 1).repeat(1, bsz)
relidxs = torch.LongTensor([0, 0]).view(-1, 1).repeat(1, bsz)
lengths = None if nolengths else torch.LongTensor([0, 0]).view(-1, 1).repeat(1, bsz)
max_srclen = srcs.size(0)
ufeats = torch.zeros(max_srclen, bsz, dtype=torch.long)
for b in range(bsz):
ufeats[len(db.val_srcs[batchidxs[b]]):, b].fill_(3) # padding
inslocs = torch.LongTensor([0]).repeat(bsz)
return srcs, ufeats, neighbs, canvases, relidxs, lengths, inslocs
def get_updated_canvs(hyps, db, device, nolengths=True):
canvases = pad_sequence([torch.LongTensor(db.d.toks2idxs(hyp.canvas))
for hyps_b in hyps for hyp in hyps_b], padding_value=db.pad_idx)
relidxs = pad_sequence([torch.LongTensor(hyp.rellist)
for hyps_b in hyps for hyp in hyps_b], padding_value=-1)
if not nolengths:
lengths = pad_sequence([torch.LongTensor(hyp.lengths)
for hyps_b in hyps for hyp in hyps_b], padding_value=db.pad_idx)
ufeats = pad_sequence([hyp.ufeats for hyps_b in hyps for hyp in hyps_b], padding_value=3)
canvases, relidxs = canvases.to(device), relidxs.to(device)
lengths = None if nolengths else lengths.to(device)
ufeats = ufeats.to(device)
return canvases, relidxs, lengths, ufeats
def make_nemask(neighbs, neoffs, pad_idx):
"""
returns bsz x nelen*nne mask with 1s for pad tokens or a neighbor from a different batch idx
"""
bsz = len(neoffs) - 1
nelen, nne = neighbs.size()
nemask = (neighbs.view(-1) == pad_idx).unsqueeze(0).expand(bsz, -1).contiguous()
nmv = nemask.view(bsz, nelen, nne)
for b in range(bsz):
nmv[b, :, :neoffs[b]].fill_(1)
nmv[b, :, neoffs[b+1]:].fill_(1)
return nemask
def get_trace(moves, val_src, nelist, db):
canv = ["<tgt>", "</tgt>"]
trace = []
for move in moves:
ktype, tneidx, tl, tr, tj, tk = move
srcc = None
if ktype == 2:
span = val_src[tl:tr]
srcc = "s"
elif ktype == 1:
span = [db.d.i2w[tneidx]]
srcc = "v"
else:
span = db.train_tgts[nelist[tneidx]][tl:tr]
srcc = "n" + str(nelist[tneidx])
canv = canv[:tj] + span + canv[tk:]
trace.append((srcc, canv[:]))
return trace
class Hyp(object):
"""
hypotheses for beam search
"""
def __init__(self, canvas, rellist, lengths, ufeats, move):
assert canvas[0] == "<tgt>"
self.canvas = canvas
self.rellist = rellist
self.lengths = lengths
self.ufeats = ufeats
self.curr_move = move # format is (ktype, tneidx, tl, tr, tj, tk)
self.mvidx = 0
self.score = 0
self.parent = None
def get_start_child(self, ktype, tneidx, tl, tj, score):
move = (ktype, tneidx, tl, None, tj, None)
# keep previous canvas for now
nuhyp = Hyp(self.canvas, self.rellist, self.lengths, self.ufeats, move)
nuhyp.score = score
nuhyp.parent = self
nuhyp.mvidx = self.mvidx + 1
return nuhyp
def get_end_child(self, db, ii, neidxs, predr, predk, score):
ktype, tneidx, tl, _, tj, _ = self.curr_move
mvidx = self.mvidx
nuufeats = self.ufeats.clone()
if ktype == 2: # from src
span = db.val_srcs[ii][tl:predr]
nuufeats[tl:predr].fill_(1)
elif ktype == 1: # a word
span = [db.d.i2w[tneidx]]
else: # from a neighbor
span = db.train_tgts[neidxs[tneidx]][tl:predr]
nucanv = self.canvas[:tj] + span + self.canvas[predk:]
nurels = self.rellist[:tj] + [mvidx]*len(span) + self.rellist[predk:]
nulens = self.lengths[:tj] + [len(span)]*len(span) + self.lengths[predk:]
nuhyp = Hyp(nucanv, nurels, nulens, nuufeats, (ktype, tneidx, tl, predr, tj, predk))
nuhyp.mvidx, nuhyp.score, nuhyp.parent = mvidx, score, self
return nuhyp
def get_start_final_child(self, score, len_avg=True):
nuhyp = Hyp(self.canvas, self.rellist, self.lengths, self.ufeats, None)
nuhyp.parent = self
nuhyp.mvidx = self.mvidx + 1
nmoves = (2*nuhyp.mvidx - 1) if len_avg else 1
nuhyp.score = score/nmoves
return nuhyp
def get_sel_child(self, db, ii, neidxs, ktype, predc, predl, predr, score):
nuufeats = self.ufeats.clone()
inc = int(not db.sel_firstlast_idxing)
if ktype == 2: # from src
span = db.val_srcs[ii][predl:predr]
nuufeats[predl+inc:predr+inc].fill_(1)
elif ktype == 1: # a word
span = [db.d.i2w[predc]]
else: # from a neighbor
span = db.train_tgts[neidxs[predc]][predl:predr]
_, _, _, _, jj, kk = self.curr_move
mvidx = self.mvidx # mvidx of insert...
nucanv = self.canvas[:jj] + span + self.canvas[kk:]
nurels = self.rellist[:jj] + [mvidx+1]*len(span) + self.rellist[kk:]
nulens = self.lengths[:jj] + [len(span)]*len(span) + self.lengths[kk:]
numove = (ktype, predc, predl, predr, jj, kk)
nuhyp = Hyp(nucanv, nurels, nulens, nuufeats, numove)
nuhyp.mvidx, nuhyp.score, nuhyp.parent = mvidx, score, self
return nuhyp
def get_ins_child(self, jj, kk, score):
numove = (None, None, None, None, jj, kk)
nuhyp = Hyp(self.canvas, self.rellist, self.lengths, self.ufeats, numove)
nuhyp.score = score
nuhyp.parent = self
nuhyp.mvidx = self.mvidx + 1
return nuhyp
def get_catbl_ins_child(self):
# self.next_insert = (len(self.canvas)-1, len(self.canvas)-1)
return self
def get_ins_final_child(self, score, len_avg=True):
nuhyp = Hyp(self.canvas, self.rellist, self.lengths, self.ufeats, None)
nmoves = 2*(self.mvidx + 1) if len_avg else 1 # include this ins but not first ins in count
nuhyp.score = score/nmoves
nuhyp.parent = self
return nuhyp
def get_moves(self, val_src, nelist, db):
"""
does the full trace if val_src is not None
"""
moves = []
if self.curr_move is None: # a properly ended hypothesis
curr = self.parent
else: # didn't predict an end
curr = self
while curr is not None and curr.curr_move is not None:
moves.append(curr.curr_move) # should be complete
curr = curr.parent.parent # go to previous full move
moves = moves[::-1]
if val_src is not None:
return get_trace(moves, val_src, nelist, db)
return moves