-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathutils.py
69 lines (59 loc) · 2.09 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from __future__ import print_function
import os
import math
import torch
from tree import Tree
from vocab import Vocab
# loading GLOVE word vectors
# if .pth file is found, will load that
# else will load from .txt file & save
def load_word_vectors(path):
if os.path.isfile(path + '.pth') and os.path.isfile(path + '.vocab'):
print('==> File found, loading to memory')
vectors = torch.load(path + '.pth')
vocab = Vocab(filename=path + '.vocab')
return vocab, vectors
# saved file not found, read from txt file
# and create tensors for word vectors
print('==> File not found, preparing, be patient')
count = sum(1 for line in open(path + '.txt'))
with open(path + '.txt', 'r') as f:
contents = f.readline().rstrip('\n').split(' ')
dim = len(contents[1:])
words = [None] * (count)
vectors = torch.zeros(count, dim)
with open(path + '.txt', 'r') as f:
idx = 0
for line in f:
contents = line.rstrip('\n').split(' ')
words[idx] = contents[0]
vectors[idx] = torch.Tensor(list(map(float, contents[1:])))
idx += 1
with open(path + '.vocab', 'w') as f:
for word in words:
f.write(word + '\n')
vocab = Vocab(filename=path + '.vocab')
torch.save(vectors, path + '.pth')
return vocab, vectors
# write unique words from a set of files to a new file
def build_vocab(filenames, vocabfile):
vocab = set()
for filename in filenames:
with open(filename, 'r') as f:
for line in f:
tokens = line.rstrip('\n').split(' ')
vocab |= set(tokens)
with open(vocabfile, 'w') as f:
for token in sorted(vocab):
f.write(token + '\n')
# mapping from scalar to vector
def map_label_to_target(label, num_classes):
target = torch.zeros(1, num_classes)
ceil = int(math.ceil(label))
floor = int(math.floor(label))
if ceil == floor:
target[0][floor - 1] = 1
else:
target[0][floor - 1] = ceil - label
target[0][ceil - 1] = label - floor
return target