-
Notifications
You must be signed in to change notification settings - Fork 308
/
Copy pathutils.py
95 lines (81 loc) · 2.65 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
#
from __future__ import absolute_import, division, unicode_literals
import numpy as np
import re
import inspect
from torch import optim
def create_dictionary(sentences):
words = {}
for s in sentences:
for word in s:
if word in words:
words[word] += 1
else:
words[word] = 1
words['<s>'] = 1e9 + 4
words['</s>'] = 1e9 + 3
words['<p>'] = 1e9 + 2
# words['<UNK>'] = 1e9 + 1
sorted_words = sorted(words.items(), key=lambda x: -x[1]) # inverse sort
id2word = []
word2id = {}
for i, (w, _) in enumerate(sorted_words):
id2word.append(w)
word2id[w] = i
return id2word, word2id
def cosine(u, v):
return np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))
class dotdict(dict):
""" dot.notation access to dictionary attributes """
__getattr__ = dict.get
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
def get_optimizer(s):
"""
Parse optimizer parameters.
Input should be of the form:
- "sgd,lr=0.01"
- "adagrad,lr=0.1,lr_decay=0.05"
"""
if "," in s:
method = s[:s.find(',')]
optim_params = {}
for x in s[s.find(',') + 1:].split(','):
split = x.split('=')
assert len(split) == 2
assert re.match("^[+-]?(\d+(\.\d*)?|\.\d+)$", split[1]) is not None
optim_params[split[0]] = float(split[1])
else:
method = s
optim_params = {}
if method == 'adadelta':
optim_fn = optim.Adadelta
elif method == 'adagrad':
optim_fn = optim.Adagrad
elif method == 'adam':
optim_fn = optim.Adam
elif method == 'adamax':
optim_fn = optim.Adamax
elif method == 'asgd':
optim_fn = optim.ASGD
elif method == 'rmsprop':
optim_fn = optim.RMSprop
elif method == 'rprop':
optim_fn = optim.Rprop
elif method == 'sgd':
optim_fn = optim.SGD
assert 'lr' in optim_params
else:
raise Exception('Unknown optimization method: "%s"' % method)
# check that we give good parameters to the optimizer
expected_args = inspect.getargspec(optim_fn.__init__)[0]
assert expected_args[:2] == ['self', 'params']
if not all(k in expected_args[2:] for k in optim_params.keys()):
raise Exception('Unexpected parameters: expected "%s", got "%s"' % (
str(expected_args[2:]), str(optim_params.keys())))
return optim_fn, optim_params