-
Notifications
You must be signed in to change notification settings - Fork 130
/
Copy pathdemo-tf-attention.config
67 lines (54 loc) · 2.37 KB
/
demo-tf-attention.config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!returnn/rnn.py
# kate: syntax python;
import os
from returnn.util.basic import get_login_username
demo_name, _ = os.path.splitext(__file__)
print("Hello, experiment: %s" % demo_name)
# task
use_tensorflow = True
task = "train"
# data
#train = {"class": "TaskEpisodicCopyDataset", "num_seqs": 1000}
#num_inputs = 10
#num_outputs = 10
train = {"class": "TaskXmlModelingDataset", "num_seqs": 1000}
num_inputs = 12
num_outputs = 12
dev = train.copy()
dev.update({"num_seqs": train["num_seqs"] // 10, "fixed_random_seed": 42})
# network
# (also defined by num_inputs & num_outputs)
network = {
"lstm0_fw" : { "class": "rec", "unit": "lstmp", "n_out" : 20, "dropout": 0.0, "L2": 0.01, "direction": 1 },
"lstm0_bw" : { "class": "rec", "unit": "lstmp", "n_out" : 20, "dropout": 0.0, "L2": 0.01, "direction": -1 },
"lstm1_fw" : { "class": "rec", "unit": "lstmp", "n_out" : 20, "dropout": 0.0, "L2": 0.01, "direction": 1, "from" : ["lstm0_fw", "lstm0_bw"] },
"lstm1_bw" : { "class": "rec", "unit": "lstmp", "n_out" : 20, "dropout": 0.0, "L2": 0.01, "direction": -1, "from" : ["lstm0_fw", "lstm0_bw"] },
"encoder": {"class": "linear", "activation": "tanh", "from": ["lstm1_fw", "lstm1_bw"], "n_out": 20},
"enc_ctx": {"class": "linear", "activation": "tanh", "from": ["encoder"], "n_out": 20},
"output": {"class": "rec", "from": [], "unit": {
# Like Bahdanau et al / Montreal NMT:
'orth_embed': {'class': 'linear', 'activation': None, 'from': ['data:classes'], "n_out": 10},
"s": {"class": "rnn_cell", "unit": "LSTMBlock", "from": ["prev:c", "prev:orth_embed"], "n_out": 20},
"c_in": {"class": "linear", "activation": "tanh", "from": ["s", "prev:orth_embed"], "n_out": 20},
"c": {"class": "dot_attention", "from": ["c_in"], "base": "base:encoder", "base_ctx": "base:enc_ctx", "n_out": 20},
"output": {"class": "softmax", "from": ["prev:s", "c"], "target": "classes"}
}, "target": "classes", "loss": "ce"}
}
# trainer
batching = "random"
batch_size = 5000
max_seqs = 40
chunking = "0"
truncation = -1
#gradient_clip = 10
gradient_nan_inf_filter = True
optimizer = {"class": "adam"}
gradient_noise = 0.3
learning_rate = 0.0005
learning_rate_control = "newbob"
learning_rate_control_relative_error_relative_lr = True
model = "/tmp/%s/returnn/%s/model" % (get_login_username(), demo_name) # https://github.com/tensorflow/tensorflow/issues/6537
num_epochs = 100
save_interval = 20
# log
log_verbosity = 5