-
Notifications
You must be signed in to change notification settings - Fork 3
/
config.yml
65 lines (54 loc) · 1.38 KB
/
config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# model
initializer_gain: 1.0
shared_embedding: false
shallow_decoder: false
hidden_size: 512
filter_size: 4096
num_hidden_layers: 6
num_encoder_layers: 0
num_decoder_layers: 0
attention_key_channels: 0
attention_value_channels: 0
num_heads: 8
layer_preprocess_sequence: n
layer_postprocess_sequence: da
norm_epsilon: 0.000001
layer_prepostprocess_dropout: 0.1
attention_dropout: 0.1
relu_dropout: 0.1
bottleneck_kind: em # vq, mog
bottleneck_bits: 12
num_compress_steps: 3
beta: 0.25
gamma: 1.0
epsilon: 0.00001
decay: 0.999
num_samples: 10
mask_startup_steps: 50000
# training
max_input_length: 50
train_steps: 1000000
eval_steps: 100
save_summary_steps: 1000
save_checkpoints_steps: 2000
n_checkpoints: 20
batch_size: 2048
learning_rate: 0.2
lr_decay: noam
lr_warmup_steps: 4000
clip_gradients: null
optimizer: adam
adam_beta1: 0.9
adam_beta2: 0.997
adam_epsilon: 0.000000001
source_vocab_file: /path/to/wmt14_ende_distill/wmtende.vocab
target_vocab_file: /path/to/wmt14_ende_distill/wmtende.vocab
source_train_file: /path/to/wmt14_ende_distill/train.en
target_train_file: /path/to/wmt14_ende_distill/train.de
source_eval_file: /path/to/wmt14_ende_distill/valid.en
target_eval_file: /path/to/wmt14_ende_distill/valid.de
record_train_file: /path/to/wmt14_ende_distill/train.tfrecords
record_eval_file: /path/to/wmt14_ende_distill/valid.tfrecords
# predict
predict_batch_size: 1
max_decode_length: 100