egs/optimizers/rs1-rmsprop2.hpm

#####################################################################
# Sockeye-recipes Hyperparameter configuration file                 #
#                                                                   #
# Overview:                                                         #
# - "workdir" corresponds a group of preprocessed bitext and models #
#    for a given dataset. Each "workdir" can contain multiple       #
#    "datadir" and "modeldir" if desired                            #
# - "datadir" stores the BPE-preprocessed training and validation   #
#    bitext files                                                   #
# - "modeldir" is generated by Sockeye and stores all training info #
# - "rootdir" is path to your installation of sockeye-recipes,      #
#    e.g. ~/src/sockeye-recipes                                     #
#                                                                   #
# preprocess-bpe.sh:                                                #
# - input: Tokenized bitext for training ("train_tok") and          #
#   and validation ("valid_tok")                                    #
# - output: BPE-preprocessed bitext ("train_bpe", "valid_bpe")      #
#   and vocabulary ("bpe_vocab_src", "bpe_vocab_trg")               #
# - main hyperparameters: number of BPE symbols for source & target #
#                                                                   #
# train.sh:                                                         #
# - input: BPE-preprocessed bitext ("train_bpe", "valid_bpe")       #
# - output: "modeldir", which contains all training info and can    #
#    be used to translate                                           #
# - main hyperparameters: many! see below                           #
#                                                                   #
# translate.sh:                                                     #
# - input: this hyperparam file, which specifies modeldir           #
# - output: resulting target translation of source file             #
#####################################################################


#####################################################################
# (0) General settings (to be modified for each project)            #
#####################################################################

### User-specified directories ###
workdir=./
modeldir=$workdir/rs1-rmsprop2
rootdir=../../
# DESCRIPTION: rs1: RNN-based seq2seq model, Small

### Language pair (source and target) ###
# Note: We assume all bitext files contain these as suffices. 
# e.g. $train_tok.$src, $train_tok.$trg refer to the source and target 
src=de
trg=en


#####################################################################
# (1) preprocess-bpe.sh settings (modify if needed)                 #
#####################################################################

### Number of symbols to use for BPE ###
# Note: we perform source and target BPE separately
# This corresponds to initial source (src) and target (trg) vocab size
bpe_symbols_src=30000
bpe_symbols_trg=30000

### Filename for BPE-processed bitext file ###
# Note: the following default names should be fine for most use cases
datadir=$workdir/data-bpe/
train_bpe_src=$datadir/train.bpe-${bpe_symbols_src}.$src
valid_bpe_src=$datadir/valid.bpe-${bpe_symbols_src}.$src
train_bpe_trg=$datadir/train.bpe-${bpe_symbols_trg}.$trg
valid_bpe_trg=$datadir/valid.bpe-${bpe_symbols_trg}.$trg

### Filename for BPE vocabulary ###
# Note: the following default names should be fine for most use cases
# Note: bpe_vocab_src will be needed for applying BPE to test, in translate.sh
bpe_vocab_src=${train_bpe_src}.bpe_vocab
bpe_vocab_trg=${train_bpe_trg}.bpe_vocab


#####################################################################
# (2) train.sh settings (modify if needed)                          #
#####################################################################

# Model architecture
num_embed="512:512"
rnn_num_hidden=512
rnn_attention_type="dot"
num_layers=1
rnn_cell_type="lstm"

# Regularization
embed_dropout=".0:.0"
rnn_dropout_inputs=".0:.0"
rnn_dropout_states=".0:.0"
label_smoothing=0.1

# Vocabulary
num_words="${bpe_symbols_src}:${bpe_symbols_trg}"
word_min_count="1:1"
max_seq_len="100:100"

# Training configuration
batch_size=4096
optimizer=rmsprop
initial_learning_rate=0.001
learning_rate_reduce_factor=0.5
loss="cross-entropy"
seed=13

# Logging and stopping condition
checkpoint_frequency=750
min_num_epochs=0
max_num_epochs=100
max_updates=500000
keep_last_params=1
decode_and_evaluate=0