Skip to content

Commit

Permalink
new file: conf/tuning/train_asr_transformer_cmvn.yaml
Browse files Browse the repository at this point in the history
  • Loading branch information
kamo-naoyuki committed Jan 4, 2021
1 parent 5f87187 commit 379b7e1
Show file tree
Hide file tree
Showing 2 changed files with 148 additions and 11 deletions.
74 changes: 74 additions & 0 deletions egs2/dirha_wsj/asr1/conf/tuning/train_asr_transformer_cmvn.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Trained using GTX-1080ti x4. It takes about 2days.
batch_type: numel
batch_bins: 16000000
accum_grad: 1
max_epoch: 50
patience: none
# The initialization method for model parameters
init: xavier_uniform
best_model_criterion:
- - valid
- acc
- max
keep_nbest_models: 10

encoder: transformer
encoder_conf:
output_size: 256
attention_heads: 4
linear_units: 2048
num_blocks: 12
dropout_rate: 0.1
positional_dropout_rate: 0.1
attention_dropout_rate: 0.0
input_layer: conv2d
normalize_before: true

decoder: transformer
decoder_conf:
attention_heads: 4
linear_units: 2028
num_blocks: 6
dropout_rate: 0.1
positional_dropout_rate: 0.1
self_attention_dropout_rate: 0.0
src_attention_dropout_rate: 0.0

model_conf:
ctc_weight: 0.3
lsm_weight: 0.1
length_normalized_loss: false

optim: adam
optim_conf:
lr: 0.005
scheduler: warmuplr
scheduler_conf:
warmup_steps: 30000

specaug: specaug
specaug_conf:
apply_time_warp: true
time_warp_window: 5
time_warp_mode: bicubic
apply_freq_mask: true
freq_mask_width_range:
- 0
- 30
num_freq_mask: 2
apply_time_mask: true
time_mask_width_range:
- 0
- 40
num_time_mask: 2

normalize: utterance_mvn
normalize_conf:
norm_means: true
norm_vars: false
eps: 1.0e-20

rir_scp: data/dirha_ir/wav.scp
noise_scp: data/dirha_noise/wav.scp
speech_volume_normalize: 1.
noise_db_range: 10_17
85 changes: 74 additions & 11 deletions egs2/dirha_wsj/asr1/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,82 @@ set -e
set -u
set -o pipefail

mic=Beam_Circular_Array # Beam_Circular_Array Beam_Linear_Array KA6 L1C
mic=

local_data_opts="--mic ${mic}"


train_set=train_si284_$mic
valid_set=dirha_sim_$mic
test_sets=dirha_real_$mic
train_set=train_si284
valid_set=dirha_sim_Livingroom_Circular_Array_Beam_Circular_Array
test_sets=
test_sets+=" dirha_real_Kitchen_Circular_Array_KA1"
test_sets+=" dirha_real_Kitchen_Circular_Array_KA2"
test_sets+=" dirha_real_Kitchen_Circular_Array_KA3"
test_sets+=" dirha_real_Kitchen_Circular_Array_KA4"
test_sets+=" dirha_real_Kitchen_Circular_Array_KA5"
test_sets+=" dirha_real_Kitchen_Circular_Array_KA6"
test_sets+=" dirha_real_Livingroom_Circular_Array_Beam_Circular_Array"
test_sets+=" dirha_real_Livingroom_Circular_Array_LA1"
test_sets+=" dirha_real_Livingroom_Circular_Array_LA2"
test_sets+=" dirha_real_Livingroom_Circular_Array_LA3"
test_sets+=" dirha_real_Livingroom_Circular_Array_LA4"
test_sets+=" dirha_real_Livingroom_Circular_Array_LA5"
test_sets+=" dirha_real_Livingroom_Circular_Array_LA6"
test_sets+=" dirha_real_Livingroom_Linear_Array_Beam_Linear_Array"
test_sets+=" dirha_real_Livingroom_Linear_Array_LD02"
test_sets+=" dirha_real_Livingroom_Linear_Array_LD03"
test_sets+=" dirha_real_Livingroom_Linear_Array_LD04"
test_sets+=" dirha_real_Livingroom_Linear_Array_LD05"
test_sets+=" dirha_real_Livingroom_Linear_Array_LD06"
test_sets+=" dirha_real_Livingroom_Linear_Array_LD07"
test_sets+=" dirha_real_Livingroom_Linear_Array_LD08"
test_sets+=" dirha_real_Livingroom_Linear_Array_LD09"
test_sets+=" dirha_real_Livingroom_Linear_Array_LD10"
test_sets+=" dirha_real_Livingroom_Linear_Array_LD11"
test_sets+=" dirha_real_Livingroom_Linear_Array_LD12"
test_sets+=" dirha_real_Livingroom_Wall_L1C"
test_sets+=" dirha_real_Livingroom_Wall_L1L"
test_sets+=" dirha_real_Livingroom_Wall_L1R"
test_sets+=" dirha_real_Livingroom_Wall_L2L"
test_sets+=" dirha_real_Livingroom_Wall_L2R"
test_sets+=" dirha_real_Livingroom_Wall_L3L"
test_sets+=" dirha_real_Livingroom_Wall_L3R"
test_sets+=" dirha_real_Livingroom_Wall_L4L"
test_sets+=" dirha_real_Livingroom_Wall_L4R"
test_sets+=" dirha_sim_Kitchen_Circular_Array_KA1"
test_sets+=" dirha_sim_Kitchen_Circular_Array_KA2"
test_sets+=" dirha_sim_Kitchen_Circular_Array_KA3"
test_sets+=" dirha_sim_Kitchen_Circular_Array_KA4"
test_sets+=" dirha_sim_Kitchen_Circular_Array_KA5"
test_sets+=" dirha_sim_Kitchen_Circular_Array_KA6"
test_sets+=" dirha_sim_Livingroom_Circular_Array_Beam_Circular_Array"
test_sets+=" dirha_sim_Livingroom_Circular_Array_LA1"
test_sets+=" dirha_sim_Livingroom_Circular_Array_LA2"
test_sets+=" dirha_sim_Livingroom_Circular_Array_LA3"
test_sets+=" dirha_sim_Livingroom_Circular_Array_LA4"
test_sets+=" dirha_sim_Livingroom_Circular_Array_LA5"
test_sets+=" dirha_sim_Livingroom_Circular_Array_LA6"
test_sets+=" dirha_sim_Livingroom_Linear_Array_Beam_Linear_Array"
test_sets+=" dirha_sim_Livingroom_Linear_Array_LD02"
test_sets+=" dirha_sim_Livingroom_Linear_Array_LD03"
test_sets+=" dirha_sim_Livingroom_Linear_Array_LD04"
test_sets+=" dirha_sim_Livingroom_Linear_Array_LD05"
test_sets+=" dirha_sim_Livingroom_Linear_Array_LD06"
test_sets+=" dirha_sim_Livingroom_Linear_Array_LD07"
test_sets+=" dirha_sim_Livingroom_Linear_Array_LD08"
test_sets+=" dirha_sim_Livingroom_Linear_Array_LD09"
test_sets+=" dirha_sim_Livingroom_Linear_Array_LD10"
test_sets+=" dirha_sim_Livingroom_Linear_Array_LD11"
test_sets+=" dirha_sim_Livingroom_Linear_Array_LD12"
test_sets+=" dirha_sim_Livingroom_Wall_L1C"
test_sets+=" dirha_sim_Livingroom_Wall_L1L"
test_sets+=" dirha_sim_Livingroom_Wall_L1R"
test_sets+=" dirha_sim_Livingroom_Wall_L2L"
test_sets+=" dirha_sim_Livingroom_Wall_L2R"
test_sets+=" dirha_sim_Livingroom_Wall_L3L"
test_sets+=" dirha_sim_Livingroom_Wall_L3R"
test_sets+=" dirha_sim_Livingroom_Wall_L4L"
test_sets+=" dirha_sim_Livingroom_Wall_L4R"

# config files
#preprocess_config=conf/no_preprocess.yaml # use conf/specaug.yaml for data augmentation
asr_config=conf/tuning/train_asr_transformer.yaml
asr_config=conf/tuning/train_asr_transformer_cmvn.yaml
lm_config=conf/tuning/train_lm_transformer.yaml
inference_config=conf/decode.yaml

Expand All @@ -26,7 +90,7 @@ word_vocab_size=65000
./asr.sh \
--lang en \
--ngpu 4 \
--audio_format wav \
--audio_format flac \
--nlsyms_txt data/nlsyms.txt \
--token_type char \
--feats_type raw \
Expand All @@ -39,6 +103,5 @@ word_vocab_size=65000
--train_set "${train_set}" \
--valid_set "${valid_set}" \
--test_sets "${test_sets}" \
--local_data_opts "${local_data_opts}" \
--bpe_train_text "data/${train_set}/text" \
--lm_train_text "data/${train_set}/text data/local/other_text/text" "$@"

0 comments on commit 379b7e1

Please sign in to comment.