Merge pull request espnet#4089 from pyf98/ls100_transformer

Add transformer results for LibriSpeech 100h
chintu619 · Feb 21, 2022 · 650472b · 650472b
2 parents ccc7c18 + da026d8
commit 650472b
Show file tree

Hide file tree

Showing 2 changed files with 121 additions and 0 deletions.
diff --git a/egs2/librispeech_100/asr1/README.md b/egs2/librispeech_100/asr1/README.md
@@ -56,3 +56,47 @@ Model: https://huggingface.co/pyf98/librispeech_100h_conformer
 |beam20_ctc0.3/test_clean|2620|66983|92.0|5.0|3.0|0.6|8.6|57.0|
 |beam20_ctc0.3/test_other|2939|66650|81.2|13.0|5.8|2.0|20.9|81.6|
 
+
+
+## Environments
+- date: `Fri Feb 18 16:00:45 EST 2022`
+- python version: `3.9.7 (default, Sep 16 2021, 13:09:58)  [GCC 7.5.0]`
+- espnet version: `espnet 0.10.7a1`
+- pytorch version: `pytorch 1.10.1`
+- Git hash: `f6779876103be2116de158a44757f8979eff0ab0`
+  - Commit date: `Fri Feb 18 15:57:13 2022 -0500`
+
+## asr_transformer_win400_hop160_ctc0.3_lr2e-3_warmup15k_timemask5_amp_no-deterministic
+
+GPU: a single V100-32GB
+
+Training Time: 42834 seconds
+
+Model: https://huggingface.co/pyf98/librispeech_100h_transformer
+
+### WER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|beam20_ctc0.3/dev_clean|2703|54402|93.0|6.4|0.5|1.1|8.1|63.1|
+|beam20_ctc0.3/dev_other|2864|50948|82.5|15.9|1.6|2.7|20.2|83.8|
+|beam20_ctc0.3/test_clean|2620|52576|92.8|6.5|0.7|1.2|8.4|63.3|
+|beam20_ctc0.3/test_other|2939|52343|82.1|16.0|1.9|2.6|20.5|84.8|
+
+### CER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|beam20_ctc0.3/dev_clean|2703|288456|97.5|1.4|1.1|0.9|3.4|63.1|
+|beam20_ctc0.3/dev_other|2864|265951|92.1|4.8|3.1|2.4|10.3|83.8|
+|beam20_ctc0.3/test_clean|2620|281530|97.4|1.4|1.2|0.9|3.5|63.3|
+|beam20_ctc0.3/test_other|2939|272758|92.0|4.7|3.2|2.3|10.2|84.8|
+
+### TER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+|beam20_ctc0.3/dev_clean|2703|69558|89.9|6.1|4.0|0.8|10.9|63.1|
+|beam20_ctc0.3/dev_other|2864|64524|78.5|15.3|6.2|2.8|24.3|83.8|
+|beam20_ctc0.3/test_clean|2620|66983|90.0|6.2|3.9|0.8|10.9|63.3|
+|beam20_ctc0.3/test_other|2939|66650|77.9|15.2|6.9|2.5|24.6|84.8|
diff --git a/...asr_transformer_win400_hop160_ctc0.3_lr2e-3_warmup15k_timemask5_amp_no-deterministic.yaml b/...asr_transformer_win400_hop160_ctc0.3_lr2e-3_warmup15k_timemask5_amp_no-deterministic.yaml
@@ -0,0 +1,77 @@
+encoder: transformer
+encoder_conf:
+    output_size: 256
+    attention_heads: 4
+    linear_units: 1024
+    num_blocks: 18
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    attention_dropout_rate: 0.1
+    input_layer: conv2d
+    normalize_before: true
+
+decoder: transformer
+decoder_conf:
+    attention_heads: 4
+    linear_units: 2048
+    num_blocks: 6
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    self_attention_dropout_rate: 0.1
+    src_attention_dropout_rate: 0.1
+
+model_conf:
+    ctc_weight: 0.3
+    lsm_weight: 0.1
+    length_normalized_loss: false
+
+frontend_conf:
+    n_fft: 512
+    win_length: 400
+    hop_length: 160
+
+seed: 2022
+log_interval: 400
+num_att_plot: 0
+num_workers: 4
+sort_in_batch: descending
+sort_batch: descending
+batch_type: numel
+batch_bins: 16000000
+accum_grad: 4
+max_epoch: 70
+patience: none
+init: none
+best_model_criterion:
+-   - valid
+    - acc
+    - max
+keep_nbest_models: 10
+
+use_amp: true
+cudnn_deterministic: false
+cudnn_benchmark: false
+
+optim: adam
+optim_conf:
+    lr: 0.002
+    weight_decay: 0.000001
+scheduler: warmuplr
+scheduler_conf:
+    warmup_steps: 15000
+
+specaug: specaug
+specaug_conf:
+    apply_time_warp: true
+    time_warp_window: 5
+    time_warp_mode: bicubic
+    apply_freq_mask: true
+    freq_mask_width_range:
+    - 0
+    - 27
+    num_freq_mask: 2
+    apply_time_mask: true
+    time_mask_width_ratio_range:
+    - 0.
+    - 0.05
+    num_time_mask: 5