Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
shincling committed Jul 22, 2020
2 parents 0bc48ac + 5c86f39 commit 16368ea
Show file tree
Hide file tree
Showing 150 changed files with 7,627 additions and 1,579 deletions.
7 changes: 5 additions & 2 deletions ci/test_integration.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ echo "=== ASR (backend=pytorch, model=rnn-no-ctc) ==="
echo "=== ASR (backend=pytorch, model=transformer) ==="
./run.sh --stage 4 --train-config conf/train_transformer.yaml \
--decode-config conf/decode.yaml
echo "=== ASR (backend=pytorch, model=conformer) ==="
./run.sh --stage 4 --train-config conf/train_conformer.yaml \
--decode-config conf/decode.yaml
echo "=== ASR (backend=pytorch, model=transformer-pure-ctc) ==="
./run.sh --stage 4 --train-config conf/train_transformer_pure_ctc.yaml \
--decode-config conf/decode_pure_ctc.yaml
Expand Down Expand Up @@ -94,7 +97,7 @@ done
for t in ${feats_types}; do
for t2 in ${token_types}; do
echo "==== feats_type=${t}, token_types=${t2} ==="
./run.sh --ngpu 0 --stage 6 --stop-stage 100 --feats-type "${t}" --token-type "${t2}" \
./run.sh --ngpu 0 --stage 6 --stop-stage 13 --feats-type "${t}" --token-type "${t2}" \
--asr-args "--max_epoch=1" --lm-args "--max_epoch=1"
done
done
Expand All @@ -109,7 +112,7 @@ echo "==== [ESPnet2] TTS ==="
feats_types="raw fbank stft"
for t in ${feats_types}; do
echo "==== feats_type=${t} ==="
./run.sh --ngpu 0 --stage 2 --stop-stage 100 --feats-type "${t}" --train-args "--max_epoch 1"
./run.sh --ngpu 0 --stage 2 --stop-stage 8 --feats-type "${t}" --train-args "--max_epoch 1"
done
# Remove generated files in order to reduce the disk usage
rm -rf exp dump data
Expand Down
3 changes: 2 additions & 1 deletion egs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,5 @@ See: https://espnet.github.io/espnet/tutorial.html
| voxforge | VoxForge | ASR | 7 languages | http://www.voxforge.org/ | |
| wsj | CSR-I (WSJ0) Complete, CSR-II (WSJ1) Complete | ASR | EN | https://catalog.ldc.upenn.edu/LDC93S6A,https://catalog.ldc.upenn.edu/LDC94S13A | |
| wsj_mix | MERL WSJ0-mix multi-speaker dataset | Multispeaker ASR | EN | http://www.merl.com/demos/deep-clustering | |
| yesno | The "yesno" corpus | ASR | HE | http://www.openslr.org/1 | |
| yesno | The "yesno" corpus | ASR | HE | http://www.openslr.org/1 |
| Yoloxóchitl-Mixtec | The Yoloxóchitl-Mixtec corpus | ASR | Mixtec | http://www.openslr.org/89 ||
14 changes: 14 additions & 0 deletions egs/aishell/asr1/RESULTS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
# Conformer result

- training config file: `conf/tuning/train_pytorch_conformer.yaml`
- decoding config file: `conf/decode.yaml`
```
exp/train_sp_pytorch_train_pytorch_conformer/decode_dev_decode_pytorch_conformer/result.txt
| SPKR | # Snt # Wrd | Corr Sub Del Ins Err S.Err |
| Sum/Avg | 14326 205341 | 94.7 5.1 0.1 0.1 5.4 39.0 |
exp/train_sp_pytorch_train_pytorch_conformer/decode_test_decode_pytorch_conformer/result.txt
| SPKR | # Snt # Wrd | Corr Sub Del Ins Err S.Err |
| Sum/Avg | 7176 104765 | 94.2 5.6 0.2 0.1 5.9 41.8 |
```

# Transformer result (default transformer with initial learning rate = 1.0 and epochs = 50)

- Environments (obtained by `$ get_sys_info.sh`)
Expand Down Expand Up @@ -64,3 +77,4 @@ exp/train_sp_pytorch_train_pytorch_transformer_lr1.0/decode_test_decode_pytorch_
| SPKR | # Snt # Wrd | Corr Sub Del Ins Err S.Err |
| Sum/Avg | 7176 104765 | 92.7 7.1 0.2 0.1 7.4 49.8 |
```

47 changes: 47 additions & 0 deletions egs/aishell/asr1/conf/tuning/train_pytorch_conformer.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# network architecture
# encoder related
elayers: 12
eunits: 2048
# decoder related
dlayers: 6
dunits: 2048
# attention related
adim: 256
aheads: 4

# hybrid CTC/attention
mtlalpha: 0.3

# label smoothing
lsm-weight: 0.1

# minibatch related
batch-size: 32
maxlen-in: 512 # if input length > maxlen-in, batchsize is automatically reduced
maxlen-out: 150 # if output length > maxlen-out, batchsize is automatically reduced

# optimization related
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs
opt: noam
accum-grad: 2
grad-clip: 5
patience: 0
epochs: 50
dropout-rate: 0.1

# transformer specific setting
backend: pytorch
model-module: "espnet.nets.pytorch_backend.e2e_asr_conformer:E2E"
transformer-input-layer: conv2d # encoder architecture type
transformer-lr: 1.0
transformer-warmup-steps: 25000
transformer-attn-dropout-rate: 0.0
transformer-length-normalized-loss: false
transformer-init: pytorch

# conformer specific setting
transformer-encoder-pos-enc-layer-type: rel_pos
transformer-encoder-selfattn-layer-type: rel_selfattn
macaron-style: true
use-cnn-module: true
cnn-module-kernel: 31
47 changes: 47 additions & 0 deletions egs/mini_an4/asr1/conf/train_conformer.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# network architecture
# encoder related
elayers: 2
eunits: 32
# decoder related
dlayers: 2
dunits: 32
# attention related
adim: 16
aheads: 4

# hybrid CTC/attention
mtlalpha: 0.3

# label smoothing
lsm-weight: 0.1

# minibatch related
batch-size: 2
maxlen-in: 512 # if input length > maxlen-in, batchsize is automatically reduced
maxlen-out: 150 # if output length > maxlen-out, batchsize is automatically reduced

# optimization related
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs
opt: noam
accum-grad: 2
grad-clip: 5
patience: 0
epochs: 3
dropout-rate: 0.1

# transformer specific setting
backend: pytorch
model-module: "espnet.nets.pytorch_backend.e2e_asr_conformer:E2E"
transformer-input-layer: conv2d # encoder architecture type
transformer-lr: 5.0
transformer-warmup-steps: 25000
transformer-attn-dropout-rate: 0.0
transformer-length-normalized-loss: false
transformer-init: pytorch

# conformer specific setting
transformer-encoder-pos-enc-layer-type: rel_pos
transformer-encoder-selfattn-layer-type: rel_selfattn
macaron-style: true
use-cnn-module: true
cnn-module-kernel: 31
2 changes: 1 addition & 1 deletion egs/wsj/asr1/local/filtering_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
args = parser.parse_args(cmd_args)

# subsampling info
if args.etype.startswith("vgg"):
if hasattr(args, "etype") and args.etype.startswith("vgg"):
# Subsampling is not performed for vgg*.
# It is performed in max pooling layers at CNN.
min_io_ratio = 4
Expand Down
36 changes: 36 additions & 0 deletions egs/yoloxochitl_mixtec/asr1/RESULTS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# RESULTS (100 epoch using single GPU)
## Environments
- date: `Thu Jun 25 23:13:00 EDT 2020`
- python version: `3.7.3 (default, Mar 27 2019, 22:11:17) [GCC 7.3.0]`
- espnet version: `espnet 0.5.2`
- chainer version: `chainer 6.0.0`
- pytorch version: `pytorch 1.1.0`

## Pre-trained Model
- Model files (archived to model.tar.gz by `$ pack_model.sh`)
- model link: https://drive.google.com/file/d/1daXJp3mpvOKYYuEcgNbIDRyp16Q0gjFg/view?usp=sharing
- training config file: `conf/train.yaml`
- decoding config file: `conf/decode.yaml`
- cmvn file: `data/train_mixtec_surface_reserve/cmvn.ark`
- e2e file: `exp/train_mixtec_surface_reserve_pytorch_mixtec_surface_reserve/results/model.last10.avg.best`
- e2e JSON file: `exp/train_mixtec_surface_reserve_pytorch_mixtec_surface_reserve/results/model.json`
- lm file: `exp/train_rnnlm_pytorch_mixtec_surface_reserve_unigram150/rnnlm.model.best`
- lm JSON file: `exp/train_rnnlm_pytorch_mixtec_surface_reserve_unigram150/model.json`
- dict file: `data/lang_char`


## train_mixtec_surface_reserve_pytorch_mixtec_surface_reserve
### CER

|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
|---|---|---|---|---|---|---|---|---|
|decode_dev_mixtec_surface_reserve_decode_mixtec_surface_reserve|10218|687420|89.6|6.0|4.5|2.7|13.2|87.8|
|decode_test_mixtec_surface_reserve_decode_mixtec_surface_reserve|10112|688918|89.7|5.9|4.4|2.7|13.0|87.9|

### WER

|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
|---|---|---|---|---|---|---|---|---|
|decode_dev_mixtec_surface_reserve_decode_mixtec_surface_reserve|10218|165748|80.3|15.6|4.1|3.2|22.9|87.8|
|decode_test_mixtec_surface_reserve_decode_mixtec_surface_reserve|10112|166168|80.5|15.5|4.1|3.2|22.7|87.9|

89 changes: 89 additions & 0 deletions egs/yoloxochitl_mixtec/asr1/cmd.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ======
# Usage: <cmd>.pl [options] JOB=1:<nj> <log> <command...>
# e.g.
# run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB
#
# Options:
# --time <time>: Limit the maximum time to execute.
# --mem <mem>: Limit the maximum memory usage.
# -–max-jobs-run <njob>: Limit the number parallel jobs. This is ignored for non-array jobs.
# --num-threads <ngpu>: Specify the number of CPU core.
# --gpu <ngpu>: Specify the number of GPU devices.
# --config: Change the configuration file from default.
#
# "JOB=1:10" is used for "array jobs" and it can control the number of parallel jobs.
# The left string of "=", i.e. "JOB", is replaced by <N>(Nth job) in the command and the log file name,
# e.g. "echo JOB" is changed to "echo 3" for the 3rd job and "echo 8" for 8th job respectively.
# Note that the number must start with a positive number, so you can't use "JOB=0:10" for example.
#
# run.pl, queue.pl, slurm.pl, and ssh.pl have unified interface, not depending on its backend.
# These options are mapping to specific options for each backend and
# it is configured by "conf/queue.conf" and "conf/slurm.conf" by default.
# If jobs failed, your configuration might be wrong for your environment.
#
#
# The official documentaion for run.pl, queue.pl, slurm.pl, and ssh.pl:
# "Parallelization in Kaldi": http://kaldi-asr.org/doc/queue.html
# =========================================================~


# Select the backend used by run.sh from "local", "sge", "slurm", or "ssh"
cmd_backend='local'

# Local machine, without any Job scheduling system
if [ "${cmd_backend}" = local ]; then

# The other usage
export train_cmd="run.pl"
# Used for "*_train.py": "--gpu" is appended optionally by run.sh
export cuda_cmd="run.pl"
# Used for "*_recog.py"
export decode_cmd="run.pl"

# "qsub" (SGE, Torque, PBS, etc.)
elif [ "${cmd_backend}" = sge ]; then
# The default setting is written in conf/queue.conf.
# You must change "-q g.q" for the "queue" for your environment.
# To know the "queue" names, type "qhost -q"
# Note that to use "--gpu *", you have to setup "complex_value" for the system scheduler.

export train_cmd="queue.pl"
export cuda_cmd="queue.pl"
export decode_cmd="queue.pl"

# "sbatch" (Slurm)
elif [ "${cmd_backend}" = slurm ]; then
# The default setting is written in conf/slurm.conf.
# You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
# To know the "partion" names, type "sinfo".
# You can use "--gpu * " by defualt for slurm and it is interpreted as "--gres gpu:*"
# The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

export train_cmd="slurm.pl"
export cuda_cmd="slurm.pl"
export decode_cmd="slurm.pl"

elif [ "${cmd_backend}" = ssh ]; then
# You have to create ".queue/machines" to specify the host to execute jobs.
# e.g. .queue/machines
# host1
# host2
# host3
# Assuming you can login them without any password, i.e. You have to set ssh keys.

export train_cmd="ssh.pl"
export cuda_cmd="ssh.pl"
export decode_cmd="ssh.pl"

# This is an example of specifying several unique options in the JHU CLSP cluster setup.
# Users can modify/add their own command options according to their cluster environments.
elif [ "${cmd_backend}" = jhu ]; then

export train_cmd="queue.pl --mem 2G"
export cuda_cmd="queue-freegpu.pl --mem 2G --gpu 1 --config conf/gpu.conf"
export decode_cmd="queue.pl --mem 4G"

else
echo "$0: Error: Unknown cmd_backend=${cmd_backend}" 1>&2
return 1
fi
1 change: 1 addition & 0 deletions egs/yoloxochitl_mixtec/asr1/conf/decode.yaml
File renamed without changes.
10 changes: 10 additions & 0 deletions egs/yoloxochitl_mixtec/asr1/conf/gpu.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Default configuration
command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64*
option mem=* -l mem_free=$0,ram_free=$0
option mem=0 # Do not add anything to qsub_opts
option num_threads=* -pe smp $0
option num_threads=1 # Do not add anything to qsub_opts
option max_jobs_run=* -tc $0
default gpu=0
option gpu=0
option gpu=* -l 'hostname=b1[12345678]*|c*,gpu=$0' -q g.q
8 changes: 8 additions & 0 deletions egs/yoloxochitl_mixtec/asr1/conf/lm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
layer: 2
unit: 650
opt: sgd # or adam
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs
batchsize: 64 # batch size in LM training
epoch: 20 # if the data size is large, we can reduce this
patience: 3
maxlen: 100 # if sentence length > lm_maxlen, lm_batchsize is automatically reduced
1 change: 1 addition & 0 deletions egs/yoloxochitl_mixtec/asr1/conf/pitch.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--sample-frequency=16000
10 changes: 10 additions & 0 deletions egs/yoloxochitl_mixtec/asr1/conf/queue.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Default configuration
command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64*
option mem=* -l mem_free=$0,ram_free=$0
option mem=0 # Do not add anything to qsub_opts
option num_threads=* -pe smp $0
option num_threads=1 # Do not add anything to qsub_opts
option max_jobs_run=* -tc $0
default gpu=0
option gpu=0
option gpu=* -l gpu=$0 -q g.q
12 changes: 12 additions & 0 deletions egs/yoloxochitl_mixtec/asr1/conf/slurm.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Default configuration
command sbatch --export=PATH --ntasks-per-node=1
option time=* --time $0
option mem=* --mem-per-cpu $0
option mem=0 # Do not add anything to qsub_opts
option num_threads=* --cpus-per-task $0 --ntasks-per-node=1
option num_threads=1 --cpus-per-task 1 --ntasks-per-node=1 # Do not add anything to qsub_opts
default gpu=0
option gpu=0 -p cpu
option gpu=* -p gpu --gres=gpu:$0
# note: the --max-jobs-run option is supported as a special case
# by slurm.pl and you don't have to handle it in the config file.
1 change: 1 addition & 0 deletions egs/yoloxochitl_mixtec/asr1/conf/train.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
batchsize: 0
beam-size: 10
penalty: 0.0
maxlenratio: 0.0
minlenratio: 0.0
ctc-weight: 0.5
lm-weight: 0.3
6 changes: 6 additions & 0 deletions egs/yoloxochitl_mixtec/asr1/conf/tuning/decode_rnn.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
lm-weight: 0.3
beam-size: 20
penalty: 0.0
maxlenratio: 0.0
minlenratio: 0.0
ctc-weight: 0.6
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# network architecture
# encoder related
elayers: 12
eunits: 2048
# decoder related
dlayers: 6
dunits: 2048
# attention related
adim: 256
aheads: 4

# hybrid CTC/attention
mtlalpha: 0.3

# label smoothing
lsm-weight: 0.1

# minibatch related
batch-size: 32
maxlen-in: 512 # if input length > maxlen-in, batchsize is automatically reduced
maxlen-out: 150 # if output length > maxlen-out, batchsize is automatically reduced

# optimization related
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs
opt: noam
accum-grad: 2
grad-clip: 5
patience: 0
epochs: 100
dropout-rate: 0.1

# transformer specific setting
backend: pytorch
model-module: "espnet.nets.pytorch_backend.e2e_asr_transformer:E2E"
transformer-input-layer: conv2d # encoder architecture type
transformer-lr: 1.0
transformer-warmup-steps: 25000
transformer-attn-dropout-rate: 0.0
transformer-length-normalized-loss: false
transformer-init: pytorch
Loading

0 comments on commit 16368ea

Please sign in to comment.