Skip to content

Commit

Permalink
fix small bugs and add CHiME4 enh_asr1 recipe & results
Browse files Browse the repository at this point in the history
  • Loading branch information
simpleoier committed Apr 28, 2022
1 parent 44971ff commit 2b66331
Show file tree
Hide file tree
Showing 52 changed files with 761 additions and 3 deletions.
3 changes: 3 additions & 0 deletions ci/test_integration_espnet2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ if python3 -c 'import torch as t; from distutils.version import LooseVersion as
for f in egs2/*/ssl1/conf/train*.yaml; do
${python} -m espnet2.bin.hubert_train --config "${f}" --iterator_type none --normalize none --dry_run true --output_dir out --token_list dummy_token_list
done
for f in egs2/*/enh_asr1/conf/train_enh_asr*.yaml; do
${python} -m espnet2.bin.enh_s2t_train --config "${f}" --iterator_type none --dry_run true --output_dir out --token_list dummy_token_list
done
fi

# These files must be same each other.
Expand Down
1 change: 0 additions & 1 deletion egs2/TEMPLATE/enh_asr1/scripts/utils/show_enh_score.sh

This file was deleted.

84 changes: 84 additions & 0 deletions egs2/TEMPLATE/enh_asr1/scripts/utils/show_enh_score.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/usr/bin/env bash
mindepth=0
maxdepth=1

. utils/parse_options.sh

if [ $# -gt 1 ]; then
echo "Usage: $0 --mindepth 0 --maxdepth 1 [exp]" 1>&2
echo ""
echo "Show the system environments and the evaluation results in Markdown format."
echo 'The default of <exp> is "exp/".'
exit 1
fi

[ -f ./path.sh ] && . ./path.sh
set -euo pipefail
if [ $# -eq 1 ]; then
exp=$(realpath "$1")
else
exp=exp
fi


cat << EOF
<!-- Generated by $0 -->
# RESULTS
## Environments
- date: \`$(LC_ALL=C date)\`
EOF

python3 << EOF
import sys, espnet, torch
pyversion = sys.version.replace('\n', ' ')
print(f"""- python version: \`{pyversion}\`
- espnet version: \`espnet {espnet.__version__}\`
- pytorch version: \`pytorch {torch.__version__}\`""")
EOF

cat << EOF
- Git hash: \`$(git rev-parse HEAD)\`
- Commit date: \`$(git log -1 --format='%cd')\`
EOF


while IFS= read -r expdir; do
if ls "${expdir}"/*/scoring_enh/result_stoi.txt &> /dev/null; then
echo -e "\n## $(basename ${expdir})\n"
[ -e "${expdir}"/config.yaml ] && grep ^config "${expdir}"/config.yaml
metrics=()
heading="\n|dataset|"
sep="|---|"
for type in pesq estoi stoi sar sdr sir si_snr; do
if ls "${expdir}"/*/scoring_enh/result_${type}.txt &> /dev/null; then
metrics+=("$type")
heading+="${type^^}|"
sep+="---|"
fi
done
echo -e "${heading}\n${sep}"

setnames=()
for dirname in "${expdir}"/*/scoring_enh/result_stoi.txt; do
dset=$(echo $dirname | sed -e "s#${expdir}/\([^/]*\)/scoring_enh/result_stoi.txt#\1#g")
setnames+=("$dset")
done
for dset in "${setnames[@]}"; do
line="|${dset}|"
for ((i=0; i<${#metrics[@]}; i++)); do
type=${metrics[$i]}
if [ -f "${expdir}"/${dset}/scoring_enh/result_${type}.txt ]; then
score=$(head -n1 "${expdir}"/${dset}/scoring_enh/result_${type}.txt)
else
score=""
fi
line+="${score}|"
done
echo $line
done
echo ""
fi

done < <(find ${exp} -mindepth ${mindepth} -maxdepth ${maxdepth} -type d)
57 changes: 57 additions & 0 deletions egs2/chime4/enh_asr1/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
<!-- Generated by scripts/utils/show_asr_result.sh -->
# RESULTS
## Environments
- date: `Thu Apr 28 00:09:17 EDT 2022`
- python version: `3.7.11 (default, Jul 27 2021, 14:32:16) [GCC 7.5.0]`
- espnet version: `espnet 202204`
- pytorch version: `pytorch 1.8.1`
- Git hash: `44971ff962aae30c962226f1ba3d87de057ac00e`
- Commit date: `Wed Apr 27 10:13:03 2022 -0400`
- Pretrained model: https://huggingface.co/espnet/simpleoier_chime4_enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char

## enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char
### WER

|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
|---|---|---|---|---|---|---|---|---|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_beamformit_2mics|1640|27119|93.0|5.2|1.8|0.6|7.7|53.3|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_beamformit_5mics|1640|27119|93.9|4.5|1.6|0.5|6.7|49.9|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_isolated_1ch_track|1640|27119|91.8|6.0|2.2|0.8|9.0|57.7|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_beamformit_2mics|1640|27120|92.2|6.0|1.9|0.7|8.6|55.5|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_beamformit_5mics|1640|27120|93.6|4.9|1.5|0.6|7.1|51.6|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_isolated_1ch_track|1640|27120|89.9|7.6|2.4|1.0|11.1|59.7|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_beamformit_2mics|1320|21409|86.7|9.7|3.5|1.3|14.5|64.7|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_beamformit_5mics|1320|21409|89.2|7.9|2.9|1.0|11.8|61.2|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_isolated_1ch_track|1320|21409|84.6|11.4|4.0|1.5|17.0|69.4|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_beamformit_2mics|1320|21416|86.0|10.5|3.5|1.5|15.5|67.5|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_beamformit_5mics|1320|21416|88.1|8.9|3.1|1.2|13.1|64.8|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_isolated_1ch_track|1320|21416|82.8|13.1|4.1|1.9|19.1|69.4|

### CER

|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
|---|---|---|---|---|---|---|---|---|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_beamformit_2mics|1640|160390|96.6|1.4|2.0|0.6|4.0|53.3|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_beamformit_5mics|1640|160390|97.1|1.1|1.8|0.5|3.4|49.9|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_isolated_1ch_track|1640|160390|95.9|1.7|2.3|0.8|4.8|57.7|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_beamformit_2mics|1640|160400|95.9|1.7|2.3|0.7|4.8|55.5|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_beamformit_5mics|1640|160400|96.8|1.4|1.9|0.6|3.8|51.6|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_isolated_1ch_track|1640|160400|94.7|2.5|2.9|1.0|6.3|59.7|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_beamformit_2mics|1320|126796|92.8|3.2|4.0|1.2|8.4|64.7|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_beamformit_5mics|1320|126796|94.3|2.4|3.3|1.0|6.6|61.2|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_isolated_1ch_track|1320|126796|91.5|3.8|4.6|1.6|10.0|69.4|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_beamformit_2mics|1320|126812|92.2|3.5|4.2|1.7|9.5|67.5|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_beamformit_5mics|1320|126812|93.7|2.7|3.5|1.4|7.7|64.8|
|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_isolated_1ch_track|1320|126812|90.3|4.8|4.9|2.2|11.9|69.4|

### TER

|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
|---|---|---|---|---|---|---|---|---|

### Enhancement

|dataset|STOI|SAR|SDR|SI_SNR|
|---|---|---|---|---|
|dt05_simu_isolated_1ch_track|0.87|7.14|7.14|4.51|
|et05_simu_isolated_1ch_track|0.85|7.47|7.47|3.02|
110 changes: 110 additions & 0 deletions egs2/chime4/enh_asr1/cmd.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ======
# Usage: <cmd>.pl [options] JOB=1:<nj> <log> <command...>
# e.g.
# run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB
#
# Options:
# --time <time>: Limit the maximum time to execute.
# --mem <mem>: Limit the maximum memory usage.
# -–max-jobs-run <njob>: Limit the number parallel jobs. This is ignored for non-array jobs.
# --num-threads <ngpu>: Specify the number of CPU core.
# --gpu <ngpu>: Specify the number of GPU devices.
# --config: Change the configuration file from default.
#
# "JOB=1:10" is used for "array jobs" and it can control the number of parallel jobs.
# The left string of "=", i.e. "JOB", is replaced by <N>(Nth job) in the command and the log file name,
# e.g. "echo JOB" is changed to "echo 3" for the 3rd job and "echo 8" for 8th job respectively.
# Note that the number must start with a positive number, so you can't use "JOB=0:10" for example.
#
# run.pl, queue.pl, slurm.pl, and ssh.pl have unified interface, not depending on its backend.
# These options are mapping to specific options for each backend and
# it is configured by "conf/queue.conf" and "conf/slurm.conf" by default.
# If jobs failed, your configuration might be wrong for your environment.
#
#
# The official documentation for run.pl, queue.pl, slurm.pl, and ssh.pl:
# "Parallelization in Kaldi": http://kaldi-asr.org/doc/queue.html
# =========================================================~


# Select the backend used by run.sh from "local", "stdout", "sge", "slurm", or "ssh"
cmd_backend='local'

# Local machine, without any Job scheduling system
if [ "${cmd_backend}" = local ]; then

# The other usage
export train_cmd="run.pl"
# Used for "*_train.py": "--gpu" is appended optionally by run.sh
export cuda_cmd="run.pl"
# Used for "*_recog.py"
export decode_cmd="run.pl"

# Local machine logging to stdout and log file, without any Job scheduling system
elif [ "${cmd_backend}" = stdout ]; then

# The other usage
export train_cmd="stdout.pl"
# Used for "*_train.py": "--gpu" is appended optionally by run.sh
export cuda_cmd="stdout.pl"
# Used for "*_recog.py"
export decode_cmd="stdout.pl"


# "qsub" (Sun Grid Engine, or derivation of it)
elif [ "${cmd_backend}" = sge ]; then
# The default setting is written in conf/queue.conf.
# You must change "-q g.q" for the "queue" for your environment.
# To know the "queue" names, type "qhost -q"
# Note that to use "--gpu *", you have to setup "complex_value" for the system scheduler.

export train_cmd="queue.pl"
export cuda_cmd="queue.pl"
export decode_cmd="queue.pl"


# "qsub" (Torque/PBS.)
elif [ "${cmd_backend}" = pbs ]; then
# The default setting is written in conf/pbs.conf.

export train_cmd="pbs.pl"
export cuda_cmd="pbs.pl"
export decode_cmd="pbs.pl"


# "sbatch" (Slurm)
elif [ "${cmd_backend}" = slurm ]; then
# The default setting is written in conf/slurm.conf.
# You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
# To know the "partion" names, type "sinfo".
# You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
# The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

export train_cmd="slurm.pl"
export cuda_cmd="slurm.pl"
export decode_cmd="slurm.pl"

elif [ "${cmd_backend}" = ssh ]; then
# You have to create ".queue/machines" to specify the host to execute jobs.
# e.g. .queue/machines
# host1
# host2
# host3
# Assuming you can login them without any password, i.e. You have to set ssh keys.

export train_cmd="ssh.pl"
export cuda_cmd="ssh.pl"
export decode_cmd="ssh.pl"

# This is an example of specifying several unique options in the JHU CLSP cluster setup.
# Users can modify/add their own command options according to their cluster environments.
elif [ "${cmd_backend}" = jhu ]; then

export train_cmd="queue.pl --mem 2G"
export cuda_cmd="queue-freegpu.pl --mem 2G --gpu 1 --config conf/queue.conf"
export decode_cmd="queue.pl --mem 4G"

else
echo "$0: Error: Unknown cmd_backend=${cmd_backend}" 1>&2
return 1
fi
1 change: 1 addition & 0 deletions egs2/chime4/enh_asr1/conf/chime4.cfg
7 changes: 7 additions & 0 deletions egs2/chime4/enh_asr1/conf/decode_asr_transformer.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
batch_size: 0
beam-size: 10
penalty: 0.0
maxlenratio: 0.0
minlenratio: 0.0
ctc-weight: 0.3
lm-weight: 1.0
2 changes: 2 additions & 0 deletions egs2/chime4/enh_asr1/conf/fbank.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
--sample-frequency=16000
--num-mel-bins=80
11 changes: 11 additions & 0 deletions egs2/chime4/enh_asr1/conf/pbs.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Default configuration
command qsub -V -v PATH -S /bin/bash
option name=* -N $0
option mem=* -l mem=$0
option mem=0 # Do not add anything to qsub_opts
option num_threads=* -l ncpus=$0
option num_threads=1 # Do not add anything to qsub_opts
option num_nodes=* -l nodes=$0:ppn=1
default gpu=0
option gpu=0
option gpu=* -l ngpus=$0
1 change: 1 addition & 0 deletions egs2/chime4/enh_asr1/conf/pitch.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--sample-frequency=16000
12 changes: 12 additions & 0 deletions egs2/chime4/enh_asr1/conf/queue.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Default configuration
command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64*
option name=* -N $0
option mem=* -l mem_free=$0,ram_free=$0
option mem=0 # Do not add anything to qsub_opts
option num_threads=* -pe smp $0
option num_threads=1 # Do not add anything to qsub_opts
option max_jobs_run=* -tc $0
option num_nodes=* -pe mpi $0 # You must set this PE as allocation_rule=1
default gpu=0
option gpu=0
option gpu=* -l gpu=$0 -q g.q
14 changes: 14 additions & 0 deletions egs2/chime4/enh_asr1/conf/slurm.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Default configuration
command sbatch --export=PATH
option name=* --job-name $0
option time=* --time $0
option mem=* --mem-per-cpu $0
option mem=0
option num_threads=* --cpus-per-task $0
option num_threads=1 --cpus-per-task 1
option num_nodes=* --nodes $0
default gpu=0
option gpu=0 -p cpu
option gpu=* -p gpu --gres=gpu:$0 -c $0 # Recommend allocating more CPU than, or equal to the number of GPU
# note: the --max-jobs-run option is supported as a special case
# by slurm.pl and you don't have to handle it in the config file.
48 changes: 48 additions & 0 deletions egs2/chime4/enh_asr1/conf/train_lm_transformer.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# network architecture
# encoder related
encoder: transformer
encoder_conf:
input_layer: conv2d
num_blocks: 12
linear_units: 2048
dropout_rate: 0.1
output_size: 256
attention_heads: 4
attention_dropout_rate: 0.0

# decoder related
decoder: transformer
decoder_conf:
input_layer: embed
num_blocks: 6
linear_units: 2048
dropout_rate: 0.1

# hybrid CTC/attention
model_conf:
ctc_weight: 0.3
lsm_weight: 0.1
length_normalized_loss: false

# optimization related
optim: adam
accum_grad: 2
grad_clip: 5
patience: 10
max_epoch: 100
optim_conf:
lr: 0.005
scheduler: warmuplr
scheduler_conf:
warmup_steps: 20000

# minibatch related
batch_type: folded
batch_size: 32

# criterion
best_model_criterion:
- - valid
- acc
- max
keep_nbest_models: 10
Loading

0 comments on commit 2b66331

Please sign in to comment.