From 2b663318cd1773fb8685b1e03295b6bc6889c283 Mon Sep 17 00:00:00 2001 From: simpleoier Date: Thu, 28 Apr 2022 00:59:22 -0400 Subject: [PATCH 1/2] fix small bugs and add CHiME4 enh_asr1 recipe & results --- ci/test_integration_espnet2.sh | 3 + .../enh_asr1/scripts/utils/show_enh_score.sh | 85 +++++++++++- egs2/chime4/enh_asr1/README.md | 57 ++++++++ egs2/chime4/enh_asr1/cmd.sh | 110 ++++++++++++++++ egs2/chime4/enh_asr1/conf/chime4.cfg | 1 + .../enh_asr1/conf/decode_asr_transformer.yaml | 7 + egs2/chime4/enh_asr1/conf/fbank.conf | 2 + egs2/chime4/enh_asr1/conf/pbs.conf | 11 ++ egs2/chime4/enh_asr1/conf/pitch.conf | 1 + egs2/chime4/enh_asr1/conf/queue.conf | 12 ++ egs2/chime4/enh_asr1/conf/slurm.conf | 14 ++ ..._enh_asr_convtasnet_fbank_transformer.yaml | 1 + .../enh_asr1/conf/train_lm_transformer.yaml | 48 +++++++ ...it_lr1e-4_accum1_adam_specaug_bypass0.yaml | 124 ++++++++++++++++++ ...ormer_lr2e-3_accum2_warmup20k_specaug.yaml | 119 +++++++++++++++++ egs2/chime4/enh_asr1/db.sh | 1 + egs2/chime4/enh_asr1/enh_asr.sh | 1 + .../CHiME3_simulate_data_patched_parallel.m | 1 + .../enh_asr1/local/bth_chime4_data_prep.sh | 1 + egs2/chime4/enh_asr1/local/chime4_asr_data.sh | 1 + egs2/chime4/enh_asr1/local/chime4_enh_data.sh | 1 + .../local/clean_chime4_format_data.sh | 1 + .../enh_asr1/local/clean_wsj0_data_prep.sh | 1 + egs2/chime4/enh_asr1/local/cstr_ndx2flist.pl | 1 + egs2/chime4/enh_asr1/local/data.sh | 89 +++++++++++++ .../enh_asr1/local/find_noisy_transcripts.pl | 1 + .../chime4/enh_asr1/local/find_transcripts.pl | 1 + egs2/chime4/enh_asr1/local/flist2scp.pl | 1 + egs2/chime4/enh_asr1/local/localize.m | 1 + egs2/chime4/enh_asr1/local/make_stft.sh | 1 + egs2/chime4/enh_asr1/local/ndx2flist.pl | 1 + .../enh_asr1/local/normalize_transcript.pl | 1 + egs2/chime4/enh_asr1/local/path.sh | 0 .../local/real_enhan_chime4_data_prep.sh | 1 + .../local/real_ext_chime4_data_prep.sh | 1 + .../local/real_noisy_chime4_data_prep.sh | 1 + .../enh_asr1/local/run_beamform_2ch_track.sh | 1 + .../enh_asr1/local/run_beamform_6ch_track.sh | 1 + .../enh_asr1/local/show_enhance_results.sh | 1 + .../local/simu_enhan_chime4_data_prep.sh | 1 + .../local/simu_ext_chime4_data_prep.sh | 1 + .../local/simu_noisy_chime4_data_prep.sh | 1 + egs2/chime4/enh_asr1/local/sym_channel.py | 1 + egs2/chime4/enh_asr1/local/wsj_data_prep.sh | 1 + egs2/chime4/enh_asr1/local/wsj_format_data.sh | 1 + egs2/chime4/enh_asr1/path.sh | 1 + egs2/chime4/enh_asr1/pyscripts | 1 + egs2/chime4/enh_asr1/run.sh | 45 +++++++ egs2/chime4/enh_asr1/scripts | 1 + egs2/chime4/enh_asr1/steps | 1 + egs2/chime4/enh_asr1/utils | 1 + espnet2/enh/espnet_enh_s2t_model.py | 2 - 52 files changed, 761 insertions(+), 3 deletions(-) mode change 120000 => 100755 egs2/TEMPLATE/enh_asr1/scripts/utils/show_enh_score.sh create mode 100644 egs2/chime4/enh_asr1/README.md create mode 100644 egs2/chime4/enh_asr1/cmd.sh create mode 120000 egs2/chime4/enh_asr1/conf/chime4.cfg create mode 100644 egs2/chime4/enh_asr1/conf/decode_asr_transformer.yaml create mode 100644 egs2/chime4/enh_asr1/conf/fbank.conf create mode 100644 egs2/chime4/enh_asr1/conf/pbs.conf create mode 100644 egs2/chime4/enh_asr1/conf/pitch.conf create mode 100644 egs2/chime4/enh_asr1/conf/queue.conf create mode 100644 egs2/chime4/enh_asr1/conf/slurm.conf create mode 120000 egs2/chime4/enh_asr1/conf/train_enh_asr_convtasnet_fbank_transformer.yaml create mode 100644 egs2/chime4/enh_asr1/conf/train_lm_transformer.yaml create mode 100644 egs2/chime4/enh_asr1/conf/tuning/train_enh_asr_convtasnet_init_noenhloss_wavlm_transformer_init_lr1e-4_accum1_adam_specaug_bypass0.yaml create mode 100644 egs2/chime4/enh_asr1/conf/tuning/train_enh_asr_convtasnet_si_snr_fbank_transformer_lr2e-3_accum2_warmup20k_specaug.yaml create mode 120000 egs2/chime4/enh_asr1/db.sh create mode 120000 egs2/chime4/enh_asr1/enh_asr.sh create mode 120000 egs2/chime4/enh_asr1/local/CHiME3_simulate_data_patched_parallel.m create mode 120000 egs2/chime4/enh_asr1/local/bth_chime4_data_prep.sh create mode 120000 egs2/chime4/enh_asr1/local/chime4_asr_data.sh create mode 120000 egs2/chime4/enh_asr1/local/chime4_enh_data.sh create mode 120000 egs2/chime4/enh_asr1/local/clean_chime4_format_data.sh create mode 120000 egs2/chime4/enh_asr1/local/clean_wsj0_data_prep.sh create mode 120000 egs2/chime4/enh_asr1/local/cstr_ndx2flist.pl create mode 100755 egs2/chime4/enh_asr1/local/data.sh create mode 120000 egs2/chime4/enh_asr1/local/find_noisy_transcripts.pl create mode 120000 egs2/chime4/enh_asr1/local/find_transcripts.pl create mode 120000 egs2/chime4/enh_asr1/local/flist2scp.pl create mode 120000 egs2/chime4/enh_asr1/local/localize.m create mode 120000 egs2/chime4/enh_asr1/local/make_stft.sh create mode 120000 egs2/chime4/enh_asr1/local/ndx2flist.pl create mode 120000 egs2/chime4/enh_asr1/local/normalize_transcript.pl create mode 100644 egs2/chime4/enh_asr1/local/path.sh create mode 120000 egs2/chime4/enh_asr1/local/real_enhan_chime4_data_prep.sh create mode 120000 egs2/chime4/enh_asr1/local/real_ext_chime4_data_prep.sh create mode 120000 egs2/chime4/enh_asr1/local/real_noisy_chime4_data_prep.sh create mode 120000 egs2/chime4/enh_asr1/local/run_beamform_2ch_track.sh create mode 120000 egs2/chime4/enh_asr1/local/run_beamform_6ch_track.sh create mode 120000 egs2/chime4/enh_asr1/local/show_enhance_results.sh create mode 120000 egs2/chime4/enh_asr1/local/simu_enhan_chime4_data_prep.sh create mode 120000 egs2/chime4/enh_asr1/local/simu_ext_chime4_data_prep.sh create mode 120000 egs2/chime4/enh_asr1/local/simu_noisy_chime4_data_prep.sh create mode 120000 egs2/chime4/enh_asr1/local/sym_channel.py create mode 120000 egs2/chime4/enh_asr1/local/wsj_data_prep.sh create mode 120000 egs2/chime4/enh_asr1/local/wsj_format_data.sh create mode 120000 egs2/chime4/enh_asr1/path.sh create mode 120000 egs2/chime4/enh_asr1/pyscripts create mode 100755 egs2/chime4/enh_asr1/run.sh create mode 120000 egs2/chime4/enh_asr1/scripts create mode 120000 egs2/chime4/enh_asr1/steps create mode 120000 egs2/chime4/enh_asr1/utils diff --git a/ci/test_integration_espnet2.sh b/ci/test_integration_espnet2.sh index 3cf663466e7..dfa6e61e959 100755 --- a/ci/test_integration_espnet2.sh +++ b/ci/test_integration_espnet2.sh @@ -134,6 +134,9 @@ if python3 -c 'import torch as t; from distutils.version import LooseVersion as for f in egs2/*/ssl1/conf/train*.yaml; do ${python} -m espnet2.bin.hubert_train --config "${f}" --iterator_type none --normalize none --dry_run true --output_dir out --token_list dummy_token_list done + for f in egs2/*/enh_asr1/conf/train_enh_asr*.yaml; do + ${python} -m espnet2.bin.enh_s2t_train --config "${f}" --iterator_type none --dry_run true --output_dir out --token_list dummy_token_list + done fi # These files must be same each other. diff --git a/egs2/TEMPLATE/enh_asr1/scripts/utils/show_enh_score.sh b/egs2/TEMPLATE/enh_asr1/scripts/utils/show_enh_score.sh deleted file mode 120000 index 6d6490d3760..00000000000 --- a/egs2/TEMPLATE/enh_asr1/scripts/utils/show_enh_score.sh +++ /dev/null @@ -1 +0,0 @@ -../../../enh1/scripts/utils/show_enh_score.sh \ No newline at end of file diff --git a/egs2/TEMPLATE/enh_asr1/scripts/utils/show_enh_score.sh b/egs2/TEMPLATE/enh_asr1/scripts/utils/show_enh_score.sh new file mode 100755 index 00000000000..e135d73f91f --- /dev/null +++ b/egs2/TEMPLATE/enh_asr1/scripts/utils/show_enh_score.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash +mindepth=0 +maxdepth=1 + +. utils/parse_options.sh + +if [ $# -gt 1 ]; then + echo "Usage: $0 --mindepth 0 --maxdepth 1 [exp]" 1>&2 + echo "" + echo "Show the system environments and the evaluation results in Markdown format." + echo 'The default of is "exp/".' + exit 1 +fi + +[ -f ./path.sh ] && . ./path.sh +set -euo pipefail +if [ $# -eq 1 ]; then + exp=$(realpath "$1") +else + exp=exp +fi + + +cat << EOF + +# RESULTS +## Environments +- date: \`$(LC_ALL=C date)\` +EOF + +python3 << EOF +import sys, espnet, torch +pyversion = sys.version.replace('\n', ' ') + +print(f"""- python version: \`{pyversion}\` +- espnet version: \`espnet {espnet.__version__}\` +- pytorch version: \`pytorch {torch.__version__}\`""") +EOF + +cat << EOF +- Git hash: \`$(git rev-parse HEAD)\` + - Commit date: \`$(git log -1 --format='%cd')\` + +EOF + + +while IFS= read -r expdir; do + if ls "${expdir}"/*/scoring_enh/result_stoi.txt &> /dev/null; then + echo -e "\n## $(basename ${expdir})\n" + [ -e "${expdir}"/config.yaml ] && grep ^config "${expdir}"/config.yaml + metrics=() + heading="\n|dataset|" + sep="|---|" + for type in pesq estoi stoi sar sdr sir si_snr; do + if ls "${expdir}"/*/scoring_enh/result_${type}.txt &> /dev/null; then + metrics+=("$type") + heading+="${type^^}|" + sep+="---|" + fi + done + echo -e "${heading}\n${sep}" + + setnames=() + for dirname in "${expdir}"/*/scoring_enh/result_stoi.txt; do + dset=$(echo $dirname | sed -e "s#${expdir}/\([^/]*\)/scoring_enh/result_stoi.txt#\1#g") + setnames+=("$dset") + done + for dset in "${setnames[@]}"; do + line="|${dset}|" + for ((i=0; i<${#metrics[@]}; i++)); do + type=${metrics[$i]} + if [ -f "${expdir}"/${dset}/scoring_enh/result_${type}.txt ]; then + score=$(head -n1 "${expdir}"/${dset}/scoring_enh/result_${type}.txt) + else + score="" + fi + line+="${score}|" + done + echo $line + done + echo "" + fi + +done < <(find ${exp} -mindepth ${mindepth} -maxdepth ${maxdepth} -type d) diff --git a/egs2/chime4/enh_asr1/README.md b/egs2/chime4/enh_asr1/README.md new file mode 100644 index 00000000000..ac5df236797 --- /dev/null +++ b/egs2/chime4/enh_asr1/README.md @@ -0,0 +1,57 @@ + +# RESULTS +## Environments +- date: `Thu Apr 28 00:09:17 EDT 2022` +- python version: `3.7.11 (default, Jul 27 2021, 14:32:16) [GCC 7.5.0]` +- espnet version: `espnet 202204` +- pytorch version: `pytorch 1.8.1` +- Git hash: `44971ff962aae30c962226f1ba3d87de057ac00e` + - Commit date: `Wed Apr 27 10:13:03 2022 -0400` +- Pretrained model: https://huggingface.co/espnet/simpleoier_chime4_enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char + +## enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char +### WER + +|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| +|---|---|---|---|---|---|---|---|---| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_beamformit_2mics|1640|27119|93.0|5.2|1.8|0.6|7.7|53.3| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_beamformit_5mics|1640|27119|93.9|4.5|1.6|0.5|6.7|49.9| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_isolated_1ch_track|1640|27119|91.8|6.0|2.2|0.8|9.0|57.7| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_beamformit_2mics|1640|27120|92.2|6.0|1.9|0.7|8.6|55.5| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_beamformit_5mics|1640|27120|93.6|4.9|1.5|0.6|7.1|51.6| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_isolated_1ch_track|1640|27120|89.9|7.6|2.4|1.0|11.1|59.7| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_beamformit_2mics|1320|21409|86.7|9.7|3.5|1.3|14.5|64.7| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_beamformit_5mics|1320|21409|89.2|7.9|2.9|1.0|11.8|61.2| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_isolated_1ch_track|1320|21409|84.6|11.4|4.0|1.5|17.0|69.4| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_beamformit_2mics|1320|21416|86.0|10.5|3.5|1.5|15.5|67.5| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_beamformit_5mics|1320|21416|88.1|8.9|3.1|1.2|13.1|64.8| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_isolated_1ch_track|1320|21416|82.8|13.1|4.1|1.9|19.1|69.4| + +### CER + +|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| +|---|---|---|---|---|---|---|---|---| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_beamformit_2mics|1640|160390|96.6|1.4|2.0|0.6|4.0|53.3| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_beamformit_5mics|1640|160390|97.1|1.1|1.8|0.5|3.4|49.9| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_isolated_1ch_track|1640|160390|95.9|1.7|2.3|0.8|4.8|57.7| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_beamformit_2mics|1640|160400|95.9|1.7|2.3|0.7|4.8|55.5| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_beamformit_5mics|1640|160400|96.8|1.4|1.9|0.6|3.8|51.6| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_isolated_1ch_track|1640|160400|94.7|2.5|2.9|1.0|6.3|59.7| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_beamformit_2mics|1320|126796|92.8|3.2|4.0|1.2|8.4|64.7| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_beamformit_5mics|1320|126796|94.3|2.4|3.3|1.0|6.6|61.2| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_isolated_1ch_track|1320|126796|91.5|3.8|4.6|1.6|10.0|69.4| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_beamformit_2mics|1320|126812|92.2|3.5|4.2|1.7|9.5|67.5| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_beamformit_5mics|1320|126812|93.7|2.7|3.5|1.4|7.7|64.8| +|decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_isolated_1ch_track|1320|126812|90.3|4.8|4.9|2.2|11.9|69.4| + +### TER + +|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| +|---|---|---|---|---|---|---|---|---| + +### Enhancement + +|dataset|STOI|SAR|SDR|SI_SNR| +|---|---|---|---|---| +|dt05_simu_isolated_1ch_track|0.87|7.14|7.14|4.51| +|et05_simu_isolated_1ch_track|0.85|7.47|7.47|3.02| diff --git a/egs2/chime4/enh_asr1/cmd.sh b/egs2/chime4/enh_asr1/cmd.sh new file mode 100644 index 00000000000..2aae6919fef --- /dev/null +++ b/egs2/chime4/enh_asr1/cmd.sh @@ -0,0 +1,110 @@ +# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ====== +# Usage: .pl [options] JOB=1: +# e.g. +# run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB +# +# Options: +# --time