From dd408fc59a634eb8dac4bac87ad7288ce7493be4 Mon Sep 17 00:00:00 2001 From: simpleoier Date: Wed, 13 Apr 2022 14:32:06 -0400 Subject: [PATCH 1/2] initial commit for enh_s2t joint model and template egs: egs2/TEMPLATE/{enh_asr1,enh_st1} --- egs2/TEMPLATE/enh1/README.md | 1 + egs2/TEMPLATE/enh1/enh.sh | 5 +- .../utils/perturb_enh_data_dir_speed.sh | 17 +- egs2/TEMPLATE/enh_asr1/cmd.sh | 110 + egs2/TEMPLATE/enh_asr1/conf/fbank.conf | 2 + egs2/TEMPLATE/enh_asr1/conf/pbs.conf | 11 + egs2/TEMPLATE/enh_asr1/conf/pitch.conf | 1 + egs2/TEMPLATE/enh_asr1/conf/queue.conf | 12 + egs2/TEMPLATE/enh_asr1/conf/slurm.conf | 14 + egs2/TEMPLATE/enh_asr1/db.sh | 1 + egs2/TEMPLATE/enh_asr1/enh_asr.sh | 1640 +++++++++++++++ egs2/TEMPLATE/enh_asr1/local/path.sh | 0 egs2/TEMPLATE/enh_asr1/path.sh | 22 + egs2/TEMPLATE/enh_asr1/pyscripts | 1 + egs2/TEMPLATE/enh_asr1/scripts/audio | 1 + egs2/TEMPLATE/enh_asr1/scripts/feats | 1 + .../scripts/utils/TEMPLATE_HF_Readme.md | 1 + .../enh_asr1/scripts/utils/TEMPLATE_Readme.md | 1 + .../scripts/utils/create_README_file.py | 1 + .../utils/download_from_google_drive.sh | 1 + .../enh_asr1/scripts/utils/evaluate_asr.sh | 1 + .../enh_asr1/scripts/utils/get_model_names.py | 1 + .../utils/perturb_enh_data_dir_speed.sh | 1 + .../enh_asr1/scripts/utils/show_asr_result.sh | 1 + .../enh_asr1/scripts/utils/show_enh_score.sh | 1 + .../scripts/utils/upload_models_to_hub.sh | 1 + egs2/TEMPLATE/enh_asr1/setup.sh | 58 + egs2/TEMPLATE/enh_asr1/steps | 1 + egs2/TEMPLATE/enh_asr1/utils | 1 + egs2/TEMPLATE/enh_st1/cmd.sh | 110 + egs2/TEMPLATE/enh_st1/conf/fbank.conf | 2 + egs2/TEMPLATE/enh_st1/conf/pbs.conf | 11 + egs2/TEMPLATE/enh_st1/conf/pitch.conf | 1 + egs2/TEMPLATE/enh_st1/conf/queue.conf | 12 + egs2/TEMPLATE/enh_st1/conf/slurm.conf | 14 + egs2/TEMPLATE/enh_st1/db.sh | 1 + egs2/TEMPLATE/enh_st1/enh_st.sh | 1806 +++++++++++++++++ egs2/TEMPLATE/enh_st1/local/path.sh | 0 egs2/TEMPLATE/enh_st1/path.sh | 22 + egs2/TEMPLATE/enh_st1/pyscripts | 1 + egs2/TEMPLATE/enh_st1/scripts | 1 + egs2/TEMPLATE/enh_st1/setup.sh | 58 + egs2/TEMPLATE/enh_st1/steps | 1 + egs2/TEMPLATE/enh_st1/utils | 1 + egs2/TEMPLATE/st1/st.sh | 2 +- espnet2/asr/espnet_model.py | 3 + espnet2/asr/frontend/s3prl.py | 9 +- espnet2/bin/asr_inference.py | 26 +- espnet2/bin/enh_inference.py | 17 +- espnet2/bin/enh_s2t_train.py | 23 + espnet2/bin/enh_train.py | 2 +- espnet2/bin/pack.py | 7 + espnet2/bin/st_inference.py | 20 +- espnet2/diar/espnet_model.py | 3 + espnet2/enh/espnet_enh_s2t_model.py | 273 +++ espnet2/enh/espnet_model.py | 37 + espnet2/gan_tts/espnet_model.py | 3 + espnet2/hubert/espnet_model.py | 3 + espnet2/lm/espnet_model.py | 10 +- .../main_funcs/calculate_all_attentions.py | 6 +- espnet2/mt/espnet_model.py | 3 + espnet2/st/espnet_model.py | 3 + espnet2/tasks/enh.py | 13 +- espnet2/tasks/enh_asr.py | 369 ---- espnet2/tasks/enh_s2t.py | 475 +++++ espnet2/train/trainer.py | 11 +- espnet2/tts/espnet_model.py | 3 + 67 files changed, 4872 insertions(+), 399 deletions(-) create mode 100644 egs2/TEMPLATE/enh_asr1/cmd.sh create mode 100644 egs2/TEMPLATE/enh_asr1/conf/fbank.conf create mode 100644 egs2/TEMPLATE/enh_asr1/conf/pbs.conf create mode 100644 egs2/TEMPLATE/enh_asr1/conf/pitch.conf create mode 100644 egs2/TEMPLATE/enh_asr1/conf/queue.conf create mode 100644 egs2/TEMPLATE/enh_asr1/conf/slurm.conf create mode 120000 egs2/TEMPLATE/enh_asr1/db.sh create mode 100755 egs2/TEMPLATE/enh_asr1/enh_asr.sh create mode 100644 egs2/TEMPLATE/enh_asr1/local/path.sh create mode 100755 egs2/TEMPLATE/enh_asr1/path.sh create mode 120000 egs2/TEMPLATE/enh_asr1/pyscripts create mode 120000 egs2/TEMPLATE/enh_asr1/scripts/audio create mode 120000 egs2/TEMPLATE/enh_asr1/scripts/feats create mode 120000 egs2/TEMPLATE/enh_asr1/scripts/utils/TEMPLATE_HF_Readme.md create mode 120000 egs2/TEMPLATE/enh_asr1/scripts/utils/TEMPLATE_Readme.md create mode 120000 egs2/TEMPLATE/enh_asr1/scripts/utils/create_README_file.py create mode 120000 egs2/TEMPLATE/enh_asr1/scripts/utils/download_from_google_drive.sh create mode 120000 egs2/TEMPLATE/enh_asr1/scripts/utils/evaluate_asr.sh create mode 120000 egs2/TEMPLATE/enh_asr1/scripts/utils/get_model_names.py create mode 120000 egs2/TEMPLATE/enh_asr1/scripts/utils/perturb_enh_data_dir_speed.sh create mode 120000 egs2/TEMPLATE/enh_asr1/scripts/utils/show_asr_result.sh create mode 120000 egs2/TEMPLATE/enh_asr1/scripts/utils/show_enh_score.sh create mode 120000 egs2/TEMPLATE/enh_asr1/scripts/utils/upload_models_to_hub.sh create mode 100755 egs2/TEMPLATE/enh_asr1/setup.sh create mode 120000 egs2/TEMPLATE/enh_asr1/steps create mode 120000 egs2/TEMPLATE/enh_asr1/utils create mode 100644 egs2/TEMPLATE/enh_st1/cmd.sh create mode 100644 egs2/TEMPLATE/enh_st1/conf/fbank.conf create mode 100644 egs2/TEMPLATE/enh_st1/conf/pbs.conf create mode 100644 egs2/TEMPLATE/enh_st1/conf/pitch.conf create mode 100644 egs2/TEMPLATE/enh_st1/conf/queue.conf create mode 100644 egs2/TEMPLATE/enh_st1/conf/slurm.conf create mode 120000 egs2/TEMPLATE/enh_st1/db.sh create mode 100755 egs2/TEMPLATE/enh_st1/enh_st.sh create mode 100644 egs2/TEMPLATE/enh_st1/local/path.sh create mode 100755 egs2/TEMPLATE/enh_st1/path.sh create mode 120000 egs2/TEMPLATE/enh_st1/pyscripts create mode 120000 egs2/TEMPLATE/enh_st1/scripts create mode 100755 egs2/TEMPLATE/enh_st1/setup.sh create mode 120000 egs2/TEMPLATE/enh_st1/steps create mode 120000 egs2/TEMPLATE/enh_st1/utils create mode 100755 espnet2/bin/enh_s2t_train.py create mode 100644 espnet2/enh/espnet_enh_s2t_model.py delete mode 100644 espnet2/tasks/enh_asr.py create mode 100644 espnet2/tasks/enh_s2t.py diff --git a/egs2/TEMPLATE/enh1/README.md b/egs2/TEMPLATE/enh1/README.md index 2d7e7aa542b..1b9984979d4 100644 --- a/egs2/TEMPLATE/enh1/README.md +++ b/egs2/TEMPLATE/enh1/README.md @@ -40,6 +40,7 @@ Format scp files such as `wav.scp`. The scp files include: + `spk{}.scp`: wav file list of speech reference signals. {} can be 1, 2, ..., depending on the number of speakers in the input signal in `wav.scp`. + `noise{}.scp` (optional): wav file list of noise reference signals. {} can be 1, 2, ..., depending on the number of noise types in the input signal in `wav.scp`. The file(s) are required when `--use_noise_ref true` is specified. Also related to the variable `noise_type_num`. + `dereverb{}.scp` (optional): wav file list of dereverberation reference signals (for training a dereverberation model). This file is required when `--use_dereverb_ref true` is specified. Also related to the variable `dereverb_ref_num`. + + `utt2category`: (optional) the category info of each utterance. This file can help the batch sampler to load the same category utterances in each batch. One usage case is that users want to load the simulation data and real data in different batches. #### Stage 4: Remove short data This stage is same as that in ASR recipe. diff --git a/egs2/TEMPLATE/enh1/enh.sh b/egs2/TEMPLATE/enh1/enh.sh index cb6e9e8503b..b465028da8e 100755 --- a/egs2/TEMPLATE/enh1/enh.sh +++ b/egs2/TEMPLATE/enh1/enh.sh @@ -201,6 +201,9 @@ fi [ -z "${valid_set}" ] && { log "${help_message}"; log "Error: --valid_set is required" ; exit 2; }; [ -z "${test_sets}" ] && { log "${help_message}"; log "Error: --test_sets is required"; exit 2; }; +# Extra files for enhancement process +utt_extra_files="utt2category" + data_feats=${dumpdir}/raw @@ -267,7 +270,7 @@ if ! "${skip_data_prep}"; then for factor in ${speed_perturb_factors}; do if [[ $(bc <<<"${factor} != 1.0") == 1 ]]; then - scripts/utils/perturb_enh_data_dir_speed.sh "${factor}" "data/${train_set}" "data/${train_set}_sp${factor}" "${_scp_list}" + scripts/utils/perturb_enh_data_dir_speed.sh --utt_extra_files "${utt_extra_files}" "${factor}" "data/${train_set}" "data/${train_set}_sp${factor}" "${_scp_list}" _dirs+="data/${train_set}_sp${factor} " else # If speed factor is 1, same as the original diff --git a/egs2/TEMPLATE/enh1/scripts/utils/perturb_enh_data_dir_speed.sh b/egs2/TEMPLATE/enh1/scripts/utils/perturb_enh_data_dir_speed.sh index 1d0a0fc3c3b..04887e10f30 100755 --- a/egs2/TEMPLATE/enh1/scripts/utils/perturb_enh_data_dir_speed.sh +++ b/egs2/TEMPLATE/enh1/scripts/utils/perturb_enh_data_dir_speed.sh @@ -27,6 +27,9 @@ export LC_ALL=C set -euo pipefail +utt_extra_files= +. utils/parse_options.sh + if [[ $# != 4 ]]; then echo "Usage: perturb_data_dir_speed.sh " echo "e.g.:" @@ -108,17 +111,15 @@ for scp_file in ${scp_files};do fi done -if [[ -f ${srcdir}/text ]]; then - utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/text >"${destdir}"/text -fi +for x in text utt2lang ${utt_extra_files}; do + if [[ -f ${srcdir}/${x} ]]; then + utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/${x} >"${destdir}"/${x} + fi +done if [[ -f ${srcdir}/spk2gender ]]; then utils/apply_map.pl -f 1 "${destdir}"/spk_map <"${srcdir}"/spk2gender >"${destdir}"/spk2gender fi -if [[ -f ${srcdir}/utt2lang ]]; then - utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/utt2lang >"${destdir}"/utt2lang -fi - rm "${destdir}"/spk_map "${destdir}"/utt_map "${destdir}"/reco_map 2>/dev/null echo "$0: generated speed-perturbed version of data in ${srcdir}, in ${destdir}" - +utils/fix_data_dir.sh "${destdir}" utils/validate_data_dir.sh --no-feats --no-text "${destdir}" diff --git a/egs2/TEMPLATE/enh_asr1/cmd.sh b/egs2/TEMPLATE/enh_asr1/cmd.sh new file mode 100644 index 00000000000..2aae6919fef --- /dev/null +++ b/egs2/TEMPLATE/enh_asr1/cmd.sh @@ -0,0 +1,110 @@ +# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ====== +# Usage: .pl [options] JOB=1: +# e.g. +# run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB +# +# Options: +# --time