Skip to content

Commit

Permalink
Merge pull request espnet#4226 from simpleoier/enh_s2t
Browse files Browse the repository at this point in the history
enh_s2t joint model
  • Loading branch information
sw005320 authored Apr 19, 2022
2 parents 4474c3c + e132867 commit 42eb310
Show file tree
Hide file tree
Showing 73 changed files with 5,176 additions and 422 deletions.
1 change: 1 addition & 0 deletions egs2/TEMPLATE/enh1/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ Format scp files such as `wav.scp`. The scp files include:
+ `spk{}.scp`: wav file list of speech reference signals. {} can be 1, 2, ..., depending on the number of speakers in the input signal in `wav.scp`.
+ `noise{}.scp` (optional): wav file list of noise reference signals. {} can be 1, 2, ..., depending on the number of noise types in the input signal in `wav.scp`. The file(s) are required when `--use_noise_ref true` is specified. Also related to the variable `noise_type_num`.
+ `dereverb{}.scp` (optional): wav file list of dereverberation reference signals (for training a dereverberation model). This file is required when `--use_dereverb_ref true` is specified. Also related to the variable `dereverb_ref_num`.
+ `utt2category`: (optional) the category info of each utterance. This file can help the batch sampler to load the same category utterances in each batch. One usage case is that users want to load the simulation data and real data in different batches.

#### Stage 4: Remove short data
This stage is same as that in ASR recipe.
Expand Down
5 changes: 4 additions & 1 deletion egs2/TEMPLATE/enh1/enh.sh
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,9 @@ fi
[ -z "${valid_set}" ] && { log "${help_message}"; log "Error: --valid_set is required" ; exit 2; };
[ -z "${test_sets}" ] && { log "${help_message}"; log "Error: --test_sets is required"; exit 2; };

# Extra files for enhancement process
utt_extra_files="utt2category"

data_feats=${dumpdir}/raw


Expand Down Expand Up @@ -267,7 +270,7 @@ if ! "${skip_data_prep}"; then

for factor in ${speed_perturb_factors}; do
if [[ $(bc <<<"${factor} != 1.0") == 1 ]]; then
scripts/utils/perturb_enh_data_dir_speed.sh "${factor}" "data/${train_set}" "data/${train_set}_sp${factor}" "${_scp_list}"
scripts/utils/perturb_enh_data_dir_speed.sh --utt_extra_files "${utt_extra_files}" "${factor}" "data/${train_set}" "data/${train_set}_sp${factor}" "${_scp_list}"
_dirs+="data/${train_set}_sp${factor} "
else
# If speed factor is 1, same as the original
Expand Down
17 changes: 9 additions & 8 deletions egs2/TEMPLATE/enh1/scripts/utils/perturb_enh_data_dir_speed.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
export LC_ALL=C
set -euo pipefail

utt_extra_files=
. utils/parse_options.sh

if [[ $# != 4 ]]; then
echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir> <scp_files>"
echo "e.g.:"
Expand Down Expand Up @@ -108,17 +111,15 @@ for scp_file in ${scp_files};do
fi
done

if [[ -f ${srcdir}/text ]]; then
utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/text >"${destdir}"/text
fi
for x in text utt2lang ${utt_extra_files}; do
if [[ -f ${srcdir}/${x} ]]; then
utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/${x} >"${destdir}"/${x}
fi
done
if [[ -f ${srcdir}/spk2gender ]]; then
utils/apply_map.pl -f 1 "${destdir}"/spk_map <"${srcdir}"/spk2gender >"${destdir}"/spk2gender
fi
if [[ -f ${srcdir}/utt2lang ]]; then
utils/apply_map.pl -f 1 "${destdir}"/utt_map <"${srcdir}"/utt2lang >"${destdir}"/utt2lang
fi

rm "${destdir}"/spk_map "${destdir}"/utt_map "${destdir}"/reco_map 2>/dev/null
echo "$0: generated speed-perturbed version of data in ${srcdir}, in ${destdir}"

utils/fix_data_dir.sh "${destdir}"
utils/validate_data_dir.sh --no-feats --no-text "${destdir}"
110 changes: 110 additions & 0 deletions egs2/TEMPLATE/enh_asr1/cmd.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ======
# Usage: <cmd>.pl [options] JOB=1:<nj> <log> <command...>
# e.g.
# run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB
#
# Options:
# --time <time>: Limit the maximum time to execute.
# --mem <mem>: Limit the maximum memory usage.
# -–max-jobs-run <njob>: Limit the number parallel jobs. This is ignored for non-array jobs.
# --num-threads <ngpu>: Specify the number of CPU core.
# --gpu <ngpu>: Specify the number of GPU devices.
# --config: Change the configuration file from default.
#
# "JOB=1:10" is used for "array jobs" and it can control the number of parallel jobs.
# The left string of "=", i.e. "JOB", is replaced by <N>(Nth job) in the command and the log file name,
# e.g. "echo JOB" is changed to "echo 3" for the 3rd job and "echo 8" for 8th job respectively.
# Note that the number must start with a positive number, so you can't use "JOB=0:10" for example.
#
# run.pl, queue.pl, slurm.pl, and ssh.pl have unified interface, not depending on its backend.
# These options are mapping to specific options for each backend and
# it is configured by "conf/queue.conf" and "conf/slurm.conf" by default.
# If jobs failed, your configuration might be wrong for your environment.
#
#
# The official documentation for run.pl, queue.pl, slurm.pl, and ssh.pl:
# "Parallelization in Kaldi": http://kaldi-asr.org/doc/queue.html
# =========================================================~


# Select the backend used by run.sh from "local", "stdout", "sge", "slurm", or "ssh"
cmd_backend='local'

# Local machine, without any Job scheduling system
if [ "${cmd_backend}" = local ]; then

# The other usage
export train_cmd="run.pl"
# Used for "*_train.py": "--gpu" is appended optionally by run.sh
export cuda_cmd="run.pl"
# Used for "*_recog.py"
export decode_cmd="run.pl"

# Local machine logging to stdout and log file, without any Job scheduling system
elif [ "${cmd_backend}" = stdout ]; then

# The other usage
export train_cmd="stdout.pl"
# Used for "*_train.py": "--gpu" is appended optionally by run.sh
export cuda_cmd="stdout.pl"
# Used for "*_recog.py"
export decode_cmd="stdout.pl"


# "qsub" (Sun Grid Engine, or derivation of it)
elif [ "${cmd_backend}" = sge ]; then
# The default setting is written in conf/queue.conf.
# You must change "-q g.q" for the "queue" for your environment.
# To know the "queue" names, type "qhost -q"
# Note that to use "--gpu *", you have to setup "complex_value" for the system scheduler.

export train_cmd="queue.pl"
export cuda_cmd="queue.pl"
export decode_cmd="queue.pl"


# "qsub" (Torque/PBS.)
elif [ "${cmd_backend}" = pbs ]; then
# The default setting is written in conf/pbs.conf.

export train_cmd="pbs.pl"
export cuda_cmd="pbs.pl"
export decode_cmd="pbs.pl"


# "sbatch" (Slurm)
elif [ "${cmd_backend}" = slurm ]; then
# The default setting is written in conf/slurm.conf.
# You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
# To know the "partion" names, type "sinfo".
# You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
# The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

export train_cmd="slurm.pl"
export cuda_cmd="slurm.pl"
export decode_cmd="slurm.pl"

elif [ "${cmd_backend}" = ssh ]; then
# You have to create ".queue/machines" to specify the host to execute jobs.
# e.g. .queue/machines
# host1
# host2
# host3
# Assuming you can login them without any password, i.e. You have to set ssh keys.

export train_cmd="ssh.pl"
export cuda_cmd="ssh.pl"
export decode_cmd="ssh.pl"

# This is an example of specifying several unique options in the JHU CLSP cluster setup.
# Users can modify/add their own command options according to their cluster environments.
elif [ "${cmd_backend}" = jhu ]; then

export train_cmd="queue.pl --mem 2G"
export cuda_cmd="queue-freegpu.pl --mem 2G --gpu 1 --config conf/queue.conf"
export decode_cmd="queue.pl --mem 4G"

else
echo "$0: Error: Unknown cmd_backend=${cmd_backend}" 1>&2
return 1
fi
2 changes: 2 additions & 0 deletions egs2/TEMPLATE/enh_asr1/conf/fbank.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
--sample-frequency=16000
--num-mel-bins=80
11 changes: 11 additions & 0 deletions egs2/TEMPLATE/enh_asr1/conf/pbs.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Default configuration
command qsub -V -v PATH -S /bin/bash
option name=* -N $0
option mem=* -l mem=$0
option mem=0 # Do not add anything to qsub_opts
option num_threads=* -l ncpus=$0
option num_threads=1 # Do not add anything to qsub_opts
option num_nodes=* -l nodes=$0:ppn=1
default gpu=0
option gpu=0
option gpu=* -l ngpus=$0
1 change: 1 addition & 0 deletions egs2/TEMPLATE/enh_asr1/conf/pitch.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--sample-frequency=16000
12 changes: 12 additions & 0 deletions egs2/TEMPLATE/enh_asr1/conf/queue.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Default configuration
command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64*
option name=* -N $0
option mem=* -l mem_free=$0,ram_free=$0
option mem=0 # Do not add anything to qsub_opts
option num_threads=* -pe smp $0
option num_threads=1 # Do not add anything to qsub_opts
option max_jobs_run=* -tc $0
option num_nodes=* -pe mpi $0 # You must set this PE as allocation_rule=1
default gpu=0
option gpu=0
option gpu=* -l gpu=$0 -q g.q
14 changes: 14 additions & 0 deletions egs2/TEMPLATE/enh_asr1/conf/slurm.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Default configuration
command sbatch --export=PATH
option name=* --job-name $0
option time=* --time $0
option mem=* --mem-per-cpu $0
option mem=0
option num_threads=* --cpus-per-task $0
option num_threads=1 --cpus-per-task 1
option num_nodes=* --nodes $0
default gpu=0
option gpu=0 -p cpu
option gpu=* -p gpu --gres=gpu:$0 -c $0 # Recommend allocating more CPU than, or equal to the number of GPU
# note: the --max-jobs-run option is supported as a special case
# by slurm.pl and you don't have to handle it in the config file.
1 change: 1 addition & 0 deletions egs2/TEMPLATE/enh_asr1/db.sh
Loading

0 comments on commit 42eb310

Please sign in to comment.