From e01170091a81ec1528057df2a437ffac1a335dec Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 22 Mar 2022 03:18:20 +0000 Subject: [PATCH 01/56] Add MediaSpeech ASR recipe --- egs2/README.md | 1 + egs2/TEMPLATE/asr1/db.sh | 1 + egs2/mediaspeech/asr1/README.md | 0 egs2/mediaspeech/asr1/asr.sh | 1 + egs2/mediaspeech/asr1/cmd.sh | 110 ++++++++++++++++++ egs2/mediaspeech/asr1/conf/decode_asr.yaml | 1 + egs2/mediaspeech/asr1/conf/fbank.conf | 2 + egs2/mediaspeech/asr1/conf/pbs.conf | 11 ++ egs2/mediaspeech/asr1/conf/pitch.conf | 1 + egs2/mediaspeech/asr1/conf/queue.conf | 12 ++ egs2/mediaspeech/asr1/conf/slurm.conf | 14 +++ egs2/mediaspeech/asr1/conf/train_asr.yaml | 1 + .../asr1/conf/tuning/train_asr_conformer.yaml | 70 +++++++++++ .../asr1/conf/tuning/train_asr_fused.yaml | 96 +++++++++++++++ .../asr1/conf/tuning/train_asr_hubert.yaml | 86 ++++++++++++++ .../conf/tuning/train_asr_transformer.yaml | 48 ++++++++ egs2/mediaspeech/asr1/db.sh | 1 + egs2/mediaspeech/asr1/local/data.sh | 82 +++++++++++++ egs2/mediaspeech/asr1/local/data_prep.pl | 1 + egs2/mediaspeech/asr1/local/data_prep.py | 92 +++++++++++++++ .../asr1/local/download_and_untar.sh | 1 + egs2/mediaspeech/asr1/local/path.sh | 0 egs2/mediaspeech/asr1/path.sh | 1 + egs2/mediaspeech/asr1/pyscripts | 1 + egs2/mediaspeech/asr1/run.sh | 45 +++++++ egs2/mediaspeech/asr1/scripts | 1 + egs2/mediaspeech/asr1/steps | 1 + egs2/mediaspeech/asr1/utils | 1 + 28 files changed, 682 insertions(+) create mode 100644 egs2/mediaspeech/asr1/README.md create mode 120000 egs2/mediaspeech/asr1/asr.sh create mode 100644 egs2/mediaspeech/asr1/cmd.sh create mode 120000 egs2/mediaspeech/asr1/conf/decode_asr.yaml create mode 100644 egs2/mediaspeech/asr1/conf/fbank.conf create mode 100644 egs2/mediaspeech/asr1/conf/pbs.conf create mode 100644 egs2/mediaspeech/asr1/conf/pitch.conf create mode 100644 egs2/mediaspeech/asr1/conf/queue.conf create mode 100644 egs2/mediaspeech/asr1/conf/slurm.conf create mode 120000 egs2/mediaspeech/asr1/conf/train_asr.yaml create mode 100644 egs2/mediaspeech/asr1/conf/tuning/train_asr_conformer.yaml create mode 100644 egs2/mediaspeech/asr1/conf/tuning/train_asr_fused.yaml create mode 100644 egs2/mediaspeech/asr1/conf/tuning/train_asr_hubert.yaml create mode 100644 egs2/mediaspeech/asr1/conf/tuning/train_asr_transformer.yaml create mode 120000 egs2/mediaspeech/asr1/db.sh create mode 100755 egs2/mediaspeech/asr1/local/data.sh create mode 120000 egs2/mediaspeech/asr1/local/data_prep.pl create mode 100755 egs2/mediaspeech/asr1/local/data_prep.py create mode 120000 egs2/mediaspeech/asr1/local/download_and_untar.sh create mode 100644 egs2/mediaspeech/asr1/local/path.sh create mode 120000 egs2/mediaspeech/asr1/path.sh create mode 120000 egs2/mediaspeech/asr1/pyscripts create mode 100755 egs2/mediaspeech/asr1/run.sh create mode 120000 egs2/mediaspeech/asr1/scripts create mode 120000 egs2/mediaspeech/asr1/steps create mode 120000 egs2/mediaspeech/asr1/utils diff --git a/egs2/README.md b/egs2/README.md index 133fc9192f6..5aae03496fb 100755 --- a/egs2/README.md +++ b/egs2/README.md @@ -54,6 +54,7 @@ See: https://espnet.github.io/espnet/espnet2_tutorial.html#recipes-using-espnet2 | ljspeech | The LJ Speech Dataset | TTS | ENG | https://keithito.com/LJ-Speech-Dataset/ | | | lrs3 | The Oxford-BBC Lip Reading Sentences 3 (LRS3) Dataset | ASR | ENG | https://www.robots.ox.ac.uk/~vgg/data/lip_reading/lrs3.html | | | lrs2 | The Oxford-BBC Lip Reading Sentences 2 (LRS2) Dataset | Lipreading/ASR | ENG | https://www.robots.ox.ac.uk/~vgg/data/lip_reading/lrs2.html | | +| mediaspeech | MediaSpeech: Multilanguage ASR Benchmark and Dataset | ASR | FR | https://www.openslr.org/108/ | | | mini_an4 | Mini version of CMU AN4 database for the integration test | ASR/TTS/SE | ENG | http://www.speech.cs.cmu.edu/databases/an4/ | | | mini_librispeech | Mini version of Librispeech corpus | DIAR | ENG | https://openslr.org/31/ | | | mls | MLS (A large multilingual corpus derived from LibriVox audiobooks) | ASR | 8 languages | http://www.openslr.org/94/ | | diff --git a/egs2/TEMPLATE/asr1/db.sh b/egs2/TEMPLATE/asr1/db.sh index f7d686fa164..48f2d29a29f 100755 --- a/egs2/TEMPLATE/asr1/db.sh +++ b/egs2/TEMPLATE/asr1/db.sh @@ -42,6 +42,7 @@ LIBRILIGHT_LIMITED= FSC= SLURP= VOXCELEB= +MEDIASPEECH=downloads MINI_LIBRISPEECH=downloads MISP2021= LIBRIMIX=downloads diff --git a/egs2/mediaspeech/asr1/README.md b/egs2/mediaspeech/asr1/README.md new file mode 100644 index 00000000000..e69de29bb2d diff --git a/egs2/mediaspeech/asr1/asr.sh b/egs2/mediaspeech/asr1/asr.sh new file mode 120000 index 00000000000..60b05122cfd --- /dev/null +++ b/egs2/mediaspeech/asr1/asr.sh @@ -0,0 +1 @@ +../../TEMPLATE/asr1/asr.sh \ No newline at end of file diff --git a/egs2/mediaspeech/asr1/cmd.sh b/egs2/mediaspeech/asr1/cmd.sh new file mode 100644 index 00000000000..2aae6919fef --- /dev/null +++ b/egs2/mediaspeech/asr1/cmd.sh @@ -0,0 +1,110 @@ +# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ====== +# Usage: .pl [options] JOB=1: +# e.g. +# run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB +# +# Options: +# --time