diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ac69ca49b32..bbd021e7afd 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -16,7 +16,7 @@ jobs: matrix: os: [ubuntu-18.04] python-version: [3.7] - pytorch-version: [1.3.1, 1.4.0, 1.5.1, 1.6.0, 1.7.1, 1.8.1, 1.9.1, 1.10.2, 1.11.0] + pytorch-version: [1.4.0, 1.5.1, 1.6.0, 1.7.1, 1.8.1, 1.9.1, 1.10.2, 1.11.0] chainer-version: [6.0.0] # NOTE(kamo): Conda is tested by Circle-CI use-conda: [false] diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml new file mode 100644 index 00000000000..049bdabafa1 --- /dev/null +++ b/.github/workflows/docker.yml @@ -0,0 +1,48 @@ +name: docker-builder + +on: + pull_request: + types: [closed] + branches: + - master + paths: + - 'tools/**' + - setup.py + +jobs: + docker: + runs-on: ubuntu-latest + if: github.event.pull_request.merged == true + steps: + - uses: actions/checkout@v2 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + + - name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Build and push CPU container + run: | + cd docker + docker build --build-arg FROM_TAG=runtime-latest \ + -f prebuilt/devel.dockerfile \ + --target devel \ + -t espnet/espnet:cpu-latest . + docker push espnet/espnet:cpu-latest + + - name: Build and push GPU container + run: | + cd docker + docker build --build-arg FROM_TAG=cuda-latest \ + --build-arg CUDA_VER=11.1 \ + -f prebuilt/devel.dockerfile \ + --target devel \ + -t espnet/espnet:gpu-latest . + docker push espnet/espnet:gpu-latest diff --git a/.gitignore b/.gitignore index 177ba14498d..7170a376705 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,8 @@ egs*/*/*/tensorboard egs*/*/*/wav* egs*/*/*/nltk* egs*/*/*/.cache* +egs*/*/*/pretrained_models* +egs*/fisher_callhome_spanish/*/local/mapping* # tools related tools/chainer diff --git a/.gitmodules b/.gitmodules index bc771d8c6ee..e69de29bb2d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +0,0 @@ -[submodule "doc/notebook"] - path = doc/notebook - url = https://github.com/espnet/notebook diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3e62434c769..9036a09b66d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -53,9 +53,9 @@ ESPnet2's recipes correspond to `egs2`. ESPnet2 applies a new paradigm without d For ESPnet2, we do not recommend preparing the recipe's stages for each corpus but using the common pipelines we provided in `asr.sh`, `tts.sh`, and `enh.sh`. For details of creating ESPnet2 recipes, please refer to [egs2-readme](https://github.com/espnet/espnet/blob/master/egs2/TEMPLATE/README.md). -The common pipeline of ESPnet2 recipes will take care of the `RESULTS.md` generation, model packing, and uploading. ESPnet2 models are maintained at Zenodo and Hugging Face. +The common pipeline of ESPnet2 recipes will take care of the `RESULTS.md` generation, model packing, and uploading. ESPnet2 models are maintained at Hugging Face and Zenodo (Deprecated). You can also refer to the document in https://github.com/espnet/espnet_model_zoo -To upload your model, you need first: +To upload your model, you need first (This is currently deprecated , uploading to Huggingface Hub is prefered) : 1. Sign up to Zenodo: https://zenodo.org/ 2. Create access token: https://zenodo.org/account/settings/applications/tokens/new/ 3. Set your environment: % export ACCESS_TOKEN="" @@ -64,6 +64,21 @@ To port models from zenodo using Hugging Face hub, 1. Create a Hugging Face account - https://huggingface.co/ 2. Request to be added to espnet organisation - https://huggingface.co/espnet 3. Go to `egs2/RECIPE/*/scripts/utils` and run `./upload_models_to_hub.sh "ZENODO_MODEL_NAME"` + +To upload models using Huggingface-cli follow the following steps: +You can also refer to https://huggingface.co/docs/transformers/model_sharing +1. Create a Hugging Face account - https://huggingface.co/ +2. Request to be added to espnet organisation - https://huggingface.co/espnet +3. Run huggingface-cli login (You can get the token request at this step under setting > Access Tokens > espnet token +4. `huggingface-cli repo create your-model-name --organization espnet` +5. `git clone https://huggingface.co/username/your-model-name` (clone this outside ESPNet to avoid issues as this a git repo) +6. `cd your-model-name` +7. `git lfs install` +8. copy contents from exp diretory of your recipe into this directory (Check other models of similar task under ESPNet to confirm your directory structure) +9. `git add . ` +10. `git commit -m "Add model files"` +11. `git push` +12. Check if the inference demo on HF is running successfully to verify the upload #### 1.3.3 Additional requirements for new recipe @@ -76,6 +91,18 @@ to its differences. - If a recipe for a new corpus is proposed, you should add its name and information to: https://github.com/espnet/espnet/blob/master/egs/README.md if it's a ESPnet1 recipe, or https://github.com/espnet/espnet/blob/master/egs2/README.md + `db.sh` if it's a ESPnet2 recipe. + +#### 1.3.4 Checklist before you submit the recipe-based PR + +- [ ] be careful about the name for the recipe. It is recommended to follow naming conventions of the other recipes +- [ ] common/shared files are linked with **soft link** (see Section 1.3.3) +- [ ] modified or new python scripts should be passed through **latest** black formating (by using python package black). The command to be executed could be `black espnet espnet2 test utils setup.py egs*/*/*/local egs2/TEMPLATE/asr1/pyscripts` +- [ ] cluster settings should be set as **default** (e.g., cmd.sh conf/slurm.conf conf/queue.conf conf/pbs.conf) +- [ ] update `egs/README.md` or `egs2/README.md` with corresponding recipes +- [ ] add corresponding entry in `egs2/TEMPLATE/db.sh` for a new corpus +- [ ] try to **simplify** the model configurations. We recommend to have only the best configuration for the start of a recipe. Please also follow the default rule defined in Section 1.3.3 +- [ ] large meta-information for a corpus should be maintained elsewhere other than in the recipe itself +- [ ] recommend to also include results and pre-trained model with the recipe ## 2 Pull Request If your proposed feature or bugfix is ready, please open a Pull Request (PR) at https://github.com/espnet/espnet @@ -124,6 +151,11 @@ we recommend using small model parameters and avoiding dynamic imports, file acc more running time, you can annotate your test with `@pytest.mark.execution_timeout(sec)`. - For test initialization (parameters, modules, etc), you can use pytest fixtures. Refer to [pytest fixtures](https://docs.pytest.org/en/latest/fixture.html#using-fixtures-from-classes-modules-or-projects) for more information. +In addition, please follow the [PEP 8 convention](https://peps.python.org/pep-0008/) for the coding style and [Google's convention for docstrings](https://google.github.io/styleguide/pyguide.html#383-functions-and-methods). +Below are some specific points that should be taken care of in particular: +- [import ordering](https://peps.python.org/pep-0008/#imports) +- Avoid writing python2-style code. For example, `super().__init__()` is preferred over `super(CLASS_NAME, self).__init()__`. + ### 4.2 Bash scripts diff --git a/README.md b/README.md index 0493ec5b56e..358da305ccc 100644 --- a/README.md +++ b/README.md @@ -2,15 +2,16 @@ # ESPnet: end-to-end speech processing toolkit -|system/pytorch ver.|1.3.1|1.4.0|1.5.1|1.6.0|1.7.1|1.8.1|1.9.1|1.10.2|1.11.0| -| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | -|ubuntu20/python3.10/pip|||||||||[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)| -|ubuntu20/python3.9/pip|||||||||[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)| -|ubuntu20/python3.8/pip|||||||||[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)| +|system/pytorch ver.|1.4.0|1.5.1|1.6.0|1.7.1|1.8.1|1.9.1|1.10.2|1.11.0| +| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | +|ubuntu20/python3.10/pip||||||||[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)| +|ubuntu20/python3.9/pip||||||||[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)| +|ubuntu20/python3.8/pip||||||||[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)| |ubuntu18/python3.7/pip|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)| -|debian9/python3.7/conda|||||||||[![debian9](https://github.com/espnet/espnet/workflows/debian9/badge.svg)](https://github.com/espnet/espnet/actions?query=workflow%3Adebian9)| -|centos7/python3.7/conda|||||||||[![centos7](https://github.com/espnet/espnet/workflows/centos7/badge.svg)](https://github.com/espnet/espnet/actions?query=workflow%3Acentos7)| -|doc/python3.8|||||||||[![doc](https://github.com/espnet/espnet/workflows/doc/badge.svg)](https://github.com/espnet/espnet/actions?query=workflow%3Adoc)| +|debian9/python3.7/conda||||||||[![debian9](https://github.com/espnet/espnet/workflows/debian9/badge.svg)](https://github.com/espnet/espnet/actions?query=workflow%3Adebian9)| +|centos7/python3.7/conda||||||||[![centos7](https://github.com/espnet/espnet/workflows/centos7/badge.svg)](https://github.com/espnet/espnet/actions?query=workflow%3Acentos7)| +|doc/python3.8||||||||[![doc](https://github.com/espnet/espnet/workflows/doc/badge.svg)](https://github.com/espnet/espnet/actions?query=workflow%3Adoc)| + [![PyPI version](https://badge.fury.io/py/espnet.svg)](https://badge.fury.io/py/espnet) [![Python Versions](https://img.shields.io/pypi/pyversions/espnet.svg)](https://pypi.org/project/espnet/) @@ -36,7 +37,7 @@ ESPnet uses [pytorch](http://pytorch.org/) as a deep learning engine and also fo - Support numbers of `ASR` recipes (WSJ, Switchboard, CHiME-4/5, Librispeech, TED, CSJ, AMI, HKUST, Voxforge, REVERB, etc.) - Support numbers of `TTS` recipes with a similar manner to the ASR recipe (LJSpeech, LibriTTS, M-AILABS, etc.) - Support numbers of `ST` recipes (Fisher-CallHome Spanish, Libri-trans, IWSLT'18, How2, Must-C, Mboshi-French, etc.) -- Support numbers of `MT` recipes (IWSLT'16, the above ST recipes etc.) +- Support numbers of `MT` recipes (IWSLT'14, IWSLT'16, the above ST recipes etc.) - Support numbers of `SLU` recipes (CATSLU-MAPS, FSC, Grabo, IEMOCAP, JDCINAL, SNIPS, SLURP, SWBD-DA, etc.) - Support numbers of `SE/SS` recipes (DNS-IS2020, LibriMix, SMS-WSJ, VCTK-noisyreverb, WHAM!, WHAMR!, WSJ-2mix, etc.) - Support voice conversion recipe (VCC2020 baseline) @@ -78,7 +79,11 @@ ESPnet uses [pytorch](http://pytorch.org/) as a deep learning engine and also fo - Self-supervised learning representations as features, using upstream models in [S3PRL](https://github.com/s3prl/s3prl) in frontend. - Set `frontend` to be `s3prl` - Select any upstream model by setting the `frontend_conf` to the corresponding name. +- Transfer Learning : + - easy usage and transfers from models previously trained by your group, or models from [ESPnet huggingface repository](https://huggingface.co/espnet). + - [Documentation](https://github.com/espnet/espnet/tree/master/egs2/mini_an4/asr1/transfer_learning.md) and [toy example runnable on colab](https://github.com/espnet/notebook/blob/master/espnet2_asr_transfer_learning_demo.ipynb). - Streaming Transformer/Conformer ASR with blockwise synchronous beam search. +- Restricted Self-Attention based on [Longformer](https://arxiv.org/abs/2004.05150) as an encoder for long sequences Demonstration - Real-time ASR demo with ESPnet2 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/espnet/notebook/blob/master/espnet2_asr_realtime_demo.ipynb) @@ -94,7 +99,7 @@ Demonstration - Conformer FastSpeech & FastSpeech2 - VITS - Multi-speaker & multi-language extention - - Pretrined speaker embedding (e.g., X-vector) + - Pretrained speaker embedding (e.g., X-vector) - Speaker ID embedding - Language ID embedding - Global style token (GST) embedding @@ -130,7 +135,7 @@ To train the neural vocoder, please check the following repositories: - Multi-speaker speech separation - Unified encoder-separator-decoder structure for time-domain and frequency-domain models - Encoder/Decoder: STFT/iSTFT, Convolution/Transposed-Convolution - - Separators: BLSTM, Transformer, Conformer, DPRNN, Neural Beamformers, etc. + - Separators: BLSTM, Transformer, Conformer, [TasNet](https://arxiv.org/abs/1809.07454), [DPRNN](https://arxiv.org/abs/1910.06379), [SkiM](https://arxiv.org/abs/2201.10800), [SVoice](https://arxiv.org/abs/2011.02329), [DC-CRN](https://web.cse.ohio-state.edu/~wang.77/papers/TZW.taslp21.pdf), [DCCRN](https://arxiv.org/abs/2008.00264), [Deep Clustering](https://ieeexplore.ieee.org/document/7471631), [Deep Attractor Network](https://pubmed.ncbi.nlm.nih.gov/29430212/), [FaSNet](https://arxiv.org/abs/1909.13387), [iFaSNet](https://arxiv.org/abs/1910.14104), Neural Beamformers, etc. - Flexible ASR integration: working as an individual task or as the ASR frontend - Easy to import pretrained models from [Asteroid](https://github.com/asteroid-team/asteroid) - Both the pre-trained models from Asteroid and the specific configuration are supported. @@ -138,7 +143,6 @@ To train the neural vocoder, please check the following repositories: Demonstration - Interactive SE demo with ESPnet2 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1fjRJCh96SoYLZPRxsjF9VDv4Q2VoIckI?usp=sharing) - ### ST: Speech Translation & MT: Machine Translation - **State-of-the-art performance** in several ST benchmarks (comparable/superior to cascaded ASR and MT) - Transformer based end-to-end ST (new!) @@ -149,9 +153,34 @@ Demonstration - End-to-end VC based on cascaded ASR+TTS (Baseline system for Voice Conversion Challenge 2020!) ### SLU: Speech Language Understanding -- Predicting intent by directly classifying it as one of intent or decoding by character -- Transformer & RNN based encoder-decoder model -- Establish SOTA results with spectral augmentation (Performs better than reported results of pretrained model on Fluent Speech Command Dataset) +- Architecture + - Transformer based Encoder + - Conformer based Encoder + - RNN based Decoder + - Transformer based Decoder +- Support Multitasking with ASR + - Predict both intent and ASR transcript +- Support Multitasking with NLU + - Deliberation encoder based 2 pass model +- Support using pretrained ASR models + - Hubert + - Wav2vec2 + - VQ-APC + - TERA and more ... +- Support using pretrained NLP models + - BERT + - MPNet And more... +- Various language support + - En / Jp / Zn / Nl / And more... +- Supports using context from previous utterances +- Supports using other tasks like SE in pipeline manner +Demonstration +- Performing noisy spoken language understanding using speech enhancement model followed by spoken language understanding model. [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/14nCrJ05vJcQX0cJuXjbMVFWUHJ3Wfb6N?usp=sharing) +- Integrated to [Huggingface Spaces](https://huggingface.co/spaces) with [Gradio](https://github.com/gradio-app/gradio). See SLU demo on multiple languages: [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/Siddhant/ESPnet2-SLU) + + +### SUM: Speech Summarization +- End to End Speech Summarization Recipe for Instructional Videos using Restricted Self-Attention [[Sharma et al., 2022]](https://arxiv.org/abs/2110.06263) ### DNN Framework - Flexible network architecture thanks to chainer and pytorch @@ -215,22 +244,22 @@ You can find useful tutorials and demos in [Interspeech 2019 Tutorial](https://g We list the character error rate (CER) and word error rate (WER) of major ASR tasks. -| Task | CER (%) | WER (%) | Pretrained model| -| ----------- | :----: | :----: | :----: | -| Aishell dev/test | 4.6/5.1 | N/A | [link](https://github.com/espnet/espnet/blob/master/egs/aishell/asr1/RESULTS.md#conformer-kernel-size--15--specaugment--lm-weight--00-result) | -| **ESPnet2** Aishell dev/test | 4.4/4.7 | N/A | [link](https://github.com/espnet/espnet/tree/master/egs2/aishell/asr1#conformer--specaug--speed-perturbation-featsraw-n_fft512-hop_length128) | -| Common Voice dev/test | 1.7/1.8 | 2.2/2.3 | [link](https://github.com/espnet/espnet/blob/master/egs/commonvoice/asr1/RESULTS.md#first-results-default-pytorch-transformer-setting-with-bpe-100-epochs-single-gpu) | -| CSJ eval1/eval2/eval3 | 5.7/3.8/4.2 | N/A | [link](https://github.com/espnet/espnet/blob/master/egs/csj/asr1/RESULTS.md#pytorch-backend-transformer-without-any-hyperparameter-tuning) | -| **ESPnet2** CSJ eval1/eval2/eval3 | 4.5/3.3/3.6 | N/A | [link](https://github.com/espnet/espnet/tree/master/egs2/csj/asr1#initial-conformer-results) | -| HKUST dev | 23.5 | N/A | [link](https://github.com/espnet/espnet/blob/master/egs/hkust/asr1/RESULTS.md#transformer-only-20-epochs) | -| **ESPnet2** HKUST dev | 21.2 | N/A | [link](https://github.com/espnet/espnet/tree/master/egs2/hkust/asr1#transformer-asr--transformer-lm) | -| Librispeech dev_clean/dev_other/test_clean/test_other | N/A | 1.9/4.9/2.1/4.9 | [link](https://github.com/espnet/espnet/blob/master/egs/librispeech/asr1/RESULTS.md#pytorch-large-conformer-with-specaug--speed-perturbation-8-gpus--transformer-lm-4-gpus) | -| **ESPnet2** Librispeech dev_clean/dev_other/test_clean/test_other | 0.6/1.5/0.6/1.4 | 1.7/3.4/1.8/3.6 | [link](https://github.com/espnet/espnet/tree/master/egs2/librispeech/asr1#self-supervised-learning-features-hubert_large_ll60k-conformer-utt_mvn-with-transformer-lm) | -| Switchboard (eval2000) callhm/swbd | N/A | 14.0/6.8 | [link](https://github.com/espnet/espnet/blob/master/egs/swbd/asr1/RESULTS.md#conformer-with-bpe-2000-specaug-speed-perturbation-transformer-lm-decoding) | -| TEDLIUM2 dev/test | N/A | 8.6/7.2 | [link](https://github.com/espnet/espnet/blob/master/egs/tedlium2/asr1/RESULTS.md#conformer-large-model--specaug--speed-perturbation--rnnlm) | -| TEDLIUM3 dev/test | N/A | 9.6/7.6 | [link](https://github.com/espnet/espnet/blob/master/egs/tedlium3/asr1/RESULTS.md) | -| WSJ dev93/eval92 | 3.2/2.1 | 7.0/4.7 | N/A | -| **ESPnet2** WSJ dev93/eval92 | 1.1/0.8 | 2.8/1.8 | [link](https://github.com/espnet/espnet/tree/master/egs2/wsj/asr1#self-supervised-learning-features-wav2vec2_large_ll60k-conformer-utt_mvn-with-transformer-lm) | +| Task | CER (%) | WER (%) | Pretrained model | +| ----------------------------------------------------------------- | :-------------: | :-------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| Aishell dev/test | 4.6/5.1 | N/A | [link](https://github.com/espnet/espnet/blob/master/egs/aishell/asr1/RESULTS.md#conformer-kernel-size--15--specaugment--lm-weight--00-result) | +| **ESPnet2** Aishell dev/test | 4.4/4.7 | N/A | [link](https://github.com/espnet/espnet/tree/master/egs2/aishell/asr1#conformer--specaug--speed-perturbation-featsraw-n_fft512-hop_length128) | +| Common Voice dev/test | 1.7/1.8 | 2.2/2.3 | [link](https://github.com/espnet/espnet/blob/master/egs/commonvoice/asr1/RESULTS.md#first-results-default-pytorch-transformer-setting-with-bpe-100-epochs-single-gpu) | +| CSJ eval1/eval2/eval3 | 5.7/3.8/4.2 | N/A | [link](https://github.com/espnet/espnet/blob/master/egs/csj/asr1/RESULTS.md#pytorch-backend-transformer-without-any-hyperparameter-tuning) | +| **ESPnet2** CSJ eval1/eval2/eval3 | 4.5/3.3/3.6 | N/A | [link](https://github.com/espnet/espnet/tree/master/egs2/csj/asr1#initial-conformer-results) | +| HKUST dev | 23.5 | N/A | [link](https://github.com/espnet/espnet/blob/master/egs/hkust/asr1/RESULTS.md#transformer-only-20-epochs) | +| **ESPnet2** HKUST dev | 21.2 | N/A | [link](https://github.com/espnet/espnet/tree/master/egs2/hkust/asr1#transformer-asr--transformer-lm) | +| Librispeech dev_clean/dev_other/test_clean/test_other | N/A | 1.9/4.9/2.1/4.9 | [link](https://github.com/espnet/espnet/blob/master/egs/librispeech/asr1/RESULTS.md#pytorch-large-conformer-with-specaug--speed-perturbation-8-gpus--transformer-lm-4-gpus) | +| **ESPnet2** Librispeech dev_clean/dev_other/test_clean/test_other | 0.6/1.5/0.6/1.4 | 1.7/3.4/1.8/3.6 | [link](https://github.com/espnet/espnet/tree/master/egs2/librispeech/asr1#self-supervised-learning-features-hubert_large_ll60k-conformer-utt_mvn-with-transformer-lm) | +| Switchboard (eval2000) callhm/swbd | N/A | 14.0/6.8 | [link](https://github.com/espnet/espnet/blob/master/egs/swbd/asr1/RESULTS.md#conformer-with-bpe-2000-specaug-speed-perturbation-transformer-lm-decoding) | +| TEDLIUM2 dev/test | N/A | 8.6/7.2 | [link](https://github.com/espnet/espnet/blob/master/egs/tedlium2/asr1/RESULTS.md#conformer-large-model--specaug--speed-perturbation--rnnlm) | +| TEDLIUM3 dev/test | N/A | 9.6/7.6 | [link](https://github.com/espnet/espnet/blob/master/egs/tedlium3/asr1/RESULTS.md) | +| WSJ dev93/eval92 | 3.2/2.1 | 7.0/4.7 | N/A | +| **ESPnet2** WSJ dev93/eval92 | 1.1/0.8 | 2.8/1.8 | [link](https://github.com/espnet/espnet/tree/master/egs2/wsj/asr1#self-supervised-learning-features-wav2vec2_large_ll60k-conformer-utt_mvn-with-transformer-lm) | Note that the performance of the CSJ, HKUST, and Librispeech tasks was significantly improved by using the wide network (#units = 1024) and large subword units if necessary reported by [RWTH](https://arxiv.org/pdf/1805.03294.pdf). @@ -257,7 +286,7 @@ The sampling rate must be consistent with that of data used in training. Available pretrained models in the demo script are listed as below. | Model | Notes | -| :------ | :------ | +| :----------------------------------------------------------------------------------------------- | :--------------------------------------------------------- | | [tedlium2.rnn.v1](https://drive.google.com/open?id=1UqIY6WJMZ4sxNxSugUqp3mrGb3j6h7xe) | Streaming decoding based on CTC-based VAD | | [tedlium2.rnn.v2](https://drive.google.com/open?id=1cac5Uc09lJrCYfWkLQsF8eapQcxZnYdf) | Streaming decoding based on CTC-based VAD (batch decoding) | | [tedlium2.transformer.v1](https://drive.google.com/open?id=1cVeSOYY1twOfL9Gns7Z3ZDnkrJqNwPow) | Joint-CTC attention Transformer trained on Tedlium 2 | @@ -274,11 +303,11 @@ Available pretrained models in the demo script are listed as below. We list results from three different models on WSJ0-2mix, which is one the most widely used benchmark dataset for speech separation. -|Model|STOI|SAR|SDR|SIR| -|---|---|---|---|---| -|[TF Masking](https://zenodo.org/record/4498554)|0.89|11.40|10.24|18.04| -|[Conv-Tasnet](https://zenodo.org/record/4498562)|0.95|16.62|15.94|25.90| -|[DPRNN-Tasnet](https://zenodo.org/record/4688000)|0.96|18.82|18.29|28.92| +| Model | STOI | SAR | SDR | SIR | +| ------------------------------------------------- | ---- | ----- | ----- | ----- | +| [TF Masking](https://zenodo.org/record/4498554) | 0.89 | 11.40 | 10.24 | 18.04 | +| [Conv-Tasnet](https://zenodo.org/record/4498562) | 0.95 | 16.62 | 15.94 | 25.90 | +| [DPRNN-Tasnet](https://zenodo.org/record/4688000) | 0.96 | 18.82 | 18.29 | 28.92 | @@ -300,23 +329,23 @@ It is based on ESPnet2. Pretrained models are available for both speech enhancem We list 4-gram BLEU of major ST tasks. #### end-to-end system -| Task | BLEU | Pretrained model | -| ---- | :----: | :----: | +| Task | BLEU | Pretrained model | +| ------------------------------------------------- | :---: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | | Fisher-CallHome Spanish fisher_test (Es->En) | 51.03 | [link](https://github.com/espnet/espnet/blob/master/egs/fisher_callhome_spanish/st1/RESULTS.md#train_spen_lcrm_pytorch_train_pytorch_transformer_bpe_short_long_bpe1000_specaug_asrtrans_mttrans) | | Fisher-CallHome Spanish callhome_evltest (Es->En) | 20.44 | [link](https://github.com/espnet/espnet/blob/master/egs/fisher_callhome_spanish/st1/RESULTS.md#train_spen_lcrm_pytorch_train_pytorch_transformer_bpe_short_long_bpe1000_specaug_asrtrans_mttrans) | -| Libri-trans test (En->Fr) | 16.70 | [link](https://github.com/espnet/espnet/blob/master/egs/libri_trans/st1/RESULTS.md#train_spfr_lc_pytorch_train_pytorch_transformer_bpe_short_long_bpe1000_specaug_asrtrans_mttrans-1) | -| How2 dev5 (En->Pt) | 45.68 | [link](https://github.com/espnet/espnet/blob/master/egs/how2/st1/RESULTS.md#trainpt_tc_pytorch_train_pytorch_transformer_short_long_bpe8000_specaug_asrtrans_mttrans-1) | -| Must-C tst-COMMON (En->De) | 22.91 | [link](https://github.com/espnet/espnet/blob/master/egs/must_c/st1/RESULTS.md#train_spen-dede_tc_pytorch_train_pytorch_transformer_short_long_bpe8000_specaug_asrtrans_mttrans) | -| Mboshi-French dev (Fr->Mboshi) | 6.18 | N/A | +| Libri-trans test (En->Fr) | 16.70 | [link](https://github.com/espnet/espnet/blob/master/egs/libri_trans/st1/RESULTS.md#train_spfr_lc_pytorch_train_pytorch_transformer_bpe_short_long_bpe1000_specaug_asrtrans_mttrans-1) | +| How2 dev5 (En->Pt) | 45.68 | [link](https://github.com/espnet/espnet/blob/master/egs/how2/st1/RESULTS.md#trainpt_tc_pytorch_train_pytorch_transformer_short_long_bpe8000_specaug_asrtrans_mttrans-1) | +| Must-C tst-COMMON (En->De) | 22.91 | [link](https://github.com/espnet/espnet/blob/master/egs/must_c/st1/RESULTS.md#train_spen-dede_tc_pytorch_train_pytorch_transformer_short_long_bpe8000_specaug_asrtrans_mttrans) | +| Mboshi-French dev (Fr->Mboshi) | 6.18 | N/A | #### cascaded system -| Task | BLEU | Pretrained model | -| ---- | :----: | :----: | -| Fisher-CallHome Spanish fisher_test (Es->En) | 42.16 | N/A | -| Fisher-CallHome Spanish callhome_evltest (Es->En) | 19.82 | N/A | -| Libri-trans test (En->Fr) | 16.96 | N/A | -| How2 dev5 (En->Pt) | 44.90 | N/A | -| Must-C tst-COMMON (En->De) | 23.65 | N/A | +| Task | BLEU | Pretrained model | +| ------------------------------------------------- | :---: | :--------------: | +| Fisher-CallHome Spanish fisher_test (Es->En) | 42.16 | N/A | +| Fisher-CallHome Spanish callhome_evltest (Es->En) | 19.82 | N/A | +| Libri-trans test (En->Fr) | 16.96 | N/A | +| How2 dev5 (En->Pt) | 44.90 | N/A | +| Must-C tst-COMMON (En->De) | 23.65 | N/A | If you want to check the results of the other recipes, please check `egs//st1/RESULTS.md`. @@ -349,9 +378,9 @@ The sampling rate must be consistent with that of data used in training. Available pretrained models in the demo script are listed as below. -| Model | Notes | -| :------ | :------ | -| [fisher_callhome_spanish.transformer.v1](https://drive.google.com/open?id=1hawp5ZLw4_SIHIT3edglxbKIIkPVe8n3) | Transformer-ST trained on Fisher-CallHome Spanish Es->En | +| Model | Notes | +| :----------------------------------------------------------------------------------------------------------- | :------------------------------------------------------- | +| [fisher_callhome_spanish.transformer.v1](https://drive.google.com/open?id=1hawp5ZLw4_SIHIT3edglxbKIIkPVe8n3) | Transformer-ST trained on Fisher-CallHome Spanish Es->En | @@ -360,17 +389,18 @@ Available pretrained models in the demo script are listed as below.
expand
-| Task | BLEU | Pretrained model | -| ---- | :----: | :----: | +| Task | BLEU | Pretrained model | +| ------------------------------------------------- | :---: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------: | | Fisher-CallHome Spanish fisher_test (Es->En) | 61.45 | [link](https://github.com/espnet/espnet/blob/master/egs/fisher_callhome_spanish/mt1/RESULTS.md#trainen_lcrm_lcrm_pytorch_train_pytorch_transformer_bpe_bpe1000) | | Fisher-CallHome Spanish callhome_evltest (Es->En) | 29.86 | [link](https://github.com/espnet/espnet/blob/master/egs/fisher_callhome_spanish/mt1/RESULTS.md#trainen_lcrm_lcrm_pytorch_train_pytorch_transformer_bpe_bpe1000) | -| Libri-trans test (En->Fr) | 18.09 | [link](https://github.com/espnet/espnet/blob/master/egs/libri_trans/mt1/RESULTS.md#trainfr_lcrm_tc_pytorch_train_pytorch_transformer_bpe1000) | -| How2 dev5 (En->Pt) | 58.61 | [link](https://github.com/espnet/espnet/blob/master/egs/how2/mt1/RESULTS.md#trainpt_tc_tc_pytorch_train_pytorch_transformer_bpe8000) | -| Must-C tst-COMMON (En->De) | 27.63 | [link](https://github.com/espnet/espnet/blob/master/egs/must_c/mt1/RESULTS.md#summary-4-gram-bleu) | -| IWSLT'14 test2014 (En->De) | 24.70 | [link](https://github.com/espnet/espnet/blob/master/egs/iwslt16/mt1/RESULTS.md#result) | -| IWSLT'14 test2014 (De->En) | 29.22 | [link](https://github.com/espnet/espnet/blob/master/egs/iwslt16/mt1/RESULTS.md#result) | -| IWSLT'16 test2014 (En->De) | 24.05 | [link](https://github.com/espnet/espnet/blob/master/egs/iwslt16/mt1/RESULTS.md#result) | -| IWSLT'16 test2014 (De->En) | 29.13 | [link](https://github.com/espnet/espnet/blob/master/egs/iwslt16/mt1/RESULTS.md#result) | +| Libri-trans test (En->Fr) | 18.09 | [link](https://github.com/espnet/espnet/blob/master/egs/libri_trans/mt1/RESULTS.md#trainfr_lcrm_tc_pytorch_train_pytorch_transformer_bpe1000) | +| How2 dev5 (En->Pt) | 58.61 | [link](https://github.com/espnet/espnet/blob/master/egs/how2/mt1/RESULTS.md#trainpt_tc_tc_pytorch_train_pytorch_transformer_bpe8000) | +| Must-C tst-COMMON (En->De) | 27.63 | [link](https://github.com/espnet/espnet/blob/master/egs/must_c/mt1/RESULTS.md#summary-4-gram-bleu) | +| IWSLT'14 test2014 (En->De) | 24.70 | [link](https://github.com/espnet/espnet/blob/master/egs/iwslt16/mt1/RESULTS.md#result) | +| IWSLT'14 test2014 (De->En) | 29.22 | [link](https://github.com/espnet/espnet/blob/master/egs/iwslt16/mt1/RESULTS.md#result) | +| IWSLT'14 test2014 (De->En) | 32.2 | [link](https://github.com/espnet/espnet/blob/master/egs2/iwslt14/mt1/README.md) | +| IWSLT'16 test2014 (En->De) | 24.05 | [link](https://github.com/espnet/espnet/blob/master/egs/iwslt16/mt1/RESULTS.md#result) | +| IWSLT'16 test2014 (De->En) | 29.13 | [link](https://github.com/espnet/espnet/blob/master/egs/iwslt16/mt1/RESULTS.md#result) |
@@ -430,19 +460,19 @@ If you want to build your own neural vocoder, please check the above repositorie Here we list all of the pretrained neural vocoders. Please download and enjoy the generation of high quality speech! | Model link | Lang | Fs [Hz] | Mel range [Hz] | FFT / Shift / Win [pt] | Model type | -| :------ | :---: | :----: | :--------: | :---------------: | :------ | -| [ljspeech.wavenet.softmax.ns.v1](https://drive.google.com/open?id=1eA1VcRS9jzFa-DovyTgJLQ_jmwOLIi8L) | EN | 22.05k | None | 1024 / 256 / None | [Softmax WaveNet](https://github.com/kan-bayashi/PytorchWaveNetVocoder) | -| [ljspeech.wavenet.mol.v1](https://drive.google.com/open?id=1sY7gEUg39QaO1szuN62-Llst9TrFno2t) | EN | 22.05k | None | 1024 / 256 / None | [MoL WaveNet](https://github.com/r9y9/wavenet_vocoder) | -| [ljspeech.parallel_wavegan.v1](https://drive.google.com/open?id=1tv9GKyRT4CDsvUWKwH3s_OfXkiTi0gw7) | EN | 22.05k | None | 1024 / 256 / None | [Parallel WaveGAN](https://github.com/kan-bayashi/ParallelWaveGAN) | -| [ljspeech.wavenet.mol.v2](https://drive.google.com/open?id=1es2HuKUeKVtEdq6YDtAsLNpqCy4fhIXr) | EN | 22.05k | 80-7600 | 1024 / 256 / None | [MoL WaveNet](https://github.com/r9y9/wavenet_vocoder) | -| [ljspeech.parallel_wavegan.v2](https://drive.google.com/open?id=1Grn7X9wD35UcDJ5F7chwdTqTa4U7DeVB) | EN | 22.05k | 80-7600 | 1024 / 256 / None | [Parallel WaveGAN](https://github.com/kan-bayashi/ParallelWaveGAN) | -| [ljspeech.melgan.v1](https://drive.google.com/open?id=1ipPWYl8FBNRlBFaKj1-i23eQpW_W_YcR) | EN | 22.05k | 80-7600 | 1024 / 256 / None | [MelGAN](https://github.com/kan-bayashi/ParallelWaveGAN) | -| [ljspeech.melgan.v3](https://drive.google.com/open?id=1_a8faVA5OGCzIcJNw4blQYjfG4oA9VEt) | EN | 22.05k | 80-7600 | 1024 / 256 / None | [MelGAN](https://github.com/kan-bayashi/ParallelWaveGAN) | -| [libritts.wavenet.mol.v1](https://drive.google.com/open?id=1jHUUmQFjWiQGyDd7ZeiCThSjjpbF_B4h) | EN | 24k | None | 1024 / 256 / None | [MoL WaveNet](https://github.com/r9y9/wavenet_vocoder) | -| [jsut.wavenet.mol.v1](https://drive.google.com/open?id=187xvyNbmJVZ0EZ1XHCdyjZHTXK9EcfkK) | JP | 24k | 80-7600 | 2048 / 300 / 1200 | [MoL WaveNet](https://github.com/r9y9/wavenet_vocoder) | -| [jsut.parallel_wavegan.v1](https://drive.google.com/open?id=1OwrUQzAmvjj1x9cDhnZPp6dqtsEqGEJM) | JP | 24k | 80-7600 | 2048 / 300 / 1200 | [Parallel WaveGAN](https://github.com/kan-bayashi/ParallelWaveGAN) | -| [csmsc.wavenet.mol.v1](https://drive.google.com/open?id=1PsjFRV5eUP0HHwBaRYya9smKy5ghXKzj) | ZH | 24k | 80-7600 | 2048 / 300 / 1200 | [MoL WaveNet](https://github.com/r9y9/wavenet_vocoder) | -| [csmsc.parallel_wavegan.v1](https://drive.google.com/open?id=10M6H88jEUGbRWBmU1Ff2VaTmOAeL8CEy) | ZH | 24k | 80-7600 | 2048 / 300 / 1200 | [Parallel WaveGAN](https://github.com/kan-bayashi/ParallelWaveGAN) | +| :--------------------------------------------------------------------------------------------------- | :---: | :-----: | :------------: | :--------------------: | :---------------------------------------------------------------------- | +| [ljspeech.wavenet.softmax.ns.v1](https://drive.google.com/open?id=1eA1VcRS9jzFa-DovyTgJLQ_jmwOLIi8L) | EN | 22.05k | None | 1024 / 256 / None | [Softmax WaveNet](https://github.com/kan-bayashi/PytorchWaveNetVocoder) | +| [ljspeech.wavenet.mol.v1](https://drive.google.com/open?id=1sY7gEUg39QaO1szuN62-Llst9TrFno2t) | EN | 22.05k | None | 1024 / 256 / None | [MoL WaveNet](https://github.com/r9y9/wavenet_vocoder) | +| [ljspeech.parallel_wavegan.v1](https://drive.google.com/open?id=1tv9GKyRT4CDsvUWKwH3s_OfXkiTi0gw7) | EN | 22.05k | None | 1024 / 256 / None | [Parallel WaveGAN](https://github.com/kan-bayashi/ParallelWaveGAN) | +| [ljspeech.wavenet.mol.v2](https://drive.google.com/open?id=1es2HuKUeKVtEdq6YDtAsLNpqCy4fhIXr) | EN | 22.05k | 80-7600 | 1024 / 256 / None | [MoL WaveNet](https://github.com/r9y9/wavenet_vocoder) | +| [ljspeech.parallel_wavegan.v2](https://drive.google.com/open?id=1Grn7X9wD35UcDJ5F7chwdTqTa4U7DeVB) | EN | 22.05k | 80-7600 | 1024 / 256 / None | [Parallel WaveGAN](https://github.com/kan-bayashi/ParallelWaveGAN) | +| [ljspeech.melgan.v1](https://drive.google.com/open?id=1ipPWYl8FBNRlBFaKj1-i23eQpW_W_YcR) | EN | 22.05k | 80-7600 | 1024 / 256 / None | [MelGAN](https://github.com/kan-bayashi/ParallelWaveGAN) | +| [ljspeech.melgan.v3](https://drive.google.com/open?id=1_a8faVA5OGCzIcJNw4blQYjfG4oA9VEt) | EN | 22.05k | 80-7600 | 1024 / 256 / None | [MelGAN](https://github.com/kan-bayashi/ParallelWaveGAN) | +| [libritts.wavenet.mol.v1](https://drive.google.com/open?id=1jHUUmQFjWiQGyDd7ZeiCThSjjpbF_B4h) | EN | 24k | None | 1024 / 256 / None | [MoL WaveNet](https://github.com/r9y9/wavenet_vocoder) | +| [jsut.wavenet.mol.v1](https://drive.google.com/open?id=187xvyNbmJVZ0EZ1XHCdyjZHTXK9EcfkK) | JP | 24k | 80-7600 | 2048 / 300 / 1200 | [MoL WaveNet](https://github.com/r9y9/wavenet_vocoder) | +| [jsut.parallel_wavegan.v1](https://drive.google.com/open?id=1OwrUQzAmvjj1x9cDhnZPp6dqtsEqGEJM) | JP | 24k | 80-7600 | 2048 / 300 / 1200 | [Parallel WaveGAN](https://github.com/kan-bayashi/ParallelWaveGAN) | +| [csmsc.wavenet.mol.v1](https://drive.google.com/open?id=1PsjFRV5eUP0HHwBaRYya9smKy5ghXKzj) | ZH | 24k | 80-7600 | 2048 / 300 / 1200 | [MoL WaveNet](https://github.com/r9y9/wavenet_vocoder) | +| [csmsc.parallel_wavegan.v1](https://drive.google.com/open?id=10M6H88jEUGbRWBmU1Ff2VaTmOAeL8CEy) | ZH | 24k | 80-7600 | 2048 / 300 / 1200 | [Parallel WaveGAN](https://github.com/kan-bayashi/ParallelWaveGAN) | If you want to use the above pretrained vocoders, please exactly match the feature setting with them. @@ -528,11 +558,33 @@ You can download converted samples of the cascade ASR+TTS baseline system [here] ### SLU results -
ESPnet2
+
expand
+ + +We list the performance on various SLU tasks and dataset using the metric reported in the original dataset paper + +| Task | Dataset | Metric | Result | Pretrained Model | +| ----------------------------------------------------------------- | :-------------: | :-------------: | :-------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| Intent Classification | SLURP | Acc | 86.3 | [link](https://github.com/espnet/espnet/tree/master/egs2/slurp/asr1/README.md) | +| Intent Classification | FSC | Acc | 99.6 | [link](https://github.com/espnet/espnet/tree/master/egs2/fsc/asr1/README.md) | +| Intent Classification | FSC Unseen Speaker Set | Acc | 98.6 | [link](https://github.com/espnet/espnet/tree/master/egs2/fsc_unseen/asr1/README.md) | +| Intent Classification | FSC Unseen Utterance Set | Acc | 86.4 | [link](https://github.com/espnet/espnet/tree/master/egs2/fsc_unseen/asr1/README.md) | +| Intent Classification | FSC Challenge Speaker Set | Acc | 97.5 | [link](https://github.com/espnet/espnet/tree/master/egs2/fsc_challenge/asr1/README.md) | +| Intent Classification | FSC Challenge Utterance Set | Acc | 78.5 | [link](https://github.com/espnet/espnet/tree/master/egs2/fsc_challenge/asr1/README.md) | +| Intent Classification | SNIPS | F1 | 91.7 | [link](https://github.com/espnet/espnet/tree/master/egs2/snips/asr1/README.md) | +| Intent Classification | Grabo (Nl) | Acc | 97.2 | [link](https://github.com/espnet/espnet/tree/master/egs2/grabo/asr1/README.md) | +| Intent Classification | CAT SLU MAP (Zn) | Acc | 78.9 | [link](https://github.com/espnet/espnet/tree/master/egs2/catslu/asr1/README.md) | +| Intent Classification | Google Speech Commands | Acc | 98.4 | [link](https://github.com/espnet/espnet/tree/master/egs2/speechcommands/asr1/README.md) | +| Slot Filling | SLURP | SLU-F1 | 71.9 | [link](https://github.com/espnet/espnet/tree/master/egs2/slurp_entity/asr1/README.md) | +| Dialogue Act Classification | Switchboard | Acc | 67.5 | [link](https://github.com/espnet/espnet/tree/master/egs2/swbd_da/asr1/README.md) | +| Dialogue Act Classification | Jdcinal (Jp) | Acc | 67.4 | [link](https://github.com/espnet/espnet/tree/master/egs2/jdcinal/asr1/README.md) | +| Emotion Recognition | IEMOCAP | Acc | 69.4 | [link](https://github.com/espnet/espnet/tree/master/egs2/iemocap/asr1/README.md) | +| Emotion Recognition | swbd_sentiment | Macro F1 | 61.4 | [link](https://github.com/espnet/espnet/tree/master/egs2/swbd_sentiment/asr1/README.md) | +| Emotion Recognition | slue_voxceleb | Macro F1 | 44.0 | [link](https://github.com/espnet/espnet/tree/master/egs2/slue-voxceleb/asr1/README.md) | -- Transformer based SLU for Fluent Speech Command Dataset + +If you want to check the results of the other recipes, please check `egs2//asr1/RESULTS.md`. -In SLU, The objective is to infer the meaning or intent of spoken utterance. The [Fluent Speech Command Dataset](https://fluent.ai/fluent-speech-commands-a-dataset-for-spoken-language-understanding-research/) describes an intent as combination of 3 slot values: action, object and location. You can see baseline results on this dataset [here](https://github.com/espnet/espnet/blob/master/egs2/fsc/asr1/RESULTS.md)
@@ -685,6 +737,8 @@ See the module documentation for more information. It is recommended to use models with RNN-based encoders (such as BLSTMP) for aligning large audio files; rather than using Transformer models that have a high memory consumption on longer audio data. The sample rate of the audio must be consistent with that of the data used in training; adjust with `sox` if needed. + +Also, we can use this tool to provide token-level segmentation information if we prepare a list of tokens instead of that of utterances in the `text` file. See the discussion in https://github.com/espnet/espnet/issues/4278#issuecomment-1100756463.
diff --git a/ci/doc.sh b/ci/doc.sh index cbcd78f4b21..114bc92b952 100755 --- a/ci/doc.sh +++ b/ci/doc.sh @@ -26,6 +26,8 @@ set -euo pipefail find ./utils/{*.sh,spm_*} -exec ./doc/usage2rst.sh {} \; | tee ./doc/_gen/utils_sh.rst find ./espnet2/bin/*.py -exec ./doc/usage2rst.sh {} \; | tee ./doc/_gen/espnet2_bin.rst +./doc/notebook2rst.sh > ./doc/_gen/notebooks.rst + # generate package doc ./doc/module2rst.py --root espnet espnet2 --dst ./doc --exclude espnet.bin diff --git a/ci/install.sh b/ci/install.sh index eeb531d7ddd..5bfed7584ad 100755 --- a/ci/install.sh +++ b/ci/install.sh @@ -21,7 +21,7 @@ ${CXX:-g++} -v . ./activate_python.sh make TH_VERSION="${TH_VERSION}" - make warp-ctc.done warp-transducer.done chainer_ctc.done nkf.done moses.done mwerSegmenter.done pesq pyopenjtalk.done py3mmseg.done s3prl.done transformers.done phonemizer.done fairseq.done k2.done gtn.done + make warp-ctc.done warp-transducer.done chainer_ctc.done nkf.done moses.done mwerSegmenter.done pesq pyopenjtalk.done py3mmseg.done s3prl.done transformers.done phonemizer.done fairseq.done k2.done gtn.done longformer.done rm -rf kaldi ) . tools/activate_python.sh diff --git a/ci/test_integration_espnet2.sh b/ci/test_integration_espnet2.sh index 78086272af7..58951c04011 100755 --- a/ci/test_integration_espnet2.sh +++ b/ci/test_integration_espnet2.sh @@ -100,6 +100,50 @@ if python3 -c "import fairseq" &> /dev/null; then cd "${cwd}" fi +# [ESPnet2] test enh_asr1 recipe +if python -c 'import torch as t; from distutils.version import LooseVersion as L; assert L(t.__version__) >= L("1.2.0")' &> /dev/null; then + cd ./egs2/mini_an4/enh_asr1 + echo "==== [ESPnet2] ENH_ASR ===" + ./run.sh --ngpu 0 --stage 0 --stop-stage 15 --skip-upload_hf false --feats-type "raw" --spk-num 1 --enh_asr_args "--max_epoch=1 --enh_separator_conf num_spk=1" --python "${python}" + # Remove generated files in order to reduce the disk usage + rm -rf exp dump data + cd "${cwd}" +fi + +# [ESPnet2] test st recipe +cd ./egs2/mini_an4/st1 +echo "==== [ESPnet2] ST ===" +./run.sh --stage 1 --stop-stage 1 +feats_types="raw fbank_pitch" +token_types="bpe char" +for t in ${feats_types}; do + ./run.sh --stage 2 --stop-stage 4 --feats-type "${t}" --python "${python}" +done +for t in ${token_types}; do + ./run.sh --stage 5 --stop-stage 5 --tgt_token_type "${t}" --src_token_type "${t}" --python "${python}" +done +for t in ${feats_types}; do + for t2 in ${token_types}; do + echo "==== feats_type=${t}, token_types=${t2} ===" + ./run.sh --ngpu 0 --stage 6 --stop-stage 13 --skip-upload false --feats-type "${t}" --tgt_token_type "${t2}" --src_token_type "${t2}" \ + --st-args "--max_epoch=1" --lm-args "--max_epoch=1" --inference_args "--beam_size 5" --python "${python}" + done +done +echo "==== feats_type=raw, token_types=bpe, model_conf.extract_feats_in_collect_stats=False, normalize=utt_mvn ===" +./run.sh --ngpu 0 --stage 10 --stop-stage 13 --skip-upload false --feats-type "raw" --tgt_token_type "bpe" --src_token_type "bpe" \ + --feats_normalize "utterance_mvn" --lm-args "--max_epoch=1" --inference_args "--beam_size 5" --python "${python}" \ + --st-args "--model_conf extract_feats_in_collect_stats=false --max_epoch=1" + +echo "==== use_streaming, feats_type=raw, token_types=bpe, model_conf.extract_feats_in_collect_stats=False, normalize=utt_mvn ===" +./run.sh --use_streaming true --ngpu 0 --stage 6 --stop-stage 13 --skip-upload false --feats-type "raw" --tgt_token_type "bpe" --src_token_type "bpe" \ + --feats_normalize "utterance_mvn" --lm-args "--max_epoch=1" --inference_args "--beam_size 5" --python "${python}" \ + --st-args "--model_conf extract_feats_in_collect_stats=false --max_epoch=1 --encoder=contextual_block_transformer --decoder=transformer + --encoder_conf block_size=40 --encoder_conf hop_size=16 --encoder_conf look_ahead=16" + +# Remove generated files in order to reduce the disk usage +rm -rf exp dump data +cd "${cwd}" + # [ESPnet2] Validate configuration files echo "" > dummy_token_list echo "==== [ESPnet2] Validation configuration files ===" @@ -124,6 +168,9 @@ if python3 -c 'import torch as t; from distutils.version import LooseVersion as for f in egs2/*/ssl1/conf/train*.yaml; do ${python} -m espnet2.bin.hubert_train --config "${f}" --iterator_type none --normalize none --dry_run true --output_dir out --token_list dummy_token_list done + for f in egs2/*/enh_asr1/conf/train_enh_asr*.yaml; do + ${python} -m espnet2.bin.enh_s2t_train --config "${f}" --iterator_type none --dry_run true --output_dir out --token_list dummy_token_list + done fi # These files must be same each other. diff --git a/doc/.gitignore b/doc/.gitignore index d4058a5aa91..79f7202744d 100644 --- a/doc/.gitignore +++ b/doc/.gitignore @@ -1,4 +1,4 @@ _gen/ _build/ build/ - +notebook/ \ No newline at end of file diff --git a/doc/argparse2rst.py b/doc/argparse2rst.py index 790049e0bc9..684673d90a3 100755 --- a/doc/argparse2rst.py +++ b/doc/argparse2rst.py @@ -20,11 +20,16 @@ def __init__(self, path): def get_parser(): parser = configargparse.ArgumentParser( - description='generate RST from argparse options', + description="generate RST from argparse options", config_file_parser_class=configargparse.YAMLConfigFileParser, - formatter_class=configargparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('src', type=str, nargs='+', - help='source python files that contain get_parser() func') + formatter_class=configargparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "src", + type=str, + nargs="+", + help="source python files that contain get_parser() func", + ) return parser @@ -53,7 +58,8 @@ def get_parser(): for m in modinfo: cmd = m.path.name sep = "~" * len(cmd) - print(f""" + print( + f""" .. _{cmd}: @@ -65,4 +71,5 @@ def get_parser(): :func: get_parser :prog: {cmd} -""") +""" + ) diff --git a/doc/conf.py b/doc/conf.py index 8aa97c1e42a..c2f5acd1881 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -20,8 +20,8 @@ import os import sys -sys.path.insert(0, os.path.abspath('../espnet/nets')) -sys.path.insert(0, os.path.abspath('../utils')) +sys.path.insert(0, os.path.abspath("../espnet/nets")) +sys.path.insert(0, os.path.abspath("../utils")) # -- General configuration ------------------------------------------------ @@ -35,8 +35,8 @@ extensions = [ "nbsphinx", "sphinx.ext.autodoc", - 'sphinx.ext.napoleon', - 'sphinx.ext.viewcode', + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", "sphinx.ext.mathjax", "sphinx.ext.todo", "sphinxarg.ext", @@ -44,42 +44,46 @@ ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = '.rst' -source_suffix = ['.rst', '.md'] +source_suffix = [".rst", ".md"] # enable to markdown from recommonmark.parser import CommonMarkParser source_parsers = { - '.md': CommonMarkParser, + ".md": CommonMarkParser, } # AutoStructify setting ref: https://qiita.com/pashango2/items/d1b379b699af85b529ce from recommonmark.transform import AutoStructify -github_doc_root = 'https://github.com/rtfd/recommonmark/tree/master/doc/' +github_doc_root = "https://github.com/rtfd/recommonmark/tree/master/doc/" def setup(app): - app.add_config_value('recommonmark_config', { - 'url_resolver': lambda url: github_doc_root + url, - 'auto_toc_tree_section': 'Contents', - }, True) + app.add_config_value( + "recommonmark_config", + { + "url_resolver": lambda url: github_doc_root + url, + "auto_toc_tree_section": "Contents", + }, + True, + ) app.add_transform(AutoStructify) # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = u'ESPnet' -copyright = u'2017, Shinji Watanabe' -author = u'Shinji Watanabe' +project = u"ESPnet" +copyright = u"2017, Shinji Watanabe" +author = u"Shinji Watanabe" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -87,6 +91,7 @@ def setup(app): # # The short X.Y version. import espnet + version = espnet.__version__ # The full version, including alpha/beta/rc tags. release = espnet.__version__ @@ -102,18 +107,21 @@ def setup(app): # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path exclude_patterns = [ - '_build', 'Thumbs.db', '.DS_Store', "README.md", + "_build", + "Thumbs.db", + ".DS_Store", + "README.md", # NOTE: because these genearate files are directly included # from the other files, we should exclude these files manually. "_gen/modules.rst", "_gen/utils_sh.rst", "_gen/utils_py.rst", "_gen/espnet_bin.rst", - "_gen/espnet-bin.rst" + "_gen/espnet-bin.rst", ] # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False @@ -127,7 +135,7 @@ def setup(app): # html_theme = 'nature' import sphinx_rtd_theme -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] # Theme options are theme-specific and customize the look and feel of a theme @@ -147,16 +155,16 @@ def setup(app): # This is required for the alabaster theme # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars html_sidebars = { - '**': [ - 'relations.html', # needs 'show_related': True theme option to display - 'searchbox.html', + "**": [ + "relations.html", # needs 'show_related': True theme option to display + "searchbox.html", ] } # -- Options for HTMLHelp output ------------------------------------------ # Output file base name for HTML help builder. -htmlhelp_basename = 'ESPnetdoc' +htmlhelp_basename = "ESPnetdoc" # -- Options for LaTeX output --------------------------------------------- @@ -164,15 +172,12 @@ def setup(app): # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', @@ -182,18 +187,14 @@ def setup(app): # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'ESPnet.tex', u'ESPnet Documentation', - u'Shinji Watanabe', 'manual'), + (master_doc, "ESPnet.tex", u"ESPnet Documentation", u"Shinji Watanabe", "manual"), ] # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'espnet', u'ESPnet Documentation', - [author], 1) -] +man_pages = [(master_doc, "espnet", u"ESPnet Documentation", [author], 1)] # -- Options for Texinfo output ------------------------------------------- @@ -201,12 +202,18 @@ def setup(app): # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'ESPnet', u'ESPnet Documentation', - author, 'ESPnet', 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "ESPnet", + u"ESPnet Documentation", + author, + "ESPnet", + "One line description of project.", + "Miscellaneous", + ), ] -autoclass_content = 'both' +autoclass_content = "both" # NOTE(kan-bayashi): Do not update outputs in notebook automatically. -nbsphinx_execute = 'never' +nbsphinx_execute = "never" diff --git a/doc/espnet2_tutorial.md b/doc/espnet2_tutorial.md index 5bdec078cc5..0dd69624a4a 100644 --- a/doc/espnet2_tutorial.md +++ b/doc/espnet2_tutorial.md @@ -180,7 +180,7 @@ You need to do one of the following two ways to change the training configuratio ```sh # Give a configuration file -./run.sh --asr_train_config conf/train_asr.yaml +./run.sh --asr_config conf/train_asr.yaml # Give arguments to "espnet2/bin/asr_train.py" directly ./run.sh --asr_args "--foo arg --bar arg2" ``` @@ -291,7 +291,7 @@ To use SSLRs in your task, you need to make several modifications. ### Usage 1. To reduce the time used in `collect_stats` step, please specify `--feats_normalize uttmvn` in `run.sh` and pass it as arguments to `asr.sh` or other task-specific scripts. (Recommended) 2. In the configuration file, specify the `frontend` and `preencoder`. Taking `HuBERT` as an example: - The `upsteam` name can be whatever supported in S3PRL. `multilayer-feature=True` means the final representation is a weighted-sum of all layers' hidden states from SSLR model. + The `upstream` name can be whatever supported in S3PRL. `multilayer-feature=True` means the final representation is a weighted-sum of all layers' hidden states from SSLR model. ``` frontend: s3prl frontend_conf: diff --git a/doc/index.rst b/doc/index.rst index 13f20ab0a96..30cd3d35fd4 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -28,16 +28,7 @@ ESPnet is an end-to-end speech processing toolkit, mainly focuses on end-to-end ./espnet2_task.md ./espnet2_distributed.md -.. toctree:: - :maxdepth: 1 - :caption: Notebook: - - ./notebook/asr_cli.ipynb - ./notebook/asr_library.ipynb - ./notebook/tts_cli.ipynb - ./notebook/pretrained.ipynb - ./notebook/tts_realtime_demo.ipynb - ./notebook/st_demo.ipynb +.. include:: ./_gen/notebooks.rst .. include:: ./_gen/modules.rst diff --git a/doc/installation.md b/doc/installation.md index 999082c9043..db45a09135b 100644 --- a/doc/installation.md +++ b/doc/installation.md @@ -32,14 +32,14 @@ the following packages are installed using Anaconda, so you can skip them.) # For CentOS $ sudo yum install libsndfile ``` -- ffmpeg (This is not required when installataion, but used in some recipes) +- ffmpeg (This is not required when installing, but used in some recipes) ```sh # For Ubuntu $ sudo apt-get install ffmpeg # For CentOS $ sudo yum install ffmpeg ``` -- flac (This is not required when installataion, but used in some recipes) +- flac (This is not required when installing, but used in some recipes) ```sh # For Ubuntu $ sudo apt-get install flac @@ -202,14 +202,14 @@ We also have [prebuilt Kaldi binaries](https://github.com/espnet/espnet/blob/mas ```sh $ cd /tools - $ make TH_VERSION=1.3.1 + $ make TH_VERSION=1.10.1 ``` Note that the CUDA version is derived from `nvcc` command. If you'd like to specify the other CUDA version, you need to give `CUDA_VERSION`. ```sh $ cd /tools - $ make TH_VERSION=1.3.1 CUDA_VERSION=10.1 + $ make TH_VERSION=1.10.1 CUDA_VERSION=11.3 ``` If you don't have `nvcc` command, packages are installed for CPU mode by default. diff --git a/doc/module2rst.py b/doc/module2rst.py index a4cd4db3f6c..7cb83b9e7ad 100755 --- a/doc/module2rst.py +++ b/doc/module2rst.py @@ -8,15 +8,15 @@ # parser parser = configargparse.ArgumentParser( - description='generate RST files from module recursively into /_gen', + description="generate RST files from module recursively into /_gen", config_file_parser_class=configargparse.YAMLConfigFileParser, - formatter_class=configargparse.ArgumentDefaultsHelpFormatter) -parser.add_argument('--root', nargs='+', - help='root module to generate docs recursively') -parser.add_argument('--dst', type=str, - help='destination path to generate RSTs') -parser.add_argument('--exclude', nargs='*', default=[], - help='exclude module name') + formatter_class=configargparse.ArgumentDefaultsHelpFormatter, +) +parser.add_argument( + "--root", nargs="+", help="root module to generate docs recursively" +) +parser.add_argument("--dst", type=str, help="destination path to generate RSTs") +parser.add_argument("--exclude", nargs="*", default=[], help="exclude module name") args = parser.parse_args() print(args) @@ -36,12 +36,14 @@ def gen_rst(module_path, f): doc = module.__doc__ if doc is None: doc = "" - f.write(f""" + f.write( + f""" {title} {sep} {doc} -""") +""" + ) for cpath in glob(module_path + "/**/*.py", recursive=True): print(cpath) @@ -51,7 +53,8 @@ def gen_rst(module_path, f): continue cname = to_module(cpath) csep = "-" * len(cname) - f.write(f""" + f.write( + f""" .. _{cname}: {cname} @@ -62,7 +65,8 @@ def gen_rst(module_path, f): :undoc-members: :show-inheritance: -""") +""" + ) f.flush() diff --git a/doc/notebook b/doc/notebook deleted file mode 160000 index ef3cbf880fc..00000000000 --- a/doc/notebook +++ /dev/null @@ -1 +0,0 @@ -Subproject commit ef3cbf880fcd725d11021e541a0cdfae4080446d diff --git a/doc/notebook2rst.sh b/doc/notebook2rst.sh new file mode 100755 index 00000000000..83bf7d57794 --- /dev/null +++ b/doc/notebook2rst.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +set -euo pipefail + +cd "$(dirname "$0")" + +if [ ! -d notebook ]; then + git clone https://github.com/espnet/notebook --depth 1 +fi + +echo "\ +.. toctree:: + :maxdepth: 1 + :caption: Notebook: +" + +find ./notebook/*.ipynb -exec echo " {}" \; diff --git a/docker/build.sh b/docker/build.sh index 7bef1b94b73..987a0f54ac7 100755 --- a/docker/build.sh +++ b/docker/build.sh @@ -3,6 +3,10 @@ # 2019, Nelson Yalta # 2019, Ludwig Kürzinger, Technische Universität München +log() { + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%dT%H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" @@ -11,10 +15,7 @@ ubuntu_ver=20.04 cuda_ver=11.1 build_ver=cpu build_cores=24 -th_ver=1.8.0 - -docker_ver=$(docker version -f '{{.Server.Version}}') -echo "Using Docker Ver.${docker_ver}" +th_ver=1.10.1 cmd_usage() { @@ -63,16 +64,16 @@ cmd_usage() { build(){ - echo "Build Latest docker containers" + log "Build Latest docker containers" # build runtime and gpu based containers this_tag=espnet/espnet:runtime-latest docker_image=$( docker images -q ${this_tag} ) if ! [[ -n ${docker_image} ]]; then - echo "Now building Runtime container" + log "Now building Runtime container" docker build --build-arg DOCKER_VER=${docker_ver} \ --build-arg FROM_TAG=${default_ubuntu_ver} \ --build-arg NUM_BUILD_CORES=${build_cores} \ - -f prebuilt/runtime/Dockerfile -t ${this_tag} . | tee -a build_runtime.log > /dev/null + -f prebuilt/runtime.dockerfile -t ${this_tag} . | tee -a build_runtime.log > /dev/null docker_image=$( docker images -q ${this_tag} ) [ -z "${docker_image}" ] && exit 1 @@ -81,9 +82,9 @@ build(){ this_tag=espnet/espnet:cuda-latest docker_image=$( docker images -q ${this_tag} ) if ! [[ -n ${docker_image} ]]; then - echo "Now building CUDA container" + log "Now building CUDA container" docker build --build-arg FROM_TAG=runtime-latest \ - -f prebuilt/devel/gpu/${default_cuda_ver}/Dockerfile -t ${this_tag} . | tee -a build_cuda.log > /dev/null + -f prebuilt/gpu.dockerfile -t ${this_tag} . | tee -a build_cuda.log > /dev/null docker_image=$( docker images -q ${this_tag} ) [ -z "${docker_image}" ] && exit 1 fi @@ -93,8 +94,11 @@ build(){ this_tag=espnet/espnet:cpu-latest docker_image=$( docker images -q ${this_tag} ) if ! [[ -n ${docker_image} ]]; then - echo "Now building cpu-latest with ubuntu:${default_ubuntu_ver}" - docker build --build-arg FROM_TAG=runtime-latest -f prebuilt/devel/Dockerfile -t ${this_tag} . | tee -a build_cpu.log > /dev/null + log "Now building cpu-latest with ubuntu:${default_ubuntu_ver}" + docker build --build-arg FROM_TAG=runtime-latest \ + -f prebuilt/devel.dockerfile \ + --target devel \ + -t ${this_tag} . | tee -a build_cpu.log > /dev/null docker_image=$( docker images -q ${this_tag} ) [ -z "${docker_image}" ] && exit 1 @@ -106,8 +110,10 @@ build(){ this_tag=espnet/espnet:gpu-latest docker_image=$( docker images -q ${this_tag} ) if ! [[ -n ${docker_image} ]]; then - echo "Now building gpu-latest with ubuntu:${default_ubuntu_ver} and cuda:${default_cuda_ver}" - docker build ${build_args} -f prebuilt/devel/Dockerfile -t ${this_tag} . | tee -a build_gpu.log > /dev/null + log "Now building gpu-latest with ubuntu:${default_ubuntu_ver} and cuda:${default_cuda_ver}" + docker build ${build_args} -f prebuilt/devel.dockerfile \ + --target devel \ + -t ${this_tag} . | tee -a build_gpu.log > /dev/null docker_image=$( docker images -q ${this_tag} ) [ -z "${docker_image}" ] && exit 1 fi @@ -115,20 +121,20 @@ build(){ build_local(){ - echo "Building docker container: base image, and image for ${build_ver}" + log "Building docker container: base image, and image for ${build_ver}" sleep 1 # prepare espnet-repo, assuming that this script is in folder espnet/docker cd ${SCRIPTPATH}/.. ESPNET_ARCHIVE="./espnet-local.tar" - echo "Reconstructing the local repository from the last commit" + log "Reconstructing the local repository from the last commit" git archive -o docker/${ESPNET_ARCHIVE} HEAD || exit 1 cd ${SCRIPTPATH} test -r ${ESPNET_ARCHIVE} || exit 1; sleep 1 if [ "${build_base_image}" = true ]; then - echo "building ESPnet base image with ubuntu:${ubuntu_ver}" + log "building ESPnet base image with ubuntu:${ubuntu_ver}" docker build --build-arg DOCKER_VER=${docker_ver} \ --build-arg FROM_TAG=${ubuntu_ver} \ --build-arg NUM_BUILD_CORES=${build_cores} \ @@ -137,11 +143,11 @@ build_local(){ fi if [[ ${build_ver} == "cpu" ]]; then - echo "building ESPnet CPU Image with ubuntu:${ubuntu_ver}" + log "building ESPnet CPU Image with ubuntu:${ubuntu_ver}" docker build --build-arg FROM_TAG=runtime-local --build-arg ESPNET_ARCHIVE=${ESPNET_ARCHIVE} \ -f prebuilt/local/Dockerfile -t espnet/espnet:cpu-local . || exit 1 elif [[ ${build_ver} == "gpu" ]]; then - echo "building ESPnet GPU Image with ubuntu:${ubuntu_ver} and cuda:${cuda_ver}" + log "building ESPnet GPU Image with ubuntu:${ubuntu_ver} and cuda:${cuda_ver}" if [ "${build_base_image}" = true ] ; then docker build -f prebuilt/devel/gpu/${ver}/Dockerfile -t espnet/espnet:cuda${ver}-cudnn7 . || exit 1 else @@ -154,15 +160,15 @@ build_local(){ build_args="${build_args} --build-arg ESPNET_ARCHIVE=${ESPNET_ARCHIVE}" docker build ${build_args} -f prebuilt/local/Dockerfile -t espnet/espnet:gpu-cuda${ver}-cudnn7-u18-local . || exit 1 else - echo "Parameter invalid: " ${ver} + log "ERROR: Parameter invalid: " ${ver} fi - echo "cleanup." + log "cleanup." test -r ${ESPNET_ARCHIVE} && rm ${ESPNET_ARCHIVE} } run_recipe1(){ - ./run.sh --docker-egs an4/asr1 \ + ./run.sh --docker-egs mini_an4/asr1 \ --docker-cmd run.sh \ --docker-gpu ${1} \ --verbose 1 \ @@ -173,10 +179,10 @@ run_recipe1(){ } run_recipe2(){ - ./run.sh --docker-egs an4/asr1 \ + ./run.sh --docker-egs mini_an4/asr1 \ --docker-cmd run.sh \ --docker-gpu ${1} \ - --docker-env "NLTK_DATA=/espnet/egs2/an4/asr1/nltk_data,HOME=/espnet/egs2/an4/asr1" \ + --docker-env "NLTK_DATA=/espnet/egs2/mini_an4/asr1/nltk_data,HOME=/espnet/egs2/mini_an4/asr1" \ --is-egs2 \ --ngpu ${2} \ --stage ${3} \ @@ -185,11 +191,11 @@ run_recipe2(){ } testing(){ - echo "Testing docker containers" + log "Testing docker containers" # Test Docker Containers with cpu setup run_stage=-1 for backend in chainer pytorch; do - if [ -f ../egs/an4/asr1/dump/train_nodev/deltafalse/data.json ]; then + if [ -f ../egs/mini_an4/asr1/dump/train_nodev/deltafalse/data.json ]; then run_stage=3 fi if [ ! -f .test_cpu_${backend}.done ]; then @@ -199,7 +205,7 @@ testing(){ done for backend in chainer pytorch; do - if [ -f ../egs/an4/asr1/dump/train_nodev/deltafalse/data.json ]; then + if [ -f ../egs/mini_an4/asr1/dump/train_nodev/deltafalse/data.json ]; then run_stage=3 fi if [ ! -f .test_gpu_${backend}.done ]; then @@ -208,7 +214,7 @@ testing(){ fi done - echo "ESPnet egs Done. Press to continue with ESPnet2 egs" + log "ESPnet egs Done. Press to continue with ESPnet2 egs" read enter # Test for espnet2 run_stage=-1 @@ -227,7 +233,7 @@ testing(){ push(){ for tag in runtime-latest cuda-latest cpu-latest gpu-latest;do - echo "docker push espnet/espnet:${tag}" + log "docker push espnet/espnet:${tag}" ( docker push espnet/espnet:${tag} )|| exit 1 done } @@ -273,14 +279,15 @@ check=true [ "${default_ubuntu_ver}" != "${ubuntu_ver}" ] || [ "${default_cuda_ver}" != "${cuda_ver}" ] && check=false if [ ${check} = false ] && [ "${mode}" != "fully_local" ]; then - echo "Error: Use of custom versions of Ubuntu (!=${default_ubuntu_ver}) and CUDA (!=${default_cuda_ver}) + log "Error: Use of custom versions of Ubuntu (!=${default_ubuntu_ver}) and CUDA (!=${default_cuda_ver}) is only available for == fully_local. Exiting... " exit 0; fi +docker_ver=$(docker version -f '{{.Server.Version}}') +log "Using Docker Ver.${docker_ver}" -echo "Using Docker Ver.${docker_ver}" ## Application menu if [[ "${mode}" == "build" ]]; then build @@ -302,4 +309,4 @@ else cmd_usage fi -echo "$(basename "$0") done." +log "$(basename "$0") done." diff --git a/docker/prebuilt/Dockerfile b/docker/espnet.dockerfile similarity index 76% rename from docker/prebuilt/Dockerfile rename to docker/espnet.dockerfile index bd2458adf70..b6295ca2a92 100644 --- a/docker/prebuilt/Dockerfile +++ b/docker/espnet.dockerfile @@ -11,9 +11,7 @@ ARG EXTRA_LIBS RUN if [ ${EXTRA_LIBS} = true ]; then \ cd /espnet/tools; \ - . ./activate_python.sh; \ - pip install parallel_wavegan; \ - pip install git+https://github.com/cybertronai/pytorch-lamb; \ + make extra; \ fi # Add user to container diff --git a/docker/prebuilt/devel/Dockerfile b/docker/prebuilt/devel.dockerfile similarity index 70% rename from docker/prebuilt/devel/Dockerfile rename to docker/prebuilt/devel.dockerfile index ad02a540674..95dc6a41059 100644 --- a/docker/prebuilt/devel/Dockerfile +++ b/docker/prebuilt/devel.dockerfile @@ -1,5 +1,5 @@ ARG FROM_TAG -FROM espnet/espnet:${FROM_TAG} +FROM espnet/espnet:${FROM_TAG} as devel LABEL maintainer "Nelson Yalta " ARG CUDA_VER @@ -48,7 +48,7 @@ RUN if [ -z "${CUDA_VER}" ]; then \ . ./activate_python.sh && \ ./installers/install_warp-ctc.sh && \ ./installers/install_kenlm.sh && \ - # ./installers/install_chainer_ctc.sh && \ + ./installers/install_chainer.sh cpu && \ conda clean --all && \ rm -f *.tar.* && \ pip cache purge @@ -56,3 +56,28 @@ RUN if [ -z "${CUDA_VER}" ]; then \ RUN rm -rf ../espnet WORKDIR / + + +#### For local docker +FROM devel as espnet_local +LABEL maintainer "Nelson Yalta " + +ARG CUDA_VER +WORKDIR / + +# IF using a local ESPNet repository, a temporary file containing the ESPnet git repo is copied over +ARG ESPNET_ARCHIVE=./espnet-local.tar +COPY ${ESPNET_ARCHIVE} /espnet-local.tar + + +# Download ESPnet +RUN echo "Getting ESPnet sources from local repository, in temporary file: " ${ESPNET_ARCHIVE} +RUN mkdir /espnet +RUN tar xf espnet-local.tar -C /espnet/ +RUN rm espnet-local.tar + +RUN cd espnet && \ + rm -rf docker egs test utils + +# Install espnet +WORKDIR /espnet/tools diff --git a/docker/prebuilt/devel/gpu/10.0/Dockerfile b/docker/prebuilt/devel/gpu/10.0/Dockerfile deleted file mode 100644 index f28793740cb..00000000000 --- a/docker/prebuilt/devel/gpu/10.0/Dockerfile +++ /dev/null @@ -1,74 +0,0 @@ -ARG FROM_TAG -FROM espnet/espnet:${FROM_TAG} -LABEL maintainer "Nelson Yalta " - -## FROM CUDA 10.0 base - -RUN apt-get update && apt-get install -y --no-install-recommends gnupg2 curl ca-certificates && \ - curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub | apt-key add - && \ - echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \ - echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list && \ - apt-get purge --autoremove -y curl && \ - rm -rf /var/lib/apt/lists/* - -ENV CUDA_VERSION 10.0.130 - -ENV CUDA_PKG_VERSION 10-0=$CUDA_VERSION-1 - -# For libraries in the cuda-compat-* package: https://docs.nvidia.com/cuda/eula/index.html#attachment-a -RUN apt-get update && apt-get install -y --no-install-recommends \ - cuda-cudart-$CUDA_PKG_VERSION \ - cuda-compat-10-0 && \ - ln -s cuda-10.0 /usr/local/cuda && \ - rm -rf /var/lib/apt/lists/* - -# Required for nvidia-docker v1 -RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \ - echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf - -ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} -ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64 - -# nvidia-container-runtime -ENV NVIDIA_VISIBLE_DEVICES all -ENV NVIDIA_DRIVER_CAPABILITIES compute,utility -ENV NVIDIA_REQUIRE_CUDA "cuda>=10.0 brand=tesla,driver>=384,driver<385 brand=tesla,driver>=410,driver<411" - -ENV CUDA_HOME /usr/local/cuda - -## FROM CUDA 10.0 runtime - -ENV NCCL_VERSION 2.4.8 - -RUN apt-get update && apt-get install -y --no-install-recommends \ - cuda-libraries-$CUDA_PKG_VERSION \ - cuda-nvtx-$CUDA_PKG_VERSION \ - libnccl2=$NCCL_VERSION-1+cuda10.0 && \ - apt-mark hold libnccl2 && \ - rm -rf /var/lib/apt/lists/* - - -## FROM CUDA 10.0 devel - -RUN apt-get update && apt-get install -y --no-install-recommends \ - cuda-libraries-dev-$CUDA_PKG_VERSION \ - cuda-nvml-dev-$CUDA_PKG_VERSION \ - cuda-minimal-build-$CUDA_PKG_VERSION \ - cuda-command-line-tools-$CUDA_PKG_VERSION \ - libnccl-dev=$NCCL_VERSION-1+cuda10.0 && \ - rm -rf /var/lib/apt/lists/* - -ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs - -## FROM CUDA 10.0-CUDNN 7 devel - -ENV CUDNN_VERSION 7.6.5.32 -LABEL com.nvidia.cudnn.version="${CUDNN_VERSION}" - -RUN apt-get update && apt-get install -y --no-install-recommends \ - libcudnn7=$CUDNN_VERSION-1+cuda10.0 \ - libcudnn7-dev=$CUDNN_VERSION-1+cuda10.0 && \ - apt-mark hold libcudnn7 && \ - rm -rf /var/lib/apt/lists/* - -WORKDIR / \ No newline at end of file diff --git a/docker/prebuilt/devel/gpu/10.1/Dockerfile b/docker/prebuilt/devel/gpu/10.1/Dockerfile deleted file mode 100644 index 044128f2910..00000000000 --- a/docker/prebuilt/devel/gpu/10.1/Dockerfile +++ /dev/null @@ -1,39 +0,0 @@ -ARG FROM_TAG -FROM espnet/espnet:${FROM_TAG} -LABEL maintainer "Nelson Yalta " - -## FROM CUDA 10.1 base [https://gitlab.com/nvidia/cuda/blob/ubuntu18.04/10.1/base/Dockerfile] - -RUN apt-get update && apt-get install -y --no-install-recommends gnupg2 curl ca-certificates && \ - curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub | apt-key add - && \ - echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \ - echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list && \ - apt-get purge --autoremove -y curl && \ - rm -rf /var/lib/apt/lists/* - -ENV CUDA_VERSION 10.1.243 - -ENV CUDA_PKG_VERSION 10-1=$CUDA_VERSION-1 - -# For libraries in the cuda-compat-* package: https://docs.nvidia.com/cuda/eula/index.html#attachment-a -RUN apt-get update && apt-get install -y --no-install-recommends \ - cuda-cudart-$CUDA_PKG_VERSION \ - cuda-compat-10-1 && \ - ln -s cuda-10.1 /usr/local/cuda && \ - rm -rf /var/lib/apt/lists/* - -# Required for nvidia-docker v1 -RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \ - echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf - -ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} -ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64 - -# nvidia-container-runtime -ENV NVIDIA_VISIBLE_DEVICES all -ENV NVIDIA_DRIVER_CAPABILITIES compute,utility -ENV NVIDIA_REQUIRE_CUDA "cuda>=10.1 brand=tesla,driver>=396,driver<397 brand=tesla,driver>=410,driver<411 brand=tesla,driver>=418,driver<419" - -ENV CUDA_HOME /usr/local/cuda - -WORKDIR / \ No newline at end of file diff --git a/docker/prebuilt/devel/gpu/10.2/Dockerfile b/docker/prebuilt/devel/gpu/10.2/Dockerfile deleted file mode 100644 index aa3d2c2d9a6..00000000000 --- a/docker/prebuilt/devel/gpu/10.2/Dockerfile +++ /dev/null @@ -1,81 +0,0 @@ -ARG FROM_TAG -FROM espnet/espnet:${FROM_TAG} -LABEL maintainer "Nelson Yalta " - -## FROM CUDA 10.1 base [https://gitlab.com/nvidia/cuda/blob/ubuntu18.04/10.1/base/Dockerfile] - -RUN apt-get update && apt-get install -y --no-install-recommends gnupg2 curl ca-certificates && \ - curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub | apt-key add - && \ - echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \ - echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list && \ - apt-get purge --autoremove -y curl && \ - rm -rf /var/lib/apt/lists/* - -ENV CUDA_VERSION 10.1.168 - -ENV CUDA_PKG_VERSION 10-1=$CUDA_VERSION-1 - -# For libraries in the cuda-compat-* package: https://docs.nvidia.com/cuda/eula/index.html#attachment-a -RUN apt-get update && apt-get install -y --no-install-recommends \ - cuda-cudart-$CUDA_PKG_VERSION \ - cuda-compat-10-1 && \ - ln -s cuda-10.1 /usr/local/cuda && \ - rm -rf /var/lib/apt/lists/* - -# Required for nvidia-docker v1 -RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \ - echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf - -ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} -ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64 - -# nvidia-container-runtime -ENV NVIDIA_VISIBLE_DEVICES all -ENV NVIDIA_DRIVER_CAPABILITIES compute,utility -ENV NVIDIA_REQUIRE_CUDA "cuda>=10.1 brand=tesla,driver>=384,driver<385 brand=tesla,driver>=396,driver<397 brand=tesla,driver>=410,driver<411" - -ENV CUDA_HOME /usr/local/cuda - -## FROM CUDA 10.1 runtime [https://gitlab.com/nvidia/cuda/blob/ubuntu18.04/10.1/runtime/Dockerfile] - -ENV NCCL_VERSION 2.7.8 - -RUN apt-get update && apt-get install -y --no-install-recommends \ - cuda-libraries-$CUDA_PKG_VERSION \ - cuda-nvtx-$CUDA_PKG_VERSION \ - libnccl2=$NCCL_VERSION-1+cuda10.1 && \ - apt-mark hold libnccl2 && \ - rm -rf /var/lib/apt/lists/* - -## FROM CUDA 10.1 devel [https://gitlab.com/nvidia/cuda/blob/ubuntu18.04/10.1/devel/Dockerfile] - -RUN apt-get update && apt-get install -y --no-install-recommends \ - cuda-nvml-dev-$CUDA_PKG_VERSION \ - cuda-command-line-tools-$CUDA_PKG_VERSION \ - cuda-nvprof-$CUDA_PKG_VERSION \ - cuda-npp-dev-$CUDA_PKG_VERSION \ - cuda-libraries-dev-$CUDA_PKG_VERSION \ - cuda-minimal-build-$CUDA_PKG_VERSION \ - libcublas-dev=10.2.1.243-1 \ - libnccl-dev=2.7.8-1+cuda10.1 && \ - apt-mark hold libnccl-dev && \ - rm -rf /var/lib/apt/lists/* - -# apt from auto upgrading the cublas package. See https://gitlab.com/nvidia/container-images/cuda/-/issues/88 -RUN apt-mark hold libcublas-dev - - -ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs - -## FROM CUDA 10.1-CUDNN 7 devel - -ENV CUDNN_VERSION 7.6.0.64 -LABEL com.nvidia.cudnn.version="${CUDNN_VERSION}" - -RUN apt-get update && apt-get install -y --no-install-recommends \ - libcudnn7=$CUDNN_VERSION-1+cuda10.1 \ - libcudnn7-dev=$CUDNN_VERSION-1+cuda10.1 && \ - apt-mark hold libcudnn7 && \ - rm -rf /var/lib/apt/lists/* - -WORKDIR / \ No newline at end of file diff --git a/docker/prebuilt/devel/gpu/8.0/Dockerfile b/docker/prebuilt/devel/gpu/8.0/Dockerfile deleted file mode 100644 index 296b3286eea..00000000000 --- a/docker/prebuilt/devel/gpu/8.0/Dockerfile +++ /dev/null @@ -1,80 +0,0 @@ -ARG FROM_TAG -FROM espnet/espnet:${FROM_TAG} -LABEL maintainer "Nelson Yalta " - -## FROM CUDA 8.0 runtime - -RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates apt-transport-https gnupg-curl && \ - rm -rf /var/lib/apt/lists/* && \ - NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \ - NVIDIA_GPGKEY_FPR=ae09fe4bbd223a84b2ccfce3f60f4b3d7fa2af80 && \ - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub && \ - apt-key adv --export --no-emit-version -a $NVIDIA_GPGKEY_FPR | tail -n +5 > cudasign.pub && \ - echo "$NVIDIA_GPGKEY_SUM cudasign.pub" | sha256sum -c --strict - && rm cudasign.pub && \ - echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/cuda.list - -ENV CUDA_VERSION 8.0.61 - -ENV CUDA_PKG_VERSION 8-0=$CUDA_VERSION-1 -RUN apt-get update && apt-get install -y --no-install-recommends \ - cuda-nvrtc-$CUDA_PKG_VERSION \ - cuda-nvgraph-$CUDA_PKG_VERSION \ - cuda-cusolver-$CUDA_PKG_VERSION \ - cuda-cublas-8-0=8.0.61.2-1 \ - cuda-cufft-$CUDA_PKG_VERSION \ - cuda-curand-$CUDA_PKG_VERSION \ - cuda-cusparse-$CUDA_PKG_VERSION \ - cuda-npp-$CUDA_PKG_VERSION \ - cuda-cudart-$CUDA_PKG_VERSION && \ - ln -s cuda-8.0 /usr/local/cuda && \ - rm -rf /var/lib/apt/lists/* - -# nvidia-docker 1.0 -LABEL com.nvidia.volumes.needed="nvidia_driver" -LABEL com.nvidia.cuda.version="${CUDA_VERSION}" - -RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \ - echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf - -ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} -ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64 -ENV CUDA_HOME /usr/local/cuda - -# nvidia-container-runtime -ENV NVIDIA_VISIBLE_DEVICES all -ENV NVIDIA_DRIVER_CAPABILITIES compute,utility -ENV NVIDIA_REQUIRE_CUDA "cuda>=8.0" - -## FROM CUDA 8.0 devel - -RUN apt-get update && apt-get install -y --no-install-recommends \ - cuda-core-$CUDA_PKG_VERSION \ - cuda-misc-headers-$CUDA_PKG_VERSION \ - cuda-command-line-tools-$CUDA_PKG_VERSION \ - cuda-nvrtc-dev-$CUDA_PKG_VERSION \ - cuda-nvml-dev-$CUDA_PKG_VERSION \ - cuda-nvgraph-dev-$CUDA_PKG_VERSION \ - cuda-cusolver-dev-$CUDA_PKG_VERSION \ - cuda-cublas-dev-8-0=8.0.61.2-1 \ - cuda-cufft-dev-$CUDA_PKG_VERSION \ - cuda-curand-dev-$CUDA_PKG_VERSION \ - cuda-cusparse-dev-$CUDA_PKG_VERSION \ - cuda-npp-dev-$CUDA_PKG_VERSION \ - cuda-cudart-dev-$CUDA_PKG_VERSION \ - cuda-driver-dev-$CUDA_PKG_VERSION && \ - rm -rf /var/lib/apt/lists/* - -ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs - -## FROM CUDA 8.0 CUDNN 7 devel - -RUN echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list - -ENV CUDNN_VERSION 7.2.1.38 -LABEL com.nvidia.cudnn.version="${CUDNN_VERSION}" - -RUN apt-get update && apt-get install -y --no-install-recommends \ - libcudnn7=$CUDNN_VERSION-1+cuda8.0 \ - libcudnn7-dev=$CUDNN_VERSION-1+cuda8.0 && \ - apt-mark hold libcudnn7 && \ - rm -rf /var/lib/apt/lists/* diff --git a/docker/prebuilt/devel/gpu/9.0/Dockerfile b/docker/prebuilt/devel/gpu/9.0/Dockerfile deleted file mode 100644 index 7bd144354fd..00000000000 --- a/docker/prebuilt/devel/gpu/9.0/Dockerfile +++ /dev/null @@ -1,76 +0,0 @@ -ARG FROM_TAG -FROM espnet/espnet:${FROM_TAG} -LABEL maintainer "Nelson Yalta " - -## FROM CUDA 9.0 base - -RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates apt-transport-https gnupg-curl && \ - rm -rf /var/lib/apt/lists/* && \ - NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \ - NVIDIA_GPGKEY_FPR=ae09fe4bbd223a84b2ccfce3f60f4b3d7fa2af80 && \ - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub && \ - apt-key adv --export --no-emit-version -a $NVIDIA_GPGKEY_FPR | tail -n +5 > cudasign.pub && \ - echo "$NVIDIA_GPGKEY_SUM cudasign.pub" | sha256sum -c --strict - && rm cudasign.pub && \ - echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \ - echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list - -ENV CUDA_VERSION 9.0.176 - -ENV CUDA_PKG_VERSION 9-0=$CUDA_VERSION-1 -RUN apt-get update && apt-get install -y --no-install-recommends \ - cuda-cudart-$CUDA_PKG_VERSION && \ - ln -s cuda-9.0 /usr/local/cuda && \ - rm -rf /var/lib/apt/lists/* - -# nvidia-docker 1.0 -LABEL com.nvidia.volumes.needed="nvidia_driver" -LABEL com.nvidia.cuda.version="${CUDA_VERSION}" - -RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \ - echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf - -ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} -ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64 -ENV CUDA_HOME /usr/local/cuda - -# nvidia-container-runtime -ENV NVIDIA_VISIBLE_DEVICES all -ENV NVIDIA_DRIVER_CAPABILITIES compute,utility -ENV NVIDIA_REQUIRE_CUDA "cuda>=9.0" - -## FROM CUDA 9.0 runtime - -ENV NCCL_VERSION 2.4.2 - -RUN apt-get update && apt-get install -y --no-install-recommends \ - cuda-libraries-$CUDA_PKG_VERSION \ - cuda-cublas-9-0=9.0.176.4-1 \ - libnccl2=$NCCL_VERSION-1+cuda9.0 && \ - apt-mark hold libnccl2 && \ - rm -rf /var/lib/apt/lists/* - -## FROM CUDA 9.0 devel - -RUN apt-get update && apt-get install -y --no-install-recommends \ - cuda-libraries-dev-$CUDA_PKG_VERSION \ - cuda-nvml-dev-$CUDA_PKG_VERSION \ - cuda-minimal-build-$CUDA_PKG_VERSION \ - cuda-command-line-tools-$CUDA_PKG_VERSION \ - cuda-core-9-0=9.0.176.3-1 \ - cuda-cublas-dev-9-0=9.0.176.4-1 \ - libnccl-dev=$NCCL_VERSION-1+cuda9.0 && \ - rm -rf /var/lib/apt/lists/* - -ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs - -## FROM CUDA 9.0 CUDNN 7 devel - -ENV CUDNN_VERSION 7.4.2.24 -LABEL com.nvidia.cudnn.version="${CUDNN_VERSION}" - -RUN apt-get update && apt-get install -y --no-install-recommends \ - libcudnn7=$CUDNN_VERSION-1+cuda9.0 \ - libcudnn7-dev=$CUDNN_VERSION-1+cuda9.0 && \ - apt-mark hold libcudnn7 && \ - rm -rf /var/lib/apt/lists/* - diff --git a/docker/prebuilt/devel/gpu/9.1/Dockerfile b/docker/prebuilt/devel/gpu/9.1/Dockerfile deleted file mode 100644 index 5f0b2c62e60..00000000000 --- a/docker/prebuilt/devel/gpu/9.1/Dockerfile +++ /dev/null @@ -1,72 +0,0 @@ -ARG FROM_TAG -FROM espnet/espnet:${FROM_TAG} -LABEL maintainer "Nelson Yalta " - -## FROM CUDA 9.1 base - -RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates apt-transport-https gnupg-curl && \ - rm -rf /var/lib/apt/lists/* && \ - NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \ - NVIDIA_GPGKEY_FPR=ae09fe4bbd223a84b2ccfce3f60f4b3d7fa2af80 && \ - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub && \ - apt-key adv --export --no-emit-version -a $NVIDIA_GPGKEY_FPR | tail -n +5 > cudasign.pub && \ - echo "$NVIDIA_GPGKEY_SUM cudasign.pub" | sha256sum -c --strict - && rm cudasign.pub && \ - echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \ - echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list - -ENV CUDA_VERSION 9.1.85 - -ENV CUDA_PKG_VERSION 9-1=$CUDA_VERSION-1 -RUN apt-get update && apt-get install -y --no-install-recommends \ - cuda-cudart-$CUDA_PKG_VERSION && \ - ln -s cuda-9.1 /usr/local/cuda && \ - rm -rf /var/lib/apt/lists/* - -# nvidia-docker 1.0 -LABEL com.nvidia.volumes.needed="nvidia_driver" -LABEL com.nvidia.cuda.version="${CUDA_VERSION}" - -RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \ - echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf - -ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} -ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64 -ENV CUDA_HOME /usr/local/cuda - -# nvidia-container-runtime -ENV NVIDIA_VISIBLE_DEVICES all -ENV NVIDIA_DRIVER_CAPABILITIES compute,utility -ENV NVIDIA_REQUIRE_CUDA "cuda>=9.1" - -## FROM CUDA 9.1 runtime - -ENV NCCL_VERSION 2.2.12 - -RUN apt-get update && apt-get install -y --no-install-recommends \ - cuda-libraries-$CUDA_PKG_VERSION \ - libnccl2=$NCCL_VERSION-1+cuda9.1 && \ - apt-mark hold libnccl2 && \ - rm -rf /var/lib/apt/lists/* - -## FROM CUDA 9.1 devel - -RUN apt-get update && apt-get install -y --no-install-recommends \ - cuda-libraries-dev-$CUDA_PKG_VERSION \ - cuda-nvml-dev-$CUDA_PKG_VERSION \ - cuda-minimal-build-$CUDA_PKG_VERSION \ - cuda-command-line-tools-$CUDA_PKG_VERSION \ - libnccl-dev=$NCCL_VERSION-1+cuda9.1 && \ - rm -rf /var/lib/apt/lists/* - -ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs - -## FROM CUDA 9.1 CUDNN 7 - -ENV CUDNN_VERSION 7.1.2.21 -LABEL com.nvidia.cudnn.version="${CUDNN_VERSION}" - -RUN apt-get update && apt-get install -y --no-install-recommends \ - libcudnn7=$CUDNN_VERSION-1+cuda9.1 \ - libcudnn7-dev=$CUDNN_VERSION-1+cuda9.1 && \ - apt-mark hold libcudnn7 && \ - rm -rf /var/lib/apt/lists/* diff --git a/docker/prebuilt/devel/gpu/9.2/Dockerfile b/docker/prebuilt/devel/gpu/9.2/Dockerfile deleted file mode 100644 index 14a089b6d34..00000000000 --- a/docker/prebuilt/devel/gpu/9.2/Dockerfile +++ /dev/null @@ -1,73 +0,0 @@ -ARG FROM_TAG -FROM espnet/espnet:${FROM_TAG} -LABEL maintainer "Nelson Yalta " - -## FROM CUDA 9.2 base [https://gitlab.com/nvidia/cuda/blob/ubuntu18.04/9.2/base/Dockerfile] -# CUDA 9.2 is not officially supported on ubuntu 18.04 yet, the ubuntu 17.10 repository for CUDA were used instead. -RUN apt-get update && apt-get install -y --no-install-recommends gnupg2 curl ca-certificates && \ - curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1710/x86_64/7fa2af80.pub | apt-key add - && \ - echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1710/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \ - echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list && \ - apt-get purge --autoremove -y curl && \ - rm -rf /var/lib/apt/lists/* - -ENV CUDA_VERSION 9.2.148 - -ENV CUDA_PKG_VERSION 9-2=$CUDA_VERSION-1 -RUN apt-get update && apt-get install -y --no-install-recommends \ - cuda-cudart-$CUDA_PKG_VERSION && \ - ln -s cuda-9.2 /usr/local/cuda && \ - rm -rf /var/lib/apt/lists/* - -# nvidia-docker 1.0 -LABEL com.nvidia.volumes.needed="nvidia_driver" -LABEL com.nvidia.cuda.version="${CUDA_VERSION}" - -RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \ - echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf - -ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} -ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64 - -# nvidia-container-runtime -ENV NVIDIA_VISIBLE_DEVICES all -ENV NVIDIA_DRIVER_CAPABILITIES compute,utility -ENV NVIDIA_REQUIRE_CUDA "cuda>=9.2" - -ENV CUDA_HOME /usr/local/cuda - -## FROM CUDA 9.2 runtime - -ENV NCCL_VERSION 2.3.7 - -RUN apt-get update && apt-get install -y --no-install-recommends \ - cuda-libraries-$CUDA_PKG_VERSION \ - cuda-nvtx-$CUDA_PKG_VERSION \ - libnccl2=$NCCL_VERSION-1+cuda9.2 && \ - apt-mark hold libnccl2 && \ - rm -rf /var/lib/apt/lists/* - -## FROM CUDA 9.2 devel [https://gitlab.com/nvidia/cuda/blob/ubuntu18.04/9.2/devel/Dockerfile] - -RUN apt-get update && apt-get install -y --no-install-recommends \ - cuda-libraries-dev-$CUDA_PKG_VERSION \ - cuda-nvml-dev-$CUDA_PKG_VERSION \ - cuda-minimal-build-$CUDA_PKG_VERSION \ - cuda-command-line-tools-$CUDA_PKG_VERSION \ - libnccl-dev=$NCCL_VERSION-1+cuda9.2 && \ - rm -rf /var/lib/apt/lists/* - -ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs - -## FROM CUDA 9.2-CUDNN 7 devel [https://gitlab.com/nvidia/cuda/blob/ubuntu18.04/9.2/devel/cudnn7/Dockerfile] - -ENV CUDNN_VERSION 7.5.0.56 -LABEL com.nvidia.cudnn.version="${CUDNN_VERSION}" - -RUN apt-get update && apt-get install -y --no-install-recommends \ - libcudnn7=$CUDNN_VERSION-1+cuda9.2 \ - libcudnn7-dev=$CUDNN_VERSION-1+cuda9.2 && \ - apt-mark hold libcudnn7 && \ - rm -rf /var/lib/apt/lists/* - -WORKDIR / \ No newline at end of file diff --git a/docker/prebuilt/devel/gpu/11.1/Dockerfile b/docker/prebuilt/gpu.dockerfile similarity index 96% rename from docker/prebuilt/devel/gpu/11.1/Dockerfile rename to docker/prebuilt/gpu.dockerfile index d49660f50a6..a94504dc52c 100644 --- a/docker/prebuilt/devel/gpu/11.1/Dockerfile +++ b/docker/prebuilt/gpu.dockerfile @@ -1,5 +1,7 @@ ARG FROM_TAG -FROM espnet/espnet:${FROM_TAG} +ARG NUM_BUILD_CORES=8 +ARG DOCKER_VER +FROM espnet/espnet:${FROM_TAG} AS cuda_builder LABEL maintainer "Nelson Yalta " ## FROM CUDA 11.1 base @@ -55,5 +57,4 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ RUN apt-mark hold libcublas-dev-11-1 ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs - WORKDIR / diff --git a/docker/prebuilt/local/Dockerfile b/docker/prebuilt/local/Dockerfile deleted file mode 100644 index 15939185aff..00000000000 --- a/docker/prebuilt/local/Dockerfile +++ /dev/null @@ -1,50 +0,0 @@ -ARG FROM_TAG -FROM espnet/espnet:${FROM_TAG} -LABEL maintainer "Nelson Yalta " - -ARG CUDA_VER -WORKDIR / - -# IF using a local ESPNet repository, a temporary file containing the ESPnet git repo is copied over -ARG ESPNET_ARCHIVE=./espnet-local.tar -COPY ${ESPNET_ARCHIVE} /espnet-local.tar - - -# Download ESPnet -RUN echo "Getting ESPnet sources from local repository, in temporary file: " ${ESPNET_ARCHIVE} -RUN mkdir /espnet -RUN tar xf espnet-local.tar -C /espnet/ -RUN rm espnet-local.tar - -RUN cd espnet && \ - rm -rf docker egs test utils - -# Install espnet -WORKDIR /espnet/tools - -# Replace nvidia-smi for nvcc because docker does not load nvidia-smi -RUN if [ -z "$( which nvcc )" ]; then \ - echo "Build without CUDA" && \ - MY_OPTS='CUPY_VERSION="" TH_VERSION=1.6.0'; \ - else \ - echo "Build with CUDA" && \ - # Disable cupy test - # Docker build does not load libcuda.so.1 - # So, their checks on cuda packages are disabled. - sed -i '200s|install.py|install.py --no-cuda --no-cupy |' Makefile && \ - export CFLAGS="-I${CUDA_HOME}/include ${CFLAGS}" && \ - MY_OPTS="CUDA_VERSION=${CUDA_VER}" && \ - . ./setup_cuda_env.sh /usr/local/cuda; \ - fi; \ - if [ "${CUDA_VER}" = "10.1" ]; then \ - # warpctc is not supported from Pytorch 1.3.1 - MY_OPTS="${MY_OPTS} TH_VERSION=1.6.0"; \ - fi; \ - echo "Make with options ${MY_OPTS}" && \ - ln -s /kaldi ./ && \ - ./setup_anaconda.sh /miniconda espnet 3.7.4 && \ - make KALDI=/kaldi ${MY_OPTS} - -RUN rm -rf ../espnet - -WORKDIR / diff --git a/docker/prebuilt/runtime/Dockerfile b/docker/prebuilt/runtime.dockerfile similarity index 82% rename from docker/prebuilt/runtime/Dockerfile rename to docker/prebuilt/runtime.dockerfile index 86ac859a67a..5f54ed90c90 100644 --- a/docker/prebuilt/runtime/Dockerfile +++ b/docker/prebuilt/runtime.dockerfile @@ -1,11 +1,11 @@ ARG FROM_TAG -FROM ubuntu:${FROM_TAG} +ARG NUM_BUILD_CORES=8 +ARG DOCKER_VER + +FROM ubuntu:${FROM_TAG} AS main_builder LABEL maintainer "Nelson Yalta " -ARG DOCKER_VER ENV DOCKER_BUILT_VER ${DOCKER_VER} - -ARG NUM_BUILD_CORES=8 ENV NUM_BUILD_CORES ${NUM_BUILD_CORES} RUN apt-get update && DEBIAN_FRONTEND=noninteractive \ @@ -49,19 +49,24 @@ RUN add-apt-repository ppa:git-core/ppa -y && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -# # Using kaldi pre-built binaries RUN git clone --depth 1 https://github.com/kaldi-asr/kaldi /opt/kaldi +RUN wget --tries=3 -nv "https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" -O miniconda.sh && \ + bash miniconda.sh -b -p /opt/miniconda && \ + rm miniconda.sh + +WORKDIR / + +FROM main_builder AS espnet1 +# # Using kaldi pre-built binaries RUN cd /opt/kaldi/tools && \ echo "" > extras/check_dependencies.sh && \ chmod +x extras/check_dependencies.sh && \ cd /opt/kaldi && \ - wget --tries=3 https://github.com/espnet/kaldi-bin/releases/download/v0.0.1/ubuntu16-featbin.tar.gz && \ + wget --tries=3 -nv https://github.com/espnet/kaldi-bin/releases/download/v0.0.1/ubuntu16-featbin.tar.gz && \ tar -xf ./ubuntu16-featbin.tar.gz && \ cp featbin/* src/featbin/ && \ rm -rf featbin && \ rm -f ubuntu16-featbin.tar.gz -RUN wget --tries=3 "https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" -O miniconda.sh && \ - bash miniconda.sh -b -p /opt/miniconda && \ - rm miniconda.sh +WORKDIR / diff --git a/docker/run.sh b/docker/run.sh index b0fbcebd307..cff0d5604bc 100755 --- a/docker/run.sh +++ b/docker/run.sh @@ -116,8 +116,8 @@ if [ ${is_root} = false ]; then build_args="${build_args} --build-arg THIS_UID=${UID}" build_args="${build_args} --build-arg EXTRA_LIBS=${EXTRAS}" - echo "Now running docker build ${build_args} -f prebuilt/Dockerfile -t espnet/espnet:${container_tag} ." - (docker build ${build_args} -f prebuilt/Dockerfile -t espnet/espnet:${container_tag} .) || exit 1 + echo "Now running docker build ${build_args} -f espnet.dockerfile -t espnet/espnet:${container_tag} ." + (docker build ${build_args} -f espnet.dockerfile -t espnet/espnet:${container_tag} .) || exit 1 fi else container_tag=${from_tag} diff --git a/egs/README.md b/egs/README.md index 9cddf37df4e..78fa57049ae 100755 --- a/egs/README.md +++ b/egs/README.md @@ -8,6 +8,7 @@ See: https://espnet.github.io/espnet/tutorial.html | Directory name | Corpus name | Task | Language | URL | Note | | ----------------------- | ------------------------------------------------------------ | ------------------------------------------ | -------------- | ------------------------------------------------------------ | ----------------------------- | |||| +| aesrc2020 | Accented English Speech Recognition Challenge 2020 | ASR | EN | https://arxiv.org/abs/2102.10233 | | | aidatatang_200zh | Aidatatang_200zh A free Chinese Mandarin speech corpus | ASR | ZH | http://www.openslr.org/62/ | | | aishell | AISHELL-ASR0009-OS1 Open Source Mandarin Speech Corpus | ASR | ZH | http://www.aishelltech.com/kysjcp | | | aishell2 | AISHELL-2 Open Source Mandarin Speech Corpus | ASR | ZH | http://www.aishelltech.com/aishell_2 | @@ -49,6 +50,8 @@ See: https://espnet.github.io/espnet/tutorial.html | librispeech | LibriSpeech ASR corpus | ASR | EN | http://www.openslr.org/12 | | | libritts | LibriTTS: A Corpus Derived from LibriSpeech for Text-to-Speech | TTS | EN | http://www.openslr.org/60/ | | | ljspeech | The LJ Speech Dataset | TTS | EN | https://keithito.com/LJ-Speech-Dataset/ | | +| lrs2 | The Lip Reading Sentences 2 Dataset | ASR | ENG | https://www.robots.ox.ac.uk/~vgg/data/lip_reading/lrs2.html | | +| lrs | The Lip Reading Sentences 2 and 3 Dataset | AVSR | ENG | https://www.robots.ox.ac.uk/~vgg/data/lip_reading/lrs2.html https://www.robots.ox.ac.uk/~vgg/data/lip_reading/lrs3.html | | | m_ailabs | The M-AILABS Speech Dataset | TTS | ~5 languages | https://www.caito.de/2019/01/the-m-ailabs-speech-dataset/ | | mucs_2021 | MUCS 2021: MUltilingual and Code-Switching ASR Challenges for Low Resource Indian Languages | ASR/Code Switching | HI, MR, OR, TA, TE, GU, HI-EN, BN-EN | https://navana-tech.github.io/MUCS2021/data.html | | | mtedx | Multilingual TEDx | ASR/Machine Translation/Speech Translation | 13 Language pairs | http://www.openslr.org/100/ | diff --git a/egs/aesrc2020/asr1/RESULTS.md b/egs/aesrc2020/asr1/RESULTS.md new file mode 100644 index 00000000000..e69de29bb2d diff --git a/egs/aesrc2020/asr1/cmd.sh b/egs/aesrc2020/asr1/cmd.sh new file mode 100644 index 00000000000..7b70ef5e06e --- /dev/null +++ b/egs/aesrc2020/asr1/cmd.sh @@ -0,0 +1,89 @@ +# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ====== +# Usage: .pl [options] JOB=1: +# e.g. +# run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB +# +# Options: +# --time