From 4ff2ce1244e0af72439deaa59226eba434a70618 Mon Sep 17 00:00:00 2001 From: kamo-naoyuki Date: Tue, 10 May 2022 11:34:31 +0900 Subject: [PATCH 01/22] add pytorch=1.10.1, 1.11.0 to ci configurations --- .github/workflows/centos7.yml | 2 +- .github/workflows/ci.yaml | 11 ++++++++--- .github/workflows/debian9.yml | 2 +- .github/workflows/test_import.yaml | 2 +- README.md | 15 ++++++++------- setup.py | 1 + tools/Makefile | 2 +- tools/installers/install_torch.sh | 16 ++++++++++++++-- 8 files changed, 35 insertions(+), 16 deletions(-) diff --git a/.github/workflows/centos7.yml b/.github/workflows/centos7.yml index 94d5973e859..d365c2e4961 100644 --- a/.github/workflows/centos7.yml +++ b/.github/workflows/centos7.yml @@ -19,7 +19,7 @@ jobs: # ImportError: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found # (required by /__w/espnet/espnet/tools/venv/envs/espnet/lib/python3.6/site-packages/pyworld/pyworld.cpython-36m-x86_64-linux-gnu.so) # NOTE(kamo): The issue doens't exist for python3.7? - TH_VERSION: 1.10.1 + TH_VERSION: 1.11.0 CHAINER_VERSION: 6.0.0 USE_CONDA: true CC: /opt/rh/devtoolset-7/root/usr/bin/gcc diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a01edd95bc7..ac69ca49b32 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -16,19 +16,24 @@ jobs: matrix: os: [ubuntu-18.04] python-version: [3.7] - pytorch-version: [1.3.1, 1.4.0, 1.5.1, 1.6.0, 1.7.1, 1.8.1, 1.9.1, 1.10.1] + pytorch-version: [1.3.1, 1.4.0, 1.5.1, 1.6.0, 1.7.1, 1.8.1, 1.9.1, 1.10.2, 1.11.0] chainer-version: [6.0.0] # NOTE(kamo): Conda is tested by Circle-CI use-conda: [false] include: - os: ubuntu-20.04 python-version: 3.8 - pytorch-version: 1.10.1 + pytorch-version: 1.11.0 chainer-verssion: 6.0.0 use-conda: false - os: ubuntu-20.04 python-version: 3.9 - pytorch-version: 1.10.1 + pytorch-version: 1.11.0 + chainer-verssion: 6.0.0 + use-conda: false + - os: ubuntu-20.04 + python-version: 3.10 + pytorch-version: 1.11.0 chainer-verssion: 6.0.0 use-conda: false steps: diff --git a/.github/workflows/debian9.yml b/.github/workflows/debian9.yml index a29e5474ad4..79a68e8383d 100644 --- a/.github/workflows/debian9.yml +++ b/.github/workflows/debian9.yml @@ -15,7 +15,7 @@ jobs: image: debian:9 env: ESPNET_PYTHON_VERSION: 3.7 - TH_VERSION: 1.10.1 + TH_VERSION: 1.11.0 CHAINER_VERSION: 6.0.0 USE_CONDA: true CC: gcc-6 diff --git a/.github/workflows/test_import.yaml b/.github/workflows/test_import.yaml index ead9f587c07..1031d3e5601 100644 --- a/.github/workflows/test_import.yaml +++ b/.github/workflows/test_import.yaml @@ -16,7 +16,7 @@ jobs: matrix: os: [ubuntu-latest] python-version: [3.9] - pytorch-version: [1.10.1] + pytorch-version: [1.11.0] steps: - uses: actions/checkout@v2 - uses: actions/cache@v1 diff --git a/README.md b/README.md index 67579053c77..0493ec5b56e 100644 --- a/README.md +++ b/README.md @@ -2,14 +2,15 @@ # ESPnet: end-to-end speech processing toolkit -|system/pytorch ver.|1.3.1|1.4.0|1.5.1|1.6.0|1.7.1|1.8.1|1.9.1|1.10.1| -| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | -|ubuntu20/python3.9/pip||||||||[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)| -|ubuntu20/python3.8/pip||||||||[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)| +|system/pytorch ver.|1.3.1|1.4.0|1.5.1|1.6.0|1.7.1|1.8.1|1.9.1|1.10.2|1.11.0| +| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | +|ubuntu20/python3.10/pip|||||||||[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)| +|ubuntu20/python3.9/pip|||||||||[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)| +|ubuntu20/python3.8/pip|||||||||[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)| |ubuntu18/python3.7/pip|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)| -|debian9/python3.7/conda||||||||[![debian9](https://github.com/espnet/espnet/workflows/debian9/badge.svg)](https://github.com/espnet/espnet/actions?query=workflow%3Adebian9)| -|centos7/python3.7/conda||||||||[![centos7](https://github.com/espnet/espnet/workflows/centos7/badge.svg)](https://github.com/espnet/espnet/actions?query=workflow%3Acentos7)| -|doc/python3.8||||||||[![doc](https://github.com/espnet/espnet/workflows/doc/badge.svg)](https://github.com/espnet/espnet/actions?query=workflow%3Adoc)| +|debian9/python3.7/conda|||||||||[![debian9](https://github.com/espnet/espnet/workflows/debian9/badge.svg)](https://github.com/espnet/espnet/actions?query=workflow%3Adebian9)| +|centos7/python3.7/conda|||||||||[![centos7](https://github.com/espnet/espnet/workflows/centos7/badge.svg)](https://github.com/espnet/espnet/actions?query=workflow%3Acentos7)| +|doc/python3.8|||||||||[![doc](https://github.com/espnet/espnet/workflows/doc/badge.svg)](https://github.com/espnet/espnet/actions?query=workflow%3Adoc)| [![PyPI version](https://badge.fury.io/py/espnet.svg)](https://badge.fury.io/py/espnet) [![Python Versions](https://img.shields.io/pypi/pyversions/espnet.svg)](https://pypi.org/project/espnet/) diff --git a/setup.py b/setup.py index dbe351bc1e0..dd7dc887308 100644 --- a/setup.py +++ b/setup.py @@ -136,6 +136,7 @@ "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", "Development Status :: 5 - Production/Stable", "Intended Audience :: Science/Research", "Operating System :: POSIX :: Linux", diff --git a/tools/Makefile b/tools/Makefile index 6431955b1b9..f40f6c57d39 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -1,4 +1,4 @@ -# PyTorch version: 1.3.1, 1.4.0, 1.5.1, 1.6.0, 1.7.1, 1.8.1, 1.9.1, 1.10.0 and 1.10.1 are tested. +# PyTorch version: 1.3.1, 1.4.0, 1.5.1, 1.6.0, 1.7.1, 1.8.1, 1.9.1, 1.10.0, 1.10.1 and 1.11.0 are tested. TH_VERSION := 1.10.1 # Use pip for pytorch installation even if you have anaconda diff --git a/tools/installers/install_torch.sh b/tools/installers/install_torch.sh index 285e37b6fd4..7faa4d535e7 100755 --- a/tools/installers/install_torch.sh +++ b/tools/installers/install_torch.sh @@ -121,18 +121,30 @@ log "[INFO] torch_version=${torch_version}" log "[INFO] cuda_version=${cuda_version}" -if $(pytorch_plus 1.10.2); then +if $(pytorch_plus 1.11.1); then log "[ERROR] This script doesn't support pytorch=${torch_version}" exit 1 +elif $(pytorch_plus 1.11.0); then + check_python_version 3.11 # Error if python>= + check_cuda_version 11.5 11.3 11.1 10.2 # Error if cuda_version doesn't match with any given numbers + install_torch 0.11.0 10.2 # install_torch + +elif $(pytorch_plus 1.10.2); then + check_python_version 3.10 # Error if python>= + check_cuda_version 11.3 11.1 10.2 # Error if cuda_version doesn't match with any given numbers + install_torch 0.10.2 10.2 # install_torch + elif $(pytorch_plus 1.10.1); then check_python_version 3.10 # Error if python>= check_cuda_version 11.3 11.1 10.2 # Error if cuda_version doesn't match with any given numbers install_torch 0.10.1 10.2 # install_torch + elif $(pytorch_plus 1.10.0); then - check_python_version 3.10 # Error if python>= + check_python_version 3.11 # Error if python>= check_cuda_version 11.3 11.1 10.2 # Error if cuda_version doesn't match with any given numbers install_torch 0.10.0 10.2 # install_torch + elif $(pytorch_plus 1.9.2); then log "[ERROR] pytorch=${torch_version} doesn't exist" exit 1 From b98fc861939310b73b50f959bc45176da10ef493 Mon Sep 17 00:00:00 2001 From: kamo-naoyuki Date: Tue, 10 May 2022 11:52:27 +0900 Subject: [PATCH 02/22] fix --- .github/workflows/ci.yaml | 2 +- .mergify.yml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index bbd021e7afd..058dfea6288 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -32,7 +32,7 @@ jobs: chainer-verssion: 6.0.0 use-conda: false - os: ubuntu-20.04 - python-version: 3.10 + python-version: "3.10" pytorch-version: 1.11.0 chainer-verssion: 6.0.0 use-conda: false diff --git a/.mergify.yml b/.mergify.yml index 0304250182c..c247939e228 100644 --- a/.mergify.yml +++ b/.mergify.yml @@ -4,16 +4,16 @@ pull_request_rules: - "label=auto-merge" - "check-success=test_centos7" - "check-success=test_debian9" - - "check-success=linter_and_test (ubuntu-18.04, 3.7, 1.3.1, 6.0.0, false)" - "check-success=linter_and_test (ubuntu-18.04, 3.7, 1.4.0, 6.0.0, false)" - "check-success=linter_and_test (ubuntu-18.04, 3.7, 1.5.1, 6.0.0, false)" - "check-success=linter_and_test (ubuntu-18.04, 3.7, 1.6.0, 6.0.0, false)" - "check-success=linter_and_test (ubuntu-18.04, 3.7, 1.7.1, 6.0.0, false)" - "check-success=linter_and_test (ubuntu-18.04, 3.7, 1.8.1, 6.0.0, false)" - "check-success=linter_and_test (ubuntu-18.04, 3.7, 1.9.1, 6.0.0, false)" - - "check-success=linter_and_test (ubuntu-18.04, 3.7, 1.10.1, 6.0.0, false)" - - "check-success=linter_and_test (ubuntu-20.04, 3.8, 1.10.1, false, 6.0.0)" - - "check-success=linter_and_test (ubuntu-20.04, 3.9, 1.10.1, false, 6.0.0)" + - "check-success=linter_and_test (ubuntu-18.04, 3.7, 1.10.2, 6.0.0, false)" + - "check-success=linter_and_test (ubuntu-18.04, 3.7, 1.11.0, 6.0.0, false)" + - "check-success=linter_and_test (ubuntu-20.04, 3.8, 1.11.0, false, 6.0.0)" + - "check-success=linter_and_test (ubuntu-20.04, 3.9, 1.11.0, false, 6.0.0)" - "check-success=test_import (ubuntu-latest, 3.9, 1.10.1)" actions: merge: From d234b9ab30bbc2bb6fd42d6335421a6f8a9ed637 Mon Sep 17 00:00:00 2001 From: kamo-naoyuki Date: Thu, 12 May 2022 17:10:40 +0900 Subject: [PATCH 03/22] fix --- tools/Makefile | 8 +------- tools/installers/install_torch.sh | 6 ++++++ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/Makefile b/tools/Makefile index cad135421a2..338fd8d22fe 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -1,5 +1,5 @@ # PyTorch version: 1.3.1, 1.4.0, 1.5.1, 1.6.0, 1.7.1, 1.8.1, 1.9.1, 1.10.0, 1.10.1 and 1.11.0 are tested. -TH_VERSION := 1.10.1 +TH_VERSION := 1.11.0 # Use pip for pytorch installation even if you have anaconda ifneq ($(shell test -f ./activate_python.sh && grep 'conda activate' ./activate_python.sh),) @@ -28,14 +28,8 @@ endif all: kaldi showenv python conda_packages.done sctk.done sph2pipe.done check_install -ifneq ($(strip $(CHAINER_VERSION)),) python: activate_python.sh espnet.done pytorch.done chainer.done fairscale.done torch_optimizer.done extra: warp-ctc.done warp-transducer.done chainer_ctc.done nkf.done moses.done mwerSegmenter.done pesq kenlm.done pyopenjtalk.done py3mmseg.done beamformit.done fairseq.done s3prl.done k2.done transformers.done phonemizer.done longformer.done -else -python: activate_python.sh espnet.done pytorch.done fairscale.done torch_optimizer.done -extra: warp-ctc.done warp-transducer.done nkf.done moses.done mwerSegmenter.done pesq kenlm.done pyopenjtalk.done py3mmseg.done beamformit.done fairseq.done s3prl.done k2.done transformers.done phonemizer.done longformer.done -endif - kaldi: test -f kaldi/egs/wsj/s5/utils/parse_options.sh || git clone --depth 1 https://github.com/kaldi-asr/kaldi diff --git a/tools/installers/install_torch.sh b/tools/installers/install_torch.sh index 7faa4d535e7..6444f91562e 100755 --- a/tools/installers/install_torch.sh +++ b/tools/installers/install_torch.sh @@ -51,6 +51,12 @@ install_torch(){ if [ -z "${cuda_version}" ]; then log conda install -y "pytorch=${torch_version}" "torchaudio=$1" cpuonly -c pytorch conda install -y "pytorch=${torch_version}" "torchaudio=$1" cpuonly -c pytorch + elif [ "${cuda_version}" = "11.5" ]; then + # NOTE(kamo): In my environment, conda-forge only could installed, but I don't know why @ 12, May, 2022 + cudatoolkit_channel=conda-forge + log conda install -y "pytorch=${torch_version}" "torchaudio=$1" "cudatoolkit=${cuda_version}" -c pytorch -c "${cudatoolkit_channel}" + conda install -y "pytorch=${torch_version}" "torchaudio=$1" "cudatoolkit=${cuda_version}" -c pytorch -c "${cudatoolkit_channel}" + elif [ "${cuda_version}" = "11.1" ] || [ "${cuda_version}" = "11.2" ]; then # Anaconda channel, which is default main channel, doesn't provide cudatoolkit=11.1, 11.2 now (Any pytorch version doesn't provide cuda=11.2). # https://anaconda.org/anaconda/cudatoolkit/files From b7cfdd9a70559271e45de103e242228f94e837ff Mon Sep 17 00:00:00 2001 From: kamo-naoyuki Date: Thu, 12 May 2022 18:05:41 +0900 Subject: [PATCH 04/22] Change LooseVersion to parse --- tools/installers/install_chainer.sh | 4 ++-- tools/installers/install_fairscale.sh | 4 ++-- tools/installers/install_fairseq.sh | 4 ++-- tools/installers/install_k2.sh | 6 +++--- tools/installers/install_longformer.sh | 4 ++-- tools/installers/install_s3prl.sh | 4 ++-- tools/installers/install_speechbrain.sh | 2 +- tools/installers/install_torch.sh | 4 ++-- tools/installers/install_torch_optimizer.sh | 4 ++-- tools/installers/install_warp-ctc.sh | 6 +++--- tools/installers/install_warp-transducer.sh | 2 +- 11 files changed, 22 insertions(+), 22 deletions(-) diff --git a/tools/installers/install_chainer.sh b/tools/installers/install_chainer.sh index 9ce037f68f6..9000bfb0d5a 100755 --- a/tools/installers/install_chainer.sh +++ b/tools/installers/install_chainer.sh @@ -22,7 +22,7 @@ python_version=$(python3 -c "import sys; print(sys.version.split()[0])") cuda_version_without_dot="${cuda_version/\./}" python_plus(){ python3 <= L('$1'): print("true") else: @@ -31,7 +31,7 @@ EOF } cuda_plus(){ python3 <= L('$1'): print("true") else: diff --git a/tools/installers/install_fairscale.sh b/tools/installers/install_fairscale.sh index 876c0b31ead..4988a75736d 100755 --- a/tools/installers/install_fairscale.sh +++ b/tools/installers/install_fairscale.sh @@ -9,7 +9,7 @@ fi torch_version=$(python3 -c "import torch; print(torch.__version__)") python_36_plus=$(python3 <= V("3.6"): @@ -22,7 +22,7 @@ EOF pt_plus(){ python3 <= L('$1'): print("true") else: diff --git a/tools/installers/install_fairseq.sh b/tools/installers/install_fairseq.sh index 780d8ce81b0..e13970c036f 100755 --- a/tools/installers/install_fairseq.sh +++ b/tools/installers/install_fairseq.sh @@ -9,7 +9,7 @@ fi torch_version=$(python3 -c "import torch; print(torch.__version__)") python_36_plus=$(python3 <= V("3.6"): @@ -22,7 +22,7 @@ EOF pt_plus(){ python3 <= L('$1'): print("true") else: diff --git a/tools/installers/install_k2.sh b/tools/installers/install_k2.sh index 667edb86a03..d41c16d39e7 100755 --- a/tools/installers/install_k2.sh +++ b/tools/installers/install_k2.sh @@ -27,7 +27,7 @@ fi python_36_plus=$(python3 <= V("3.6"): @@ -64,7 +64,7 @@ libc_version="$(${libc_path} | grep "GNU C Library" | grep -oP "version [0-9]*.[ pytorch_plus(){ python3 <= L('$1'): print("true") else: @@ -74,7 +74,7 @@ EOF libc_plus(){ python3 <= L('$1'): print("true") else: diff --git a/tools/installers/install_longformer.sh b/tools/installers/install_longformer.sh index c942abb0dd9..d054fad50ea 100755 --- a/tools/installers/install_longformer.sh +++ b/tools/installers/install_longformer.sh @@ -9,7 +9,7 @@ fi torch_version=$(python3 -c "import torch; print(torch.__version__)") python_36_plus=$(python3 <= V("3.6"): @@ -21,7 +21,7 @@ EOF pt_plus(){ python3 <= L('$1'): print("true") else: diff --git a/tools/installers/install_s3prl.sh b/tools/installers/install_s3prl.sh index 66f38af0e36..eeea6946c9c 100755 --- a/tools/installers/install_s3prl.sh +++ b/tools/installers/install_s3prl.sh @@ -10,7 +10,7 @@ if [ $# != 0 ]; then fi torch_17_plus=$(python3 <= V("1.7"): @@ -21,7 +21,7 @@ EOF ) python_36_plus=$(python3 <= V("3.6"): diff --git a/tools/installers/install_speechbrain.sh b/tools/installers/install_speechbrain.sh index b3c2310206e..cb26a78f3ad 100755 --- a/tools/installers/install_speechbrain.sh +++ b/tools/installers/install_speechbrain.sh @@ -8,7 +8,7 @@ if [ $# != 0 ]; then fi torch_18_plus=$(python3 <= V("1.8"): diff --git a/tools/installers/install_torch.sh b/tools/installers/install_torch.sh index 6444f91562e..d542183db53 100755 --- a/tools/installers/install_torch.sh +++ b/tools/installers/install_torch.sh @@ -29,7 +29,7 @@ cuda_version_without_dot="${cuda_version/\./}" python_plus(){ python3 <= L('$1'): print("true") else: @@ -38,7 +38,7 @@ EOF } pytorch_plus(){ python3 <= L('$1'): print("true") else: diff --git a/tools/installers/install_torch_optimizer.sh b/tools/installers/install_torch_optimizer.sh index 5d8565deead..a4b42d4fade 100755 --- a/tools/installers/install_torch_optimizer.sh +++ b/tools/installers/install_torch_optimizer.sh @@ -9,7 +9,7 @@ fi torch_version=$(python3 -c "import torch; print(torch.__version__)") python_36_plus=$(python3 <= V("3.6"): @@ -22,7 +22,7 @@ EOF pt_plus(){ python3 <= L('$1'): print("true") else: diff --git a/tools/installers/install_warp-ctc.sh b/tools/installers/install_warp-ctc.sh index 38267d955d4..259146ea388 100755 --- a/tools/installers/install_warp-ctc.sh +++ b/tools/installers/install_warp-ctc.sh @@ -9,7 +9,7 @@ if [ $# != 0 ]; then fi torch_17_plus=$(python3 <= V("1.7"): @@ -20,7 +20,7 @@ EOF ) torch_11_plus=$(python3 <= V("1.1"): @@ -31,7 +31,7 @@ EOF ) torch_10_plus=$(python3 <= V("1.0"): diff --git a/tools/installers/install_warp-transducer.sh b/tools/installers/install_warp-transducer.sh index 9ed3ce18fc3..910083509d4 100755 --- a/tools/installers/install_warp-transducer.sh +++ b/tools/installers/install_warp-transducer.sh @@ -9,7 +9,7 @@ fi # TODO(kamo): Consider clang case # Note: Requires gcc>=4.9.2 to build extensions with pytorch>=1.0 if python3 -c 'import torch as t;assert t.__version__[0] == "1"' &> /dev/null; then \ - python3 -c "from distutils.version import LooseVersion as V;assert V('$(gcc -dumpversion)') >= V('4.9.2'), 'Requires gcc>=4.9.2'"; \ + python3 -c "from packaging.version import parse as V;assert V('$(gcc -dumpversion)') >= V('4.9.2'), 'Requires gcc>=4.9.2'"; \ fi rm -rf warp-transducer From 7d5242212403e740c4d5b8ebd9a346a991ea50a9 Mon Sep 17 00:00:00 2001 From: kamo-naoyuki Date: Thu, 12 May 2022 18:09:15 +0900 Subject: [PATCH 05/22] fix --- test/espnet2/train/test_reporter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/espnet2/train/test_reporter.py b/test/espnet2/train/test_reporter.py index c928c52523a..9cd796d665c 100644 --- a/test/espnet2/train/test_reporter.py +++ b/test/espnet2/train/test_reporter.py @@ -53,7 +53,7 @@ def test_register(weight1, weight2): desired[k] /= weight1 + weight2 for k1, k2 in reporter.get_all_keys(): - if k2 in ("time", "total_count"): + if k2 in ("time", "total_count", "gpu_max_cached_mem_GB", "gpu_cached_mem_GB"): continue np.testing.assert_allclose(reporter.get_value(k1, k2), desired[k2]) From f899a05768436cc38fb432d6f002ab667983abbd Mon Sep 17 00:00:00 2001 From: kamo-naoyuki Date: Thu, 12 May 2022 18:09:33 +0900 Subject: [PATCH 06/22] fix --- espnet/nets/pytorch_backend/nets_utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/espnet/nets/pytorch_backend/nets_utils.py b/espnet/nets/pytorch_backend/nets_utils.py index a21ff54a78e..4bfeeb96cb2 100644 --- a/espnet/nets/pytorch_backend/nets_utils.py +++ b/espnet/nets/pytorch_backend/nets_utils.py @@ -152,6 +152,10 @@ def make_pad_mask(lengths, xs=None, length_dim=-1, maxlen=None): if not isinstance(lengths, list): lengths = lengths.tolist() + else: + assert isinstance(lengths, torch.tensor), type(lengths) + lengths = lengths.long() + bs = int(len(lengths)) if maxlen is None: if xs is None: From 1c344a95ceb83b4b44675aee5326afeb9284d8e8 Mon Sep 17 00:00:00 2001 From: kamo-naoyuki Date: Thu, 12 May 2022 18:25:35 +0900 Subject: [PATCH 07/22] change LooseVersion to parse --- ci/install.sh | 2 +- ci/test_integration_espnet2.sh | 14 +++++++------- .../local/generate_fe_trainingdata.py.patch | 16 ++++++++-------- egs2/fsc/asr1/run.sh | 2 +- egs2/fsc_challenge/asr1/run.sh | 2 +- egs2/fsc_unseen/asr1/run.sh | 2 +- espnet/asr/pytorch_backend/asr.py | 6 +++--- espnet/asr/pytorch_backend/recog.py | 6 +++--- espnet/nets/pytorch_backend/ctc.py | 4 ++-- .../pytorch_backend/e2e_tts_transformer.py | 12 ++++++------ espnet2/asr/espnet_model.py | 4 ++-- espnet2/asr/maskctc_model.py | 4 ++-- espnet2/bin/tts_inference.py | 4 ++-- espnet2/diar/espnet_model.py | 4 ++-- espnet2/enh/decoder/stft_decoder.py | 4 ++-- espnet2/enh/encoder/stft_encoder.py | 4 ++-- espnet2/enh/espnet_enh_s2t_model.py | 4 ++-- espnet2/enh/espnet_model.py | 4 ++-- espnet2/enh/layers/beamformer.py | 4 ++-- espnet2/enh/layers/complex_utils.py | 6 +++--- espnet2/enh/layers/dnn_beamformer.py | 4 ++-- espnet2/enh/layers/mask_estimator.py | 4 ++-- espnet2/enh/layers/wpe.py | 4 ++-- espnet2/enh/loss/criterions/tf_domain.py | 4 ++-- espnet2/enh/separator/conformer_separator.py | 4 ++-- espnet2/enh/separator/dc_crn_separator.py | 4 ++-- espnet2/enh/separator/dccrn_separator.py | 4 ++-- espnet2/enh/separator/dprnn_separator.py | 4 ++-- espnet2/enh/separator/fasnet_separator.py | 4 ++-- espnet2/enh/separator/rnn_separator.py | 4 ++-- espnet2/enh/separator/tcn_separator.py | 4 ++-- espnet2/enh/separator/transformer_separator.py | 4 ++-- espnet2/gan_tts/espnet_model.py | 4 ++-- espnet2/hubert/espnet_model.py | 4 ++-- espnet2/layers/stft.py | 8 ++++---- espnet2/mt/espnet_model.py | 4 ++-- espnet2/st/espnet_model.py | 4 ++-- espnet2/tasks/abs_task.py | 8 ++++---- espnet2/train/gan_trainer.py | 4 ++-- espnet2/train/reporter.py | 4 ++-- espnet2/train/trainer.py | 6 +++--- espnet2/tts/espnet_model.py | 4 ++-- espnet2/utils/griffin_lim.py | 4 ++-- setup.py | 1 - test/espnet2/asr/frontend/test_s3prl.py | 4 ++-- test/espnet2/enh/layers/test_complex_utils.py | 4 ++-- test/espnet2/enh/layers/test_enh_layers.py | 6 +++--- .../enh/loss/criterions/test_tf_domain.py | 4 ++-- test/espnet2/enh/separator/test_beamformer.py | 4 ++-- .../enh/separator/test_dc_crn_separator.py | 4 ++-- .../enh/separator/test_dccrn_separator.py | 4 ++-- test/espnet2/enh/test_espnet_model.py | 4 ++-- .../gan_tts/joint/test_joint_text2wav.py | 4 ++-- test/espnet2/gan_tts/vits/test_generator.py | 10 ---------- test/espnet2/gan_tts/vits/test_vits.py | 18 ------------------ test/test_custom_transducer.py | 5 ++--- test/test_e2e_asr_transducer.py | 6 +++--- tools/check_install.py | 4 ++-- utils/convert_fbank_to_wav.py | 4 ++-- 59 files changed, 133 insertions(+), 163 deletions(-) diff --git a/ci/install.sh b/ci/install.sh index 5bfed7584ad..7f8498a2a88 100755 --- a/ci/install.sh +++ b/ci/install.sh @@ -48,7 +48,7 @@ python3 -m pip freeze # Check pytorch version python3 < /dev/null; then echo "==== use_k2, num_paths > nll_batch_size, feats_type=raw, token_types=bpe, model_conf.extract_feats_in_collect_stats=False, normalize=utt_mvn ===" ./run.sh --num_paths 500 --nll_batch_size 20 --use_k2 true --ngpu 0 --stage 12 --stop-stage 13 --skip-upload false --feats-type "raw" --token-type "bpe" \ --feats_normalize "utterance_mvn" --lm-args "--max_epoch=1" --python "${python}" \ --asr-args "--model_conf extract_feats_in_collect_stats=false --max_epoch=1" - + echo "==== use_k2, num_paths == nll_batch_size, feats_type=raw, token_types=bpe, model_conf.extract_feats_in_collect_stats=False, normalize=utt_mvn ===" ./run.sh --num_paths 20 --nll_batch_size 20 --use_k2 true --ngpu 0 --stage 12 --stop-stage 13 --skip-upload false --feats-type "raw" --token-type "bpe" \ --feats_normalize "utterance_mvn" --lm-args "--max_epoch=1" --python "${python}" \ @@ -68,7 +68,7 @@ rm -rf exp dump data # NOTE(kan-bayashi): pytorch 1.4 - 1.6 works but 1.6 has a problem with CPU, # so we test this recipe using only pytorch > 1.6 here. # See also: https://github.com/pytorch/pytorch/issues/42446 -if python3 -c 'import torch as t; from distutils.version import LooseVersion as L; assert L(t.__version__) > L("1.6")' &> /dev/null; then +if python3 -c 'import torch as t; from packaging.version import parse as L; assert L(t.__version__) > L("1.6")' &> /dev/null; then ./run.sh --fs 22050 --tts_task gan_tts --feats_extract linear_spectrogram --feats_normalize none --inference_model latest.pth \ --ngpu 0 --stop-stage 8 --skip-upload false --train-args "--num_iters_per_epoch 1 --max_epoch 1" --python "${python}" rm -rf exp dump data @@ -76,7 +76,7 @@ fi cd "${cwd}" # [ESPnet2] test enh recipe -if python -c 'import torch as t; from distutils.version import LooseVersion as L; assert L(t.__version__) >= L("1.2.0")' &> /dev/null; then +if python -c 'import torch as t; from packaging.version import parse as L; assert L(t.__version__) >= L("1.2.0")' &> /dev/null; then cd ./egs2/mini_an4/enh1 echo "==== [ESPnet2] ENH ===" ./run.sh --stage 1 --stop-stage 1 --python "${python}" @@ -101,7 +101,7 @@ if python3 -c "import fairseq" &> /dev/null; then fi # [ESPnet2] test enh_asr1 recipe -if python -c 'import torch as t; from distutils.version import LooseVersion as L; assert L(t.__version__) >= L("1.2.0")' &> /dev/null; then +if python -c 'import torch as t; from packaging.version import parse as L; assert L(t.__version__) >= L("1.2.0")' &> /dev/null; then cd ./egs2/mini_an4/enh_asr1 echo "==== [ESPnet2] ENH_ASR ===" ./run.sh --ngpu 0 --stage 0 --stop-stage 15 --skip-upload_hf false --feats-type "raw" --spk-num 1 --enh_asr_args "--max_epoch=1 --enh_separator_conf num_spk=1" --python "${python}" @@ -122,7 +122,7 @@ done for t in ${token_types}; do ./run.sh --stage 5 --stop-stage 5 --tgt_token_type "${t}" --src_token_type "${t}" --python "${python}" done -for t in ${feats_types}; do +for t in ${feats_types}; do for t2 in ${token_types}; do echo "==== feats_type=${t}, token_types=${t2} ===" ./run.sh --ngpu 0 --stage 6 --stop-stage 13 --skip-upload false --feats-type "${t}" --tgt_token_type "${t2}" --src_token_type "${t2}" \ @@ -147,7 +147,7 @@ cd "${cwd}" # [ESPnet2] Validate configuration files echo "" > dummy_token_list echo "==== [ESPnet2] Validation configuration files ===" -if python3 -c 'import torch as t; from distutils.version import LooseVersion as L; assert L(t.__version__) >= L("1.8.0")' &> /dev/null; then +if python3 -c 'import torch as t; from packaging.version import parse as L; assert L(t.__version__) >= L("1.8.0")' &> /dev/null; then for f in egs2/*/asr1/conf/train_asr*.yaml; do if [ "$f" == "egs2/fsc/asr1/conf/train_asr.yaml" ]; then if ! python3 -c "import s3prl" > /dev/null; then diff --git a/egs2/aishell4/enh1/local/generate_fe_trainingdata.py.patch b/egs2/aishell4/enh1/local/generate_fe_trainingdata.py.patch index a7666a5a756..47c079997eb 100644 --- a/egs2/aishell4/enh1/local/generate_fe_trainingdata.py.patch +++ b/egs2/aishell4/enh1/local/generate_fe_trainingdata.py.patch @@ -2,9 +2,9 @@ +++ generate_fe_trainingdata.new.py @@ -1,8 +1,8 @@ #!/usr/bin/env python - + -import io -+from distutils.version import LooseVersion ++from packaging.version import parse as V import os -import subprocess +import sys @@ -14,17 +14,17 @@ @@ -12,6 +12,10 @@ import librosa import argparse - + + -+is_py_3_3_plus = LooseVersion(sys.version) > LooseVersion("3.3") ++is_py_3_3_plus = V(sys.version) > V("3.3") + + def get_line_context(file_path, line_number): return linecache.getline(file_path, line_number).strip() - + @@ -119,7 +123,7 @@ return data / max_val - + def add_noise(clean, noise, rir, snr): - random.seed(time.clock()) + random.seed(time.perf_counter() if is_py_3_3_plus else time.clock()) @@ -32,9 +32,9 @@ noise = add_reverb(noise, rir[:, 16:24]) noise = noise[:-7999] @@ -189,7 +193,7 @@ - + for i in range(args.wavnum): - + - random.seed(time.clock()) + random.seed(time.perf_counter() if is_py_3_3_plus else time.clock()) wav1idx = random.randint(0, len(open(wavlist1,'r').readlines())-1) diff --git a/egs2/fsc/asr1/run.sh b/egs2/fsc/asr1/run.sh index 70b42c7ac61..3cea8d9bbc7 100755 --- a/egs2/fsc/asr1/run.sh +++ b/egs2/fsc/asr1/run.sh @@ -9,7 +9,7 @@ train_set="train" valid_set="valid" test_sets="test valid" -if python3 -c 'import torch as t; from distutils.version import LooseVersion as L; assert L(t.__version__) >= L("1.7.0")' &> /dev/null; then +if python3 -c 'import torch as t; from packaging.version import parse as L; assert L(t.__version__) >= L("1.7.0")' &> /dev/null; then asr_config=conf/train_asr.yaml else asr_config=conf/tuning/train_asr_transformer_adam_specaug.yaml #s3prl is installed when pytorch > 1.7. Hence using default frontend diff --git a/egs2/fsc_challenge/asr1/run.sh b/egs2/fsc_challenge/asr1/run.sh index 70b42c7ac61..3cea8d9bbc7 100755 --- a/egs2/fsc_challenge/asr1/run.sh +++ b/egs2/fsc_challenge/asr1/run.sh @@ -9,7 +9,7 @@ train_set="train" valid_set="valid" test_sets="test valid" -if python3 -c 'import torch as t; from distutils.version import LooseVersion as L; assert L(t.__version__) >= L("1.7.0")' &> /dev/null; then +if python3 -c 'import torch as t; from packaging.version import parse as L; assert L(t.__version__) >= L("1.7.0")' &> /dev/null; then asr_config=conf/train_asr.yaml else asr_config=conf/tuning/train_asr_transformer_adam_specaug.yaml #s3prl is installed when pytorch > 1.7. Hence using default frontend diff --git a/egs2/fsc_unseen/asr1/run.sh b/egs2/fsc_unseen/asr1/run.sh index 70b42c7ac61..3cea8d9bbc7 100755 --- a/egs2/fsc_unseen/asr1/run.sh +++ b/egs2/fsc_unseen/asr1/run.sh @@ -9,7 +9,7 @@ train_set="train" valid_set="valid" test_sets="test valid" -if python3 -c 'import torch as t; from distutils.version import LooseVersion as L; assert L(t.__version__) >= L("1.7.0")' &> /dev/null; then +if python3 -c 'import torch as t; from packaging.version import parse as L; assert L(t.__version__) >= L("1.7.0")' &> /dev/null; then asr_config=conf/train_asr.yaml else asr_config=conf/tuning/train_asr_transformer_adam_specaug.yaml #s3prl is installed when pytorch > 1.7. Hence using default frontend diff --git a/espnet/asr/pytorch_backend/asr.py b/espnet/asr/pytorch_backend/asr.py index d487380bd3f..a83d9a27dc1 100644 --- a/espnet/asr/pytorch_backend/asr.py +++ b/espnet/asr/pytorch_backend/asr.py @@ -4,7 +4,7 @@ """Training/decoding definition for the speech recognition task.""" import copy -from distutils.version import LooseVersion +from packaging.version import parse as V import itertools import json import logging @@ -989,7 +989,7 @@ def recog(args): # It seems quantized LSTM only supports non-packed sequence before torch 1.4.0. # Reference issue: https://github.com/pytorch/pytorch/issues/27963 if ( - torch.__version__ < LooseVersion("1.4.0") + torch.__version__ < V("1.4.0") and "lstm" in train_args.etype and torch.nn.LSTM in q_config ): @@ -999,7 +999,7 @@ def recog(args): # Dunno why but weight_observer from dynamic quantized module must have # dtype=torch.qint8 with torch < 1.5 although dtype=torch.float16 is supported. - if args.quantize_dtype == "float16" and torch.__version__ < LooseVersion( + if args.quantize_dtype == "float16" and torch.__version__ < V( "1.5.0" ): raise ValueError( diff --git a/espnet/asr/pytorch_backend/recog.py b/espnet/asr/pytorch_backend/recog.py index 6c6d4ce1194..68fea23a144 100644 --- a/espnet/asr/pytorch_backend/recog.py +++ b/espnet/asr/pytorch_backend/recog.py @@ -1,6 +1,6 @@ """V2 backend for `asr_recog.py` using py:class:`espnet.nets.beam_search.BeamSearch`.""" -from distutils.version import LooseVersion +from packaging.version import parse as V import json import logging @@ -54,7 +54,7 @@ def recog_v2(args): # See https://github.com/espnet/espnet/pull/3616 for more information. if ( - torch.__version__ < LooseVersion("1.4.0") + torch.__version__ < V("1.4.0") and "lstm" in train_args.etype and torch.nn.LSTM in q_config ): @@ -62,7 +62,7 @@ def recog_v2(args): "Quantized LSTM in ESPnet is only supported with torch 1.4+." ) - if args.quantize_dtype == "float16" and torch.__version__ < LooseVersion( + if args.quantize_dtype == "float16" and torch.__version__ < V( "1.5.0" ): raise ValueError( diff --git a/espnet/nets/pytorch_backend/ctc.py b/espnet/nets/pytorch_backend/ctc.py index f834967f645..c974df09b7a 100644 --- a/espnet/nets/pytorch_backend/ctc.py +++ b/espnet/nets/pytorch_backend/ctc.py @@ -1,4 +1,4 @@ -from distutils.version import LooseVersion +from packaging.version import parse as V import logging import numpy as np @@ -30,7 +30,7 @@ def __init__(self, odim, eprojs, dropout_rate, ctc_type="warpctc", reduce=True): # In case of Pytorch >= 1.7.0, CTC will be always builtin self.ctc_type = ( ctc_type - if LooseVersion(torch.__version__) < LooseVersion("1.7.0") + if V(torch.__version__) < V("1.7.0") else "builtin" ) diff --git a/espnet/nets/pytorch_backend/e2e_tts_transformer.py b/espnet/nets/pytorch_backend/e2e_tts_transformer.py index 9f860285d55..e71f1973fda 100644 --- a/espnet/nets/pytorch_backend/e2e_tts_transformer.py +++ b/espnet/nets/pytorch_backend/e2e_tts_transformer.py @@ -714,7 +714,7 @@ def forward(self, xs, ilens, ys, labels, olens, spembs=None, *args, **kwargs): labels = labels[:, :max_olen] # forward encoder - x_masks = self._source_mask(ilens) + x_masks = self._source_mask(ilens).to(xs.device) hs, h_masks = self.encoder(xs, x_masks) # integrate speaker embedding @@ -732,7 +732,7 @@ def forward(self, xs, ilens, ys, labels, olens, spembs=None, *args, **kwargs): ys_in = self._add_first_frame_and_remove_last_frame(ys_in) # forward decoder - y_masks = self._target_mask(olens_in) + y_masks = self._target_mask(olens_in).to(xs.device) zs, _ = self.decoder(ys_in, y_masks, hs, h_masks) # (B, Lmax//r, odim * r) -> (B, Lmax//r * r, odim) before_outs = self.feat_out(zs).view(zs.size(0), -1, self.odim) @@ -975,7 +975,7 @@ def calculate_all_attentions( self.eval() with torch.no_grad(): # forward encoder - x_masks = self._source_mask(ilens) + x_masks = self._source_mask(ilens).to(xs.device) hs, h_masks = self.encoder(xs, x_masks) # integrate speaker embedding @@ -994,7 +994,7 @@ def calculate_all_attentions( ys_in = self._add_first_frame_and_remove_last_frame(ys_in) # forward decoder - y_masks = self._target_mask(olens_in) + y_masks = self._target_mask(olens_in).to(xs.device) zs, _ = self.decoder(ys_in, y_masks, hs, h_masks) # calculate final outputs @@ -1097,7 +1097,7 @@ def _source_mask(self, ilens): [[1, 1, 1, 0, 0]]], dtype=torch.uint8) """ - x_masks = make_non_pad_mask(ilens).to(next(self.parameters()).device) + x_masks = make_non_pad_mask(ilens) return x_masks.unsqueeze(-2) def _target_mask(self, olens): @@ -1126,7 +1126,7 @@ def _target_mask(self, olens): [1, 1, 1, 0, 0]]], dtype=torch.uint8) """ - y_masks = make_non_pad_mask(olens).to(next(self.parameters()).device) + y_masks = make_non_pad_mask(olens) s_masks = subsequent_mask(y_masks.size(-1), device=y_masks.device).unsqueeze(0) return y_masks.unsqueeze(-2) & s_masks diff --git a/espnet2/asr/espnet_model.py b/espnet2/asr/espnet_model.py index 08c10182a83..5756598d2ff 100644 --- a/espnet2/asr/espnet_model.py +++ b/espnet2/asr/espnet_model.py @@ -1,5 +1,5 @@ from contextlib import contextmanager -from distutils.version import LooseVersion +from packaging.version import parse as V import logging from typing import Dict from typing import List @@ -29,7 +29,7 @@ from espnet2.torch_utils.device_funcs import force_gatherable from espnet2.train.abs_espnet_model import AbsESPnetModel -if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"): +if V(torch.__version__) >= V("1.6.0"): from torch.cuda.amp import autocast else: # Nothing to do if torch<1.6.0 diff --git a/espnet2/asr/maskctc_model.py b/espnet2/asr/maskctc_model.py index 26cf7a90956..10d91de94c5 100644 --- a/espnet2/asr/maskctc_model.py +++ b/espnet2/asr/maskctc_model.py @@ -1,5 +1,5 @@ from contextlib import contextmanager -from distutils.version import LooseVersion +from packaging.version import parse as V from itertools import groupby import logging from typing import Dict @@ -31,7 +31,7 @@ from espnet2.text.token_id_converter import TokenIDConverter from espnet2.torch_utils.device_funcs import force_gatherable -if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"): +if V(torch.__version__) >= V("1.6.0"): from torch.cuda.amp import autocast else: # Nothing to do if torch<1.6.0 diff --git a/espnet2/bin/tts_inference.py b/espnet2/bin/tts_inference.py index 683074d2eb0..6e3da15f0de 100755 --- a/espnet2/bin/tts_inference.py +++ b/espnet2/bin/tts_inference.py @@ -8,7 +8,7 @@ import sys import time -from distutils.version import LooseVersion +from packaging.version import parse as V from pathlib import Path from typing import Any from typing import Dict @@ -300,7 +300,7 @@ def from_pretrained( from parallel_wavegan import __version__ # NOTE(kan-bayashi): Filelock download is supported from 0.5.2 - assert LooseVersion(__version__) > LooseVersion("0.5.1"), ( + assert V(__version__) > V("0.5.1"), ( "Please install the latest parallel_wavegan " "via `pip install -U parallel_wavegan`." ) diff --git a/espnet2/diar/espnet_model.py b/espnet2/diar/espnet_model.py index 1e1d10af15e..92b434e7642 100644 --- a/espnet2/diar/espnet_model.py +++ b/espnet2/diar/espnet_model.py @@ -2,7 +2,7 @@ # Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) from contextlib import contextmanager -from distutils.version import LooseVersion +from packaging.version import parse as V from itertools import permutations from typing import Dict from typing import Optional @@ -22,7 +22,7 @@ from espnet2.torch_utils.device_funcs import force_gatherable from espnet2.train.abs_espnet_model import AbsESPnetModel -if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"): +if V(torch.__version__) >= V("1.6.0"): from torch.cuda.amp import autocast else: # Nothing to do if torch<1.6.0 diff --git a/espnet2/enh/decoder/stft_decoder.py b/espnet2/enh/decoder/stft_decoder.py index e9d3bae5c2d..93768dd2484 100644 --- a/espnet2/enh/decoder/stft_decoder.py +++ b/espnet2/enh/decoder/stft_decoder.py @@ -1,11 +1,11 @@ -from distutils.version import LooseVersion +from packaging.version import parse as V import torch from torch_complex.tensor import ComplexTensor from espnet2.enh.decoder.abs_decoder import AbsDecoder from espnet2.layers.stft import Stft -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") class STFTDecoder(AbsDecoder): diff --git a/espnet2/enh/encoder/stft_encoder.py b/espnet2/enh/encoder/stft_encoder.py index b2ab65e5532..2c1f68934d5 100644 --- a/espnet2/enh/encoder/stft_encoder.py +++ b/espnet2/enh/encoder/stft_encoder.py @@ -1,11 +1,11 @@ -from distutils.version import LooseVersion +from packaging.version import parse as V import torch from torch_complex.tensor import ComplexTensor from espnet2.enh.encoder.abs_encoder import AbsEncoder from espnet2.layers.stft import Stft -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") class STFTEncoder(AbsEncoder): diff --git a/espnet2/enh/espnet_enh_s2t_model.py b/espnet2/enh/espnet_enh_s2t_model.py index 51746f9fbc1..c2e05654fce 100644 --- a/espnet2/enh/espnet_enh_s2t_model.py +++ b/espnet2/enh/espnet_enh_s2t_model.py @@ -1,5 +1,5 @@ from contextlib import contextmanager -from distutils.version import LooseVersion +from packaging.version import parse as V import logging import random from typing import Dict @@ -16,7 +16,7 @@ from espnet2.torch_utils.device_funcs import force_gatherable from espnet2.train.abs_espnet_model import AbsESPnetModel -if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"): +if V(torch.__version__) >= V("1.6.0"): from torch.cuda.amp import autocast else: # Nothing to do if torch<1.6.0 diff --git a/espnet2/enh/espnet_model.py b/espnet2/enh/espnet_model.py index 75bb57094f4..06d9f72902e 100644 --- a/espnet2/enh/espnet_model.py +++ b/espnet2/enh/espnet_model.py @@ -1,5 +1,5 @@ """Enhancement model module.""" -from distutils.version import LooseVersion +from packaging.version import parse as V from typing import Dict from typing import List from typing import Optional @@ -20,7 +20,7 @@ from espnet2.train.abs_espnet_model import AbsESPnetModel -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") EPS = torch.finfo(torch.get_default_dtype()).eps diff --git a/espnet2/enh/layers/beamformer.py b/espnet2/enh/layers/beamformer.py index e3d61d2489f..2ceeee6c728 100644 --- a/espnet2/enh/layers/beamformer.py +++ b/espnet2/enh/layers/beamformer.py @@ -1,5 +1,5 @@ """Beamformer module.""" -from distutils.version import LooseVersion +from packaging.version import parse as V from typing import List from typing import Optional from typing import Union @@ -20,7 +20,7 @@ from espnet2.enh.layers.complex_utils import to_double -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") EPS = torch.finfo(torch.double).eps diff --git a/espnet2/enh/layers/complex_utils.py b/espnet2/enh/layers/complex_utils.py index acfbe2f61a8..329eee35d7c 100644 --- a/espnet2/enh/layers/complex_utils.py +++ b/espnet2/enh/layers/complex_utils.py @@ -1,5 +1,5 @@ """Beamformer module.""" -from distutils.version import LooseVersion +from packaging.version import parse as V from typing import Sequence from typing import Tuple from typing import Union @@ -10,8 +10,8 @@ EPS = torch.finfo(torch.double).eps -is_torch_1_8_plus = LooseVersion(torch.__version__) >= LooseVersion("1.8.0") -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_8_plus = V(torch.__version__) >= V("1.8.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") def new_complex_like( diff --git a/espnet2/enh/layers/dnn_beamformer.py b/espnet2/enh/layers/dnn_beamformer.py index 40b264dcea9..be4c3622e40 100644 --- a/espnet2/enh/layers/dnn_beamformer.py +++ b/espnet2/enh/layers/dnn_beamformer.py @@ -1,5 +1,5 @@ """DNN beamformer module.""" -from distutils.version import LooseVersion +from packaging.version import parse as V from typing import List from typing import Optional from typing import Tuple @@ -30,7 +30,7 @@ from espnet2.enh.layers.mask_estimator import MaskEstimator -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") BEAMFORMER_TYPES = ( # Minimum Variance Distortionless Response beamformer diff --git a/espnet2/enh/layers/mask_estimator.py b/espnet2/enh/layers/mask_estimator.py index daea80f79ec..6f40c66ddfe 100644 --- a/espnet2/enh/layers/mask_estimator.py +++ b/espnet2/enh/layers/mask_estimator.py @@ -1,4 +1,4 @@ -from distutils.version import LooseVersion +from packaging.version import parse as V from typing import Tuple from typing import Union @@ -13,7 +13,7 @@ from espnet2.enh.layers.complex_utils import is_complex -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") class MaskEstimator(torch.nn.Module): diff --git a/espnet2/enh/layers/wpe.py b/espnet2/enh/layers/wpe.py index a9760325030..e6117b89786 100644 --- a/espnet2/enh/layers/wpe.py +++ b/espnet2/enh/layers/wpe.py @@ -1,4 +1,4 @@ -from distutils.version import LooseVersion +from packaging.version import parse as V from typing import Tuple from typing import Union @@ -12,7 +12,7 @@ from espnet2.enh.layers.complex_utils import reverse -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") """ WPE pytorch version: Ported from https://github.com/fgnt/nara_wpe diff --git a/espnet2/enh/loss/criterions/tf_domain.py b/espnet2/enh/loss/criterions/tf_domain.py index c94678e4244..cb81d7cf25d 100644 --- a/espnet2/enh/loss/criterions/tf_domain.py +++ b/espnet2/enh/loss/criterions/tf_domain.py @@ -1,6 +1,6 @@ from abc import ABC from abc import abstractmethod -from distutils.version import LooseVersion +from packaging.version import parse as V from functools import reduce import math @@ -13,7 +13,7 @@ from espnet2.enh.loss.criterions.abs_loss import AbsEnhLoss -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") EPS = torch.finfo(torch.get_default_dtype()).eps diff --git a/espnet2/enh/separator/conformer_separator.py b/espnet2/enh/separator/conformer_separator.py index 5a9031f441d..3e3574beade 100644 --- a/espnet2/enh/separator/conformer_separator.py +++ b/espnet2/enh/separator/conformer_separator.py @@ -1,5 +1,5 @@ from collections import OrderedDict -from distutils.version import LooseVersion +from packaging.version import parse as V from typing import Dict from typing import List from typing import Optional @@ -17,7 +17,7 @@ from espnet2.enh.separator.abs_separator import AbsSeparator -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") class ConformerSeparator(AbsSeparator): diff --git a/espnet2/enh/separator/dc_crn_separator.py b/espnet2/enh/separator/dc_crn_separator.py index fa4ed14bc89..b3f9be4fddd 100644 --- a/espnet2/enh/separator/dc_crn_separator.py +++ b/espnet2/enh/separator/dc_crn_separator.py @@ -1,5 +1,5 @@ from collections import OrderedDict -from distutils.version import LooseVersion +from packaging.version import parse as V from typing import Dict from typing import List from typing import Optional @@ -16,7 +16,7 @@ EPS = torch.finfo(torch.get_default_dtype()).eps -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") class DC_CRNSeparator(AbsSeparator): diff --git a/espnet2/enh/separator/dccrn_separator.py b/espnet2/enh/separator/dccrn_separator.py index a97def4e905..74f793d14bd 100644 --- a/espnet2/enh/separator/dccrn_separator.py +++ b/espnet2/enh/separator/dccrn_separator.py @@ -1,5 +1,5 @@ from collections import OrderedDict -from distutils.version import LooseVersion +from packaging.version import parse as V from typing import Dict from typing import List from typing import Optional @@ -18,7 +18,7 @@ from espnet2.enh.layers.complexnn import NavieComplexLSTM from espnet2.enh.separator.abs_separator import AbsSeparator -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") EPS = torch.finfo(torch.double).eps diff --git a/espnet2/enh/separator/dprnn_separator.py b/espnet2/enh/separator/dprnn_separator.py index ddf0962b15d..d0f446ee36a 100644 --- a/espnet2/enh/separator/dprnn_separator.py +++ b/espnet2/enh/separator/dprnn_separator.py @@ -1,5 +1,5 @@ from collections import OrderedDict -from distutils.version import LooseVersion +from packaging.version import parse as V from typing import Dict from typing import List from typing import Optional @@ -16,7 +16,7 @@ from espnet2.enh.separator.abs_separator import AbsSeparator -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") class DPRNNSeparator(AbsSeparator): diff --git a/espnet2/enh/separator/fasnet_separator.py b/espnet2/enh/separator/fasnet_separator.py index 20f83b80046..deb1a4d1f43 100644 --- a/espnet2/enh/separator/fasnet_separator.py +++ b/espnet2/enh/separator/fasnet_separator.py @@ -1,5 +1,5 @@ from collections import OrderedDict -from distutils.version import LooseVersion +from packaging.version import parse as V from typing import Dict from typing import List from typing import Optional @@ -12,7 +12,7 @@ from espnet2.enh.separator.abs_separator import AbsSeparator -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") class FaSNetSeparator(AbsSeparator): diff --git a/espnet2/enh/separator/rnn_separator.py b/espnet2/enh/separator/rnn_separator.py index 1294c0e1ffb..2a551edbde0 100644 --- a/espnet2/enh/separator/rnn_separator.py +++ b/espnet2/enh/separator/rnn_separator.py @@ -1,5 +1,5 @@ from collections import OrderedDict -from distutils.version import LooseVersion +from packaging.version import parse as V from typing import Dict from typing import List from typing import Optional @@ -14,7 +14,7 @@ from espnet2.enh.separator.abs_separator import AbsSeparator -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") class RNNSeparator(AbsSeparator): diff --git a/espnet2/enh/separator/tcn_separator.py b/espnet2/enh/separator/tcn_separator.py index 8d82103ec5d..12c6db42e42 100644 --- a/espnet2/enh/separator/tcn_separator.py +++ b/espnet2/enh/separator/tcn_separator.py @@ -1,5 +1,5 @@ from collections import OrderedDict -from distutils.version import LooseVersion +from packaging.version import parse as V from typing import Dict from typing import List from typing import Optional @@ -14,7 +14,7 @@ from espnet2.enh.separator.abs_separator import AbsSeparator -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") class TCNSeparator(AbsSeparator): diff --git a/espnet2/enh/separator/transformer_separator.py b/espnet2/enh/separator/transformer_separator.py index ca4421221e7..c6dbcf91eaa 100644 --- a/espnet2/enh/separator/transformer_separator.py +++ b/espnet2/enh/separator/transformer_separator.py @@ -1,5 +1,5 @@ from collections import OrderedDict -from distutils.version import LooseVersion +from packaging.version import parse as V from typing import Dict from typing import List from typing import Optional @@ -22,7 +22,7 @@ from espnet2.enh.separator.abs_separator import AbsSeparator -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") class TransformerSeparator(AbsSeparator): diff --git a/espnet2/gan_tts/espnet_model.py b/espnet2/gan_tts/espnet_model.py index 34ca845f0fd..5cc1785a4d5 100644 --- a/espnet2/gan_tts/espnet_model.py +++ b/espnet2/gan_tts/espnet_model.py @@ -4,7 +4,7 @@ """GAN-based text-to-speech ESPnet model.""" from contextlib import contextmanager -from distutils.version import LooseVersion +from packaging.version import parse as V from typing import Any from typing import Dict from typing import Optional @@ -19,7 +19,7 @@ from espnet2.train.abs_gan_espnet_model import AbsGANESPnetModel from espnet2.tts.feats_extract.abs_feats_extract import AbsFeatsExtract -if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"): +if V(torch.__version__) >= V("1.6.0"): from torch.cuda.amp import autocast else: # Nothing to do if torch < 1.6.0 diff --git a/espnet2/hubert/espnet_model.py b/espnet2/hubert/espnet_model.py index 4fa775841bc..35468bde93e 100644 --- a/espnet2/hubert/espnet_model.py +++ b/espnet2/hubert/espnet_model.py @@ -7,7 +7,7 @@ # Code in Fairseq: https://github.com/pytorch/fairseq/tree/master/examples/hubert from contextlib import contextmanager -from distutils.version import LooseVersion +from packaging.version import parse as V from typing import Dict from typing import List from typing import Optional @@ -28,7 +28,7 @@ from espnet2.torch_utils.device_funcs import force_gatherable from espnet2.train.abs_espnet_model import AbsESPnetModel -if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"): +if V(torch.__version__) >= V("1.6.0"): from torch.cuda.amp import autocast else: # Nothing to do if torch<1.6.0 diff --git a/espnet2/layers/stft.py b/espnet2/layers/stft.py index b888bfede82..847469bbd4a 100644 --- a/espnet2/layers/stft.py +++ b/espnet2/layers/stft.py @@ -1,4 +1,4 @@ -from distutils.version import LooseVersion +from packaging.version import parse as V from typing import Optional from typing import Tuple from typing import Union @@ -13,10 +13,10 @@ import librosa import numpy as np -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") -is_torch_1_7_plus = LooseVersion(torch.__version__) >= LooseVersion("1.7") +is_torch_1_7_plus = V(torch.__version__) >= V("1.7") class Stft(torch.nn.Module, InversibleInterface): @@ -182,7 +182,7 @@ def inverse( wavs: (batch, samples) ilens: (batch,) """ - if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"): + if V(torch.__version__) >= V("1.6.0"): istft = torch.functional.istft else: try: diff --git a/espnet2/mt/espnet_model.py b/espnet2/mt/espnet_model.py index 953d5bc02f8..b937cbe3dfd 100644 --- a/espnet2/mt/espnet_model.py +++ b/espnet2/mt/espnet_model.py @@ -1,5 +1,5 @@ from contextlib import contextmanager -from distutils.version import LooseVersion +from packaging.version import parse as V import logging from typing import Dict from typing import List @@ -24,7 +24,7 @@ from espnet2.torch_utils.device_funcs import force_gatherable from espnet2.train.abs_espnet_model import AbsESPnetModel -if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"): +if V(torch.__version__) >= V("1.6.0"): from torch.cuda.amp import autocast else: # Nothing to do if torch<1.6.0 diff --git a/espnet2/st/espnet_model.py b/espnet2/st/espnet_model.py index ee744681bd7..fb8fcfdaee9 100644 --- a/espnet2/st/espnet_model.py +++ b/espnet2/st/espnet_model.py @@ -1,5 +1,5 @@ from contextlib import contextmanager -from distutils.version import LooseVersion +from packaging.version import parse as V import logging from typing import Dict from typing import List @@ -28,7 +28,7 @@ from espnet2.torch_utils.device_funcs import force_gatherable from espnet2.train.abs_espnet_model import AbsESPnetModel -if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"): +if V(torch.__version__) >= V("1.6.0"): from torch.cuda.amp import autocast else: # Nothing to do if torch<1.6.0 diff --git a/espnet2/tasks/abs_task.py b/espnet2/tasks/abs_task.py index 37dda7259e9..54c4cd26a43 100644 --- a/espnet2/tasks/abs_task.py +++ b/espnet2/tasks/abs_task.py @@ -3,7 +3,7 @@ from abc import abstractmethod import argparse from dataclasses import dataclass -from distutils.version import LooseVersion +from packaging.version import parse as V import functools import logging import os @@ -76,7 +76,7 @@ except Exception: wandb = None -if LooseVersion(torch.__version__) >= LooseVersion("1.5.0"): +if V(torch.__version__) >= V("1.5.0"): from torch.multiprocessing.spawn import ProcessContext else: from torch.multiprocessing.spawn import SpawnContext as ProcessContext @@ -94,7 +94,7 @@ rmsprop=torch.optim.RMSprop, rprop=torch.optim.Rprop, ) -if LooseVersion(torch.__version__) >= LooseVersion("1.10.0"): +if V(torch.__version__) >= V("1.10.0"): # From 1.10.0, RAdam is officially supported optim_classes.update( radam=torch.optim.RAdam, @@ -116,7 +116,7 @@ sgdw=torch_optimizer.SGDW, yogi=torch_optimizer.Yogi, ) - if LooseVersion(torch_optimizer.__version__) < LooseVersion("0.2.0"): + if V(torch_optimizer.__version__) < V("0.2.0"): # From 0.2.0, RAdam is dropped optim_classes.update( radam=torch_optimizer.RAdam, diff --git a/espnet2/train/gan_trainer.py b/espnet2/train/gan_trainer.py index 0d3cc59bea0..cc0aa1ba95d 100644 --- a/espnet2/train/gan_trainer.py +++ b/espnet2/train/gan_trainer.py @@ -9,7 +9,7 @@ import time from contextlib import contextmanager -from distutils.version import LooseVersion +from packaging.version import parse as V from typing import Dict from typing import Iterable from typing import List @@ -35,7 +35,7 @@ if torch.distributed.is_available(): from torch.distributed import ReduceOp -if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"): +if V(torch.__version__) >= V("1.6.0"): from torch.cuda.amp import autocast from torch.cuda.amp import GradScaler else: diff --git a/espnet2/train/reporter.py b/espnet2/train/reporter.py index a3c03995b54..65b4ac6a9d8 100644 --- a/espnet2/train/reporter.py +++ b/espnet2/train/reporter.py @@ -3,7 +3,7 @@ from contextlib import contextmanager import dataclasses import datetime -from distutils.version import LooseVersion +from packaging.version import parse as V import logging from pathlib import Path import time @@ -357,7 +357,7 @@ def finish_epoch(self, sub_reporter: SubReporter) -> None: seconds=time.perf_counter() - sub_reporter.start_time ) stats["total_count"] = sub_reporter.total_count - if LooseVersion(torch.__version__) >= LooseVersion("1.4.0"): + if V(torch.__version__) >= V("1.4.0"): if torch.cuda.is_initialized(): stats["gpu_max_cached_mem_GB"] = ( torch.cuda.max_memory_reserved() / 2**30 diff --git a/espnet2/train/trainer.py b/espnet2/train/trainer.py index 304d3329264..6fe2726880d 100644 --- a/espnet2/train/trainer.py +++ b/espnet2/train/trainer.py @@ -3,7 +3,7 @@ from contextlib import contextmanager import dataclasses from dataclasses import is_dataclass -from distutils.version import LooseVersion +from packaging.version import parse as V import logging from pathlib import Path import time @@ -42,7 +42,7 @@ if torch.distributed.is_available(): from torch.distributed import ReduceOp -if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"): +if V(torch.__version__) >= V("1.6.0"): from torch.cuda.amp import autocast from torch.cuda.amp import GradScaler else: @@ -183,7 +183,7 @@ def run( output_dir = Path(trainer_options.output_dir) reporter = Reporter() if trainer_options.use_amp: - if LooseVersion(torch.__version__) < LooseVersion("1.6.0"): + if V(torch.__version__) < V("1.6.0"): raise RuntimeError( "Require torch>=1.6.0 for Automatic Mixed Precision" ) diff --git a/espnet2/tts/espnet_model.py b/espnet2/tts/espnet_model.py index e09c4a35a55..6cb88fe4b5b 100644 --- a/espnet2/tts/espnet_model.py +++ b/espnet2/tts/espnet_model.py @@ -4,7 +4,7 @@ """Text-to-speech ESPnet model.""" from contextlib import contextmanager -from distutils.version import LooseVersion +from packaging.version import parse as V from typing import Dict from typing import Optional from typing import Tuple @@ -19,7 +19,7 @@ from espnet2.tts.abs_tts import AbsTTS from espnet2.tts.feats_extract.abs_feats_extract import AbsFeatsExtract -if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"): +if V(torch.__version__) >= V("1.6.0"): from torch.cuda.amp import autocast else: # Nothing to do if torch<1.6.0 diff --git a/espnet2/utils/griffin_lim.py b/espnet2/utils/griffin_lim.py index c1536d51b2b..3d4a948b7aa 100644 --- a/espnet2/utils/griffin_lim.py +++ b/espnet2/utils/griffin_lim.py @@ -7,7 +7,7 @@ import logging -from distutils.version import LooseVersion +from packaging.version import parse as V from functools import partial from typeguard import check_argument_types from typing import Optional @@ -77,7 +77,7 @@ def griffin_lim( # assert the size of input linear spectrogram assert spc.shape[1] == n_fft // 2 + 1 - if LooseVersion(librosa.__version__) >= LooseVersion("0.7.0"): + if V(librosa.__version__) >= V("0.7.0"): # use librosa's fast Grriffin-Lim algorithm spc = np.abs(spc.T) y = librosa.griffinlim( diff --git a/setup.py b/setup.py index dba53a97c7f..58755a756ba 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,6 @@ import os -from distutils.version import LooseVersion from setuptools import find_packages from setuptools import setup diff --git a/test/espnet2/asr/frontend/test_s3prl.py b/test/espnet2/asr/frontend/test_s3prl.py index 0bfebb823b3..2c0f66e1ee6 100644 --- a/test/espnet2/asr/frontend/test_s3prl.py +++ b/test/espnet2/asr/frontend/test_s3prl.py @@ -1,10 +1,10 @@ -from distutils.version import LooseVersion +from packaging.version import parse as V import torch from espnet2.asr.frontend.s3prl import S3prlFrontend -is_torch_1_7_plus = LooseVersion(torch.__version__) >= LooseVersion("1.7.0") +is_torch_1_7_plus = V(torch.__version__) >= V("1.7.0") def test_frontend_init(): diff --git a/test/espnet2/enh/layers/test_complex_utils.py b/test/espnet2/enh/layers/test_complex_utils.py index e566f3aea76..6404f33eaa3 100644 --- a/test/espnet2/enh/layers/test_complex_utils.py +++ b/test/espnet2/enh/layers/test_complex_utils.py @@ -1,4 +1,4 @@ -from distutils.version import LooseVersion +from packaging.version import parse as V import numpy as np import pytest @@ -16,7 +16,7 @@ from espnet2.enh.layers.complex_utils import trace -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") # invertible matrix mat_np = np.array( [ diff --git a/test/espnet2/enh/layers/test_enh_layers.py b/test/espnet2/enh/layers/test_enh_layers.py index 62f4554b10b..3d4f0a84ead 100644 --- a/test/espnet2/enh/layers/test_enh_layers.py +++ b/test/espnet2/enh/layers/test_enh_layers.py @@ -1,4 +1,4 @@ -from distutils.version import LooseVersion +from packaging.version import parse as V import numpy as np import pytest @@ -13,8 +13,8 @@ from espnet2.enh.layers.complex_utils import solve from espnet2.layers.stft import Stft -is_torch_1_1_plus = LooseVersion(torch.__version__) >= LooseVersion("1.1.0") -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_1_plus = V(torch.__version__) >= V("1.1.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") random_speech = torch.tensor( diff --git a/test/espnet2/enh/loss/criterions/test_tf_domain.py b/test/espnet2/enh/loss/criterions/test_tf_domain.py index 9d1cec94a1d..117a16545db 100644 --- a/test/espnet2/enh/loss/criterions/test_tf_domain.py +++ b/test/espnet2/enh/loss/criterions/test_tf_domain.py @@ -1,4 +1,4 @@ -from distutils.version import LooseVersion +from packaging.version import parse as V import pytest import torch @@ -11,7 +11,7 @@ from espnet2.enh.loss.criterions.tf_domain import FrequencyDomainMSE -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") @pytest.mark.parametrize("criterion_class", [FrequencyDomainL1, FrequencyDomainMSE]) diff --git a/test/espnet2/enh/separator/test_beamformer.py b/test/espnet2/enh/separator/test_beamformer.py index 3a10c7a9643..eddf317ee86 100644 --- a/test/espnet2/enh/separator/test_beamformer.py +++ b/test/espnet2/enh/separator/test_beamformer.py @@ -1,4 +1,4 @@ -from distutils.version import LooseVersion +from packaging.version import parse as V import pytest import torch @@ -7,7 +7,7 @@ from espnet2.enh.separator.neural_beamformer import NeuralBeamformer -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") random_speech = torch.tensor( [ [ diff --git a/test/espnet2/enh/separator/test_dc_crn_separator.py b/test/espnet2/enh/separator/test_dc_crn_separator.py index 712de05e063..8f60b62399a 100644 --- a/test/espnet2/enh/separator/test_dc_crn_separator.py +++ b/test/espnet2/enh/separator/test_dc_crn_separator.py @@ -1,4 +1,4 @@ -from distutils.version import LooseVersion +from packaging.version import parse as V import pytest import torch @@ -8,7 +8,7 @@ from espnet2.enh.separator.dc_crn_separator import DC_CRNSeparator -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") @pytest.mark.parametrize("input_dim", [33, 65]) diff --git a/test/espnet2/enh/separator/test_dccrn_separator.py b/test/espnet2/enh/separator/test_dccrn_separator.py index acf30c1ed98..3a075ac42ba 100644 --- a/test/espnet2/enh/separator/test_dccrn_separator.py +++ b/test/espnet2/enh/separator/test_dccrn_separator.py @@ -1,4 +1,4 @@ -from distutils.version import LooseVersion +from packaging.version import parse as V import pytest import torch @@ -6,7 +6,7 @@ from espnet2.enh.separator.dccrn_separator import DCCRNSeparator -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") @pytest.mark.parametrize("input_dim", [9]) diff --git a/test/espnet2/enh/test_espnet_model.py b/test/espnet2/enh/test_espnet_model.py index 6985ab63e36..906b42bbac3 100644 --- a/test/espnet2/enh/test_espnet_model.py +++ b/test/espnet2/enh/test_espnet_model.py @@ -1,4 +1,4 @@ -from distutils.version import LooseVersion +from packaging.version import parse as V import pytest import torch @@ -26,7 +26,7 @@ from espnet2.enh.separator.transformer_separator import TransformerSeparator -is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0") +is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0") stft_encoder = STFTEncoder( diff --git a/test/espnet2/gan_tts/joint/test_joint_text2wav.py b/test/espnet2/gan_tts/joint/test_joint_text2wav.py index 1badd3a892f..f0ed087da20 100644 --- a/test/espnet2/gan_tts/joint/test_joint_text2wav.py +++ b/test/espnet2/gan_tts/joint/test_joint_text2wav.py @@ -3,7 +3,7 @@ """Test VITS related modules.""" -from distutils.version import LooseVersion +from packaging.version import parse as V import pytest import torch @@ -190,7 +190,7 @@ def make_loss_args(**kwargs): @pytest.mark.skipif( - LooseVersion(torch.__version__) < LooseVersion("1.4"), + V(torch.__version__) < V("1.4"), reason="Pytorch >= 1.4 is required.", ) @pytest.mark.skipif( diff --git a/test/espnet2/gan_tts/vits/test_generator.py b/test/espnet2/gan_tts/vits/test_generator.py index 7ac9f3f879e..9c17ed897ab 100644 --- a/test/espnet2/gan_tts/vits/test_generator.py +++ b/test/espnet2/gan_tts/vits/test_generator.py @@ -3,8 +3,6 @@ """Test VITS generator modules.""" -from distutils.version import LooseVersion - import pytest import torch @@ -66,10 +64,6 @@ def make_generator_args(**kwargs): # so a little bit more time is needed to run. Therefore, # here we extend execution timeout from 2 sec to 5 sec. @pytest.mark.execution_timeout(5) -@pytest.mark.skipif( - LooseVersion(torch.__version__) < LooseVersion("1.4"), - reason="Pytorch >= 1.4 is required.", -) @pytest.mark.skipif( "1.6" in torch.__version__, reason="group conv in pytorch 1.6 has an issue. " @@ -198,10 +192,6 @@ def test_vits_generator_forward(model_dict): print(f"{i+j+1}: {output_.shape}") -@pytest.mark.skipif( - LooseVersion(torch.__version__) < LooseVersion("1.4"), - reason="Pytorch >= 1.4 is required.", -) @pytest.mark.skipif( "1.6" in torch.__version__, reason="group conv in pytorch 1.6 has an issue. " diff --git a/test/espnet2/gan_tts/vits/test_vits.py b/test/espnet2/gan_tts/vits/test_vits.py index a35d8c66bf1..e749345e346 100644 --- a/test/espnet2/gan_tts/vits/test_vits.py +++ b/test/espnet2/gan_tts/vits/test_vits.py @@ -3,8 +3,6 @@ """Test VITS related modules.""" -from distutils.version import LooseVersion - import pytest import torch @@ -148,10 +146,6 @@ def make_vits_loss_args(**kwargs): return defaults -@pytest.mark.skipif( - LooseVersion(torch.__version__) < LooseVersion("1.4"), - reason="Pytorch >= 1.4 is required.", -) @pytest.mark.skipif( "1.6" in torch.__version__, reason="group conv in pytorch 1.6 has an issue. " @@ -349,10 +343,6 @@ def test_vits_is_trainable_and_decodable(gen_dict, dis_dict, loss_dict): assert output_dict["wav"].size(0) == inputs["feats"].size(0) * upsample_factor -@pytest.mark.skipif( - LooseVersion(torch.__version__) < LooseVersion("1.4"), - reason="Pytorch >= 1.4 is required.", -) @pytest.mark.skipif( "1.6" in torch.__version__, reason="Group conv in pytorch 1.6 has an issue. " @@ -588,10 +578,6 @@ def test_multi_speaker_vits_is_trainable_and_decodable( not torch.cuda.is_available(), reason="GPU is needed.", ) -@pytest.mark.skipif( - LooseVersion(torch.__version__) < LooseVersion("1.4"), - reason="Pytorch >= 1.4 is required.", -) @pytest.mark.skipif( "1.6" in torch.__version__, reason="group conv in pytorch 1.6 has an issue. " @@ -799,10 +785,6 @@ def test_vits_is_trainable_and_decodable_on_gpu(gen_dict, dis_dict, loss_dict): not torch.cuda.is_available(), reason="GPU is needed.", ) -@pytest.mark.skipif( - LooseVersion(torch.__version__) < LooseVersion("1.4"), - reason="Pytorch >= 1.4 is required.", -) @pytest.mark.skipif( "1.6" in torch.__version__, reason="Group conv in pytorch 1.6 has an issue. " diff --git a/test/test_custom_transducer.py b/test/test_custom_transducer.py index 34447581e6f..bf6101365cd 100644 --- a/test/test_custom_transducer.py +++ b/test/test_custom_transducer.py @@ -1,7 +1,7 @@ # coding: utf-8 import argparse -from distutils.version import LooseVersion +from packaging.version import parse as V import tempfile import json @@ -15,8 +15,7 @@ import espnet.nets.pytorch_backend.lm.default as lm_pytorch from espnet.nets.pytorch_backend.transducer.blocks import build_blocks -is_torch_1_4_plus = LooseVersion(torch.__version__) >= LooseVersion("1.4.0") -is_torch_1_5_plus = LooseVersion(torch.__version__) >= LooseVersion("1.5.0") +is_torch_1_5_plus = V(torch.__version__) >= V("1.5.0") def make_train_args(**kwargs): diff --git a/test/test_e2e_asr_transducer.py b/test/test_e2e_asr_transducer.py index 835f9bfe8ab..4a115433cfd 100644 --- a/test/test_e2e_asr_transducer.py +++ b/test/test_e2e_asr_transducer.py @@ -1,7 +1,7 @@ # coding: utf-8 import argparse -from distutils.version import LooseVersion +from packaging.version import parse as V import tempfile import json @@ -16,8 +16,8 @@ import espnet.nets.pytorch_backend.lm.default as lm_pytorch from espnet.nets.pytorch_backend.nets_utils import pad_list -is_torch_1_4_plus = LooseVersion(torch.__version__) >= LooseVersion("1.4.0") -is_torch_1_5_plus = LooseVersion(torch.__version__) >= LooseVersion("1.5.0") +is_torch_1_4_plus = V(torch.__version__) >= V("1.4.0") +is_torch_1_5_plus = V(torch.__version__) >= V("1.5.0") def get_default_train_args(**kwargs): diff --git a/tools/check_install.py b/tools/check_install.py index 82081986123..b8e522758b7 100644 --- a/tools/check_install.py +++ b/tools/check_install.py @@ -9,7 +9,7 @@ import shutil import sys -from distutils.version import LooseVersion +from packaging.version import parse module_list = [ ("torchaudio", None, None), @@ -77,7 +77,7 @@ def main(): import chainer print(f"[x] chainer={chainer.__version__}") - if LooseVersion(chainer.__version__) != LooseVersion("6.0.0"): + if parse(chainer.__version__) != parse("6.0.0"): print( f"Warning! chainer={chainer.__version__} is not supported. " "Supported version is 6.0.0" diff --git a/utils/convert_fbank_to_wav.py b/utils/convert_fbank_to_wav.py index e38feb90593..ccb4a9c439b 100755 --- a/utils/convert_fbank_to_wav.py +++ b/utils/convert_fbank_to_wav.py @@ -7,7 +7,7 @@ import logging import os -from distutils.version import LooseVersion +from packaging.version import parse as V import librosa import numpy as np @@ -66,7 +66,7 @@ def griffin_lim(spc, n_fft, n_shift, win_length, window="hann", n_iters=100): # assert the size of input linear spectrogram assert spc.shape[1] == n_fft // 2 + 1 - if LooseVersion(librosa.__version__) >= LooseVersion("0.7.0"): + if V(librosa.__version__) >= V("0.7.0"): # use librosa's fast Grriffin-Lim algorithm spc = np.abs(spc.T) y = librosa.griffinlim( From 6e9035d42eea31cad87a7c8b87fc79635a6df7c2 Mon Sep 17 00:00:00 2001 From: kamo-naoyuki Date: Thu, 12 May 2022 18:32:33 +0900 Subject: [PATCH 08/22] fix --- espnet/nets/pytorch_backend/nets_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/espnet/nets/pytorch_backend/nets_utils.py b/espnet/nets/pytorch_backend/nets_utils.py index 4bfeeb96cb2..3a7b1e079bc 100644 --- a/espnet/nets/pytorch_backend/nets_utils.py +++ b/espnet/nets/pytorch_backend/nets_utils.py @@ -153,7 +153,7 @@ def make_pad_mask(lengths, xs=None, length_dim=-1, maxlen=None): if not isinstance(lengths, list): lengths = lengths.tolist() else: - assert isinstance(lengths, torch.tensor), type(lengths) + assert isinstance(lengths, torch.Tensor), type(lengths) lengths = lengths.long() bs = int(len(lengths)) From b0050d97da3d0545b62a5d21b029ddd016ce6ca1 Mon Sep 17 00:00:00 2001 From: kamo-naoyuki Date: Thu, 12 May 2022 18:56:52 +0900 Subject: [PATCH 09/22] fix --- setup.py | 1 + tools/Makefile | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 58755a756ba..9fb44f87b25 100644 --- a/setup.py +++ b/setup.py @@ -11,6 +11,7 @@ requirements = { "install": [ "setuptools>=38.5.1", + "packaging", "configargparse>=1.2.1", "typeguard>=2.7.0", "humanfriendly", diff --git a/tools/Makefile b/tools/Makefile index 338fd8d22fe..87ccbd6d21a 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -28,7 +28,7 @@ endif all: kaldi showenv python conda_packages.done sctk.done sph2pipe.done check_install -python: activate_python.sh espnet.done pytorch.done chainer.done fairscale.done torch_optimizer.done +python: activate_python.sh packaging.done espnet.done pytorch.done chainer.done fairscale.done torch_optimizer.done extra: warp-ctc.done warp-transducer.done chainer_ctc.done nkf.done moses.done mwerSegmenter.done pesq kenlm.done pyopenjtalk.done py3mmseg.done beamformit.done fairseq.done s3prl.done k2.done transformers.done phonemizer.done longformer.done kaldi: @@ -84,8 +84,10 @@ sph2pipe.done: ./installers/install_sph2pipe.sh touch sph2pipe.done +packaging.done: activate_python.sh + . ./activate_python.sh && python3 -m pip install packaging -pytorch.done: activate_python.sh +pytorch.done: activate_python.sh packaging.done ifeq ($(strip $(USE_CONDA)),) # NOTE(kan-bayashi): Temporary fixed numpy version . ./activate_python.sh && pip install "numpy<=1.21.3" From 8fbac77268906075043cbecfb3e1c5625b145fce Mon Sep 17 00:00:00 2001 From: kamo-naoyuki Date: Thu, 12 May 2022 18:59:17 +0900 Subject: [PATCH 10/22] fix --- tools/installers/install_torch.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/installers/install_torch.sh b/tools/installers/install_torch.sh index d542183db53..5ea48c0de8a 100755 --- a/tools/installers/install_torch.sh +++ b/tools/installers/install_torch.sh @@ -52,7 +52,7 @@ install_torch(){ log conda install -y "pytorch=${torch_version}" "torchaudio=$1" cpuonly -c pytorch conda install -y "pytorch=${torch_version}" "torchaudio=$1" cpuonly -c pytorch elif [ "${cuda_version}" = "11.5" ]; then - # NOTE(kamo): In my environment, conda-forge only could installed, but I don't know why @ 12, May, 2022 + # NOTE(kamo): In my environment, cudatoolkit of conda-forge only could be installed, but I don't know why @ 12, May, 2022 cudatoolkit_channel=conda-forge log conda install -y "pytorch=${torch_version}" "torchaudio=$1" "cudatoolkit=${cuda_version}" -c pytorch -c "${cudatoolkit_channel}" conda install -y "pytorch=${torch_version}" "torchaudio=$1" "cudatoolkit=${cuda_version}" -c pytorch -c "${cudatoolkit_channel}" From 86186b744fb2bfc259909c49cc906fb0856d15bf Mon Sep 17 00:00:00 2001 From: kamo-naoyuki Date: Thu, 12 May 2022 19:10:18 +0900 Subject: [PATCH 11/22] add installation for packaging --- tools/installers/install_chainer.sh | 3 +++ tools/installers/install_fairscale.sh | 3 +++ tools/installers/install_fairseq.sh | 3 +++ tools/installers/install_k2.sh | 3 +++ tools/installers/install_longformer.sh | 3 +++ tools/installers/install_s3prl.sh | 3 +++ tools/installers/install_speechbrain.sh | 3 +++ tools/installers/install_torch.sh | 3 +++ tools/installers/install_torch_optimizer.sh | 3 +++ tools/installers/install_warp-ctc.sh | 3 +++ tools/installers/install_warp-transducer.sh | 3 +++ 11 files changed, 33 insertions(+) diff --git a/tools/installers/install_chainer.sh b/tools/installers/install_chainer.sh index 9000bfb0d5a..4ef3e4cdc58 100755 --- a/tools/installers/install_chainer.sh +++ b/tools/installers/install_chainer.sh @@ -16,6 +16,9 @@ if [ "${cuda_version}" = cpu ] || [ "${cuda_version}" = CPU ]; then fi +if ! python -c "import packaging.version" &> /dev/null; then + python3 -m pip install packaging +fi # espnet requires chiner=6.0.0 chainer_version=6.0.0 python_version=$(python3 -c "import sys; print(sys.version.split()[0])") diff --git a/tools/installers/install_fairscale.sh b/tools/installers/install_fairscale.sh index 4988a75736d..620b906ffd3 100755 --- a/tools/installers/install_fairscale.sh +++ b/tools/installers/install_fairscale.sh @@ -7,6 +7,9 @@ if [ $# != 0 ]; then exit 1; fi +if ! python -c "import packaging.version" &> /dev/null; then + python3 -m pip install packaging +fi torch_version=$(python3 -c "import torch; print(torch.__version__)") python_36_plus=$(python3 < /dev/null; then + python3 -m pip install packaging +fi torch_version=$(python3 -c "import torch; print(torch.__version__)") python_36_plus=$(python3 </dev/null) =~ pytorch ]] && echo true || echo false) fi +if ! python -c "import packaging.version" &> /dev/null; then + python3 -m pip install packaging +fi python_36_plus=$(python3 < /dev/null; then + python3 -m pip install packaging +fi torch_version=$(python3 -c "import torch; print(torch.__version__)") python_36_plus=$(python3 < /dev/null; then + python3 -m pip install packaging +fi torch_17_plus=$(python3 < /dev/null; then + python3 -m pip install packaging +fi torch_18_plus=$(python3 < /dev/null; then + python3 -m pip install packaging +fi if $(pytorch_plus 1.11.1); then log "[ERROR] This script doesn't support pytorch=${torch_version}" diff --git a/tools/installers/install_torch_optimizer.sh b/tools/installers/install_torch_optimizer.sh index a4b42d4fade..49fad9d504c 100755 --- a/tools/installers/install_torch_optimizer.sh +++ b/tools/installers/install_torch_optimizer.sh @@ -7,6 +7,9 @@ if [ $# != 0 ]; then exit 1; fi +if ! python -c "import packaging.version" &> /dev/null; then + python3 -m pip install packaging +fi torch_version=$(python3 -c "import torch; print(torch.__version__)") python_36_plus=$(python3 < /dev/null; then + python3 -m pip install packaging +fi torch_17_plus=$(python3 < /dev/null; then + python3 -m pip install packaging +fi # TODO(kamo): Consider clang case # Note: Requires gcc>=4.9.2 to build extensions with pytorch>=1.0 if python3 -c 'import torch as t;assert t.__version__[0] == "1"' &> /dev/null; then \ From 809ac3741814b7d9ebdd351b9e0e9343e236977c Mon Sep 17 00:00:00 2001 From: kamo-naoyuki Date: Thu, 12 May 2022 19:27:20 +0900 Subject: [PATCH 12/22] fix --- egs2/aishell4/enh1/local/generate_fe_trainingdata.py.patch | 2 +- tools/installers/install_fairscale.sh | 2 +- tools/installers/install_fairseq.sh | 2 +- tools/installers/install_k2.sh | 2 +- tools/installers/install_longformer.sh | 2 +- tools/installers/install_s3prl.sh | 2 +- tools/installers/install_torch_optimizer.sh | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/egs2/aishell4/enh1/local/generate_fe_trainingdata.py.patch b/egs2/aishell4/enh1/local/generate_fe_trainingdata.py.patch index 47c079997eb..9a23ef72207 100644 --- a/egs2/aishell4/enh1/local/generate_fe_trainingdata.py.patch +++ b/egs2/aishell4/enh1/local/generate_fe_trainingdata.py.patch @@ -16,7 +16,7 @@ import argparse + -+is_py_3_3_plus = V(sys.version) > V("3.3") ++is_py_3_3_plus = V("{}.{}.{}".format(*sys.version_info[:3])) > V("3.3") + + def get_line_context(file_path, line_number): diff --git a/tools/installers/install_fairscale.sh b/tools/installers/install_fairscale.sh index 620b906ffd3..436d5ae7b54 100755 --- a/tools/installers/install_fairscale.sh +++ b/tools/installers/install_fairscale.sh @@ -15,7 +15,7 @@ python_36_plus=$(python3 <= V("3.6"): +if V("{}.{}.{}".format(*sys.version_info[:3])) >= V("3.6"): print("true") else: print("false") diff --git a/tools/installers/install_fairseq.sh b/tools/installers/install_fairseq.sh index f4f12007688..61824378f6d 100755 --- a/tools/installers/install_fairseq.sh +++ b/tools/installers/install_fairseq.sh @@ -15,7 +15,7 @@ python_36_plus=$(python3 <= V("3.6"): +if V("{}.{}.{}".format(*sys.version_info[:3])) >= V("3.6"): print("true") else: print("false") diff --git a/tools/installers/install_k2.sh b/tools/installers/install_k2.sh index 03d21d7b873..6066584fd0a 100755 --- a/tools/installers/install_k2.sh +++ b/tools/installers/install_k2.sh @@ -33,7 +33,7 @@ python_36_plus=$(python3 <= V("3.6"): +if V("{}.{}.{}".format(*sys.version_info[:3])) >= V("3.6"): print("true") else: print("false") diff --git a/tools/installers/install_longformer.sh b/tools/installers/install_longformer.sh index 891fad4e611..04e817ecc36 100755 --- a/tools/installers/install_longformer.sh +++ b/tools/installers/install_longformer.sh @@ -15,7 +15,7 @@ python_36_plus=$(python3 <= V("3.6"): +if V("{}.{}.{}".format(*sys.version_info[:3])) >= V("3.6"): print("true") else: print("false") diff --git a/tools/installers/install_s3prl.sh b/tools/installers/install_s3prl.sh index 68ee4ec9ad6..b55092e3e30 100755 --- a/tools/installers/install_s3prl.sh +++ b/tools/installers/install_s3prl.sh @@ -27,7 +27,7 @@ python_36_plus=$(python3 <= V("3.6"): +if V("{}.{}.{}".format(*sys.version_info[:3])) >= V("3.6"): print("true") else: print("false") diff --git a/tools/installers/install_torch_optimizer.sh b/tools/installers/install_torch_optimizer.sh index 49fad9d504c..014ca1d0830 100755 --- a/tools/installers/install_torch_optimizer.sh +++ b/tools/installers/install_torch_optimizer.sh @@ -15,7 +15,7 @@ python_36_plus=$(python3 <= V("3.6"): +if V("{}.{}.{}".format(*sys.version_info[:3])) >= V("3.6"): print("true") else: print("false") From 5c4b966a957062e4de298bcb69fe8cf6f1365fd1 Mon Sep 17 00:00:00 2001 From: kamo-naoyuki Date: Thu, 12 May 2022 19:36:11 +0900 Subject: [PATCH 13/22] remove tests for python=3.10.0 temporary --- .github/workflows/ci.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 058dfea6288..92e0b29f582 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -31,11 +31,6 @@ jobs: pytorch-version: 1.11.0 chainer-verssion: 6.0.0 use-conda: false - - os: ubuntu-20.04 - python-version: "3.10" - pytorch-version: 1.11.0 - chainer-verssion: 6.0.0 - use-conda: false steps: - uses: actions/checkout@master - uses: actions/cache@v1 From 005aad11b37acf388c6b70143ab40a5231bc7a39 Mon Sep 17 00:00:00 2001 From: kamo-naoyuki Date: Thu, 12 May 2022 20:04:57 +0900 Subject: [PATCH 14/22] fix --- tools/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/Makefile b/tools/Makefile index 87ccbd6d21a..8936b69d4f9 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -86,6 +86,7 @@ sph2pipe.done: packaging.done: activate_python.sh . ./activate_python.sh && python3 -m pip install packaging + touch packaging.done pytorch.done: activate_python.sh packaging.done ifeq ($(strip $(USE_CONDA)),) From 5c474b96c543c3d26e95b432355bcfd2bf8dc116 Mon Sep 17 00:00:00 2001 From: kamo-naoyuki Date: Thu, 12 May 2022 20:20:18 +0900 Subject: [PATCH 15/22] remove verbosity options --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index a032ac70480..d17c8920e9a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,7 +2,7 @@ test=pytest [tool:pytest] -addopts = --cov-config=.coveragerc --verbose --durations=0 --cov=espnet --cov=espnet2 +addopts = --cov-config=.coveragerc --cov=espnet --cov=espnet2 testpaths = test execution_timeout = 2.0 From 934b161f1f714637c3d7d47c14f8c810a9df6fe2 Mon Sep 17 00:00:00 2001 From: kamo-naoyuki Date: Thu, 12 May 2022 20:33:58 +0900 Subject: [PATCH 16/22] change to show the error logs when jobs are failed --- egs2/TEMPLATE/asr1/asr.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/egs2/TEMPLATE/asr1/asr.sh b/egs2/TEMPLATE/asr1/asr.sh index f4d7a8ad24a..65a0048eed9 100755 --- a/egs2/TEMPLATE/asr1/asr.sh +++ b/egs2/TEMPLATE/asr1/asr.sh @@ -771,7 +771,7 @@ if ! "${skip_train}"; then --train_shape_file "${_logdir}/train.JOB.scp" \ --valid_shape_file "${_logdir}/dev.JOB.scp" \ --output_dir "${_logdir}/stats.JOB" \ - ${_opts} ${lm_args} || { cat "${_logdir}"/stats.1.log; exit 1; } + ${_opts} ${lm_args} || { cat $(grep -l -i error "${_logdir}"/stats.*.log) ; exit 1; } # 4. Aggregate shape files _opts= @@ -985,7 +985,7 @@ if ! "${skip_train}"; then --train_shape_file "${_logdir}/train.JOB.scp" \ --valid_shape_file "${_logdir}/valid.JOB.scp" \ --output_dir "${_logdir}/stats.JOB" \ - ${_opts} ${asr_args} || { cat "${_logdir}"/stats.1.log; exit 1; } + ${_opts} ${asr_args} || { cat $(grep -l -i error "${_logdir}"/stats.*.log) ; exit 1; } # 4. Aggregate shape files _opts= @@ -1252,7 +1252,7 @@ if ! "${skip_eval}"; then --asr_train_config "${asr_exp}"/config.yaml \ --asr_model_file "${asr_exp}"/"${inference_asr_model}" \ --output_dir "${_logdir}"/output.JOB \ - ${_opts} ${inference_args} + ${_opts} ${inference_args} || { cat $(grep -l -i error "${_logdir}"/asr_inference.*.log) ; exit 1; } # 3. Concatenates the output files from each jobs for f in token token_int score text; do From bb0d0aaa9e9f9076ac88aad425ad2f2caef369a7 Mon Sep 17 00:00:00 2001 From: kamo-naoyuki Date: Thu, 12 May 2022 20:40:39 +0900 Subject: [PATCH 17/22] fix code style --- egs2/TEMPLATE/asr1/asr.sh | 6 +++--- espnet/asr/pytorch_backend/asr.py | 6 ++---- espnet/asr/pytorch_backend/recog.py | 4 +--- espnet/nets/pytorch_backend/ctc.py | 8 ++------ espnet/nets/pytorch_backend/e2e_tts_fastspeech.py | 6 +++--- espnet/nets/pytorch_backend/e2e_vc_transformer.py | 12 ++++++------ espnet/nets/pytorch_backend/nets_utils.py | 5 +---- espnet2/asr/espnet_model.py | 2 +- espnet2/asr/maskctc_model.py | 2 +- espnet2/diar/espnet_model.py | 2 +- espnet2/enh/espnet_enh_s2t_model.py | 2 +- espnet2/enh/loss/criterions/tf_domain.py | 2 +- espnet2/mt/espnet_model.py | 2 +- espnet2/st/espnet_model.py | 2 +- espnet2/tasks/abs_task.py | 2 +- espnet2/train/reporter.py | 2 +- espnet2/train/trainer.py | 2 +- espnet2/utils/griffin_lim.py | 2 +- 18 files changed, 29 insertions(+), 40 deletions(-) diff --git a/egs2/TEMPLATE/asr1/asr.sh b/egs2/TEMPLATE/asr1/asr.sh index 65a0048eed9..763aceb7a34 100755 --- a/egs2/TEMPLATE/asr1/asr.sh +++ b/egs2/TEMPLATE/asr1/asr.sh @@ -755,7 +755,7 @@ if ! "${skip_train}"; then log "LM collect-stats started... log: '${_logdir}/stats.*.log'" # NOTE: --*_shape_file doesn't require length information if --batch_type=unsorted, # but it's used only for deciding the sample ids. - # shellcheck disable=SC2086 + # shellcheck disable=SC2046,SC2086 ${train_cmd} JOB=1:"${_nj}" "${_logdir}"/stats.JOB.log \ ${python} -m espnet2.bin.lm_train \ --collect_stats true \ @@ -967,7 +967,7 @@ if ! "${skip_train}"; then # NOTE: --*_shape_file doesn't require length information if --batch_type=unsorted, # but it's used only for deciding the sample ids. - # shellcheck disable=SC2086 + # shellcheck disable=SC2046,SC2086 ${train_cmd} JOB=1:"${_nj}" "${_logdir}"/stats.JOB.log \ ${python} -m espnet2.bin.asr_train \ --collect_stats true \ @@ -1242,7 +1242,7 @@ if ! "${skip_eval}"; then # 2. Submit decoding jobs log "Decoding started... log: '${_logdir}/asr_inference.*.log'" - # shellcheck disable=SC2086 + # shellcheck disable=SC2046,SC2086 ${_cmd} --gpu "${_ngpu}" JOB=1:"${_nj}" "${_logdir}"/asr_inference.JOB.log \ ${python} -m ${asr_inference_tool} \ --batch_size ${batch_size} \ diff --git a/espnet/asr/pytorch_backend/asr.py b/espnet/asr/pytorch_backend/asr.py index a83d9a27dc1..0effaaaa893 100644 --- a/espnet/asr/pytorch_backend/asr.py +++ b/espnet/asr/pytorch_backend/asr.py @@ -4,12 +4,12 @@ """Training/decoding definition for the speech recognition task.""" import copy -from packaging.version import parse as V import itertools import json import logging import math import os +from packaging.version import parse as V from chainer import reporter as reporter_module from chainer import training @@ -999,9 +999,7 @@ def recog(args): # Dunno why but weight_observer from dynamic quantized module must have # dtype=torch.qint8 with torch < 1.5 although dtype=torch.float16 is supported. - if args.quantize_dtype == "float16" and torch.__version__ < V( - "1.5.0" - ): + if args.quantize_dtype == "float16" and torch.__version__ < V("1.5.0"): raise ValueError( "float16 dtype for dynamic quantization is not supported with torch " "version < 1.5.0. Switching to qint8 dtype instead." diff --git a/espnet/asr/pytorch_backend/recog.py b/espnet/asr/pytorch_backend/recog.py index 68fea23a144..b64131d1ad2 100644 --- a/espnet/asr/pytorch_backend/recog.py +++ b/espnet/asr/pytorch_backend/recog.py @@ -62,9 +62,7 @@ def recog_v2(args): "Quantized LSTM in ESPnet is only supported with torch 1.4+." ) - if args.quantize_dtype == "float16" and torch.__version__ < V( - "1.5.0" - ): + if args.quantize_dtype == "float16" and torch.__version__ < V("1.5.0"): raise ValueError( "float16 dtype for dynamic quantization is not supported with torch " "version < 1.5.0. Switching to qint8 dtype instead." diff --git a/espnet/nets/pytorch_backend/ctc.py b/espnet/nets/pytorch_backend/ctc.py index c974df09b7a..96b2e4f52b9 100644 --- a/espnet/nets/pytorch_backend/ctc.py +++ b/espnet/nets/pytorch_backend/ctc.py @@ -1,5 +1,5 @@ -from packaging.version import parse as V import logging +from packaging.version import parse as V import numpy as np import six @@ -28,11 +28,7 @@ def __init__(self, odim, eprojs, dropout_rate, ctc_type="warpctc", reduce=True): self.probs = None # for visualization # In case of Pytorch >= 1.7.0, CTC will be always builtin - self.ctc_type = ( - ctc_type - if V(torch.__version__) < V("1.7.0") - else "builtin" - ) + self.ctc_type = ctc_type if V(torch.__version__) < V("1.7.0") else "builtin" if ctc_type != self.ctc_type: logging.warning(f"CTC was set to {self.ctc_type} due to PyTorch version.") diff --git a/espnet/nets/pytorch_backend/e2e_tts_fastspeech.py b/espnet/nets/pytorch_backend/e2e_tts_fastspeech.py index c5a3069e53c..8c9f2bcb232 100644 --- a/espnet/nets/pytorch_backend/e2e_tts_fastspeech.py +++ b/espnet/nets/pytorch_backend/e2e_tts_fastspeech.py @@ -576,7 +576,7 @@ def _forward( alpha=1.0, ): # forward encoder - x_masks = self._source_mask(ilens) + x_masks = self._source_mask(ilens).to(xs.device) hs, _ = self.encoder(xs, x_masks) # (B, Tmax, adim) # integrate speaker embedding @@ -603,7 +603,7 @@ def _forward( olens_in = olens.new([olen // self.reduction_factor for olen in olens]) else: olens_in = olens - h_masks = self._source_mask(olens_in) + h_masks = self._source_mask(olens_in).to(xs.device) else: h_masks = None zs, _ = self.decoder(hs, h_masks) # (B, Lmax, adim) @@ -816,7 +816,7 @@ def _source_mask(self, ilens): [1, 1, 1, 0, 0]]], dtype=torch.uint8) """ - x_masks = make_non_pad_mask(ilens).to(next(self.parameters()).device) + x_masks = make_non_pad_mask(ilens) return x_masks.unsqueeze(-2) def _load_teacher_model(self, model_path): diff --git a/espnet/nets/pytorch_backend/e2e_vc_transformer.py b/espnet/nets/pytorch_backend/e2e_vc_transformer.py index c4e0144d412..99fd3f3962b 100644 --- a/espnet/nets/pytorch_backend/e2e_vc_transformer.py +++ b/espnet/nets/pytorch_backend/e2e_vc_transformer.py @@ -673,7 +673,7 @@ def forward(self, xs, ilens, ys, labels, olens, spembs=None, *args, **kwargs): xs_ds, ilens_ds = xs, ilens # forward encoder - x_masks = self._source_mask(ilens_ds) + x_masks = self._source_mask(ilens_ds).to(xs.device) hs, hs_masks = self.encoder(xs_ds, x_masks) # integrate speaker embedding @@ -701,7 +701,7 @@ def forward(self, xs, ilens, ys, labels, olens, spembs=None, *args, **kwargs): ilens_ds_st = ilens_ds # forward decoder - y_masks = self._target_mask(olens_in) + y_masks = self._target_mask(olens_in).to(xs.device) zs, _ = self.decoder(ys_in, y_masks, hs_int, hs_masks) # (B, Lmax//r, odim * r) -> (B, Lmax//r * r, odim) before_outs = self.feat_out(zs).view(zs.size(0), -1, self.odim) @@ -977,7 +977,7 @@ def calculate_all_attentions( xs_ds, ilens_ds = xs, ilens # forward encoder - x_masks = self._source_mask(ilens_ds) + x_masks = self._source_mask(ilens_ds).to(xs.device) hs, hs_masks = self.encoder(xs_ds, x_masks) # integrate speaker embedding @@ -996,7 +996,7 @@ def calculate_all_attentions( ys_in = self._add_first_frame_and_remove_last_frame(ys_in) # forward decoder - y_masks = self._target_mask(olens_in) + y_masks = self._target_mask(olens_in).to(xs.device) zs, _ = self.decoder(ys_in, y_masks, hs, hs_masks) # calculate final outputs @@ -1099,7 +1099,7 @@ def _source_mask(self, ilens): [[1, 1, 1, 0, 0]]], dtype=torch.uint8) """ - x_masks = make_non_pad_mask(ilens).to(next(self.parameters()).device) + x_masks = make_non_pad_mask(ilens) return x_masks.unsqueeze(-2) def _target_mask(self, olens): @@ -1128,7 +1128,7 @@ def _target_mask(self, olens): [1, 1, 1, 0, 0]]], dtype=torch.uint8) """ - y_masks = make_non_pad_mask(olens).to(next(self.parameters()).device) + y_masks = make_non_pad_mask(olens) s_masks = subsequent_mask(y_masks.size(-1), device=y_masks.device).unsqueeze(0) return y_masks.unsqueeze(-2) & s_masks diff --git a/espnet/nets/pytorch_backend/nets_utils.py b/espnet/nets/pytorch_backend/nets_utils.py index 3a7b1e079bc..638b0b0bf23 100644 --- a/espnet/nets/pytorch_backend/nets_utils.py +++ b/espnet/nets/pytorch_backend/nets_utils.py @@ -151,10 +151,7 @@ def make_pad_mask(lengths, xs=None, length_dim=-1, maxlen=None): raise ValueError("length_dim cannot be 0: {}".format(length_dim)) if not isinstance(lengths, list): - lengths = lengths.tolist() - else: - assert isinstance(lengths, torch.Tensor), type(lengths) - lengths = lengths.long() + lengths = lengths.long().tolist() bs = int(len(lengths)) if maxlen is None: diff --git a/espnet2/asr/espnet_model.py b/espnet2/asr/espnet_model.py index 5756598d2ff..67698e95115 100644 --- a/espnet2/asr/espnet_model.py +++ b/espnet2/asr/espnet_model.py @@ -1,6 +1,6 @@ from contextlib import contextmanager -from packaging.version import parse as V import logging +from packaging.version import parse as V from typing import Dict from typing import List from typing import Optional diff --git a/espnet2/asr/maskctc_model.py b/espnet2/asr/maskctc_model.py index 10d91de94c5..2a95eec89ea 100644 --- a/espnet2/asr/maskctc_model.py +++ b/espnet2/asr/maskctc_model.py @@ -1,7 +1,7 @@ from contextlib import contextmanager -from packaging.version import parse as V from itertools import groupby import logging +from packaging.version import parse as V from typing import Dict from typing import List from typing import Optional diff --git a/espnet2/diar/espnet_model.py b/espnet2/diar/espnet_model.py index 92b434e7642..2017316f70f 100644 --- a/espnet2/diar/espnet_model.py +++ b/espnet2/diar/espnet_model.py @@ -2,8 +2,8 @@ # Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) from contextlib import contextmanager -from packaging.version import parse as V from itertools import permutations +from packaging.version import parse as V from typing import Dict from typing import Optional from typing import Tuple diff --git a/espnet2/enh/espnet_enh_s2t_model.py b/espnet2/enh/espnet_enh_s2t_model.py index c2e05654fce..4d37ce0b0c0 100644 --- a/espnet2/enh/espnet_enh_s2t_model.py +++ b/espnet2/enh/espnet_enh_s2t_model.py @@ -1,6 +1,6 @@ from contextlib import contextmanager -from packaging.version import parse as V import logging +from packaging.version import parse as V import random from typing import Dict from typing import List diff --git a/espnet2/enh/loss/criterions/tf_domain.py b/espnet2/enh/loss/criterions/tf_domain.py index cb81d7cf25d..4c4a91ef5d2 100644 --- a/espnet2/enh/loss/criterions/tf_domain.py +++ b/espnet2/enh/loss/criterions/tf_domain.py @@ -1,8 +1,8 @@ from abc import ABC from abc import abstractmethod -from packaging.version import parse as V from functools import reduce import math +from packaging.version import parse as V import torch import torch.nn.functional as F diff --git a/espnet2/mt/espnet_model.py b/espnet2/mt/espnet_model.py index b937cbe3dfd..8a493366046 100644 --- a/espnet2/mt/espnet_model.py +++ b/espnet2/mt/espnet_model.py @@ -1,6 +1,6 @@ from contextlib import contextmanager -from packaging.version import parse as V import logging +from packaging.version import parse as V from typing import Dict from typing import List from typing import Optional diff --git a/espnet2/st/espnet_model.py b/espnet2/st/espnet_model.py index fb8fcfdaee9..743b53d8288 100644 --- a/espnet2/st/espnet_model.py +++ b/espnet2/st/espnet_model.py @@ -1,6 +1,6 @@ from contextlib import contextmanager -from packaging.version import parse as V import logging +from packaging.version import parse as V from typing import Dict from typing import List from typing import Optional diff --git a/espnet2/tasks/abs_task.py b/espnet2/tasks/abs_task.py index 54c4cd26a43..0f23feaa93d 100644 --- a/espnet2/tasks/abs_task.py +++ b/espnet2/tasks/abs_task.py @@ -3,10 +3,10 @@ from abc import abstractmethod import argparse from dataclasses import dataclass -from packaging.version import parse as V import functools import logging import os +from packaging.version import parse as V from pathlib import Path import sys from typing import Any diff --git a/espnet2/train/reporter.py b/espnet2/train/reporter.py index 65b4ac6a9d8..be1d2a51fe5 100644 --- a/espnet2/train/reporter.py +++ b/espnet2/train/reporter.py @@ -3,8 +3,8 @@ from contextlib import contextmanager import dataclasses import datetime -from packaging.version import parse as V import logging +from packaging.version import parse as V from pathlib import Path import time from typing import ContextManager diff --git a/espnet2/train/trainer.py b/espnet2/train/trainer.py index 6fe2726880d..da8ea6144b4 100644 --- a/espnet2/train/trainer.py +++ b/espnet2/train/trainer.py @@ -3,8 +3,8 @@ from contextlib import contextmanager import dataclasses from dataclasses import is_dataclass -from packaging.version import parse as V import logging +from packaging.version import parse as V from pathlib import Path import time from typing import Dict diff --git a/espnet2/utils/griffin_lim.py b/espnet2/utils/griffin_lim.py index 3d4a948b7aa..c9b08cd1235 100644 --- a/espnet2/utils/griffin_lim.py +++ b/espnet2/utils/griffin_lim.py @@ -7,8 +7,8 @@ import logging -from packaging.version import parse as V from functools import partial +from packaging.version import parse as V from typeguard import check_argument_types from typing import Optional From 98689a5f0bfd88efffdbbcdd5d924e186d563a91 Mon Sep 17 00:00:00 2001 From: kamo-naoyuki Date: Thu, 12 May 2022 21:17:35 +0900 Subject: [PATCH 18/22] change to show the error logs when jobs are failed --- .../asr1/scripts/utils/evaluate_asr.sh | 4 +- egs2/TEMPLATE/diar1/diar.sh | 8 +- egs2/TEMPLATE/enh1/enh.sh | 8 +- egs2/TEMPLATE/enh_asr1/enh_asr.sh | 12 +-- egs2/TEMPLATE/enh_st1/enh_st.sh | 32 +++--- egs2/TEMPLATE/mt1/mt.sh | 32 +++--- egs2/TEMPLATE/ssl1/hubert.sh | 100 +++++++++--------- egs2/TEMPLATE/st1/st.sh | 42 ++++---- egs2/TEMPLATE/tts1/tts.sh | 8 +- 9 files changed, 123 insertions(+), 123 deletions(-) diff --git a/egs2/TEMPLATE/asr1/scripts/utils/evaluate_asr.sh b/egs2/TEMPLATE/asr1/scripts/utils/evaluate_asr.sh index 7d3da2bfbea..0cc2c632591 100755 --- a/egs2/TEMPLATE/asr1/scripts/utils/evaluate_asr.sh +++ b/egs2/TEMPLATE/asr1/scripts/utils/evaluate_asr.sh @@ -173,14 +173,14 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then # 2. Submit decoding jobs log "Decoding started... log: '${logdir}/asr_inference.*.log'" - # shellcheck disable=SC2086 + # shellcheck disable=SC2046,SC2086 ${_cmd} --gpu "${_ngpu}" JOB=1:"${_nj}" "${logdir}"/asr_inference.JOB.log \ python3 -m espnet2.bin.asr_inference \ --ngpu "${_ngpu}" \ --data_path_and_name_and_type "${wavscp},speech,sound" \ --key_file "${logdir}"/keys.JOB.scp \ --output_dir "${logdir}"/output.JOB \ - "${_opts[@]}" ${inference_args} + "${_opts[@]}" ${inference_args} || { cat $(grep -l -i error "${logdir}"/asr_inference.*.log) ; exit 1; } # 3. Concatenates the output files from each jobs for f in token token_int score text; do diff --git a/egs2/TEMPLATE/diar1/diar.sh b/egs2/TEMPLATE/diar1/diar.sh index 815c73537f4..b711d324eab 100755 --- a/egs2/TEMPLATE/diar1/diar.sh +++ b/egs2/TEMPLATE/diar1/diar.sh @@ -348,7 +348,7 @@ if ! "${skip_train}"; then # NOTE: --*_shape_file doesn't require length information if --batch_type=unsorted, # but it's used only for deciding the sample ids. - # shellcheck disable=SC2086 + # shellcheck disable=SC2046,SC2086 ${train_cmd} JOB=1:"${_nj}" "${_logdir}"/stats.JOB.log \ ${python} -m espnet2.bin.diar_train \ --collect_stats true \ @@ -360,7 +360,7 @@ if ! "${skip_train}"; then --train_shape_file "${_logdir}/train.JOB.scp" \ --valid_shape_file "${_logdir}/valid.JOB.scp" \ --output_dir "${_logdir}/stats.JOB" \ - ${_opts} ${diar_args} || { cat "${_logdir}"/stats.1.log; exit 1; } + ${_opts} ${diar_args} || { cat $(grep -l -i error "${_logdir}"/stats.*.log) ; exit 1; } # 4. Aggregate shape files _opts= @@ -510,7 +510,7 @@ if ! "${skip_eval}"; then # 2. Submit inference jobs log "Diarization started... log: '${_logdir}/diar_inference.*.log'" - # shellcheck disable=SC2086 + # shellcheck disable=SC2046,SC2086 ${_cmd} --gpu "${_ngpu}" JOB=1:"${_nj}" "${_logdir}"/diar_inference.JOB.log \ ${python} -m espnet2.bin.diar_inference \ --ngpu "${_ngpu}" \ @@ -520,7 +520,7 @@ if ! "${skip_eval}"; then --train_config "${diar_exp}"/config.yaml \ --model_file "${diar_exp}"/"${inference_model}" \ --output_dir "${_logdir}"/output.JOB \ - ${_opts} + ${_opts} || { cat $(grep -l -i error "${_logdir}"/diar_inference.*.log) ; exit 1; } # 3. Concatenates the output files from each jobs for i in $(seq "${_nj}"); do diff --git a/egs2/TEMPLATE/enh1/enh.sh b/egs2/TEMPLATE/enh1/enh.sh index db170043db6..864a0485df0 100755 --- a/egs2/TEMPLATE/enh1/enh.sh +++ b/egs2/TEMPLATE/enh1/enh.sh @@ -494,7 +494,7 @@ if ! "${skip_train}"; then # but it's used only for deciding the sample ids. - # shellcheck disable=SC2086 + # shellcheck disable=SC2046,SC2086 ${train_cmd} JOB=1:"${_nj}" "${_logdir}"/stats.JOB.log \ ${python} -m espnet2.bin.enh_train \ --collect_stats true \ @@ -504,7 +504,7 @@ if ! "${skip_train}"; then --train_shape_file "${_logdir}/train.JOB.scp" \ --valid_shape_file "${_logdir}/valid.JOB.scp" \ --output_dir "${_logdir}/stats.JOB" \ - ${_opts} ${enh_args} || { cat "${_logdir}"/stats.1.log; exit 1; } + ${_opts} ${enh_args} || { cat $(grep -l -i error "${_logdir}"/stats.*.log) ; exit 1; } # 4. Aggregate shape files _opts= @@ -652,7 +652,7 @@ if ! "${skip_eval}"; then # 2. Submit inference jobs log "Enhancement started... log: '${_logdir}/enh_inference.*.log'" - # shellcheck disable=SC2086 + # shellcheck disable=SC2046,SC2086 ${_cmd} --gpu "${_ngpu}" JOB=1:"${_nj}" "${_logdir}"/enh_inference.JOB.log \ ${python} -m espnet2.bin.enh_inference \ --ngpu "${_ngpu}" \ @@ -663,7 +663,7 @@ if ! "${skip_eval}"; then ${inference_enh_config:+--inference_config "$inference_enh_config"} \ --model_file "${enh_exp}"/"${inference_model}" \ --output_dir "${_logdir}"/output.JOB \ - ${_opts} ${inference_args} + ${_opts} ${inference_args} || { cat $(grep -l -i error "${_logdir}"/enh_inference.*.log) ; exit 1; } _spk_list=" " diff --git a/egs2/TEMPLATE/enh_asr1/enh_asr.sh b/egs2/TEMPLATE/enh_asr1/enh_asr.sh index fc720ddf94b..9ec09219613 100755 --- a/egs2/TEMPLATE/enh_asr1/enh_asr.sh +++ b/egs2/TEMPLATE/enh_asr1/enh_asr.sh @@ -794,7 +794,7 @@ if ! "${skip_train}"; then log "LM collect-stats started... log: '${_logdir}/stats.*.log'" # NOTE: --*_shape_file doesn't require length information if --batch_type=unsorted, # but it's used only for deciding the sample ids. - # shellcheck disable=SC2086 + # shellcheck disable=SC2046,SC2086 ${train_cmd} JOB=1:"${_nj}" "${_logdir}"/stats.JOB.log \ ${python} -m espnet2.bin.lm_train \ --collect_stats true \ @@ -810,7 +810,7 @@ if ! "${skip_train}"; then --train_shape_file "${_logdir}/train.JOB.scp" \ --valid_shape_file "${_logdir}/dev.JOB.scp" \ --output_dir "${_logdir}/stats.JOB" \ - ${_opts} ${lm_args} || { cat "${_logdir}"/stats.1.log; exit 1; } + ${_opts} ${lm_args} || { cat $(grep -l -i error "${_logdir}"/stats.*.log) ; exit 1; } # 4. Aggregate shape files _opts= @@ -937,7 +937,7 @@ if ! "${skip_train}"; then if "${use_ngram}"; then log "Stage 9: Ngram Training: train_set=${data_feats}/lm_train.txt" cut -f 2 -d " " ${data_feats}/lm_train.txt | lmplz -S "20%" --discount_fallback -o ${ngram_num} - >${ngram_exp}/${ngram_num}gram.arpa - build_binary -s ${ngram_exp}/${ngram_num}gram.arpa ${ngram_exp}/${ngram_num}gram.bin + build_binary -s ${ngram_exp}/${ngram_num}gram.arpa ${ngram_exp}/${ngram_num}gram.bin else log "Stage 9: Skip ngram stages: use_ngram=${use_ngram}" fi @@ -1335,7 +1335,7 @@ if ! "${skip_eval}"; then # 2. Submit inference jobs log "Enhancement started... log: '${_logdir}/enh_inference.*.log'" - # shellcheck disable=SC2086 + # shellcheck disable=SC2046,SC2086 ${_cmd} --gpu "${_ngpu}" JOB=1:"${_nj}" "${_logdir}"/enh_inference.JOB.log \ ${python} -m espnet2.bin.enh_inference \ --enh_s2t_task true \ @@ -1347,7 +1347,7 @@ if ! "${skip_eval}"; then ${inference_enh_config:+--inference_config "$inference_enh_config"} \ --model_file "${enh_asr_exp}"/"${inference_enh_asr_model}" \ --output_dir "${_logdir}"/output.JOB \ - ${_opts} ${enh_inference_args} + ${_opts} ${enh_inference_args} || { cat $(grep -l -i error "${_logdir}"/enh_inference.*.log) ; exit 1; } # 3. Concatenates the output files from each jobs _spk_list=" " @@ -1632,7 +1632,7 @@ if ! "${skip_upload_hf}"; then # Generate description file # shellcheck disable=SC2034 hf_task=speech-enhancement-recognition - # shellcheck disable=SC2034 + # shellcheck disable=SC2034 espnet_task=EnhS2T # shellcheck disable=SC2034 task_exp=${enh_asr_exp} diff --git a/egs2/TEMPLATE/enh_st1/enh_st.sh b/egs2/TEMPLATE/enh_st1/enh_st.sh index eabf49cc29d..b27f986e582 100755 --- a/egs2/TEMPLATE/enh_st1/enh_st.sh +++ b/egs2/TEMPLATE/enh_st1/enh_st.sh @@ -551,7 +551,7 @@ if ! "${skip_data_prep}"; then done utils/combine_data.sh --extra_files "${utt_extra_files} ${_scp_list}" "data/${train_set}_sp" ${_dirs} for extra_file in ${utt_extra_files}; do - python pyscripts/utils/remove_duplicate_keys.py data/"${train_set}_sp"/${extra_file} > data/"${train_set}_sp"/${extra_file}.tmp + python pyscripts/utils/remove_duplicate_keys.py data/"${train_set}_sp"/${extra_file} > data/"${train_set}_sp"/${extra_file}.tmp mv data/"${train_set}_sp"/${extra_file}.tmp data/"${train_set}_sp"/${extra_file} done else @@ -593,7 +593,7 @@ if ! "${skip_data_prep}"; then fi cp ${single_file} "${data_feats}${_suf}/${dset}" expand_utt_extra_files="${expand_utt_extra_files} $(basename ${single_file})" - done + done done echo "${expand_utt_extra_files}" utils/fix_data_dir.sh --utt_extra_files "${expand_utt_extra_files}" "${data_feats}${_suf}/${dset}" @@ -727,9 +727,9 @@ if ! "${skip_data_prep}"; then utils/fix_data_dir.sh --utt_extra_files "${utt_extra_files}" "${data_feats}/${dset}" for utt_extra_file in ${utt_extra_files}; do python pyscripts/utils/remove_duplicate_keys.py ${data_feats}/${dset}/${utt_extra_file} \ - > ${data_feats}/${dset}/${utt_extra_file}.tmp + > ${data_feats}/${dset}/${utt_extra_file}.tmp mv ${data_feats}/${dset}/${utt_extra_file}.tmp ${data_feats}/${dset}/${utt_extra_file} - done + done done # shellcheck disable=SC2002 @@ -934,7 +934,7 @@ if ! "${skip_train}"; then log "LM collect-stats started... log: '${_logdir}/stats.*.log'" # NOTE: --*_shape_file doesn't require length information if --batch_type=unsorted, # but it's used only for deciding the sample ids. - # shellcheck disable=SC2086 + # shellcheck disable=SC2046,SC2086 ${train_cmd} JOB=1:"${_nj}" "${_logdir}"/stats.JOB.log \ ${python} -m espnet2.bin.lm_train \ --collect_stats true \ @@ -950,7 +950,7 @@ if ! "${skip_train}"; then --train_shape_file "${_logdir}/train.JOB.scp" \ --valid_shape_file "${_logdir}/dev.JOB.scp" \ --output_dir "${_logdir}/stats.JOB" \ - ${_opts} ${lm_args} || { cat "${_logdir}"/stats.1.log; exit 1; } + ${_opts} ${lm_args} || { cat $(grep -l -i error "${_logdir}"/stats.*.log) ; exit 1; } # 4. Aggregate shape files _opts= @@ -1078,7 +1078,7 @@ if ! "${skip_train}"; then if "${use_ngram}"; then log "Stage 9: Ngram Training: train_set=${data_feats}/lm_train.txt" cut -f 2 -d " " ${data_feats}/lm_train.txt | lmplz -S "20%" --discount_fallback -o ${ngram_num} - >${ngram_exp}/${ngram_num}gram.arpa - build_binary -s ${ngram_exp}/${ngram_num}gram.arpa ${ngram_exp}/${ngram_num}gram.bin + build_binary -s ${ngram_exp}/${ngram_num}gram.arpa ${ngram_exp}/${ngram_num}gram.bin else log "Stage 9: Skip ngram stages: use_ngram=${use_ngram}" fi @@ -1148,7 +1148,7 @@ if ! "${skip_train}"; then # but it's used only for deciding the sample ids. # TODO(jiatong): fix different bpe model - # shellcheck disable=SC2086 + # shellcheck disable=SC2046,SC2086 ${train_cmd} JOB=1:"${_nj}" "${_logdir}"/stats.JOB.log \ ${python} -m espnet2.bin.enh_s2t_train \ --collect_stats true \ @@ -1173,7 +1173,7 @@ if ! "${skip_train}"; then --train_shape_file "${_logdir}/train.JOB.scp" \ --valid_shape_file "${_logdir}/valid.JOB.scp" \ --output_dir "${_logdir}/stats.JOB" \ - ${_opts} ${enh_st_args} || { cat "${_logdir}"/stats.1.log; exit 1; } + ${_opts} ${enh_st_args} || { cat $(grep -l -i error "${_logdir}"/stats.*.log) ; exit 1; } # 4. Aggregate shape files _opts= @@ -1436,7 +1436,7 @@ if ! "${skip_eval}"; then # 2. Submit decoding jobs log "Decoding started... log: '${_logdir}/st_inference.*.log'" - # shellcheck disable=SC2086 + # shellcheck disable=SC2046,SC2086 ${_cmd} --gpu "${_ngpu}" JOB=1:"${_nj}" "${_logdir}"/st_inference.JOB.log \ ${python} -m ${st_inference_tool} \ --enh_s2t_task true \ @@ -1447,7 +1447,7 @@ if ! "${skip_eval}"; then --st_train_config "${enh_st_exp}"/config.yaml \ --st_model_file "${enh_st_exp}"/"${inference_enh_st_model}" \ --output_dir "${_logdir}"/output.JOB \ - ${_opts} ${st_inference_args} + ${_opts} ${st_inference_args} || { cat $(grep -l -i error "${_logdir}"/st_inference.*.log) ; exit 1; } # 3. Concatenates the output files from each jobs for f in token token_int score text; do @@ -1773,11 +1773,11 @@ if ! "${skip_upload_hf}"; then gitlfs=$(git lfs --version 2> /dev/null || true) [ -z "${gitlfs}" ] && \ log "ERROR: You need to install git-lfs first" && \ - exit 1 - + exit 1 + dir_repo=${expdir}/hf_${hf_repo//"/"/"_"} [ ! -d "${dir_repo}" ] && git clone https://huggingface.co/${hf_repo} ${dir_repo} - + if command -v git &> /dev/null; then _creator_name="$(git config user.name)" _checkout="git checkout $(git show -s --format=%H)" @@ -1790,13 +1790,13 @@ if ! "${skip_upload_hf}"; then # foo/asr1 -> foo _corpus="${_task%/*}" _model_name="${_creator_name}/${_corpus}_$(basename ${packed_model} .zip)" - + # copy files in ${dir_repo} unzip -o ${packed_model} -d ${dir_repo} # Generate description file # shellcheck disable=SC2034 hf_task=speech-enhancement-translation - # shellcheck disable=SC2034 + # shellcheck disable=SC2034 espnet_task=EnhS2T # shellcheck disable=SC2034 task_exp=${enh_st_exp} diff --git a/egs2/TEMPLATE/mt1/mt.sh b/egs2/TEMPLATE/mt1/mt.sh index bf6996c13c8..02260cb3a4d 100755 --- a/egs2/TEMPLATE/mt1/mt.sh +++ b/egs2/TEMPLATE/mt1/mt.sh @@ -455,7 +455,7 @@ if ! "${skip_data_prep}"; then log "Stage 1: Data preparation for data/${train_set}, data/${valid_set}, etc." # [Task dependent] Need to create data.sh for new corpus local/data.sh ${local_data_opts} - + fi if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then @@ -474,7 +474,7 @@ if ! "${skip_data_prep}"; then # with regex to suuport multi-references for single_file in $(ls data/"${dset}"/${extra_file}*); do cp ${single_file} "${data_feats}${_suf}/${dset}" - done + done done echo "${feats_type}" > "${data_feats}${_suf}/${dset}/feats_type" done @@ -702,7 +702,7 @@ if ! "${skip_train}"; then log "LM collect-stats started... log: '${_logdir}/stats.*.log'" # NOTE: --*_shape_file doesn't require length information if --batch_type=unsorted, # but it's used only for deciding the sample ids. - # shellcheck disable=SC2086 + # shellcheck disable=SC2046,SC2086 ${train_cmd} JOB=1:"${_nj}" "${_logdir}"/stats.JOB.log \ ${python} -m espnet2.bin.lm_train \ --collect_stats true \ @@ -718,7 +718,7 @@ if ! "${skip_train}"; then --train_shape_file "${_logdir}/train.JOB.scp" \ --valid_shape_file "${_logdir}/dev.JOB.scp" \ --output_dir "${_logdir}/stats.JOB" \ - ${_opts} ${lm_args} || { cat "${_logdir}"/stats.1.log; exit 1; } + ${_opts} ${lm_args} || { cat $(grep -l -i error "${_logdir}"/stats.*.log) ; exit 1; } # 4. Aggregate shape files _opts= @@ -845,7 +845,7 @@ if ! "${skip_train}"; then if "${use_ngram}"; then log "Stage 8: Ngram Training: train_set=${data_feats}/lm_train.txt" cut -f 2 -d " " ${data_feats}/lm_train.txt | lmplz -S "20%" --discount_fallback -o ${ngram_num} - >${ngram_exp}/${ngram_num}gram.arpa - build_binary -s ${ngram_exp}/${ngram_num}gram.arpa ${ngram_exp}/${ngram_num}gram.bin + build_binary -s ${ngram_exp}/${ngram_num}gram.arpa ${ngram_exp}/${ngram_num}gram.bin else log "Stage 8: Skip ngram stages: use_ngram=${use_ngram}" fi @@ -1132,7 +1132,7 @@ if ! "${skip_eval}"; then # 2. Submit decoding jobs log "Decoding started... log: '${_logdir}/mt_inference.*.log'" - # shellcheck disable=SC2086 + # shellcheck disable=SC2046,SC2086 ${_cmd} --gpu "${_ngpu}" JOB=1:"${_nj}" "${_logdir}"/mt_inference.JOB.log \ ${python} -m ${mt_inference_tool} \ --batch_size ${batch_size} \ @@ -1142,7 +1142,7 @@ if ! "${skip_eval}"; then --mt_train_config "${mt_exp}"/config.yaml \ --mt_model_file "${mt_exp}"/"${inference_mt_model}" \ --output_dir "${_logdir}"/output.JOB \ - ${_opts} ${inference_args} + ${_opts} ${inference_args} || { cat $(grep -l -i error "${_logdir}"/mt_inference.*.log) ; exit 1; } # 3. Concatenates the output files from each jobs for f in token token_int score text; do @@ -1205,7 +1205,7 @@ if ! "${skip_eval}"; then # ) \ # <(<"${_data}/text.${tgt_case}.${tgt_lang}" awk '{ print "(" $2 "-" $1 ")" }') \ # >"${_scoredir}/hyp.trn.org" - + # remove utterance id #perl -pe 's/\([^\)]+\)//g;' "${_scoredir}/ref.trn.org" > "${_scoredir}/ref.trn" #perl -pe 's/\([^\)]+\)//g;' "${_scoredir}/hyp.trn.org" > "${_scoredir}/hyp.trn" @@ -1220,7 +1220,7 @@ if ! "${skip_eval}"; then -i "${_scoredir}/hyp.trn.detok" \ -m bleu chrf ter \ >> ${_scoredir}/result.tc.txt - + log "Write a case-sensitive BLEU (single-reference) result in ${_scoredir}/result.tc.txt" fi @@ -1252,8 +1252,8 @@ if ! "${skip_eval}"; then ) \ <(<"${_data}/text.${tgt_case}.${tgt_lang}" awk '{ print "(" $2 "-" $1 ")" }') \ >"${_scoredir}/ref.trn.org.${ref_idx}" - - # + + # perl -pe 's/\([^\)]+\)//g;' "${_scoredir}/ref.trn.org.${ref_idx}" > "${_scoredir}/ref.trn.${ref_idx}" detokenizer.perl -l ${tgt_lang} -q < "${_scoredir}/ref.trn.${ref_idx}" > "${_scoredir}/ref.trn.detok.${ref_idx}" remove_punctuation.pl < "${_scoredir}/ref.trn.detok.${ref_idx}" > "${_scoredir}/ref.trn.detok.lc.rm.${ref_idx}" @@ -1386,11 +1386,11 @@ if ! "${skip_upload_hf}"; then gitlfs=$(git lfs --version 2> /dev/null || true) [ -z "${gitlfs}" ] && \ log "ERROR: You need to install git-lfs first" && \ - exit 1 - + exit 1 + dir_repo=${expdir}/hf_${hf_repo//"/"/"_"} [ ! -d "${dir_repo}" ] && git clone https://huggingface.co/${hf_repo} ${dir_repo} - + if command -v git &> /dev/null; then _creator_name="$(git config user.name)" _checkout="git checkout $(git show -s --format=%H)" @@ -1403,13 +1403,13 @@ if ! "${skip_upload_hf}"; then # foo/asr1 -> foo _corpus="${_task%/*}" _model_name="${_creator_name}/${_corpus}_$(basename ${packed_model} .zip)" - + # copy files in ${dir_repo} unzip -o ${packed_model} -d ${dir_repo} # Generate description file # shellcheck disable=SC2034 hf_task=machine-translation - # shellcheck disable=SC2034 + # shellcheck disable=SC2034 espnet_task=MT # shellcheck disable=SC2034 task_exp=${mt_exp} diff --git a/egs2/TEMPLATE/ssl1/hubert.sh b/egs2/TEMPLATE/ssl1/hubert.sh index 8a6f7590cb8..027b6636782 100755 --- a/egs2/TEMPLATE/ssl1/hubert.sh +++ b/egs2/TEMPLATE/ssl1/hubert.sh @@ -143,7 +143,7 @@ Options: # Pretrain related --pretrain_configs # configration files of pretraining stage --n_clusters # number of k-means clusters of pretraining stage - --features_km # feature for k-means clustering of pretraining stage + --features_km # feature for k-means clustering of pretraining stage --pt_args # Arguments for hubert model pretraining (default="${pt_args}"). # e.g., --pt_args "--max_epoch 10" # Note that it will overwrite args in pt config. @@ -180,7 +180,7 @@ fi [ -z "${valid_set}" ] && { log "${help_message}"; log "Error: --valid_set is required"; exit 2; }; # Check pretrain_config, n_clusters and feature list -pretrain_config_list=(${pretrain_configs// / }) +pretrain_config_list=(${pretrain_configs// / }) n_clusters_list=(${n_clusters// / }) feature_list=(${features_km// / }) if ! [ ${pretrain_start_iter} -le ${pretrain_stop_iter} ]; then @@ -227,7 +227,7 @@ fi if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then if [ "${feats_type}" = raw ]; then log "Stage 3: Format wav.scp: data/ -> ${data_feats}" - + # ====== Recreating "wav.scp" ====== # Kaldi-wav.scp, which can describe the file path with unix-pipe, like "cat /some/path |", # shouldn't be used in training process. @@ -235,7 +235,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then # and it can also change the audio-format and sampling rate. # If nothing is need, then format_wav_scp.sh does nothing: # i.e. the input file format and rate is same as the output. - + for dset in "${train_set}" "${valid_set}"; do _suf="/org" utils/copy_data_dir.sh --validate_opts --non-print data/"${dset}" "${data_feats}${_suf}/${dset}" @@ -253,7 +253,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then scripts/audio/format_wav_scp.sh --nj "${nj}" --cmd "${train_cmd}" \ --audio-format "${audio_format}" --fs "${fs}" ${_opts} \ "data/${dset}/wav.scp" "${data_feats}${_suf}/${dset}" - + echo "${feats_type}" > "${data_feats}${_suf}/${dset}/feats_type" done else @@ -265,21 +265,21 @@ fi if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then log "Stage 4: Remove long/short data: ${data_feats}/org -> ${data_feats}" - + # NOTE(kamo): Not applying to test_sets to keep original data for dset in "${train_set}" "${valid_set}"; do - + # Copy data dir utils/copy_data_dir.sh --validate_opts --non-print "${data_feats}/org/${dset}" "${data_feats}/${dset}" cp "${data_feats}/org/${dset}/feats_type" "${data_feats}/${dset}/feats_type" - + # Remove short utterances _feats_type="$(<${data_feats}/${dset}/feats_type)" if [ "${_feats_type}" = raw ]; then _fs=$(python3 -c "import humanfriendly as h;print(h.parse_size('${fs}'))") _min_length=$(python3 -c "print(int(${min_wav_duration} * ${_fs}))") _max_length=$(python3 -c "print(int(${max_wav_duration} * ${_fs}))") - + # utt2num_samples is created by format_wav_scp.sh <"${data_feats}/org/${dset}/utt2num_samples" \ awk -v min_length="${_min_length}" -v max_length="${_max_length}" \ @@ -291,11 +291,11 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then else log "Error: not supported: --feats_type ${feats_type}" fi - + # Remove empty text <"${data_feats}/org/${dset}/text" \ awk ' { if( NF != 1 ) print $0; } ' >"${data_feats}/${dset}/text" - + # fix_data_dir.sh leaves only utts which exist in all files utils/fix_data_dir.sh "${data_feats}/${dset}" done @@ -303,7 +303,7 @@ fi if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then - + for ((iter=${pretrain_start_iter}; iter<=${pretrain_stop_iter};iter++)); do asr_config="${pretrain_config_list[${iter}]}" if [ "${lang}" != noinfo ]; then @@ -311,25 +311,25 @@ if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then else asr_stats_dir="${expdir}/pretrain_iter${iter}_stats_${feats_type}" fi - + if [ -n "${asr_config}" ]; then asr_tag="$(basename "${asr_config}" .yaml)_${feats_type}" else asr_tag="train_${feats_type}" fi - + asr_exp="${expdir}/pretrain_${asr_tag}_iter${iter}" - + train_set_plabel=$(eval "echo ${train_set}_\${feature_list[${iter}]}_km\${n_clusters_list[${iter}]}") valid_set_plabel=$(eval "echo ${valid_set}_\${feature_list[${iter}]}_km\${n_clusters_list[${iter}]}") - + feats_km="${feature_list[${iter}]}" n_clusters="${n_clusters_list[${iter}]}" dictdir="./data/${feats_km}_km${n_clusters}_token_list_iter${iter}/${token_type}" - + if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then log "Stage 5.iter${iter}: Running ${n_clusters} cluster K-means on ${feats_km} feature." - + if [ ${iter} -eq 0 ] || [ ${feats_km} == "mfcc" ]; then ./scripts/km.sh \ --train_set "${train_set}" \ @@ -354,21 +354,21 @@ if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then --hubert_dir_path "${expdir}/pretrained_model_iter$((iter-1))"/valid.acc.best.pth fi fi - + if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then _asr_train_dir="${data_feats}/${train_set_plabel}" _asr_valid_dir="${data_feats}/${valid_set_plabel}" - + log "Stage 6.iter${iter}: ${feats_km} pretrain model collect stats: \ train_set=${_asr_train_dir}, valid_set=${_asr_valid_dir}" - + _opts= if [ -n "${asr_config}" ]; then # To generate the config file: e.g. # % python3 -m espnet2.bin.asr_train --print_config --optim adam _opts+="--config ${asr_config} " fi - + _feats_type="$(<${_asr_train_dir}/feats_type)" if [ "${_feats_type}" = raw ]; then _scp=wav.scp @@ -385,14 +385,14 @@ if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then _input_size="$(<${_asr_train_dir}/feats_dim)" _opts+="--input_size=${_input_size} " fi - + # 1. Split the key file _logdir="${asr_stats_dir}/logdir" mkdir -p "${_logdir}" - + # Get the minimum number among ${nj} and the number lines of input files _nj=$(min "${nj}" "$(<${_asr_train_dir}/${_scp} wc -l)" "$(<${_asr_valid_dir}/${_scp} wc -l)") - + key_file="${_asr_train_dir}/${_scp}" split_scps="" for n in $(seq "${_nj}"); do @@ -400,7 +400,7 @@ if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then done # shellcheck disable=SC2086 utils/split_scp.pl "${key_file}" ${split_scps} - + key_file="${_asr_valid_dir}/${_scp}" split_scps="" for n in $(seq "${_nj}"); do @@ -408,18 +408,18 @@ if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then done # shellcheck disable=SC2086 utils/split_scp.pl "${key_file}" ${split_scps} - + # 2. Generate run.sh log "Generate '${asr_stats_dir}/run.sh'. You can resume the process from stage 5.iter${iter} using this script" mkdir -p "${asr_stats_dir}"; echo "${run_args} --stage 6 \"\$@\"; exit \$?" > "${asr_stats_dir}/run.sh"; chmod +x "${asr_stats_dir}/run.sh" - + # 3. Submit jobs log "Hubert pretraining collect-stats started... log: '${_logdir}/stats.*.log'" - + # NOTE: --*_shape_file doesn't require length information if --batch_type=unsorted, # but it's used only for deciding the sample ids. - - # shellcheck disable=SC2086 + + # shellcheck disableSC2046,SC2086 ${train_cmd} JOB=1:"${_nj}" "${_logdir}"/stats.JOB.log \ ${python} -m espnet2.bin.hubert_train \ --collect_stats true \ @@ -439,8 +439,8 @@ if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then --valid_shape_file "${_logdir}/valid.JOB.scp" \ --output_dir "${_logdir}/stats.JOB" \ --hubert_dict "${dictdir}/dict.txt" \ - ${_opts} ${pt_args} || { cat "${_logdir}"/stats.1.log; exit 1; } - + ${_opts} ${pt_args} || { cat $(grep -l -i error "${_logdir}"/stats.*.log) ; exit 1; } + # 4. Aggregate shape files _opts= for i in $(seq "${_nj}"); do @@ -448,30 +448,30 @@ if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then done # shellcheck disable=SC2086 ${python} -m espnet2.bin.aggregate_stats_dirs ${_opts} --output_dir "${asr_stats_dir}" - + # Append the num-tokens at the last dimensions. This is used for batch-bins count <"${asr_stats_dir}/train/text_shape" \ awk -v N="$(<${dictdir}/tokens.txt wc -l)" '{ print $0 "," N }' \ >"${asr_stats_dir}/train/text_shape.${token_type}" - + <"${asr_stats_dir}/valid/text_shape" \ awk -v N="$(<${dictdir}/tokens.txt wc -l)" '{ print $0 "," N }' \ >"${asr_stats_dir}/valid/text_shape.${token_type}" fi - + if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then _asr_train_dir="${data_feats}/${train_set_plabel}" _asr_valid_dir="${data_feats}/${valid_set_plabel}" - + log "Stage 7.iter${iter}: Hubert Pretraining: train_set=${_asr_train_dir}, valid_set=${_asr_valid_dir}" - + _opts= if [ -n "${asr_config}" ]; then # To generate the config file: e.g. # % python3 -m espnet2.bin.hubert_train --print_config --optim adam _opts+="--config ${asr_config} " fi - + _feats_type="$(<${_asr_train_dir}/feats_type)" if [ "${_feats_type}" = raw ]; then _scp=wav.scp @@ -488,14 +488,14 @@ if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then _type=kaldi_ark _fold_length="${asr_speech_fold_length}" _input_size="$(<${_asr_train_dir}/feats_dim)" - _opts+="--input_size=${_input_size} " + _opts+="--input_size=${_input_size} " fi - + if [ "${num_splits_asr}" -gt 1 ]; then # If you met a memory error when parsing text files, this option may help you. # The corpus is split into subsets and each subset is used for training one by one in order, # so the memory footprint can be limited to the memory required for each dataset. - + _split_dir="${asr_stats_dir}/splits${num_splits_asr}" if [ ! -f "${_split_dir}/.done" ]; then rm -f "${_split_dir}/.done" @@ -511,23 +511,23 @@ if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then else log "${_split_dir}/.done exists. Spliting is skipped" fi - + _opts+="--train_data_path_and_name_and_type ${_split_dir}/${_scp},speech,${_type} " _opts+="--train_data_path_and_name_and_type ${_split_dir}/text,text,text " _opts+="--train_shape_file ${_split_dir}/speech_shape " _opts+="--train_shape_file ${_split_dir}/text_shape.${token_type} " _opts+="--multiple_iterator true " - + else _opts+="--train_data_path_and_name_and_type ${_asr_train_dir}/${_scp},speech,${_type} " _opts+="--train_data_path_and_name_and_type ${_asr_train_dir}/text,text,text " _opts+="--train_shape_file ${asr_stats_dir}/train/speech_shape " _opts+="--train_shape_file ${asr_stats_dir}/train/text_shape.${token_type} " fi - + log "Generate '${asr_exp}/run.sh'. You can resume the process from stage 6 using this script" mkdir -p "${asr_exp}"; echo "${run_args} --stage 7 \"\$@\"; exit \$?" > "${asr_exp}/run.sh"; chmod +x "${asr_exp}/run.sh" - + # NOTE(kamo): --fold_length is used only if --batch_type=folded and it's ignored in the other case log "Hubert pretraining started... log: '${asr_exp}/train.log'" if echo "${cuda_cmd}" | grep -e queue.pl -e queue-freegpu.pl &> /dev/null; then @@ -536,7 +536,7 @@ if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then else jobname="${asr_exp}/train.log" fi - + # shellcheck disable=SC2086 ${python} -m espnet2.bin.launch \ --cmd "${cuda_cmd} --name ${jobname}" \ @@ -564,19 +564,19 @@ if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then --output_dir "${asr_exp}" \ --hubert_dict "${dictdir}/dict.txt" \ ${_opts} ${pt_args} - + if [ "${iter}" -ge 0 ]; then log "Create a symbolic link of the pretrained model" if [ -L "${expdir}/pretrained_model_iter${iter}" ]; then log "Symbolic link ${expdir}/pretrained_model_iter${iter} already exists, remove it." rm "${expdir}/pretrained_model_iter${iter}" fi - + if ! [ -z "${asr_exp}" ]; then ln -s "../${asr_exp}" "${expdir}/pretrained_model_iter${iter}" fi fi - + log "Model saved in: ${asr_exp}" else log "Skip the pretraining stages" diff --git a/egs2/TEMPLATE/st1/st.sh b/egs2/TEMPLATE/st1/st.sh index 18303210f87..ebd2903d7a7 100755 --- a/egs2/TEMPLATE/st1/st.sh +++ b/egs2/TEMPLATE/st1/st.sh @@ -505,9 +505,9 @@ if ! "${skip_data_prep}"; then done utils/combine_data.sh --extra_files "${utt_extra_files}" "data/${train_set}_sp" ${_dirs} for extra_file in ${utt_extra_files}; do - python pyscripts/utils/remove_duplicate_keys.py data/"${train_set}_sp"/${extra_file} > data/"${train_set}_sp"/${extra_file}.tmp + python pyscripts/utils/remove_duplicate_keys.py data/"${train_set}_sp"/${extra_file} > data/"${train_set}_sp"/${extra_file}.tmp mv data/"${train_set}_sp"/${extra_file}.tmp data/"${train_set}_sp"/${extra_file} - done + done else log "Skip stage 2: Speed perturbation" fi @@ -544,7 +544,7 @@ if ! "${skip_data_prep}"; then for single_file in $(ls data/"${dset}"/${extra_file}*); do cp ${single_file} "${data_feats}${_suf}/${dset}" expand_utt_extra_files="${expand_utt_extra_files} $(basename ${single_file})" - done + done done echo "${expand_utt_extra_files}" utils/fix_data_dir.sh --utt_extra_files "${expand_utt_extra_files}" "${data_feats}${_suf}/${dset}" @@ -589,7 +589,7 @@ if ! "${skip_data_prep}"; then for single_file in $(ls data/"${dset}"/${extra_file}*); do cp ${single_file} "${data_feats}${_suf}/${dset}" expand_utt_extra_files="${expand_utt_extra_files} $(basename ${single_file})" - done + done done for extra_file in ${expand_utt_extra_files}; do LC_ALL=C sort -u -k1,1 "${data_feats}${_suf}/${dset}/${extra_file}" -o "${data_feats}${_suf}/${dset}/${extra_file}" @@ -638,7 +638,7 @@ if ! "${skip_data_prep}"; then for single_file in $(ls data/"${dset}"/${extra_file}*); do cp ${single_file} "${data_feats}${_suf}/${dset}" expand_utt_extra_files="${expand_utt_extra_files} $(basename ${single_file})" - done + done done utils/fix_data_dir.sh --utt_extra_files "${expand_utt_extra_files}*" "${data_feats}${_suf}/${dset}" for extra_file in ${expand_utt_extra_files}; do @@ -724,9 +724,9 @@ if ! "${skip_data_prep}"; then utils/fix_data_dir.sh --utt_extra_files "${utt_extra_files}" "${data_feats}/${dset}" for utt_extra_file in ${utt_extra_files}; do python pyscripts/utils/remove_duplicate_keys.py ${data_feats}/${dset}/${utt_extra_file} \ - > ${data_feats}/${dset}/${utt_extra_file}.tmp + > ${data_feats}/${dset}/${utt_extra_file}.tmp mv ${data_feats}/${dset}/${utt_extra_file}.tmp ${data_feats}/${dset}/${utt_extra_file} - done + done done # shellcheck disable=SC2002 @@ -931,7 +931,7 @@ if ! "${skip_train}"; then log "LM collect-stats started... log: '${_logdir}/stats.*.log'" # NOTE: --*_shape_file doesn't require length information if --batch_type=unsorted, # but it's used only for deciding the sample ids. - # shellcheck disable=SC2086 + # shellcheck disable=SC2046,SC2086 ${train_cmd} JOB=1:"${_nj}" "${_logdir}"/stats.JOB.log \ ${python} -m espnet2.bin.lm_train \ --collect_stats true \ @@ -947,7 +947,7 @@ if ! "${skip_train}"; then --train_shape_file "${_logdir}/train.JOB.scp" \ --valid_shape_file "${_logdir}/dev.JOB.scp" \ --output_dir "${_logdir}/stats.JOB" \ - ${_opts} ${lm_args} || { cat "${_logdir}"/stats.1.log; exit 1; } + ${_opts} ${lm_args} || { cat $(grep -l -i error "${_logdir}"/stats.*.log) ; exit 1; } # 4. Aggregate shape files _opts= @@ -1075,7 +1075,7 @@ if ! "${skip_train}"; then if "${use_ngram}"; then log "Stage 9: Ngram Training: train_set=${data_feats}/lm_train.txt" cut -f 2 -d " " ${data_feats}/lm_train.txt | lmplz -S "20%" --discount_fallback -o ${ngram_num} - >${ngram_exp}/${ngram_num}gram.arpa - build_binary -s ${ngram_exp}/${ngram_num}gram.arpa ${ngram_exp}/${ngram_num}gram.bin + build_binary -s ${ngram_exp}/${ngram_num}gram.arpa ${ngram_exp}/${ngram_num}gram.bin else log "Stage 9: Skip ngram stages: use_ngram=${use_ngram}" fi @@ -1427,7 +1427,7 @@ if ! "${skip_eval}"; then # 2. Submit decoding jobs log "Decoding started... log: '${_logdir}/st_inference.*.log'" - # shellcheck disable=SC2086 + # shellcheck disable=SC2046,SC2086 ${_cmd} --gpu "${_ngpu}" JOB=1:"${_nj}" "${_logdir}"/st_inference.JOB.log \ ${python} -m ${st_inference_tool} \ --batch_size ${batch_size} \ @@ -1437,7 +1437,7 @@ if ! "${skip_eval}"; then --st_train_config "${st_exp}"/config.yaml \ --st_model_file "${st_exp}"/"${inference_st_model}" \ --output_dir "${_logdir}"/output.JOB \ - ${_opts} ${inference_args} + ${_opts} ${inference_args} || { cat $(grep -l -i error "${_logdir}"/st_inference.*.log) ; exit 1; } # 3. Concatenates the output files from each jobs for f in token token_int score text; do @@ -1483,7 +1483,7 @@ if ! "${skip_eval}"; then ) \ <(<"${_data}/utt2spk" awk '{ print "(" $2 "-" $1 ")" }') \ >"${_scoredir}/hyp.trn.org" - + # remove utterance id perl -pe 's/\([^\)]+\)//g;' "${_scoredir}/ref.trn.org" > "${_scoredir}/ref.trn" perl -pe 's/\([^\)]+\)//g;' "${_scoredir}/hyp.trn.org" > "${_scoredir}/hyp.trn" @@ -1498,7 +1498,7 @@ if ! "${skip_eval}"; then -i "${_scoredir}/hyp.trn.detok" \ -m bleu chrf ter \ >> ${_scoredir}/result.tc.txt - + log "Write a case-sensitive BLEU (single-reference) result in ${_scoredir}/result.tc.txt" fi @@ -1530,8 +1530,8 @@ if ! "${skip_eval}"; then ) \ <(<"${_data}/utt2spk" awk '{ print "(" $2 "-" $1 ")" }') \ >"${_scoredir}/ref.trn.org.${ref_idx}" - - # + + # perl -pe 's/\([^\)]+\)//g;' "${_scoredir}/ref.trn.org.${ref_idx}" > "${_scoredir}/ref.trn.${ref_idx}" detokenizer.perl -l ${tgt_lang} -q < "${_scoredir}/ref.trn.${ref_idx}" > "${_scoredir}/ref.trn.detok.${ref_idx}" remove_punctuation.pl < "${_scoredir}/ref.trn.detok.${ref_idx}" > "${_scoredir}/ref.trn.detok.lc.rm.${ref_idx}" @@ -1667,11 +1667,11 @@ if ! "${skip_upload_hf}"; then gitlfs=$(git lfs --version 2> /dev/null || true) [ -z "${gitlfs}" ] && \ log "ERROR: You need to install git-lfs first" && \ - exit 1 - + exit 1 + dir_repo=${expdir}/hf_${hf_repo//"/"/"_"} [ ! -d "${dir_repo}" ] && git clone https://huggingface.co/${hf_repo} ${dir_repo} - + if command -v git &> /dev/null; then _creator_name="$(git config user.name)" _checkout="git checkout $(git show -s --format=%H)" @@ -1684,13 +1684,13 @@ if ! "${skip_upload_hf}"; then # foo/asr1 -> foo _corpus="${_task%/*}" _model_name="${_creator_name}/${_corpus}_$(basename ${packed_model} .zip)" - + # copy files in ${dir_repo} unzip -o ${packed_model} -d ${dir_repo} # Generate description file # shellcheck disable=SC2034 hf_task=speech-translation - # shellcheck disable=SC2034 + # shellcheck disable=SC2034 espnet_task=ST # shellcheck disable=SC2034 task_exp=${st_exp} diff --git a/egs2/TEMPLATE/tts1/tts.sh b/egs2/TEMPLATE/tts1/tts.sh index 0bd2e0debb8..13a3aaf2d5d 100755 --- a/egs2/TEMPLATE/tts1/tts.sh +++ b/egs2/TEMPLATE/tts1/tts.sh @@ -644,7 +644,7 @@ if ! "${skip_train}"; then # 3. Submit jobs log "TTS collect_stats started... log: '${_logdir}/stats.*.log'" - # shellcheck disable=SC2086 + # shellcheck disable=SC2046,SC2086 ${train_cmd} JOB=1:"${_nj}" "${_logdir}"/stats.JOB.log \ ${python} -m "espnet2.bin.${tts_task}_train" \ --collect_stats true \ @@ -665,7 +665,7 @@ if ! "${skip_train}"; then --train_shape_file "${_logdir}/train.JOB.scp" \ --valid_shape_file "${_logdir}/valid.JOB.scp" \ --output_dir "${_logdir}/stats.JOB" \ - ${_opts} ${train_args} || { cat "${_logdir}"/stats.1.log; exit 1; } + ${_opts} ${train_args} || { cat $(grep -l -i error "${_logdir}"/stats.*.log) ; exit 1; } # 4. Aggregate shape files _opts= @@ -1008,7 +1008,7 @@ if ! "${skip_eval}"; then # 3. Submit decoding jobs log "Decoding started... log: '${_logdir}/tts_inference.*.log'" - # shellcheck disable=SC2086 + # shellcheck disable=SC2046,SC2086 ${_cmd} --gpu "${_ngpu}" JOB=1:"${_nj}" "${_logdir}"/tts_inference.JOB.log \ ${python} -m espnet2.bin.tts_inference \ --ngpu "${_ngpu}" \ @@ -1019,7 +1019,7 @@ if ! "${skip_eval}"; then --train_config "${tts_exp}"/config.yaml \ --output_dir "${_logdir}"/output.JOB \ --vocoder_file "${vocoder_file}" \ - ${_opts} ${_ex_opts} ${inference_args} + ${_opts} ${_ex_opts} ${inference_args} || { cat $(grep -l -i error "${_logdir}"/tts_inference.*.log) ; exit 1; } # 4. Concatenates the output files from each jobs if [ -e "${_logdir}/output.${_nj}/norm" ]; then From 5518b6ba0af0bba9e9d59d6c47607656f49c9988 Mon Sep 17 00:00:00 2001 From: kamo-naoyuki Date: Thu, 12 May 2022 22:04:42 +0900 Subject: [PATCH 19/22] fix import order --- espnet/asr/pytorch_backend/recog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/espnet/asr/pytorch_backend/recog.py b/espnet/asr/pytorch_backend/recog.py index b64131d1ad2..c6818e41ee7 100644 --- a/espnet/asr/pytorch_backend/recog.py +++ b/espnet/asr/pytorch_backend/recog.py @@ -1,8 +1,8 @@ """V2 backend for `asr_recog.py` using py:class:`espnet.nets.beam_search.BeamSearch`.""" -from packaging.version import parse as V import json import logging +from packaging.version import parse as V import torch From 9a2001fac56dddf5ba1c2eaec092cb420f83f7c9 Mon Sep 17 00:00:00 2001 From: kamo-naoyuki Date: Fri, 13 May 2022 03:44:11 +0900 Subject: [PATCH 20/22] fix for pytorch1.11 (+= became inplace op) --- espnet/nets/pytorch_backend/tacotron2/encoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/espnet/nets/pytorch_backend/tacotron2/encoder.py b/espnet/nets/pytorch_backend/tacotron2/encoder.py index fee4b1c5552..148db765cc7 100644 --- a/espnet/nets/pytorch_backend/tacotron2/encoder.py +++ b/espnet/nets/pytorch_backend/tacotron2/encoder.py @@ -145,7 +145,7 @@ def forward(self, xs, ilens=None): if self.convs is not None: for i in six.moves.range(len(self.convs)): if self.use_residual: - xs += self.convs[i](xs) + xs = xs + self.convs[i](xs) else: xs = self.convs[i](xs) if self.blstm is None: From 2625be71a722e7eb030dff4f71d8dc9599a33844 Mon Sep 17 00:00:00 2001 From: kamo-naoyuki Date: Fri, 13 May 2022 03:46:24 +0900 Subject: [PATCH 21/22] remove warning --- test/espnet2/tasks/test_abs_task.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/espnet2/tasks/test_abs_task.py b/test/espnet2/tasks/test_abs_task.py index 7a9297f78e2..6b36d3b51d1 100644 --- a/test/espnet2/tasks/test_abs_task.py +++ b/test/espnet2/tasks/test_abs_task.py @@ -8,7 +8,7 @@ from espnet2.train.collate_fn import CommonCollateFn -class TestModel(AbsESPnetModel): +class DummyModel(AbsESPnetModel): def __init__(self): super().__init__() self.layer1 = torch.nn.Linear(1, 1) @@ -60,7 +60,7 @@ def optional_data_names(cls, train=True, inference=False): @classmethod def build_model(cls, args): - model = TestModel() + model = DummyModel() return model @classmethod From 9cfd6af64a28237019196cd495fbd2943790ce21 Mon Sep 17 00:00:00 2001 From: kamo-naoyuki Date: Fri, 13 May 2022 09:58:04 +0900 Subject: [PATCH 22/22] fix --- espnet/asr/pytorch_backend/asr.py | 4 ++-- espnet/asr/pytorch_backend/recog.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/espnet/asr/pytorch_backend/asr.py b/espnet/asr/pytorch_backend/asr.py index 0effaaaa893..7a265f2badf 100644 --- a/espnet/asr/pytorch_backend/asr.py +++ b/espnet/asr/pytorch_backend/asr.py @@ -989,7 +989,7 @@ def recog(args): # It seems quantized LSTM only supports non-packed sequence before torch 1.4.0. # Reference issue: https://github.com/pytorch/pytorch/issues/27963 if ( - torch.__version__ < V("1.4.0") + V(torch.__version__) < V("1.4.0") and "lstm" in train_args.etype and torch.nn.LSTM in q_config ): @@ -999,7 +999,7 @@ def recog(args): # Dunno why but weight_observer from dynamic quantized module must have # dtype=torch.qint8 with torch < 1.5 although dtype=torch.float16 is supported. - if args.quantize_dtype == "float16" and torch.__version__ < V("1.5.0"): + if args.quantize_dtype == "float16" and V(torch.__version__) < V("1.5.0"): raise ValueError( "float16 dtype for dynamic quantization is not supported with torch " "version < 1.5.0. Switching to qint8 dtype instead." diff --git a/espnet/asr/pytorch_backend/recog.py b/espnet/asr/pytorch_backend/recog.py index c6818e41ee7..0824f6e7b26 100644 --- a/espnet/asr/pytorch_backend/recog.py +++ b/espnet/asr/pytorch_backend/recog.py @@ -54,7 +54,7 @@ def recog_v2(args): # See https://github.com/espnet/espnet/pull/3616 for more information. if ( - torch.__version__ < V("1.4.0") + V(torch.__version__) < V("1.4.0") and "lstm" in train_args.etype and torch.nn.LSTM in q_config ): @@ -62,7 +62,7 @@ def recog_v2(args): "Quantized LSTM in ESPnet is only supported with torch 1.4+." ) - if args.quantize_dtype == "float16" and torch.__version__ < V("1.5.0"): + if args.quantize_dtype == "float16" and V(torch.__version__) < V("1.5.0"): raise ValueError( "float16 dtype for dynamic quantization is not supported with torch " "version < 1.5.0. Switching to qint8 dtype instead."