From 4ff2ce1244e0af72439deaa59226eba434a70618 Mon Sep 17 00:00:00 2001
From: kamo-naoyuki <naoyuki.kamo829@gmail.com>
Date: Tue, 10 May 2022 11:34:31 +0900
Subject: [PATCH 01/22] add pytorch=1.10.1, 1.11.0 to ci configurations

---
 .github/workflows/centos7.yml      |  2 +-
 .github/workflows/ci.yaml          | 11 ++++++++---
 .github/workflows/debian9.yml      |  2 +-
 .github/workflows/test_import.yaml |  2 +-
 README.md                          | 15 ++++++++-------
 setup.py                           |  1 +
 tools/Makefile                     |  2 +-
 tools/installers/install_torch.sh  | 16 ++++++++++++++--
 8 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/centos7.yml b/.github/workflows/centos7.yml
index 94d5973e859..d365c2e4961 100644
--- a/.github/workflows/centos7.yml
+++ b/.github/workflows/centos7.yml
@@ -19,7 +19,7 @@ jobs:
         # ImportError: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found
         # (required by /__w/espnet/espnet/tools/venv/envs/espnet/lib/python3.6/site-packages/pyworld/pyworld.cpython-36m-x86_64-linux-gnu.so)
         # NOTE(kamo): The issue doens't exist for python3.7?
-        TH_VERSION: 1.10.1
+        TH_VERSION: 1.11.0
         CHAINER_VERSION: 6.0.0
         USE_CONDA: true
         CC: /opt/rh/devtoolset-7/root/usr/bin/gcc
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index a01edd95bc7..ac69ca49b32 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -16,19 +16,24 @@ jobs:
       matrix:
         os: [ubuntu-18.04]
         python-version: [3.7]
-        pytorch-version: [1.3.1, 1.4.0, 1.5.1, 1.6.0, 1.7.1, 1.8.1, 1.9.1, 1.10.1]
+        pytorch-version: [1.3.1, 1.4.0, 1.5.1, 1.6.0, 1.7.1, 1.8.1, 1.9.1, 1.10.2, 1.11.0]
         chainer-version: [6.0.0]
         # NOTE(kamo): Conda is tested by Circle-CI
         use-conda: [false]
         include:
           - os: ubuntu-20.04
             python-version: 3.8
-            pytorch-version: 1.10.1
+            pytorch-version: 1.11.0
             chainer-verssion: 6.0.0
             use-conda: false
           - os: ubuntu-20.04
             python-version: 3.9
-            pytorch-version: 1.10.1
+            pytorch-version: 1.11.0
+            chainer-verssion: 6.0.0
+            use-conda: false
+          - os: ubuntu-20.04
+            python-version: 3.10
+            pytorch-version: 1.11.0
             chainer-verssion: 6.0.0
             use-conda: false
     steps:
diff --git a/.github/workflows/debian9.yml b/.github/workflows/debian9.yml
index a29e5474ad4..79a68e8383d 100644
--- a/.github/workflows/debian9.yml
+++ b/.github/workflows/debian9.yml
@@ -15,7 +15,7 @@ jobs:
       image: debian:9
       env:
         ESPNET_PYTHON_VERSION: 3.7
-        TH_VERSION: 1.10.1
+        TH_VERSION: 1.11.0
         CHAINER_VERSION: 6.0.0
         USE_CONDA: true
         CC: gcc-6
diff --git a/.github/workflows/test_import.yaml b/.github/workflows/test_import.yaml
index ead9f587c07..1031d3e5601 100644
--- a/.github/workflows/test_import.yaml
+++ b/.github/workflows/test_import.yaml
@@ -16,7 +16,7 @@ jobs:
       matrix:
         os: [ubuntu-latest]
         python-version: [3.9]
-        pytorch-version: [1.10.1]
+        pytorch-version: [1.11.0]
     steps:
     - uses: actions/checkout@v2
     - uses: actions/cache@v1
diff --git a/README.md b/README.md
index 67579053c77..0493ec5b56e 100644
--- a/README.md
+++ b/README.md
@@ -2,14 +2,15 @@
 
 # ESPnet: end-to-end speech processing toolkit
 
-|system/pytorch ver.|1.3.1|1.4.0|1.5.1|1.6.0|1.7.1|1.8.1|1.9.1|1.10.1|
-| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
-|ubuntu20/python3.9/pip||||||||[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|
-|ubuntu20/python3.8/pip||||||||[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|
+|system/pytorch ver.|1.3.1|1.4.0|1.5.1|1.6.0|1.7.1|1.8.1|1.9.1|1.10.2|1.11.0|
+| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
+|ubuntu20/python3.10/pip|||||||||[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|
+|ubuntu20/python3.9/pip|||||||||[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|
+|ubuntu20/python3.8/pip|||||||||[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|
 |ubuntu18/python3.7/pip|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|
-|debian9/python3.7/conda||||||||[![debian9](https://github.com/espnet/espnet/workflows/debian9/badge.svg)](https://github.com/espnet/espnet/actions?query=workflow%3Adebian9)|
-|centos7/python3.7/conda||||||||[![centos7](https://github.com/espnet/espnet/workflows/centos7/badge.svg)](https://github.com/espnet/espnet/actions?query=workflow%3Acentos7)|
-|doc/python3.8||||||||[![doc](https://github.com/espnet/espnet/workflows/doc/badge.svg)](https://github.com/espnet/espnet/actions?query=workflow%3Adoc)|
+|debian9/python3.7/conda|||||||||[![debian9](https://github.com/espnet/espnet/workflows/debian9/badge.svg)](https://github.com/espnet/espnet/actions?query=workflow%3Adebian9)|
+|centos7/python3.7/conda|||||||||[![centos7](https://github.com/espnet/espnet/workflows/centos7/badge.svg)](https://github.com/espnet/espnet/actions?query=workflow%3Acentos7)|
+|doc/python3.8|||||||||[![doc](https://github.com/espnet/espnet/workflows/doc/badge.svg)](https://github.com/espnet/espnet/actions?query=workflow%3Adoc)|
 
 [![PyPI version](https://badge.fury.io/py/espnet.svg)](https://badge.fury.io/py/espnet)
 [![Python Versions](https://img.shields.io/pypi/pyversions/espnet.svg)](https://pypi.org/project/espnet/)
diff --git a/setup.py b/setup.py
index dbe351bc1e0..dd7dc887308 100644
--- a/setup.py
+++ b/setup.py
@@ -136,6 +136,7 @@
         "Programming Language :: Python :: 3.7",
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
         "Development Status :: 5 - Production/Stable",
         "Intended Audience :: Science/Research",
         "Operating System :: POSIX :: Linux",
diff --git a/tools/Makefile b/tools/Makefile
index 6431955b1b9..f40f6c57d39 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -1,4 +1,4 @@
-# PyTorch version: 1.3.1, 1.4.0, 1.5.1, 1.6.0, 1.7.1, 1.8.1, 1.9.1, 1.10.0 and 1.10.1 are tested.
+# PyTorch version: 1.3.1, 1.4.0, 1.5.1, 1.6.0, 1.7.1, 1.8.1, 1.9.1, 1.10.0, 1.10.1 and 1.11.0 are tested.
 TH_VERSION := 1.10.1
 
 # Use pip for pytorch installation even if you have anaconda
diff --git a/tools/installers/install_torch.sh b/tools/installers/install_torch.sh
index 285e37b6fd4..7faa4d535e7 100755
--- a/tools/installers/install_torch.sh
+++ b/tools/installers/install_torch.sh
@@ -121,18 +121,30 @@ log "[INFO] torch_version=${torch_version}"
 log "[INFO] cuda_version=${cuda_version}"
 
 
-if $(pytorch_plus 1.10.2); then
+if $(pytorch_plus 1.11.1); then
     log "[ERROR] This script doesn't support pytorch=${torch_version}"
     exit 1
 
+elif $(pytorch_plus 1.11.0); then
+    check_python_version 3.11  # Error if python>=<number>
+    check_cuda_version 11.5 11.3 11.1 10.2  # Error if cuda_version doesn't match with any given numbers
+    install_torch 0.11.0 10.2  # install_torch <torch-audio-ver> <default-cuda-version-for-pip-install-torch>
+
+elif $(pytorch_plus 1.10.2); then
+    check_python_version 3.10  # Error if python>=<number>
+    check_cuda_version 11.3 11.1 10.2  # Error if cuda_version doesn't match with any given numbers
+    install_torch 0.10.2 10.2  # install_torch <torch-audio-ver> <default-cuda-version-for-pip-install-torch>
+
 elif $(pytorch_plus 1.10.1); then
     check_python_version 3.10  # Error if python>=<number>
     check_cuda_version 11.3 11.1 10.2  # Error if cuda_version doesn't match with any given numbers
     install_torch 0.10.1 10.2  # install_torch <torch-audio-ver> <default-cuda-version-for-pip-install-torch>
+
 elif $(pytorch_plus 1.10.0); then
-    check_python_version 3.10  # Error if python>=<number>
+    check_python_version 3.11  # Error if python>=<number>
     check_cuda_version 11.3 11.1 10.2  # Error if cuda_version doesn't match with any given numbers
     install_torch 0.10.0 10.2  # install_torch <torch-audio-ver> <default-cuda-version-for-pip-install-torch>
+
 elif $(pytorch_plus 1.9.2); then
     log "[ERROR] pytorch=${torch_version} doesn't exist"
     exit 1

From b98fc861939310b73b50f959bc45176da10ef493 Mon Sep 17 00:00:00 2001
From: kamo-naoyuki <naoyuki.kamo829@gmail.com>
Date: Tue, 10 May 2022 11:52:27 +0900
Subject: [PATCH 02/22] fix

---
 .github/workflows/ci.yaml | 2 +-
 .mergify.yml              | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index bbd021e7afd..058dfea6288 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -32,7 +32,7 @@ jobs:
             chainer-verssion: 6.0.0
             use-conda: false
           - os: ubuntu-20.04
-            python-version: 3.10
+            python-version: "3.10"
             pytorch-version: 1.11.0
             chainer-verssion: 6.0.0
             use-conda: false
diff --git a/.mergify.yml b/.mergify.yml
index 0304250182c..c247939e228 100644
--- a/.mergify.yml
+++ b/.mergify.yml
@@ -4,16 +4,16 @@ pull_request_rules:
       - "label=auto-merge"
       - "check-success=test_centos7"
       - "check-success=test_debian9"
-      - "check-success=linter_and_test (ubuntu-18.04, 3.7, 1.3.1, 6.0.0, false)"
       - "check-success=linter_and_test (ubuntu-18.04, 3.7, 1.4.0, 6.0.0, false)"
       - "check-success=linter_and_test (ubuntu-18.04, 3.7, 1.5.1, 6.0.0, false)"
       - "check-success=linter_and_test (ubuntu-18.04, 3.7, 1.6.0, 6.0.0, false)"
       - "check-success=linter_and_test (ubuntu-18.04, 3.7, 1.7.1, 6.0.0, false)"
       - "check-success=linter_and_test (ubuntu-18.04, 3.7, 1.8.1, 6.0.0, false)"
       - "check-success=linter_and_test (ubuntu-18.04, 3.7, 1.9.1, 6.0.0, false)"
-      - "check-success=linter_and_test (ubuntu-18.04, 3.7, 1.10.1, 6.0.0, false)"
-      - "check-success=linter_and_test (ubuntu-20.04, 3.8, 1.10.1, false, 6.0.0)"
-      - "check-success=linter_and_test (ubuntu-20.04, 3.9, 1.10.1, false, 6.0.0)"
+      - "check-success=linter_and_test (ubuntu-18.04, 3.7, 1.10.2, 6.0.0, false)"
+      - "check-success=linter_and_test (ubuntu-18.04, 3.7, 1.11.0, 6.0.0, false)"
+      - "check-success=linter_and_test (ubuntu-20.04, 3.8, 1.11.0, false, 6.0.0)"
+      - "check-success=linter_and_test (ubuntu-20.04, 3.9, 1.11.0, false, 6.0.0)"
       - "check-success=test_import (ubuntu-latest, 3.9, 1.10.1)"
     actions:
       merge:

From d234b9ab30bbc2bb6fd42d6335421a6f8a9ed637 Mon Sep 17 00:00:00 2001
From: kamo-naoyuki <naoyuki.kamo829@gmail.com>
Date: Thu, 12 May 2022 17:10:40 +0900
Subject: [PATCH 03/22] fix

---
 tools/Makefile                    | 8 +-------
 tools/installers/install_torch.sh | 6 ++++++
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tools/Makefile b/tools/Makefile
index cad135421a2..338fd8d22fe 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -1,5 +1,5 @@
 # PyTorch version: 1.3.1, 1.4.0, 1.5.1, 1.6.0, 1.7.1, 1.8.1, 1.9.1, 1.10.0, 1.10.1 and 1.11.0 are tested.
-TH_VERSION := 1.10.1
+TH_VERSION := 1.11.0
 
 # Use pip for pytorch installation even if you have anaconda
 ifneq ($(shell test -f ./activate_python.sh && grep 'conda activate' ./activate_python.sh),)
@@ -28,14 +28,8 @@ endif
 
 all: kaldi showenv python conda_packages.done sctk.done sph2pipe.done check_install
 
-ifneq ($(strip $(CHAINER_VERSION)),)
 python: activate_python.sh espnet.done pytorch.done chainer.done fairscale.done torch_optimizer.done
 extra: warp-ctc.done warp-transducer.done chainer_ctc.done nkf.done moses.done mwerSegmenter.done pesq kenlm.done pyopenjtalk.done py3mmseg.done beamformit.done fairseq.done s3prl.done k2.done transformers.done phonemizer.done longformer.done
-else
-python: activate_python.sh espnet.done pytorch.done fairscale.done torch_optimizer.done
-extra: warp-ctc.done warp-transducer.done nkf.done moses.done mwerSegmenter.done pesq kenlm.done pyopenjtalk.done py3mmseg.done beamformit.done fairseq.done s3prl.done k2.done transformers.done phonemizer.done longformer.done
-endif
-
 
 kaldi:
 	test -f kaldi/egs/wsj/s5/utils/parse_options.sh || git clone --depth 1 https://github.com/kaldi-asr/kaldi
diff --git a/tools/installers/install_torch.sh b/tools/installers/install_torch.sh
index 7faa4d535e7..6444f91562e 100755
--- a/tools/installers/install_torch.sh
+++ b/tools/installers/install_torch.sh
@@ -51,6 +51,12 @@ install_torch(){
         if [ -z "${cuda_version}" ]; then
             log conda install -y "pytorch=${torch_version}" "torchaudio=$1" cpuonly -c pytorch
             conda install -y "pytorch=${torch_version}" "torchaudio=$1" cpuonly -c pytorch
+        elif [ "${cuda_version}" = "11.5" ]; then
+            # NOTE(kamo): In my environment, conda-forge only could installed, but I don't know why @ 12, May, 2022
+            cudatoolkit_channel=conda-forge
+            log conda install -y "pytorch=${torch_version}" "torchaudio=$1" "cudatoolkit=${cuda_version}" -c pytorch -c "${cudatoolkit_channel}"
+            conda install -y "pytorch=${torch_version}" "torchaudio=$1" "cudatoolkit=${cuda_version}" -c pytorch -c "${cudatoolkit_channel}"
+
         elif [ "${cuda_version}" = "11.1" ] || [ "${cuda_version}" = "11.2" ]; then
             # Anaconda channel, which is default main channel, doesn't provide cudatoolkit=11.1, 11.2 now (Any pytorch version doesn't provide cuda=11.2).
             # https://anaconda.org/anaconda/cudatoolkit/files

From b7cfdd9a70559271e45de103e242228f94e837ff Mon Sep 17 00:00:00 2001
From: kamo-naoyuki <naoyuki.kamo829@gmail.com>
Date: Thu, 12 May 2022 18:05:41 +0900
Subject: [PATCH 04/22] Change LooseVersion to parse

---
 tools/installers/install_chainer.sh         | 4 ++--
 tools/installers/install_fairscale.sh       | 4 ++--
 tools/installers/install_fairseq.sh         | 4 ++--
 tools/installers/install_k2.sh              | 6 +++---
 tools/installers/install_longformer.sh      | 4 ++--
 tools/installers/install_s3prl.sh           | 4 ++--
 tools/installers/install_speechbrain.sh     | 2 +-
 tools/installers/install_torch.sh           | 4 ++--
 tools/installers/install_torch_optimizer.sh | 4 ++--
 tools/installers/install_warp-ctc.sh        | 6 +++---
 tools/installers/install_warp-transducer.sh | 2 +-
 11 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/tools/installers/install_chainer.sh b/tools/installers/install_chainer.sh
index 9ce037f68f6..9000bfb0d5a 100755
--- a/tools/installers/install_chainer.sh
+++ b/tools/installers/install_chainer.sh
@@ -22,7 +22,7 @@ python_version=$(python3 -c "import sys; print(sys.version.split()[0])")
 cuda_version_without_dot="${cuda_version/\./}"
 python_plus(){
     python3 <<EOF
-from distutils.version import LooseVersion as L
+from packaging.version import parse as L
 if L('$python_version') >= L('$1'):
     print("true")
 else:
@@ -31,7 +31,7 @@ EOF
 }
 cuda_plus(){
     python3 <<EOF
-from distutils.version import LooseVersion as L
+from packaging.version import parse as L
 if L('$cuda_version') >= L('$1'):
     print("true")
 else:
diff --git a/tools/installers/install_fairscale.sh b/tools/installers/install_fairscale.sh
index 876c0b31ead..4988a75736d 100755
--- a/tools/installers/install_fairscale.sh
+++ b/tools/installers/install_fairscale.sh
@@ -9,7 +9,7 @@ fi
 
 torch_version=$(python3 -c "import torch; print(torch.__version__)")
 python_36_plus=$(python3 <<EOF
-from distutils.version import LooseVersion as V
+from packaging.version import parse as V
 import sys
 
 if V(sys.version) >= V("3.6"):
@@ -22,7 +22,7 @@ EOF
 pt_plus(){
     python3 <<EOF
 import sys
-from distutils.version import LooseVersion as L
+from packaging.version import parse as L
 if L('$torch_version') >= L('$1'):
     print("true")
 else:
diff --git a/tools/installers/install_fairseq.sh b/tools/installers/install_fairseq.sh
index 780d8ce81b0..e13970c036f 100755
--- a/tools/installers/install_fairseq.sh
+++ b/tools/installers/install_fairseq.sh
@@ -9,7 +9,7 @@ fi
 
 torch_version=$(python3 -c "import torch; print(torch.__version__)")
 python_36_plus=$(python3 <<EOF
-from distutils.version import LooseVersion as V
+from packaging.version import parse as V
 import sys
 
 if V(sys.version) >= V("3.6"):
@@ -22,7 +22,7 @@ EOF
 pt_plus(){
     python3 <<EOF
 import sys
-from distutils.version import LooseVersion as L
+from packaging.version import parse as L
 if L('$torch_version') >= L('$1'):
     print("true")
 else:
diff --git a/tools/installers/install_k2.sh b/tools/installers/install_k2.sh
index 667edb86a03..d41c16d39e7 100755
--- a/tools/installers/install_k2.sh
+++ b/tools/installers/install_k2.sh
@@ -27,7 +27,7 @@ fi
 
 
 python_36_plus=$(python3 <<EOF
-from distutils.version import LooseVersion as V
+from packaging.version import parse as V
 import sys
 
 if V(sys.version) >= V("3.6"):
@@ -64,7 +64,7 @@ libc_version="$(${libc_path} | grep "GNU C Library" | grep -oP "version [0-9]*.[
 pytorch_plus(){
     python3 <<EOF
 import sys
-from distutils.version import LooseVersion as L
+from packaging.version import parse as L
 if L('$torch_version') >= L('$1'):
     print("true")
 else:
@@ -74,7 +74,7 @@ EOF
 libc_plus(){
     python3 <<EOF
 import sys
-from distutils.version import LooseVersion as L
+from packaging.version import parse as L
 if L('$libc_version') >= L('$1'):
     print("true")
 else:
diff --git a/tools/installers/install_longformer.sh b/tools/installers/install_longformer.sh
index c942abb0dd9..d054fad50ea 100755
--- a/tools/installers/install_longformer.sh
+++ b/tools/installers/install_longformer.sh
@@ -9,7 +9,7 @@ fi
 
 torch_version=$(python3 -c "import torch; print(torch.__version__)")
 python_36_plus=$(python3 <<EOF
-from distutils.version import LooseVersion as V
+from packaging.version import parse as V
 import sys
 
 if V(sys.version) >= V("3.6"):
@@ -21,7 +21,7 @@ EOF
 pt_plus(){
     python3 <<EOF
 import sys
-from distutils.version import LooseVersion as L
+from packaging.version import parse as L
 if L('$torch_version') >= L('$1'):
     print("true")
 else:
diff --git a/tools/installers/install_s3prl.sh b/tools/installers/install_s3prl.sh
index 66f38af0e36..eeea6946c9c 100755
--- a/tools/installers/install_s3prl.sh
+++ b/tools/installers/install_s3prl.sh
@@ -10,7 +10,7 @@ if [ $# != 0 ]; then
 fi
 
 torch_17_plus=$(python3 <<EOF
-from distutils.version import LooseVersion as V
+from packaging.version import parse as V
 import torch
 
 if V(torch.__version__) >= V("1.7"):
@@ -21,7 +21,7 @@ EOF
 )
 
 python_36_plus=$(python3 <<EOF
-from distutils.version import LooseVersion as V
+from packaging.version import parse as V
 import sys
 
 if V(sys.version) >= V("3.6"):
diff --git a/tools/installers/install_speechbrain.sh b/tools/installers/install_speechbrain.sh
index b3c2310206e..cb26a78f3ad 100755
--- a/tools/installers/install_speechbrain.sh
+++ b/tools/installers/install_speechbrain.sh
@@ -8,7 +8,7 @@ if [ $# != 0 ]; then
 fi
 
 torch_18_plus=$(python3 <<EOF
-from distutils.version import LooseVersion as V
+from packaging.version import parse as V
 import torch
 
 if V(torch.__version__) >= V("1.8"):
diff --git a/tools/installers/install_torch.sh b/tools/installers/install_torch.sh
index 6444f91562e..d542183db53 100755
--- a/tools/installers/install_torch.sh
+++ b/tools/installers/install_torch.sh
@@ -29,7 +29,7 @@ cuda_version_without_dot="${cuda_version/\./}"
 
 python_plus(){
     python3 <<EOF
-from distutils.version import LooseVersion as L
+from packaging.version import parse as L
 if L('$python_version') >= L('$1'):
     print("true")
 else:
@@ -38,7 +38,7 @@ EOF
 }
 pytorch_plus(){
     python3 <<EOF
-from distutils.version import LooseVersion as L
+from packaging.version import parse as L
 if L('$torch_version') >= L('$1'):
     print("true")
 else:
diff --git a/tools/installers/install_torch_optimizer.sh b/tools/installers/install_torch_optimizer.sh
index 5d8565deead..a4b42d4fade 100755
--- a/tools/installers/install_torch_optimizer.sh
+++ b/tools/installers/install_torch_optimizer.sh
@@ -9,7 +9,7 @@ fi
 
 torch_version=$(python3 -c "import torch; print(torch.__version__)")
 python_36_plus=$(python3 <<EOF
-from distutils.version import LooseVersion as V
+from packaging.version import parse as V
 import sys
 
 if V(sys.version) >= V("3.6"):
@@ -22,7 +22,7 @@ EOF
 pt_plus(){
     python3 <<EOF
 import sys
-from distutils.version import LooseVersion as L
+from packaging.version import parse as L
 if L('$torch_version') >= L('$1'):
     print("true")
 else:
diff --git a/tools/installers/install_warp-ctc.sh b/tools/installers/install_warp-ctc.sh
index 38267d955d4..259146ea388 100755
--- a/tools/installers/install_warp-ctc.sh
+++ b/tools/installers/install_warp-ctc.sh
@@ -9,7 +9,7 @@ if [ $# != 0 ]; then
 fi
 
 torch_17_plus=$(python3 <<EOF
-from distutils.version import LooseVersion as V
+from packaging.version import parse as V
 import torch
 
 if V(torch.__version__) >= V("1.7"):
@@ -20,7 +20,7 @@ EOF
 )
 
 torch_11_plus=$(python3 <<EOF
-from distutils.version import LooseVersion as V
+from packaging.version import parse as V
 import torch
 
 if V(torch.__version__) >= V("1.1"):
@@ -31,7 +31,7 @@ EOF
 )
 
 torch_10_plus=$(python3 <<EOF
-from distutils.version import LooseVersion as V
+from packaging.version import parse as V
 import torch
 
 if V(torch.__version__) >= V("1.0"):
diff --git a/tools/installers/install_warp-transducer.sh b/tools/installers/install_warp-transducer.sh
index 9ed3ce18fc3..910083509d4 100755
--- a/tools/installers/install_warp-transducer.sh
+++ b/tools/installers/install_warp-transducer.sh
@@ -9,7 +9,7 @@ fi
 # TODO(kamo): Consider clang case
 # Note: Requires gcc>=4.9.2 to build extensions with pytorch>=1.0
 if python3 -c 'import torch as t;assert t.__version__[0] == "1"' &> /dev/null; then \
-    python3 -c "from distutils.version import LooseVersion as V;assert V('$(gcc -dumpversion)') >= V('4.9.2'), 'Requires gcc>=4.9.2'"; \
+    python3 -c "from packaging.version import parse as V;assert V('$(gcc -dumpversion)') >= V('4.9.2'), 'Requires gcc>=4.9.2'"; \
 fi
 
 rm -rf warp-transducer

From 7d5242212403e740c4d5b8ebd9a346a991ea50a9 Mon Sep 17 00:00:00 2001
From: kamo-naoyuki <naoyuki.kamo829@gmail.com>
Date: Thu, 12 May 2022 18:09:15 +0900
Subject: [PATCH 05/22] fix

---
 test/espnet2/train/test_reporter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/espnet2/train/test_reporter.py b/test/espnet2/train/test_reporter.py
index c928c52523a..9cd796d665c 100644
--- a/test/espnet2/train/test_reporter.py
+++ b/test/espnet2/train/test_reporter.py
@@ -53,7 +53,7 @@ def test_register(weight1, weight2):
             desired[k] /= weight1 + weight2
 
     for k1, k2 in reporter.get_all_keys():
-        if k2 in ("time", "total_count"):
+        if k2 in ("time", "total_count", "gpu_max_cached_mem_GB", "gpu_cached_mem_GB"):
             continue
         np.testing.assert_allclose(reporter.get_value(k1, k2), desired[k2])
 

From f899a05768436cc38fb432d6f002ab667983abbd Mon Sep 17 00:00:00 2001
From: kamo-naoyuki <naoyuki.kamo829@gmail.com>
Date: Thu, 12 May 2022 18:09:33 +0900
Subject: [PATCH 06/22] fix

---
 espnet/nets/pytorch_backend/nets_utils.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/espnet/nets/pytorch_backend/nets_utils.py b/espnet/nets/pytorch_backend/nets_utils.py
index a21ff54a78e..4bfeeb96cb2 100644
--- a/espnet/nets/pytorch_backend/nets_utils.py
+++ b/espnet/nets/pytorch_backend/nets_utils.py
@@ -152,6 +152,10 @@ def make_pad_mask(lengths, xs=None, length_dim=-1, maxlen=None):
 
     if not isinstance(lengths, list):
         lengths = lengths.tolist()
+    else:
+        assert isinstance(lengths, torch.tensor), type(lengths)
+        lengths = lengths.long()
+
     bs = int(len(lengths))
     if maxlen is None:
         if xs is None:

From 1c344a95ceb83b4b44675aee5326afeb9284d8e8 Mon Sep 17 00:00:00 2001
From: kamo-naoyuki <naoyuki.kamo829@gmail.com>
Date: Thu, 12 May 2022 18:25:35 +0900
Subject: [PATCH 07/22] change LooseVersion to parse

---
 ci/install.sh                                  |  2 +-
 ci/test_integration_espnet2.sh                 | 14 +++++++-------
 .../local/generate_fe_trainingdata.py.patch    | 16 ++++++++--------
 egs2/fsc/asr1/run.sh                           |  2 +-
 egs2/fsc_challenge/asr1/run.sh                 |  2 +-
 egs2/fsc_unseen/asr1/run.sh                    |  2 +-
 espnet/asr/pytorch_backend/asr.py              |  6 +++---
 espnet/asr/pytorch_backend/recog.py            |  6 +++---
 espnet/nets/pytorch_backend/ctc.py             |  4 ++--
 .../pytorch_backend/e2e_tts_transformer.py     | 12 ++++++------
 espnet2/asr/espnet_model.py                    |  4 ++--
 espnet2/asr/maskctc_model.py                   |  4 ++--
 espnet2/bin/tts_inference.py                   |  4 ++--
 espnet2/diar/espnet_model.py                   |  4 ++--
 espnet2/enh/decoder/stft_decoder.py            |  4 ++--
 espnet2/enh/encoder/stft_encoder.py            |  4 ++--
 espnet2/enh/espnet_enh_s2t_model.py            |  4 ++--
 espnet2/enh/espnet_model.py                    |  4 ++--
 espnet2/enh/layers/beamformer.py               |  4 ++--
 espnet2/enh/layers/complex_utils.py            |  6 +++---
 espnet2/enh/layers/dnn_beamformer.py           |  4 ++--
 espnet2/enh/layers/mask_estimator.py           |  4 ++--
 espnet2/enh/layers/wpe.py                      |  4 ++--
 espnet2/enh/loss/criterions/tf_domain.py       |  4 ++--
 espnet2/enh/separator/conformer_separator.py   |  4 ++--
 espnet2/enh/separator/dc_crn_separator.py      |  4 ++--
 espnet2/enh/separator/dccrn_separator.py       |  4 ++--
 espnet2/enh/separator/dprnn_separator.py       |  4 ++--
 espnet2/enh/separator/fasnet_separator.py      |  4 ++--
 espnet2/enh/separator/rnn_separator.py         |  4 ++--
 espnet2/enh/separator/tcn_separator.py         |  4 ++--
 espnet2/enh/separator/transformer_separator.py |  4 ++--
 espnet2/gan_tts/espnet_model.py                |  4 ++--
 espnet2/hubert/espnet_model.py                 |  4 ++--
 espnet2/layers/stft.py                         |  8 ++++----
 espnet2/mt/espnet_model.py                     |  4 ++--
 espnet2/st/espnet_model.py                     |  4 ++--
 espnet2/tasks/abs_task.py                      |  8 ++++----
 espnet2/train/gan_trainer.py                   |  4 ++--
 espnet2/train/reporter.py                      |  4 ++--
 espnet2/train/trainer.py                       |  6 +++---
 espnet2/tts/espnet_model.py                    |  4 ++--
 espnet2/utils/griffin_lim.py                   |  4 ++--
 setup.py                                       |  1 -
 test/espnet2/asr/frontend/test_s3prl.py        |  4 ++--
 test/espnet2/enh/layers/test_complex_utils.py  |  4 ++--
 test/espnet2/enh/layers/test_enh_layers.py     |  6 +++---
 .../enh/loss/criterions/test_tf_domain.py      |  4 ++--
 test/espnet2/enh/separator/test_beamformer.py  |  4 ++--
 .../enh/separator/test_dc_crn_separator.py     |  4 ++--
 .../enh/separator/test_dccrn_separator.py      |  4 ++--
 test/espnet2/enh/test_espnet_model.py          |  4 ++--
 .../gan_tts/joint/test_joint_text2wav.py       |  4 ++--
 test/espnet2/gan_tts/vits/test_generator.py    | 10 ----------
 test/espnet2/gan_tts/vits/test_vits.py         | 18 ------------------
 test/test_custom_transducer.py                 |  5 ++---
 test/test_e2e_asr_transducer.py                |  6 +++---
 tools/check_install.py                         |  4 ++--
 utils/convert_fbank_to_wav.py                  |  4 ++--
 59 files changed, 133 insertions(+), 163 deletions(-)

diff --git a/ci/install.sh b/ci/install.sh
index 5bfed7584ad..7f8498a2a88 100755
--- a/ci/install.sh
+++ b/ci/install.sh
@@ -48,7 +48,7 @@ python3 -m pip freeze
 # Check pytorch version
 python3 <<EOF
 import torch
-from distutils.version import LooseVersion as L
+from packaging.version import parse as L
 version = '$TH_VERSION'.split(".")
 next_version = f"{version[0]}.{version[1]}.{int(version[2]) + 1}"
 
diff --git a/ci/test_integration_espnet2.sh b/ci/test_integration_espnet2.sh
index 58951c04011..036d2d8358e 100755
--- a/ci/test_integration_espnet2.sh
+++ b/ci/test_integration_espnet2.sh
@@ -40,13 +40,13 @@ echo "==== use_streaming, feats_type=raw, token_types=bpe, model_conf.extract_fe
     --feats_normalize "utterance_mvn" --lm-args "--max_epoch=1" --python "${python}" \
     --asr-args "--model_conf extract_feats_in_collect_stats=false --max_epoch=1 --encoder=contextual_block_transformer --decoder=transformer
                 --encoder_conf block_size=40 --encoder_conf hop_size=16 --encoder_conf look_ahead=16"
-    
+
 if python3 -c "import k2" &> /dev/null; then
     echo "==== use_k2, num_paths > nll_batch_size, feats_type=raw, token_types=bpe, model_conf.extract_feats_in_collect_stats=False, normalize=utt_mvn ==="
     ./run.sh --num_paths 500 --nll_batch_size 20 --use_k2 true --ngpu 0 --stage 12 --stop-stage 13 --skip-upload false --feats-type "raw" --token-type "bpe" \
         --feats_normalize "utterance_mvn" --lm-args "--max_epoch=1" --python "${python}" \
         --asr-args "--model_conf extract_feats_in_collect_stats=false --max_epoch=1"
-    
+
     echo "==== use_k2, num_paths == nll_batch_size, feats_type=raw, token_types=bpe, model_conf.extract_feats_in_collect_stats=False, normalize=utt_mvn ==="
     ./run.sh --num_paths 20 --nll_batch_size 20 --use_k2 true --ngpu 0 --stage 12 --stop-stage 13 --skip-upload false --feats-type "raw" --token-type "bpe" \
        --feats_normalize "utterance_mvn" --lm-args "--max_epoch=1" --python "${python}" \
@@ -68,7 +68,7 @@ rm -rf exp dump data
 # NOTE(kan-bayashi): pytorch 1.4 - 1.6 works but 1.6 has a problem with CPU,
 #   so we test this recipe using only pytorch > 1.6 here.
 #   See also: https://github.com/pytorch/pytorch/issues/42446
-if python3 -c 'import torch as t; from distutils.version import LooseVersion as L; assert L(t.__version__) > L("1.6")' &> /dev/null; then
+if python3 -c 'import torch as t; from packaging.version import parse as L; assert L(t.__version__) > L("1.6")' &> /dev/null; then
     ./run.sh --fs 22050 --tts_task gan_tts --feats_extract linear_spectrogram --feats_normalize none --inference_model latest.pth \
         --ngpu 0 --stop-stage 8 --skip-upload false --train-args "--num_iters_per_epoch 1 --max_epoch 1" --python "${python}"
     rm -rf exp dump data
@@ -76,7 +76,7 @@ fi
 cd "${cwd}"
 
 # [ESPnet2] test enh recipe
-if python -c 'import torch as t; from distutils.version import LooseVersion as L; assert L(t.__version__) >= L("1.2.0")' &> /dev/null;  then
+if python -c 'import torch as t; from packaging.version import parse as L; assert L(t.__version__) >= L("1.2.0")' &> /dev/null;  then
     cd ./egs2/mini_an4/enh1
     echo "==== [ESPnet2] ENH ==="
     ./run.sh --stage 1 --stop-stage 1 --python "${python}"
@@ -101,7 +101,7 @@ if python3 -c "import fairseq" &> /dev/null; then
 fi
 
 # [ESPnet2] test enh_asr1 recipe
-if python -c 'import torch as t; from distutils.version import LooseVersion as L; assert L(t.__version__) >= L("1.2.0")' &> /dev/null;  then
+if python -c 'import torch as t; from packaging.version import parse as L; assert L(t.__version__) >= L("1.2.0")' &> /dev/null;  then
     cd ./egs2/mini_an4/enh_asr1
     echo "==== [ESPnet2] ENH_ASR ==="
     ./run.sh --ngpu 0 --stage 0 --stop-stage 15 --skip-upload_hf false --feats-type "raw" --spk-num 1 --enh_asr_args "--max_epoch=1 --enh_separator_conf num_spk=1" --python "${python}"
@@ -122,7 +122,7 @@ done
 for t in ${token_types}; do
     ./run.sh --stage 5 --stop-stage 5 --tgt_token_type "${t}" --src_token_type "${t}" --python "${python}"
 done
-for t in ${feats_types}; do 
+for t in ${feats_types}; do
     for t2 in ${token_types}; do
         echo "==== feats_type=${t}, token_types=${t2} ==="
         ./run.sh --ngpu 0 --stage 6 --stop-stage 13 --skip-upload false --feats-type "${t}" --tgt_token_type "${t2}" --src_token_type "${t2}" \
@@ -147,7 +147,7 @@ cd "${cwd}"
 # [ESPnet2] Validate configuration files
 echo "<blank>" > dummy_token_list
 echo "==== [ESPnet2] Validation configuration files ==="
-if python3 -c 'import torch as t; from distutils.version import LooseVersion as L; assert L(t.__version__) >= L("1.8.0")' &> /dev/null;  then
+if python3 -c 'import torch as t; from packaging.version import parse as L; assert L(t.__version__) >= L("1.8.0")' &> /dev/null;  then
     for f in egs2/*/asr1/conf/train_asr*.yaml; do
         if [ "$f" == "egs2/fsc/asr1/conf/train_asr.yaml" ]; then
             if ! python3 -c "import s3prl" > /dev/null; then
diff --git a/egs2/aishell4/enh1/local/generate_fe_trainingdata.py.patch b/egs2/aishell4/enh1/local/generate_fe_trainingdata.py.patch
index a7666a5a756..47c079997eb 100644
--- a/egs2/aishell4/enh1/local/generate_fe_trainingdata.py.patch
+++ b/egs2/aishell4/enh1/local/generate_fe_trainingdata.py.patch
@@ -2,9 +2,9 @@
 +++ generate_fe_trainingdata.new.py
 @@ -1,8 +1,8 @@
  #!/usr/bin/env python
- 
+
 -import io
-+from distutils.version import LooseVersion
++from packaging.version import parse as V
  import os
 -import subprocess
 +import sys
@@ -14,17 +14,17 @@
 @@ -12,6 +12,10 @@
  import librosa
  import argparse
- 
+
 +
-+is_py_3_3_plus = LooseVersion(sys.version) > LooseVersion("3.3")
++is_py_3_3_plus = V(sys.version) > V("3.3")
 +
 +
  def get_line_context(file_path, line_number):
      return linecache.getline(file_path, line_number).strip()
- 
+
 @@ -119,7 +123,7 @@
          return data / max_val
- 
+
  def add_noise(clean, noise, rir, snr):
 -    random.seed(time.clock())
 +    random.seed(time.perf_counter() if is_py_3_3_plus else time.clock())
@@ -32,9 +32,9 @@
          noise = add_reverb(noise, rir[:, 16:24])
          noise = noise[:-7999]
 @@ -189,7 +193,7 @@
- 
+
      for i in range(args.wavnum):
- 
+
 -        random.seed(time.clock())
 +        random.seed(time.perf_counter() if is_py_3_3_plus else time.clock())
          wav1idx = random.randint(0, len(open(wavlist1,'r').readlines())-1)
diff --git a/egs2/fsc/asr1/run.sh b/egs2/fsc/asr1/run.sh
index 70b42c7ac61..3cea8d9bbc7 100755
--- a/egs2/fsc/asr1/run.sh
+++ b/egs2/fsc/asr1/run.sh
@@ -9,7 +9,7 @@ train_set="train"
 valid_set="valid"
 test_sets="test valid"
 
-if python3 -c 'import torch as t; from distutils.version import LooseVersion as L; assert L(t.__version__) >= L("1.7.0")' &> /dev/null;  then
+if python3 -c 'import torch as t; from packaging.version import parse as L; assert L(t.__version__) >= L("1.7.0")' &> /dev/null;  then
 	asr_config=conf/train_asr.yaml
 else
 	asr_config=conf/tuning/train_asr_transformer_adam_specaug.yaml #s3prl is installed when pytorch > 1.7. Hence using default frontend
diff --git a/egs2/fsc_challenge/asr1/run.sh b/egs2/fsc_challenge/asr1/run.sh
index 70b42c7ac61..3cea8d9bbc7 100755
--- a/egs2/fsc_challenge/asr1/run.sh
+++ b/egs2/fsc_challenge/asr1/run.sh
@@ -9,7 +9,7 @@ train_set="train"
 valid_set="valid"
 test_sets="test valid"
 
-if python3 -c 'import torch as t; from distutils.version import LooseVersion as L; assert L(t.__version__) >= L("1.7.0")' &> /dev/null;  then
+if python3 -c 'import torch as t; from packaging.version import parse as L; assert L(t.__version__) >= L("1.7.0")' &> /dev/null;  then
 	asr_config=conf/train_asr.yaml
 else
 	asr_config=conf/tuning/train_asr_transformer_adam_specaug.yaml #s3prl is installed when pytorch > 1.7. Hence using default frontend
diff --git a/egs2/fsc_unseen/asr1/run.sh b/egs2/fsc_unseen/asr1/run.sh
index 70b42c7ac61..3cea8d9bbc7 100755
--- a/egs2/fsc_unseen/asr1/run.sh
+++ b/egs2/fsc_unseen/asr1/run.sh
@@ -9,7 +9,7 @@ train_set="train"
 valid_set="valid"
 test_sets="test valid"
 
-if python3 -c 'import torch as t; from distutils.version import LooseVersion as L; assert L(t.__version__) >= L("1.7.0")' &> /dev/null;  then
+if python3 -c 'import torch as t; from packaging.version import parse as L; assert L(t.__version__) >= L("1.7.0")' &> /dev/null;  then
 	asr_config=conf/train_asr.yaml
 else
 	asr_config=conf/tuning/train_asr_transformer_adam_specaug.yaml #s3prl is installed when pytorch > 1.7. Hence using default frontend
diff --git a/espnet/asr/pytorch_backend/asr.py b/espnet/asr/pytorch_backend/asr.py
index d487380bd3f..a83d9a27dc1 100644
--- a/espnet/asr/pytorch_backend/asr.py
+++ b/espnet/asr/pytorch_backend/asr.py
@@ -4,7 +4,7 @@
 """Training/decoding definition for the speech recognition task."""
 
 import copy
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 import itertools
 import json
 import logging
@@ -989,7 +989,7 @@ def recog(args):
         # It seems quantized LSTM only supports non-packed sequence before torch 1.4.0.
         # Reference issue: https://github.com/pytorch/pytorch/issues/27963
         if (
-            torch.__version__ < LooseVersion("1.4.0")
+            torch.__version__ < V("1.4.0")
             and "lstm" in train_args.etype
             and torch.nn.LSTM in q_config
         ):
@@ -999,7 +999,7 @@ def recog(args):
 
         # Dunno why but weight_observer from dynamic quantized module must have
         # dtype=torch.qint8 with torch < 1.5 although dtype=torch.float16 is supported.
-        if args.quantize_dtype == "float16" and torch.__version__ < LooseVersion(
+        if args.quantize_dtype == "float16" and torch.__version__ < V(
             "1.5.0"
         ):
             raise ValueError(
diff --git a/espnet/asr/pytorch_backend/recog.py b/espnet/asr/pytorch_backend/recog.py
index 6c6d4ce1194..68fea23a144 100644
--- a/espnet/asr/pytorch_backend/recog.py
+++ b/espnet/asr/pytorch_backend/recog.py
@@ -1,6 +1,6 @@
 """V2 backend for `asr_recog.py` using py:class:`espnet.nets.beam_search.BeamSearch`."""
 
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 import json
 import logging
 
@@ -54,7 +54,7 @@ def recog_v2(args):
 
         # See https://github.com/espnet/espnet/pull/3616 for more information.
         if (
-            torch.__version__ < LooseVersion("1.4.0")
+            torch.__version__ < V("1.4.0")
             and "lstm" in train_args.etype
             and torch.nn.LSTM in q_config
         ):
@@ -62,7 +62,7 @@ def recog_v2(args):
                 "Quantized LSTM in ESPnet is only supported with torch 1.4+."
             )
 
-        if args.quantize_dtype == "float16" and torch.__version__ < LooseVersion(
+        if args.quantize_dtype == "float16" and torch.__version__ < V(
             "1.5.0"
         ):
             raise ValueError(
diff --git a/espnet/nets/pytorch_backend/ctc.py b/espnet/nets/pytorch_backend/ctc.py
index f834967f645..c974df09b7a 100644
--- a/espnet/nets/pytorch_backend/ctc.py
+++ b/espnet/nets/pytorch_backend/ctc.py
@@ -1,4 +1,4 @@
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 import logging
 
 import numpy as np
@@ -30,7 +30,7 @@ def __init__(self, odim, eprojs, dropout_rate, ctc_type="warpctc", reduce=True):
         # In case of Pytorch >= 1.7.0, CTC will be always builtin
         self.ctc_type = (
             ctc_type
-            if LooseVersion(torch.__version__) < LooseVersion("1.7.0")
+            if V(torch.__version__) < V("1.7.0")
             else "builtin"
         )
 
diff --git a/espnet/nets/pytorch_backend/e2e_tts_transformer.py b/espnet/nets/pytorch_backend/e2e_tts_transformer.py
index 9f860285d55..e71f1973fda 100644
--- a/espnet/nets/pytorch_backend/e2e_tts_transformer.py
+++ b/espnet/nets/pytorch_backend/e2e_tts_transformer.py
@@ -714,7 +714,7 @@ def forward(self, xs, ilens, ys, labels, olens, spembs=None, *args, **kwargs):
             labels = labels[:, :max_olen]
 
         # forward encoder
-        x_masks = self._source_mask(ilens)
+        x_masks = self._source_mask(ilens).to(xs.device)
         hs, h_masks = self.encoder(xs, x_masks)
 
         # integrate speaker embedding
@@ -732,7 +732,7 @@ def forward(self, xs, ilens, ys, labels, olens, spembs=None, *args, **kwargs):
         ys_in = self._add_first_frame_and_remove_last_frame(ys_in)
 
         # forward decoder
-        y_masks = self._target_mask(olens_in)
+        y_masks = self._target_mask(olens_in).to(xs.device)
         zs, _ = self.decoder(ys_in, y_masks, hs, h_masks)
         # (B, Lmax//r, odim * r) -> (B, Lmax//r * r, odim)
         before_outs = self.feat_out(zs).view(zs.size(0), -1, self.odim)
@@ -975,7 +975,7 @@ def calculate_all_attentions(
         self.eval()
         with torch.no_grad():
             # forward encoder
-            x_masks = self._source_mask(ilens)
+            x_masks = self._source_mask(ilens).to(xs.device)
             hs, h_masks = self.encoder(xs, x_masks)
 
             # integrate speaker embedding
@@ -994,7 +994,7 @@ def calculate_all_attentions(
             ys_in = self._add_first_frame_and_remove_last_frame(ys_in)
 
             # forward decoder
-            y_masks = self._target_mask(olens_in)
+            y_masks = self._target_mask(olens_in).to(xs.device)
             zs, _ = self.decoder(ys_in, y_masks, hs, h_masks)
 
             # calculate final outputs
@@ -1097,7 +1097,7 @@ def _source_mask(self, ilens):
                     [[1, 1, 1, 0, 0]]], dtype=torch.uint8)
 
         """
-        x_masks = make_non_pad_mask(ilens).to(next(self.parameters()).device)
+        x_masks = make_non_pad_mask(ilens)
         return x_masks.unsqueeze(-2)
 
     def _target_mask(self, olens):
@@ -1126,7 +1126,7 @@ def _target_mask(self, olens):
                      [1, 1, 1, 0, 0]]], dtype=torch.uint8)
 
         """
-        y_masks = make_non_pad_mask(olens).to(next(self.parameters()).device)
+        y_masks = make_non_pad_mask(olens)
         s_masks = subsequent_mask(y_masks.size(-1), device=y_masks.device).unsqueeze(0)
         return y_masks.unsqueeze(-2) & s_masks
 
diff --git a/espnet2/asr/espnet_model.py b/espnet2/asr/espnet_model.py
index 08c10182a83..5756598d2ff 100644
--- a/espnet2/asr/espnet_model.py
+++ b/espnet2/asr/espnet_model.py
@@ -1,5 +1,5 @@
 from contextlib import contextmanager
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 import logging
 from typing import Dict
 from typing import List
@@ -29,7 +29,7 @@
 from espnet2.torch_utils.device_funcs import force_gatherable
 from espnet2.train.abs_espnet_model import AbsESPnetModel
 
-if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"):
+if V(torch.__version__) >= V("1.6.0"):
     from torch.cuda.amp import autocast
 else:
     # Nothing to do if torch<1.6.0
diff --git a/espnet2/asr/maskctc_model.py b/espnet2/asr/maskctc_model.py
index 26cf7a90956..10d91de94c5 100644
--- a/espnet2/asr/maskctc_model.py
+++ b/espnet2/asr/maskctc_model.py
@@ -1,5 +1,5 @@
 from contextlib import contextmanager
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from itertools import groupby
 import logging
 from typing import Dict
@@ -31,7 +31,7 @@
 from espnet2.text.token_id_converter import TokenIDConverter
 from espnet2.torch_utils.device_funcs import force_gatherable
 
-if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"):
+if V(torch.__version__) >= V("1.6.0"):
     from torch.cuda.amp import autocast
 else:
     # Nothing to do if torch<1.6.0
diff --git a/espnet2/bin/tts_inference.py b/espnet2/bin/tts_inference.py
index 683074d2eb0..6e3da15f0de 100755
--- a/espnet2/bin/tts_inference.py
+++ b/espnet2/bin/tts_inference.py
@@ -8,7 +8,7 @@
 import sys
 import time
 
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from pathlib import Path
 from typing import Any
 from typing import Dict
@@ -300,7 +300,7 @@ def from_pretrained(
                 from parallel_wavegan import __version__
 
                 # NOTE(kan-bayashi): Filelock download is supported from 0.5.2
-                assert LooseVersion(__version__) > LooseVersion("0.5.1"), (
+                assert V(__version__) > V("0.5.1"), (
                     "Please install the latest parallel_wavegan "
                     "via `pip install -U parallel_wavegan`."
                 )
diff --git a/espnet2/diar/espnet_model.py b/espnet2/diar/espnet_model.py
index 1e1d10af15e..92b434e7642 100644
--- a/espnet2/diar/espnet_model.py
+++ b/espnet2/diar/espnet_model.py
@@ -2,7 +2,7 @@
 #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 
 from contextlib import contextmanager
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from itertools import permutations
 from typing import Dict
 from typing import Optional
@@ -22,7 +22,7 @@
 from espnet2.torch_utils.device_funcs import force_gatherable
 from espnet2.train.abs_espnet_model import AbsESPnetModel
 
-if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"):
+if V(torch.__version__) >= V("1.6.0"):
     from torch.cuda.amp import autocast
 else:
     # Nothing to do if torch<1.6.0
diff --git a/espnet2/enh/decoder/stft_decoder.py b/espnet2/enh/decoder/stft_decoder.py
index e9d3bae5c2d..93768dd2484 100644
--- a/espnet2/enh/decoder/stft_decoder.py
+++ b/espnet2/enh/decoder/stft_decoder.py
@@ -1,11 +1,11 @@
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 import torch
 from torch_complex.tensor import ComplexTensor
 
 from espnet2.enh.decoder.abs_decoder import AbsDecoder
 from espnet2.layers.stft import Stft
 
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 
 
 class STFTDecoder(AbsDecoder):
diff --git a/espnet2/enh/encoder/stft_encoder.py b/espnet2/enh/encoder/stft_encoder.py
index b2ab65e5532..2c1f68934d5 100644
--- a/espnet2/enh/encoder/stft_encoder.py
+++ b/espnet2/enh/encoder/stft_encoder.py
@@ -1,11 +1,11 @@
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 import torch
 from torch_complex.tensor import ComplexTensor
 
 from espnet2.enh.encoder.abs_encoder import AbsEncoder
 from espnet2.layers.stft import Stft
 
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 
 
 class STFTEncoder(AbsEncoder):
diff --git a/espnet2/enh/espnet_enh_s2t_model.py b/espnet2/enh/espnet_enh_s2t_model.py
index 51746f9fbc1..c2e05654fce 100644
--- a/espnet2/enh/espnet_enh_s2t_model.py
+++ b/espnet2/enh/espnet_enh_s2t_model.py
@@ -1,5 +1,5 @@
 from contextlib import contextmanager
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 import logging
 import random
 from typing import Dict
@@ -16,7 +16,7 @@
 from espnet2.torch_utils.device_funcs import force_gatherable
 from espnet2.train.abs_espnet_model import AbsESPnetModel
 
-if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"):
+if V(torch.__version__) >= V("1.6.0"):
     from torch.cuda.amp import autocast
 else:
     # Nothing to do if torch<1.6.0
diff --git a/espnet2/enh/espnet_model.py b/espnet2/enh/espnet_model.py
index 75bb57094f4..06d9f72902e 100644
--- a/espnet2/enh/espnet_model.py
+++ b/espnet2/enh/espnet_model.py
@@ -1,5 +1,5 @@
 """Enhancement model module."""
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from typing import Dict
 from typing import List
 from typing import Optional
@@ -20,7 +20,7 @@
 from espnet2.train.abs_espnet_model import AbsESPnetModel
 
 
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 
 EPS = torch.finfo(torch.get_default_dtype()).eps
 
diff --git a/espnet2/enh/layers/beamformer.py b/espnet2/enh/layers/beamformer.py
index e3d61d2489f..2ceeee6c728 100644
--- a/espnet2/enh/layers/beamformer.py
+++ b/espnet2/enh/layers/beamformer.py
@@ -1,5 +1,5 @@
 """Beamformer module."""
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from typing import List
 from typing import Optional
 from typing import Union
@@ -20,7 +20,7 @@
 from espnet2.enh.layers.complex_utils import to_double
 
 
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 EPS = torch.finfo(torch.double).eps
 
 
diff --git a/espnet2/enh/layers/complex_utils.py b/espnet2/enh/layers/complex_utils.py
index acfbe2f61a8..329eee35d7c 100644
--- a/espnet2/enh/layers/complex_utils.py
+++ b/espnet2/enh/layers/complex_utils.py
@@ -1,5 +1,5 @@
 """Beamformer module."""
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from typing import Sequence
 from typing import Tuple
 from typing import Union
@@ -10,8 +10,8 @@
 
 
 EPS = torch.finfo(torch.double).eps
-is_torch_1_8_plus = LooseVersion(torch.__version__) >= LooseVersion("1.8.0")
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_8_plus = V(torch.__version__) >= V("1.8.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 
 
 def new_complex_like(
diff --git a/espnet2/enh/layers/dnn_beamformer.py b/espnet2/enh/layers/dnn_beamformer.py
index 40b264dcea9..be4c3622e40 100644
--- a/espnet2/enh/layers/dnn_beamformer.py
+++ b/espnet2/enh/layers/dnn_beamformer.py
@@ -1,5 +1,5 @@
 """DNN beamformer module."""
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from typing import List
 from typing import Optional
 from typing import Tuple
@@ -30,7 +30,7 @@
 from espnet2.enh.layers.mask_estimator import MaskEstimator
 
 
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 
 BEAMFORMER_TYPES = (
     # Minimum Variance Distortionless Response beamformer
diff --git a/espnet2/enh/layers/mask_estimator.py b/espnet2/enh/layers/mask_estimator.py
index daea80f79ec..6f40c66ddfe 100644
--- a/espnet2/enh/layers/mask_estimator.py
+++ b/espnet2/enh/layers/mask_estimator.py
@@ -1,4 +1,4 @@
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from typing import Tuple
 from typing import Union
 
@@ -13,7 +13,7 @@
 from espnet2.enh.layers.complex_utils import is_complex
 
 
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 
 
 class MaskEstimator(torch.nn.Module):
diff --git a/espnet2/enh/layers/wpe.py b/espnet2/enh/layers/wpe.py
index a9760325030..e6117b89786 100644
--- a/espnet2/enh/layers/wpe.py
+++ b/espnet2/enh/layers/wpe.py
@@ -1,4 +1,4 @@
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from typing import Tuple
 from typing import Union
 
@@ -12,7 +12,7 @@
 from espnet2.enh.layers.complex_utils import reverse
 
 
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 
 
 """ WPE pytorch version: Ported from https://github.com/fgnt/nara_wpe
diff --git a/espnet2/enh/loss/criterions/tf_domain.py b/espnet2/enh/loss/criterions/tf_domain.py
index c94678e4244..cb81d7cf25d 100644
--- a/espnet2/enh/loss/criterions/tf_domain.py
+++ b/espnet2/enh/loss/criterions/tf_domain.py
@@ -1,6 +1,6 @@
 from abc import ABC
 from abc import abstractmethod
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from functools import reduce
 import math
 
@@ -13,7 +13,7 @@
 from espnet2.enh.loss.criterions.abs_loss import AbsEnhLoss
 
 
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 
 EPS = torch.finfo(torch.get_default_dtype()).eps
 
diff --git a/espnet2/enh/separator/conformer_separator.py b/espnet2/enh/separator/conformer_separator.py
index 5a9031f441d..3e3574beade 100644
--- a/espnet2/enh/separator/conformer_separator.py
+++ b/espnet2/enh/separator/conformer_separator.py
@@ -1,5 +1,5 @@
 from collections import OrderedDict
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from typing import Dict
 from typing import List
 from typing import Optional
@@ -17,7 +17,7 @@
 from espnet2.enh.separator.abs_separator import AbsSeparator
 
 
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 
 
 class ConformerSeparator(AbsSeparator):
diff --git a/espnet2/enh/separator/dc_crn_separator.py b/espnet2/enh/separator/dc_crn_separator.py
index fa4ed14bc89..b3f9be4fddd 100644
--- a/espnet2/enh/separator/dc_crn_separator.py
+++ b/espnet2/enh/separator/dc_crn_separator.py
@@ -1,5 +1,5 @@
 from collections import OrderedDict
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from typing import Dict
 from typing import List
 from typing import Optional
@@ -16,7 +16,7 @@
 
 
 EPS = torch.finfo(torch.get_default_dtype()).eps
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 
 
 class DC_CRNSeparator(AbsSeparator):
diff --git a/espnet2/enh/separator/dccrn_separator.py b/espnet2/enh/separator/dccrn_separator.py
index a97def4e905..74f793d14bd 100644
--- a/espnet2/enh/separator/dccrn_separator.py
+++ b/espnet2/enh/separator/dccrn_separator.py
@@ -1,5 +1,5 @@
 from collections import OrderedDict
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from typing import Dict
 from typing import List
 from typing import Optional
@@ -18,7 +18,7 @@
 from espnet2.enh.layers.complexnn import NavieComplexLSTM
 from espnet2.enh.separator.abs_separator import AbsSeparator
 
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 EPS = torch.finfo(torch.double).eps
 
 
diff --git a/espnet2/enh/separator/dprnn_separator.py b/espnet2/enh/separator/dprnn_separator.py
index ddf0962b15d..d0f446ee36a 100644
--- a/espnet2/enh/separator/dprnn_separator.py
+++ b/espnet2/enh/separator/dprnn_separator.py
@@ -1,5 +1,5 @@
 from collections import OrderedDict
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from typing import Dict
 from typing import List
 from typing import Optional
@@ -16,7 +16,7 @@
 from espnet2.enh.separator.abs_separator import AbsSeparator
 
 
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 
 
 class DPRNNSeparator(AbsSeparator):
diff --git a/espnet2/enh/separator/fasnet_separator.py b/espnet2/enh/separator/fasnet_separator.py
index 20f83b80046..deb1a4d1f43 100644
--- a/espnet2/enh/separator/fasnet_separator.py
+++ b/espnet2/enh/separator/fasnet_separator.py
@@ -1,5 +1,5 @@
 from collections import OrderedDict
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from typing import Dict
 from typing import List
 from typing import Optional
@@ -12,7 +12,7 @@
 from espnet2.enh.separator.abs_separator import AbsSeparator
 
 
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 
 
 class FaSNetSeparator(AbsSeparator):
diff --git a/espnet2/enh/separator/rnn_separator.py b/espnet2/enh/separator/rnn_separator.py
index 1294c0e1ffb..2a551edbde0 100644
--- a/espnet2/enh/separator/rnn_separator.py
+++ b/espnet2/enh/separator/rnn_separator.py
@@ -1,5 +1,5 @@
 from collections import OrderedDict
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from typing import Dict
 from typing import List
 from typing import Optional
@@ -14,7 +14,7 @@
 from espnet2.enh.separator.abs_separator import AbsSeparator
 
 
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 
 
 class RNNSeparator(AbsSeparator):
diff --git a/espnet2/enh/separator/tcn_separator.py b/espnet2/enh/separator/tcn_separator.py
index 8d82103ec5d..12c6db42e42 100644
--- a/espnet2/enh/separator/tcn_separator.py
+++ b/espnet2/enh/separator/tcn_separator.py
@@ -1,5 +1,5 @@
 from collections import OrderedDict
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from typing import Dict
 from typing import List
 from typing import Optional
@@ -14,7 +14,7 @@
 from espnet2.enh.separator.abs_separator import AbsSeparator
 
 
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 
 
 class TCNSeparator(AbsSeparator):
diff --git a/espnet2/enh/separator/transformer_separator.py b/espnet2/enh/separator/transformer_separator.py
index ca4421221e7..c6dbcf91eaa 100644
--- a/espnet2/enh/separator/transformer_separator.py
+++ b/espnet2/enh/separator/transformer_separator.py
@@ -1,5 +1,5 @@
 from collections import OrderedDict
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from typing import Dict
 from typing import List
 from typing import Optional
@@ -22,7 +22,7 @@
 from espnet2.enh.separator.abs_separator import AbsSeparator
 
 
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 
 
 class TransformerSeparator(AbsSeparator):
diff --git a/espnet2/gan_tts/espnet_model.py b/espnet2/gan_tts/espnet_model.py
index 34ca845f0fd..5cc1785a4d5 100644
--- a/espnet2/gan_tts/espnet_model.py
+++ b/espnet2/gan_tts/espnet_model.py
@@ -4,7 +4,7 @@
 """GAN-based text-to-speech ESPnet model."""
 
 from contextlib import contextmanager
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from typing import Any
 from typing import Dict
 from typing import Optional
@@ -19,7 +19,7 @@
 from espnet2.train.abs_gan_espnet_model import AbsGANESPnetModel
 from espnet2.tts.feats_extract.abs_feats_extract import AbsFeatsExtract
 
-if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"):
+if V(torch.__version__) >= V("1.6.0"):
     from torch.cuda.amp import autocast
 else:
     # Nothing to do if torch < 1.6.0
diff --git a/espnet2/hubert/espnet_model.py b/espnet2/hubert/espnet_model.py
index 4fa775841bc..35468bde93e 100644
--- a/espnet2/hubert/espnet_model.py
+++ b/espnet2/hubert/espnet_model.py
@@ -7,7 +7,7 @@
 #     Code in Fairseq: https://github.com/pytorch/fairseq/tree/master/examples/hubert
 
 from contextlib import contextmanager
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from typing import Dict
 from typing import List
 from typing import Optional
@@ -28,7 +28,7 @@
 from espnet2.torch_utils.device_funcs import force_gatherable
 from espnet2.train.abs_espnet_model import AbsESPnetModel
 
-if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"):
+if V(torch.__version__) >= V("1.6.0"):
     from torch.cuda.amp import autocast
 else:
     # Nothing to do if torch<1.6.0
diff --git a/espnet2/layers/stft.py b/espnet2/layers/stft.py
index b888bfede82..847469bbd4a 100644
--- a/espnet2/layers/stft.py
+++ b/espnet2/layers/stft.py
@@ -1,4 +1,4 @@
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from typing import Optional
 from typing import Tuple
 from typing import Union
@@ -13,10 +13,10 @@
 import librosa
 import numpy as np
 
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 
 
-is_torch_1_7_plus = LooseVersion(torch.__version__) >= LooseVersion("1.7")
+is_torch_1_7_plus = V(torch.__version__) >= V("1.7")
 
 
 class Stft(torch.nn.Module, InversibleInterface):
@@ -182,7 +182,7 @@ def inverse(
             wavs: (batch, samples)
             ilens: (batch,)
         """
-        if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"):
+        if V(torch.__version__) >= V("1.6.0"):
             istft = torch.functional.istft
         else:
             try:
diff --git a/espnet2/mt/espnet_model.py b/espnet2/mt/espnet_model.py
index 953d5bc02f8..b937cbe3dfd 100644
--- a/espnet2/mt/espnet_model.py
+++ b/espnet2/mt/espnet_model.py
@@ -1,5 +1,5 @@
 from contextlib import contextmanager
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 import logging
 from typing import Dict
 from typing import List
@@ -24,7 +24,7 @@
 from espnet2.torch_utils.device_funcs import force_gatherable
 from espnet2.train.abs_espnet_model import AbsESPnetModel
 
-if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"):
+if V(torch.__version__) >= V("1.6.0"):
     from torch.cuda.amp import autocast
 else:
     # Nothing to do if torch<1.6.0
diff --git a/espnet2/st/espnet_model.py b/espnet2/st/espnet_model.py
index ee744681bd7..fb8fcfdaee9 100644
--- a/espnet2/st/espnet_model.py
+++ b/espnet2/st/espnet_model.py
@@ -1,5 +1,5 @@
 from contextlib import contextmanager
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 import logging
 from typing import Dict
 from typing import List
@@ -28,7 +28,7 @@
 from espnet2.torch_utils.device_funcs import force_gatherable
 from espnet2.train.abs_espnet_model import AbsESPnetModel
 
-if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"):
+if V(torch.__version__) >= V("1.6.0"):
     from torch.cuda.amp import autocast
 else:
     # Nothing to do if torch<1.6.0
diff --git a/espnet2/tasks/abs_task.py b/espnet2/tasks/abs_task.py
index 37dda7259e9..54c4cd26a43 100644
--- a/espnet2/tasks/abs_task.py
+++ b/espnet2/tasks/abs_task.py
@@ -3,7 +3,7 @@
 from abc import abstractmethod
 import argparse
 from dataclasses import dataclass
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 import functools
 import logging
 import os
@@ -76,7 +76,7 @@
 except Exception:
     wandb = None
 
-if LooseVersion(torch.__version__) >= LooseVersion("1.5.0"):
+if V(torch.__version__) >= V("1.5.0"):
     from torch.multiprocessing.spawn import ProcessContext
 else:
     from torch.multiprocessing.spawn import SpawnContext as ProcessContext
@@ -94,7 +94,7 @@
     rmsprop=torch.optim.RMSprop,
     rprop=torch.optim.Rprop,
 )
-if LooseVersion(torch.__version__) >= LooseVersion("1.10.0"):
+if V(torch.__version__) >= V("1.10.0"):
     # From 1.10.0, RAdam is officially supported
     optim_classes.update(
         radam=torch.optim.RAdam,
@@ -116,7 +116,7 @@
         sgdw=torch_optimizer.SGDW,
         yogi=torch_optimizer.Yogi,
     )
-    if LooseVersion(torch_optimizer.__version__) < LooseVersion("0.2.0"):
+    if V(torch_optimizer.__version__) < V("0.2.0"):
         # From 0.2.0, RAdam is dropped
         optim_classes.update(
             radam=torch_optimizer.RAdam,
diff --git a/espnet2/train/gan_trainer.py b/espnet2/train/gan_trainer.py
index 0d3cc59bea0..cc0aa1ba95d 100644
--- a/espnet2/train/gan_trainer.py
+++ b/espnet2/train/gan_trainer.py
@@ -9,7 +9,7 @@
 import time
 
 from contextlib import contextmanager
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from typing import Dict
 from typing import Iterable
 from typing import List
@@ -35,7 +35,7 @@
 if torch.distributed.is_available():
     from torch.distributed import ReduceOp
 
-if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"):
+if V(torch.__version__) >= V("1.6.0"):
     from torch.cuda.amp import autocast
     from torch.cuda.amp import GradScaler
 else:
diff --git a/espnet2/train/reporter.py b/espnet2/train/reporter.py
index a3c03995b54..65b4ac6a9d8 100644
--- a/espnet2/train/reporter.py
+++ b/espnet2/train/reporter.py
@@ -3,7 +3,7 @@
 from contextlib import contextmanager
 import dataclasses
 import datetime
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 import logging
 from pathlib import Path
 import time
@@ -357,7 +357,7 @@ def finish_epoch(self, sub_reporter: SubReporter) -> None:
             seconds=time.perf_counter() - sub_reporter.start_time
         )
         stats["total_count"] = sub_reporter.total_count
-        if LooseVersion(torch.__version__) >= LooseVersion("1.4.0"):
+        if V(torch.__version__) >= V("1.4.0"):
             if torch.cuda.is_initialized():
                 stats["gpu_max_cached_mem_GB"] = (
                     torch.cuda.max_memory_reserved() / 2**30
diff --git a/espnet2/train/trainer.py b/espnet2/train/trainer.py
index 304d3329264..6fe2726880d 100644
--- a/espnet2/train/trainer.py
+++ b/espnet2/train/trainer.py
@@ -3,7 +3,7 @@
 from contextlib import contextmanager
 import dataclasses
 from dataclasses import is_dataclass
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 import logging
 from pathlib import Path
 import time
@@ -42,7 +42,7 @@
 if torch.distributed.is_available():
     from torch.distributed import ReduceOp
 
-if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"):
+if V(torch.__version__) >= V("1.6.0"):
     from torch.cuda.amp import autocast
     from torch.cuda.amp import GradScaler
 else:
@@ -183,7 +183,7 @@ def run(
         output_dir = Path(trainer_options.output_dir)
         reporter = Reporter()
         if trainer_options.use_amp:
-            if LooseVersion(torch.__version__) < LooseVersion("1.6.0"):
+            if V(torch.__version__) < V("1.6.0"):
                 raise RuntimeError(
                     "Require torch>=1.6.0 for  Automatic Mixed Precision"
                 )
diff --git a/espnet2/tts/espnet_model.py b/espnet2/tts/espnet_model.py
index e09c4a35a55..6cb88fe4b5b 100644
--- a/espnet2/tts/espnet_model.py
+++ b/espnet2/tts/espnet_model.py
@@ -4,7 +4,7 @@
 """Text-to-speech ESPnet model."""
 
 from contextlib import contextmanager
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from typing import Dict
 from typing import Optional
 from typing import Tuple
@@ -19,7 +19,7 @@
 from espnet2.tts.abs_tts import AbsTTS
 from espnet2.tts.feats_extract.abs_feats_extract import AbsFeatsExtract
 
-if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"):
+if V(torch.__version__) >= V("1.6.0"):
     from torch.cuda.amp import autocast
 else:
     # Nothing to do if torch<1.6.0
diff --git a/espnet2/utils/griffin_lim.py b/espnet2/utils/griffin_lim.py
index c1536d51b2b..3d4a948b7aa 100644
--- a/espnet2/utils/griffin_lim.py
+++ b/espnet2/utils/griffin_lim.py
@@ -7,7 +7,7 @@
 
 import logging
 
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 from functools import partial
 from typeguard import check_argument_types
 from typing import Optional
@@ -77,7 +77,7 @@ def griffin_lim(
     # assert the size of input linear spectrogram
     assert spc.shape[1] == n_fft // 2 + 1
 
-    if LooseVersion(librosa.__version__) >= LooseVersion("0.7.0"):
+    if V(librosa.__version__) >= V("0.7.0"):
         # use librosa's fast Grriffin-Lim algorithm
         spc = np.abs(spc.T)
         y = librosa.griffinlim(
diff --git a/setup.py b/setup.py
index dba53a97c7f..58755a756ba 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,6 @@
 
 import os
 
-from distutils.version import LooseVersion
 from setuptools import find_packages
 from setuptools import setup
 
diff --git a/test/espnet2/asr/frontend/test_s3prl.py b/test/espnet2/asr/frontend/test_s3prl.py
index 0bfebb823b3..2c0f66e1ee6 100644
--- a/test/espnet2/asr/frontend/test_s3prl.py
+++ b/test/espnet2/asr/frontend/test_s3prl.py
@@ -1,10 +1,10 @@
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 
 import torch
 
 from espnet2.asr.frontend.s3prl import S3prlFrontend
 
-is_torch_1_7_plus = LooseVersion(torch.__version__) >= LooseVersion("1.7.0")
+is_torch_1_7_plus = V(torch.__version__) >= V("1.7.0")
 
 
 def test_frontend_init():
diff --git a/test/espnet2/enh/layers/test_complex_utils.py b/test/espnet2/enh/layers/test_complex_utils.py
index e566f3aea76..6404f33eaa3 100644
--- a/test/espnet2/enh/layers/test_complex_utils.py
+++ b/test/espnet2/enh/layers/test_complex_utils.py
@@ -1,4 +1,4 @@
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 
 import numpy as np
 import pytest
@@ -16,7 +16,7 @@
 from espnet2.enh.layers.complex_utils import trace
 
 
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 # invertible matrix
 mat_np = np.array(
     [
diff --git a/test/espnet2/enh/layers/test_enh_layers.py b/test/espnet2/enh/layers/test_enh_layers.py
index 62f4554b10b..3d4f0a84ead 100644
--- a/test/espnet2/enh/layers/test_enh_layers.py
+++ b/test/espnet2/enh/layers/test_enh_layers.py
@@ -1,4 +1,4 @@
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 
 import numpy as np
 import pytest
@@ -13,8 +13,8 @@
 from espnet2.enh.layers.complex_utils import solve
 from espnet2.layers.stft import Stft
 
-is_torch_1_1_plus = LooseVersion(torch.__version__) >= LooseVersion("1.1.0")
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_1_plus = V(torch.__version__) >= V("1.1.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 
 
 random_speech = torch.tensor(
diff --git a/test/espnet2/enh/loss/criterions/test_tf_domain.py b/test/espnet2/enh/loss/criterions/test_tf_domain.py
index 9d1cec94a1d..117a16545db 100644
--- a/test/espnet2/enh/loss/criterions/test_tf_domain.py
+++ b/test/espnet2/enh/loss/criterions/test_tf_domain.py
@@ -1,4 +1,4 @@
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 import pytest
 import torch
 
@@ -11,7 +11,7 @@
 from espnet2.enh.loss.criterions.tf_domain import FrequencyDomainMSE
 
 
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 
 
 @pytest.mark.parametrize("criterion_class", [FrequencyDomainL1, FrequencyDomainMSE])
diff --git a/test/espnet2/enh/separator/test_beamformer.py b/test/espnet2/enh/separator/test_beamformer.py
index 3a10c7a9643..eddf317ee86 100644
--- a/test/espnet2/enh/separator/test_beamformer.py
+++ b/test/espnet2/enh/separator/test_beamformer.py
@@ -1,4 +1,4 @@
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 import pytest
 import torch
 
@@ -7,7 +7,7 @@
 from espnet2.enh.separator.neural_beamformer import NeuralBeamformer
 
 
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 random_speech = torch.tensor(
     [
         [
diff --git a/test/espnet2/enh/separator/test_dc_crn_separator.py b/test/espnet2/enh/separator/test_dc_crn_separator.py
index 712de05e063..8f60b62399a 100644
--- a/test/espnet2/enh/separator/test_dc_crn_separator.py
+++ b/test/espnet2/enh/separator/test_dc_crn_separator.py
@@ -1,4 +1,4 @@
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 import pytest
 
 import torch
@@ -8,7 +8,7 @@
 from espnet2.enh.separator.dc_crn_separator import DC_CRNSeparator
 
 
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 
 
 @pytest.mark.parametrize("input_dim", [33, 65])
diff --git a/test/espnet2/enh/separator/test_dccrn_separator.py b/test/espnet2/enh/separator/test_dccrn_separator.py
index acf30c1ed98..3a075ac42ba 100644
--- a/test/espnet2/enh/separator/test_dccrn_separator.py
+++ b/test/espnet2/enh/separator/test_dccrn_separator.py
@@ -1,4 +1,4 @@
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 import pytest
 
 import torch
@@ -6,7 +6,7 @@
 
 from espnet2.enh.separator.dccrn_separator import DCCRNSeparator
 
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 
 
 @pytest.mark.parametrize("input_dim", [9])
diff --git a/test/espnet2/enh/test_espnet_model.py b/test/espnet2/enh/test_espnet_model.py
index 6985ab63e36..906b42bbac3 100644
--- a/test/espnet2/enh/test_espnet_model.py
+++ b/test/espnet2/enh/test_espnet_model.py
@@ -1,4 +1,4 @@
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 
 import pytest
 import torch
@@ -26,7 +26,7 @@
 from espnet2.enh.separator.transformer_separator import TransformerSeparator
 
 
-is_torch_1_9_plus = LooseVersion(torch.__version__) >= LooseVersion("1.9.0")
+is_torch_1_9_plus = V(torch.__version__) >= V("1.9.0")
 
 
 stft_encoder = STFTEncoder(
diff --git a/test/espnet2/gan_tts/joint/test_joint_text2wav.py b/test/espnet2/gan_tts/joint/test_joint_text2wav.py
index 1badd3a892f..f0ed087da20 100644
--- a/test/espnet2/gan_tts/joint/test_joint_text2wav.py
+++ b/test/espnet2/gan_tts/joint/test_joint_text2wav.py
@@ -3,7 +3,7 @@
 
 """Test VITS related modules."""
 
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 
 import pytest
 import torch
@@ -190,7 +190,7 @@ def make_loss_args(**kwargs):
 
 
 @pytest.mark.skipif(
-    LooseVersion(torch.__version__) < LooseVersion("1.4"),
+    V(torch.__version__) < V("1.4"),
     reason="Pytorch >= 1.4 is required.",
 )
 @pytest.mark.skipif(
diff --git a/test/espnet2/gan_tts/vits/test_generator.py b/test/espnet2/gan_tts/vits/test_generator.py
index 7ac9f3f879e..9c17ed897ab 100644
--- a/test/espnet2/gan_tts/vits/test_generator.py
+++ b/test/espnet2/gan_tts/vits/test_generator.py
@@ -3,8 +3,6 @@
 
 """Test VITS generator modules."""
 
-from distutils.version import LooseVersion
-
 import pytest
 import torch
 
@@ -66,10 +64,6 @@ def make_generator_args(**kwargs):
 #   so a little bit more time is needed to run. Therefore,
 #   here we extend execution timeout from 2 sec to 5 sec.
 @pytest.mark.execution_timeout(5)
-@pytest.mark.skipif(
-    LooseVersion(torch.__version__) < LooseVersion("1.4"),
-    reason="Pytorch >= 1.4 is required.",
-)
 @pytest.mark.skipif(
     "1.6" in torch.__version__,
     reason="group conv in pytorch 1.6 has an issue. "
@@ -198,10 +192,6 @@ def test_vits_generator_forward(model_dict):
                 print(f"{i+j+1}: {output_.shape}")
 
 
-@pytest.mark.skipif(
-    LooseVersion(torch.__version__) < LooseVersion("1.4"),
-    reason="Pytorch >= 1.4 is required.",
-)
 @pytest.mark.skipif(
     "1.6" in torch.__version__,
     reason="group conv in pytorch 1.6 has an issue. "
diff --git a/test/espnet2/gan_tts/vits/test_vits.py b/test/espnet2/gan_tts/vits/test_vits.py
index a35d8c66bf1..e749345e346 100644
--- a/test/espnet2/gan_tts/vits/test_vits.py
+++ b/test/espnet2/gan_tts/vits/test_vits.py
@@ -3,8 +3,6 @@
 
 """Test VITS related modules."""
 
-from distutils.version import LooseVersion
-
 import pytest
 import torch
 
@@ -148,10 +146,6 @@ def make_vits_loss_args(**kwargs):
     return defaults
 
 
-@pytest.mark.skipif(
-    LooseVersion(torch.__version__) < LooseVersion("1.4"),
-    reason="Pytorch >= 1.4 is required.",
-)
 @pytest.mark.skipif(
     "1.6" in torch.__version__,
     reason="group conv in pytorch 1.6 has an issue. "
@@ -349,10 +343,6 @@ def test_vits_is_trainable_and_decodable(gen_dict, dis_dict, loss_dict):
         assert output_dict["wav"].size(0) == inputs["feats"].size(0) * upsample_factor
 
 
-@pytest.mark.skipif(
-    LooseVersion(torch.__version__) < LooseVersion("1.4"),
-    reason="Pytorch >= 1.4 is required.",
-)
 @pytest.mark.skipif(
     "1.6" in torch.__version__,
     reason="Group conv in pytorch 1.6 has an issue. "
@@ -588,10 +578,6 @@ def test_multi_speaker_vits_is_trainable_and_decodable(
     not torch.cuda.is_available(),
     reason="GPU is needed.",
 )
-@pytest.mark.skipif(
-    LooseVersion(torch.__version__) < LooseVersion("1.4"),
-    reason="Pytorch >= 1.4 is required.",
-)
 @pytest.mark.skipif(
     "1.6" in torch.__version__,
     reason="group conv in pytorch 1.6 has an issue. "
@@ -799,10 +785,6 @@ def test_vits_is_trainable_and_decodable_on_gpu(gen_dict, dis_dict, loss_dict):
     not torch.cuda.is_available(),
     reason="GPU is needed.",
 )
-@pytest.mark.skipif(
-    LooseVersion(torch.__version__) < LooseVersion("1.4"),
-    reason="Pytorch >= 1.4 is required.",
-)
 @pytest.mark.skipif(
     "1.6" in torch.__version__,
     reason="Group conv in pytorch 1.6 has an issue. "
diff --git a/test/test_custom_transducer.py b/test/test_custom_transducer.py
index 34447581e6f..bf6101365cd 100644
--- a/test/test_custom_transducer.py
+++ b/test/test_custom_transducer.py
@@ -1,7 +1,7 @@
 # coding: utf-8
 
 import argparse
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 import tempfile
 
 import json
@@ -15,8 +15,7 @@
 import espnet.nets.pytorch_backend.lm.default as lm_pytorch
 from espnet.nets.pytorch_backend.transducer.blocks import build_blocks
 
-is_torch_1_4_plus = LooseVersion(torch.__version__) >= LooseVersion("1.4.0")
-is_torch_1_5_plus = LooseVersion(torch.__version__) >= LooseVersion("1.5.0")
+is_torch_1_5_plus = V(torch.__version__) >= V("1.5.0")
 
 
 def make_train_args(**kwargs):
diff --git a/test/test_e2e_asr_transducer.py b/test/test_e2e_asr_transducer.py
index 835f9bfe8ab..4a115433cfd 100644
--- a/test/test_e2e_asr_transducer.py
+++ b/test/test_e2e_asr_transducer.py
@@ -1,7 +1,7 @@
 # coding: utf-8
 
 import argparse
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 import tempfile
 
 import json
@@ -16,8 +16,8 @@
 import espnet.nets.pytorch_backend.lm.default as lm_pytorch
 from espnet.nets.pytorch_backend.nets_utils import pad_list
 
-is_torch_1_4_plus = LooseVersion(torch.__version__) >= LooseVersion("1.4.0")
-is_torch_1_5_plus = LooseVersion(torch.__version__) >= LooseVersion("1.5.0")
+is_torch_1_4_plus = V(torch.__version__) >= V("1.4.0")
+is_torch_1_5_plus = V(torch.__version__) >= V("1.5.0")
 
 
 def get_default_train_args(**kwargs):
diff --git a/tools/check_install.py b/tools/check_install.py
index 82081986123..b8e522758b7 100644
--- a/tools/check_install.py
+++ b/tools/check_install.py
@@ -9,7 +9,7 @@
 import shutil
 import sys
 
-from distutils.version import LooseVersion
+from packaging.version import parse
 
 module_list = [
     ("torchaudio", None, None),
@@ -77,7 +77,7 @@ def main():
         import chainer
 
         print(f"[x] chainer={chainer.__version__}")
-        if LooseVersion(chainer.__version__) != LooseVersion("6.0.0"):
+        if parse(chainer.__version__) != parse("6.0.0"):
             print(
                 f"Warning! chainer={chainer.__version__} is not supported. "
                 "Supported version is 6.0.0"
diff --git a/utils/convert_fbank_to_wav.py b/utils/convert_fbank_to_wav.py
index e38feb90593..ccb4a9c439b 100755
--- a/utils/convert_fbank_to_wav.py
+++ b/utils/convert_fbank_to_wav.py
@@ -7,7 +7,7 @@
 import logging
 import os
 
-from distutils.version import LooseVersion
+from packaging.version import parse as V
 
 import librosa
 import numpy as np
@@ -66,7 +66,7 @@ def griffin_lim(spc, n_fft, n_shift, win_length, window="hann", n_iters=100):
     # assert the size of input linear spectrogram
     assert spc.shape[1] == n_fft // 2 + 1
 
-    if LooseVersion(librosa.__version__) >= LooseVersion("0.7.0"):
+    if V(librosa.__version__) >= V("0.7.0"):
         # use librosa's fast Grriffin-Lim algorithm
         spc = np.abs(spc.T)
         y = librosa.griffinlim(

From 6e9035d42eea31cad87a7c8b87fc79635a6df7c2 Mon Sep 17 00:00:00 2001
From: kamo-naoyuki <naoyuki.kamo829@gmail.com>
Date: Thu, 12 May 2022 18:32:33 +0900
Subject: [PATCH 08/22] fix

---
 espnet/nets/pytorch_backend/nets_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/espnet/nets/pytorch_backend/nets_utils.py b/espnet/nets/pytorch_backend/nets_utils.py
index 4bfeeb96cb2..3a7b1e079bc 100644
--- a/espnet/nets/pytorch_backend/nets_utils.py
+++ b/espnet/nets/pytorch_backend/nets_utils.py
@@ -153,7 +153,7 @@ def make_pad_mask(lengths, xs=None, length_dim=-1, maxlen=None):
     if not isinstance(lengths, list):
         lengths = lengths.tolist()
     else:
-        assert isinstance(lengths, torch.tensor), type(lengths)
+        assert isinstance(lengths, torch.Tensor), type(lengths)
         lengths = lengths.long()
 
     bs = int(len(lengths))

From b0050d97da3d0545b62a5d21b029ddd016ce6ca1 Mon Sep 17 00:00:00 2001
From: kamo-naoyuki <naoyuki.kamo829@gmail.com>
Date: Thu, 12 May 2022 18:56:52 +0900
Subject: [PATCH 09/22] fix

---
 setup.py       | 1 +
 tools/Makefile | 6 ++++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 58755a756ba..9fb44f87b25 100644
--- a/setup.py
+++ b/setup.py
@@ -11,6 +11,7 @@
 requirements = {
     "install": [
         "setuptools>=38.5.1",
+        "packaging",
         "configargparse>=1.2.1",
         "typeguard>=2.7.0",
         "humanfriendly",
diff --git a/tools/Makefile b/tools/Makefile
index 338fd8d22fe..87ccbd6d21a 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -28,7 +28,7 @@ endif
 
 all: kaldi showenv python conda_packages.done sctk.done sph2pipe.done check_install
 
-python: activate_python.sh espnet.done pytorch.done chainer.done fairscale.done torch_optimizer.done
+python: activate_python.sh packaging.done espnet.done pytorch.done chainer.done fairscale.done torch_optimizer.done
 extra: warp-ctc.done warp-transducer.done chainer_ctc.done nkf.done moses.done mwerSegmenter.done pesq kenlm.done pyopenjtalk.done py3mmseg.done beamformit.done fairseq.done s3prl.done k2.done transformers.done phonemizer.done longformer.done
 
 kaldi:
@@ -84,8 +84,10 @@ sph2pipe.done:
 	./installers/install_sph2pipe.sh
 	touch sph2pipe.done
 
+packaging.done: activate_python.sh
+	. ./activate_python.sh && python3 -m pip install packaging
 
-pytorch.done: activate_python.sh
+pytorch.done: activate_python.sh packaging.done
 ifeq ($(strip $(USE_CONDA)),)
 	# NOTE(kan-bayashi): Temporary fixed numpy version
 	. ./activate_python.sh && pip install "numpy<=1.21.3"

From 8fbac77268906075043cbecfb3e1c5625b145fce Mon Sep 17 00:00:00 2001
From: kamo-naoyuki <naoyuki.kamo829@gmail.com>
Date: Thu, 12 May 2022 18:59:17 +0900
Subject: [PATCH 10/22] fix

---
 tools/installers/install_torch.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/installers/install_torch.sh b/tools/installers/install_torch.sh
index d542183db53..5ea48c0de8a 100755
--- a/tools/installers/install_torch.sh
+++ b/tools/installers/install_torch.sh
@@ -52,7 +52,7 @@ install_torch(){
             log conda install -y "pytorch=${torch_version}" "torchaudio=$1" cpuonly -c pytorch
             conda install -y "pytorch=${torch_version}" "torchaudio=$1" cpuonly -c pytorch
         elif [ "${cuda_version}" = "11.5" ]; then
-            # NOTE(kamo): In my environment, conda-forge only could installed, but I don't know why @ 12, May, 2022
+            # NOTE(kamo): In my environment, cudatoolkit of conda-forge only could be installed, but I don't know why @ 12, May, 2022
             cudatoolkit_channel=conda-forge
             log conda install -y "pytorch=${torch_version}" "torchaudio=$1" "cudatoolkit=${cuda_version}" -c pytorch -c "${cudatoolkit_channel}"
             conda install -y "pytorch=${torch_version}" "torchaudio=$1" "cudatoolkit=${cuda_version}" -c pytorch -c "${cudatoolkit_channel}"

From 86186b744fb2bfc259909c49cc906fb0856d15bf Mon Sep 17 00:00:00 2001
From: kamo-naoyuki <naoyuki.kamo829@gmail.com>
Date: Thu, 12 May 2022 19:10:18 +0900
Subject: [PATCH 11/22] add installation for packaging

---
 tools/installers/install_chainer.sh         | 3 +++
 tools/installers/install_fairscale.sh       | 3 +++
 tools/installers/install_fairseq.sh         | 3 +++
 tools/installers/install_k2.sh              | 3 +++
 tools/installers/install_longformer.sh      | 3 +++
 tools/installers/install_s3prl.sh           | 3 +++
 tools/installers/install_speechbrain.sh     | 3 +++
 tools/installers/install_torch.sh           | 3 +++
 tools/installers/install_torch_optimizer.sh | 3 +++
 tools/installers/install_warp-ctc.sh        | 3 +++
 tools/installers/install_warp-transducer.sh | 3 +++
 11 files changed, 33 insertions(+)

diff --git a/tools/installers/install_chainer.sh b/tools/installers/install_chainer.sh
index 9000bfb0d5a..4ef3e4cdc58 100755
--- a/tools/installers/install_chainer.sh
+++ b/tools/installers/install_chainer.sh
@@ -16,6 +16,9 @@ if [ "${cuda_version}" = cpu ] || [ "${cuda_version}" = CPU ]; then
 fi
 
 
+if ! python -c "import packaging.version" &> /dev/null; then
+    python3 -m pip install packaging
+fi
 # espnet requires chiner=6.0.0
 chainer_version=6.0.0
 python_version=$(python3 -c "import sys; print(sys.version.split()[0])")
diff --git a/tools/installers/install_fairscale.sh b/tools/installers/install_fairscale.sh
index 4988a75736d..620b906ffd3 100755
--- a/tools/installers/install_fairscale.sh
+++ b/tools/installers/install_fairscale.sh
@@ -7,6 +7,9 @@ if [ $# != 0 ]; then
     exit 1;
 fi
 
+if ! python -c "import packaging.version" &> /dev/null; then
+    python3 -m pip install packaging
+fi
 torch_version=$(python3 -c "import torch; print(torch.__version__)")
 python_36_plus=$(python3 <<EOF
 from packaging.version import parse as V
diff --git a/tools/installers/install_fairseq.sh b/tools/installers/install_fairseq.sh
index e13970c036f..f4f12007688 100755
--- a/tools/installers/install_fairseq.sh
+++ b/tools/installers/install_fairseq.sh
@@ -7,6 +7,9 @@ if [ $# != 0 ]; then
     exit 1;
 fi
 
+if ! python -c "import packaging.version" &> /dev/null; then
+    python3 -m pip install packaging
+fi
 torch_version=$(python3 -c "import torch; print(torch.__version__)")
 python_36_plus=$(python3 <<EOF
 from packaging.version import parse as V
diff --git a/tools/installers/install_k2.sh b/tools/installers/install_k2.sh
index d41c16d39e7..03d21d7b873 100755
--- a/tools/installers/install_k2.sh
+++ b/tools/installers/install_k2.sh
@@ -25,6 +25,9 @@ else
     use_conda=$([[ $(conda list -e -c -f --no-pip pytorch 2>/dev/null) =~ pytorch ]] && echo true || echo false)
 fi
 
+if ! python -c "import packaging.version" &> /dev/null; then
+    python3 -m pip install packaging
+fi
 
 python_36_plus=$(python3 <<EOF
 from packaging.version import parse as V
diff --git a/tools/installers/install_longformer.sh b/tools/installers/install_longformer.sh
index d054fad50ea..891fad4e611 100755
--- a/tools/installers/install_longformer.sh
+++ b/tools/installers/install_longformer.sh
@@ -7,6 +7,9 @@ if [ $# != 0 ]; then
     exit 1;
 fi
 
+if ! python -c "import packaging.version" &> /dev/null; then
+    python3 -m pip install packaging
+fi
 torch_version=$(python3 -c "import torch; print(torch.__version__)")
 python_36_plus=$(python3 <<EOF
 from packaging.version import parse as V
diff --git a/tools/installers/install_s3prl.sh b/tools/installers/install_s3prl.sh
index eeea6946c9c..68ee4ec9ad6 100755
--- a/tools/installers/install_s3prl.sh
+++ b/tools/installers/install_s3prl.sh
@@ -9,6 +9,9 @@ if [ $# != 0 ]; then
     exit 1;
 fi
 
+if ! python -c "import packaging.version" &> /dev/null; then
+    python3 -m pip install packaging
+fi
 torch_17_plus=$(python3 <<EOF
 from packaging.version import parse as V
 import torch
diff --git a/tools/installers/install_speechbrain.sh b/tools/installers/install_speechbrain.sh
index cb26a78f3ad..420bfe9b636 100755
--- a/tools/installers/install_speechbrain.sh
+++ b/tools/installers/install_speechbrain.sh
@@ -7,6 +7,9 @@ if [ $# != 0 ]; then
     exit 1;
 fi
 
+if ! python -c "import packaging.version" &> /dev/null; then
+    python3 -m pip install packaging
+fi
 torch_18_plus=$(python3 <<EOF
 from packaging.version import parse as V
 import torch
diff --git a/tools/installers/install_torch.sh b/tools/installers/install_torch.sh
index 5ea48c0de8a..78ea41983a0 100755
--- a/tools/installers/install_torch.sh
+++ b/tools/installers/install_torch.sh
@@ -126,6 +126,9 @@ log "[INFO] python_version=${python_version}"
 log "[INFO] torch_version=${torch_version}"
 log "[INFO] cuda_version=${cuda_version}"
 
+if ! python -c "import packaging.version" &> /dev/null; then
+    python3 -m pip install packaging
+fi
 
 if $(pytorch_plus 1.11.1); then
     log "[ERROR] This script doesn't support pytorch=${torch_version}"
diff --git a/tools/installers/install_torch_optimizer.sh b/tools/installers/install_torch_optimizer.sh
index a4b42d4fade..49fad9d504c 100755
--- a/tools/installers/install_torch_optimizer.sh
+++ b/tools/installers/install_torch_optimizer.sh
@@ -7,6 +7,9 @@ if [ $# != 0 ]; then
     exit 1;
 fi
 
+if ! python -c "import packaging.version" &> /dev/null; then
+    python3 -m pip install packaging
+fi
 torch_version=$(python3 -c "import torch; print(torch.__version__)")
 python_36_plus=$(python3 <<EOF
 from packaging.version import parse as V
diff --git a/tools/installers/install_warp-ctc.sh b/tools/installers/install_warp-ctc.sh
index 259146ea388..5550afbd15a 100755
--- a/tools/installers/install_warp-ctc.sh
+++ b/tools/installers/install_warp-ctc.sh
@@ -8,6 +8,9 @@ if [ $# != 0 ]; then
     exit 1;
 fi
 
+if ! python -c "import packaging.version" &> /dev/null; then
+    python3 -m pip install packaging
+fi
 torch_17_plus=$(python3 <<EOF
 from packaging.version import parse as V
 import torch
diff --git a/tools/installers/install_warp-transducer.sh b/tools/installers/install_warp-transducer.sh
index 910083509d4..3265ef95ae0 100755
--- a/tools/installers/install_warp-transducer.sh
+++ b/tools/installers/install_warp-transducer.sh
@@ -6,6 +6,9 @@ if [ $# != 0 ]; then
     exit 1;
 fi
 
+if ! python -c "import packaging.version" &> /dev/null; then
+    python3 -m pip install packaging
+fi
 # TODO(kamo): Consider clang case
 # Note: Requires gcc>=4.9.2 to build extensions with pytorch>=1.0
 if python3 -c 'import torch as t;assert t.__version__[0] == "1"' &> /dev/null; then \

From 809ac3741814b7d9ebdd351b9e0e9343e236977c Mon Sep 17 00:00:00 2001
From: kamo-naoyuki <naoyuki.kamo829@gmail.com>
Date: Thu, 12 May 2022 19:27:20 +0900
Subject: [PATCH 12/22] fix

---
 egs2/aishell4/enh1/local/generate_fe_trainingdata.py.patch | 2 +-
 tools/installers/install_fairscale.sh                      | 2 +-
 tools/installers/install_fairseq.sh                        | 2 +-
 tools/installers/install_k2.sh                             | 2 +-
 tools/installers/install_longformer.sh                     | 2 +-
 tools/installers/install_s3prl.sh                          | 2 +-
 tools/installers/install_torch_optimizer.sh                | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/egs2/aishell4/enh1/local/generate_fe_trainingdata.py.patch b/egs2/aishell4/enh1/local/generate_fe_trainingdata.py.patch
index 47c079997eb..9a23ef72207 100644
--- a/egs2/aishell4/enh1/local/generate_fe_trainingdata.py.patch
+++ b/egs2/aishell4/enh1/local/generate_fe_trainingdata.py.patch
@@ -16,7 +16,7 @@
  import argparse
 
 +
-+is_py_3_3_plus = V(sys.version) > V("3.3")
++is_py_3_3_plus = V("{}.{}.{}".format(*sys.version_info[:3])) > V("3.3")
 +
 +
  def get_line_context(file_path, line_number):
diff --git a/tools/installers/install_fairscale.sh b/tools/installers/install_fairscale.sh
index 620b906ffd3..436d5ae7b54 100755
--- a/tools/installers/install_fairscale.sh
+++ b/tools/installers/install_fairscale.sh
@@ -15,7 +15,7 @@ python_36_plus=$(python3 <<EOF
 from packaging.version import parse as V
 import sys
 
-if V(sys.version) >= V("3.6"):
+if V("{}.{}.{}".format(*sys.version_info[:3])) >= V("3.6"):
     print("true")
 else:
     print("false")
diff --git a/tools/installers/install_fairseq.sh b/tools/installers/install_fairseq.sh
index f4f12007688..61824378f6d 100755
--- a/tools/installers/install_fairseq.sh
+++ b/tools/installers/install_fairseq.sh
@@ -15,7 +15,7 @@ python_36_plus=$(python3 <<EOF
 from packaging.version import parse as V
 import sys
 
-if V(sys.version) >= V("3.6"):
+if V("{}.{}.{}".format(*sys.version_info[:3])) >= V("3.6"):
     print("true")
 else:
     print("false")
diff --git a/tools/installers/install_k2.sh b/tools/installers/install_k2.sh
index 03d21d7b873..6066584fd0a 100755
--- a/tools/installers/install_k2.sh
+++ b/tools/installers/install_k2.sh
@@ -33,7 +33,7 @@ python_36_plus=$(python3 <<EOF
 from packaging.version import parse as V
 import sys
 
-if V(sys.version) >= V("3.6"):
+if V("{}.{}.{}".format(*sys.version_info[:3])) >= V("3.6"):
     print("true")
 else:
     print("false")
diff --git a/tools/installers/install_longformer.sh b/tools/installers/install_longformer.sh
index 891fad4e611..04e817ecc36 100755
--- a/tools/installers/install_longformer.sh
+++ b/tools/installers/install_longformer.sh
@@ -15,7 +15,7 @@ python_36_plus=$(python3 <<EOF
 from packaging.version import parse as V
 import sys
 
-if V(sys.version) >= V("3.6"):
+if V("{}.{}.{}".format(*sys.version_info[:3])) >= V("3.6"):
     print("true")
 else:
     print("false")
diff --git a/tools/installers/install_s3prl.sh b/tools/installers/install_s3prl.sh
index 68ee4ec9ad6..b55092e3e30 100755
--- a/tools/installers/install_s3prl.sh
+++ b/tools/installers/install_s3prl.sh
@@ -27,7 +27,7 @@ python_36_plus=$(python3 <<EOF
 from packaging.version import parse as V
 import sys
 
-if V(sys.version) >= V("3.6"):
+if V("{}.{}.{}".format(*sys.version_info[:3])) >= V("3.6"):
     print("true")
 else:
     print("false")
diff --git a/tools/installers/install_torch_optimizer.sh b/tools/installers/install_torch_optimizer.sh
index 49fad9d504c..014ca1d0830 100755
--- a/tools/installers/install_torch_optimizer.sh
+++ b/tools/installers/install_torch_optimizer.sh
@@ -15,7 +15,7 @@ python_36_plus=$(python3 <<EOF
 from packaging.version import parse as V
 import sys
 
-if V(sys.version) >= V("3.6"):
+if V("{}.{}.{}".format(*sys.version_info[:3])) >= V("3.6"):
     print("true")
 else:
     print("false")

From 5c4b966a957062e4de298bcb69fe8cf6f1365fd1 Mon Sep 17 00:00:00 2001
From: kamo-naoyuki <naoyuki.kamo829@gmail.com>
Date: Thu, 12 May 2022 19:36:11 +0900
Subject: [PATCH 13/22] remove tests for python=3.10.0 temporary

---
 .github/workflows/ci.yaml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 058dfea6288..92e0b29f582 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -31,11 +31,6 @@ jobs:
             pytorch-version: 1.11.0
             chainer-verssion: 6.0.0
             use-conda: false
-          - os: ubuntu-20.04
-            python-version: "3.10"
-            pytorch-version: 1.11.0
-            chainer-verssion: 6.0.0
-            use-conda: false
     steps:
       - uses: actions/checkout@master
       - uses: actions/cache@v1

From 005aad11b37acf388c6b70143ab40a5231bc7a39 Mon Sep 17 00:00:00 2001
From: kamo-naoyuki <naoyuki.kamo829@gmail.com>
Date: Thu, 12 May 2022 20:04:57 +0900
Subject: [PATCH 14/22] fix

---
 tools/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/Makefile b/tools/Makefile
index 87ccbd6d21a..8936b69d4f9 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -86,6 +86,7 @@ sph2pipe.done:
 
 packaging.done: activate_python.sh
 	. ./activate_python.sh && python3 -m pip install packaging
+	touch packaging.done
 
 pytorch.done: activate_python.sh packaging.done
 ifeq ($(strip $(USE_CONDA)),)

From 5c474b96c543c3d26e95b432355bcfd2bf8dc116 Mon Sep 17 00:00:00 2001
From: kamo-naoyuki <naoyuki.kamo829@gmail.com>
Date: Thu, 12 May 2022 20:20:18 +0900
Subject: [PATCH 15/22] remove verbosity options

---
 setup.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index a032ac70480..d17c8920e9a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -2,7 +2,7 @@
 test=pytest
 
 [tool:pytest]
-addopts = --cov-config=.coveragerc --verbose --durations=0 --cov=espnet --cov=espnet2
+addopts = --cov-config=.coveragerc --cov=espnet --cov=espnet2
 testpaths = test
 execution_timeout = 2.0
 

From 934b161f1f714637c3d7d47c14f8c810a9df6fe2 Mon Sep 17 00:00:00 2001
From: kamo-naoyuki <naoyuki.kamo829@gmail.com>
Date: Thu, 12 May 2022 20:33:58 +0900
Subject: [PATCH 16/22] change to show the error logs when jobs are failed

---
 egs2/TEMPLATE/asr1/asr.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/egs2/TEMPLATE/asr1/asr.sh b/egs2/TEMPLATE/asr1/asr.sh
index f4d7a8ad24a..65a0048eed9 100755
--- a/egs2/TEMPLATE/asr1/asr.sh
+++ b/egs2/TEMPLATE/asr1/asr.sh
@@ -771,7 +771,7 @@ if ! "${skip_train}"; then
                     --train_shape_file "${_logdir}/train.JOB.scp" \
                     --valid_shape_file "${_logdir}/dev.JOB.scp" \
                     --output_dir "${_logdir}/stats.JOB" \
-                    ${_opts} ${lm_args} || { cat "${_logdir}"/stats.1.log; exit 1; }
+                    ${_opts} ${lm_args} || { cat $(grep -l -i error "${_logdir}"/stats.*.log) ; exit 1; }
 
             # 4. Aggregate shape files
             _opts=
@@ -985,7 +985,7 @@ if ! "${skip_train}"; then
                 --train_shape_file "${_logdir}/train.JOB.scp" \
                 --valid_shape_file "${_logdir}/valid.JOB.scp" \
                 --output_dir "${_logdir}/stats.JOB" \
-                ${_opts} ${asr_args} || { cat "${_logdir}"/stats.1.log; exit 1; }
+                ${_opts} ${asr_args} || { cat $(grep -l -i error "${_logdir}"/stats.*.log) ; exit 1; }
 
         # 4. Aggregate shape files
         _opts=
@@ -1252,7 +1252,7 @@ if ! "${skip_eval}"; then
                     --asr_train_config "${asr_exp}"/config.yaml \
                     --asr_model_file "${asr_exp}"/"${inference_asr_model}" \
                     --output_dir "${_logdir}"/output.JOB \
-                    ${_opts} ${inference_args}
+                    ${_opts} ${inference_args} || { cat $(grep -l -i error "${_logdir}"/asr_inference.*.log) ; exit 1; }
 
             # 3. Concatenates the output files from each jobs
             for f in token token_int score text; do

From bb0d0aaa9e9f9076ac88aad425ad2f2caef369a7 Mon Sep 17 00:00:00 2001
From: kamo-naoyuki <naoyuki.kamo829@gmail.com>
Date: Thu, 12 May 2022 20:40:39 +0900
Subject: [PATCH 17/22] fix code style

---
 egs2/TEMPLATE/asr1/asr.sh                         |  6 +++---
 espnet/asr/pytorch_backend/asr.py                 |  6 ++----
 espnet/asr/pytorch_backend/recog.py               |  4 +---
 espnet/nets/pytorch_backend/ctc.py                |  8 ++------
 espnet/nets/pytorch_backend/e2e_tts_fastspeech.py |  6 +++---
 espnet/nets/pytorch_backend/e2e_vc_transformer.py | 12 ++++++------
 espnet/nets/pytorch_backend/nets_utils.py         |  5 +----
 espnet2/asr/espnet_model.py                       |  2 +-
 espnet2/asr/maskctc_model.py                      |  2 +-
 espnet2/diar/espnet_model.py                      |  2 +-
 espnet2/enh/espnet_enh_s2t_model.py               |  2 +-
 espnet2/enh/loss/criterions/tf_domain.py          |  2 +-
 espnet2/mt/espnet_model.py                        |  2 +-
 espnet2/st/espnet_model.py                        |  2 +-
 espnet2/tasks/abs_task.py                         |  2 +-
 espnet2/train/reporter.py                         |  2 +-
 espnet2/train/trainer.py                          |  2 +-
 espnet2/utils/griffin_lim.py                      |  2 +-
 18 files changed, 29 insertions(+), 40 deletions(-)

diff --git a/egs2/TEMPLATE/asr1/asr.sh b/egs2/TEMPLATE/asr1/asr.sh
index 65a0048eed9..763aceb7a34 100755
--- a/egs2/TEMPLATE/asr1/asr.sh
+++ b/egs2/TEMPLATE/asr1/asr.sh
@@ -755,7 +755,7 @@ if ! "${skip_train}"; then
             log "LM collect-stats started... log: '${_logdir}/stats.*.log'"
             # NOTE: --*_shape_file doesn't require length information if --batch_type=unsorted,
             #       but it's used only for deciding the sample ids.
-            # shellcheck disable=SC2086
+            # shellcheck disable=SC2046,SC2086
             ${train_cmd} JOB=1:"${_nj}" "${_logdir}"/stats.JOB.log \
                 ${python} -m espnet2.bin.lm_train \
                     --collect_stats true \
@@ -967,7 +967,7 @@ if ! "${skip_train}"; then
         # NOTE: --*_shape_file doesn't require length information if --batch_type=unsorted,
         #       but it's used only for deciding the sample ids.
 
-        # shellcheck disable=SC2086
+        # shellcheck disable=SC2046,SC2086
         ${train_cmd} JOB=1:"${_nj}" "${_logdir}"/stats.JOB.log \
             ${python} -m espnet2.bin.asr_train \
                 --collect_stats true \
@@ -1242,7 +1242,7 @@ if ! "${skip_eval}"; then
 
             # 2. Submit decoding jobs
             log "Decoding started... log: '${_logdir}/asr_inference.*.log'"
-            # shellcheck disable=SC2086
+            # shellcheck disable=SC2046,SC2086
             ${_cmd} --gpu "${_ngpu}" JOB=1:"${_nj}" "${_logdir}"/asr_inference.JOB.log \
                 ${python} -m ${asr_inference_tool} \
                     --batch_size ${batch_size} \
diff --git a/espnet/asr/pytorch_backend/asr.py b/espnet/asr/pytorch_backend/asr.py
index a83d9a27dc1..0effaaaa893 100644
--- a/espnet/asr/pytorch_backend/asr.py
+++ b/espnet/asr/pytorch_backend/asr.py
@@ -4,12 +4,12 @@
 """Training/decoding definition for the speech recognition task."""
 
 import copy
-from packaging.version import parse as V
 import itertools
 import json
 import logging
 import math
 import os
+from packaging.version import parse as V
 
 from chainer import reporter as reporter_module
 from chainer import training
@@ -999,9 +999,7 @@ def recog(args):
 
         # Dunno why but weight_observer from dynamic quantized module must have
         # dtype=torch.qint8 with torch < 1.5 although dtype=torch.float16 is supported.
-        if args.quantize_dtype == "float16" and torch.__version__ < V(
-            "1.5.0"
-        ):
+        if args.quantize_dtype == "float16" and torch.__version__ < V("1.5.0"):
             raise ValueError(
                 "float16 dtype for dynamic quantization is not supported with torch "
                 "version < 1.5.0. Switching to qint8 dtype instead."
diff --git a/espnet/asr/pytorch_backend/recog.py b/espnet/asr/pytorch_backend/recog.py
index 68fea23a144..b64131d1ad2 100644
--- a/espnet/asr/pytorch_backend/recog.py
+++ b/espnet/asr/pytorch_backend/recog.py
@@ -62,9 +62,7 @@ def recog_v2(args):
                 "Quantized LSTM in ESPnet is only supported with torch 1.4+."
             )
 
-        if args.quantize_dtype == "float16" and torch.__version__ < V(
-            "1.5.0"
-        ):
+        if args.quantize_dtype == "float16" and torch.__version__ < V("1.5.0"):
             raise ValueError(
                 "float16 dtype for dynamic quantization is not supported with torch "
                 "version < 1.5.0. Switching to qint8 dtype instead."
diff --git a/espnet/nets/pytorch_backend/ctc.py b/espnet/nets/pytorch_backend/ctc.py
index c974df09b7a..96b2e4f52b9 100644
--- a/espnet/nets/pytorch_backend/ctc.py
+++ b/espnet/nets/pytorch_backend/ctc.py
@@ -1,5 +1,5 @@
-from packaging.version import parse as V
 import logging
+from packaging.version import parse as V
 
 import numpy as np
 import six
@@ -28,11 +28,7 @@ def __init__(self, odim, eprojs, dropout_rate, ctc_type="warpctc", reduce=True):
         self.probs = None  # for visualization
 
         # In case of Pytorch >= 1.7.0, CTC will be always builtin
-        self.ctc_type = (
-            ctc_type
-            if V(torch.__version__) < V("1.7.0")
-            else "builtin"
-        )
+        self.ctc_type = ctc_type if V(torch.__version__) < V("1.7.0") else "builtin"
 
         if ctc_type != self.ctc_type:
             logging.warning(f"CTC was set to {self.ctc_type} due to PyTorch version.")
diff --git a/espnet/nets/pytorch_backend/e2e_tts_fastspeech.py b/espnet/nets/pytorch_backend/e2e_tts_fastspeech.py
index c5a3069e53c..8c9f2bcb232 100644
--- a/espnet/nets/pytorch_backend/e2e_tts_fastspeech.py
+++ b/espnet/nets/pytorch_backend/e2e_tts_fastspeech.py
@@ -576,7 +576,7 @@ def _forward(
         alpha=1.0,
     ):
         # forward encoder
-        x_masks = self._source_mask(ilens)
+        x_masks = self._source_mask(ilens).to(xs.device)
         hs, _ = self.encoder(xs, x_masks)  # (B, Tmax, adim)
 
         # integrate speaker embedding
@@ -603,7 +603,7 @@ def _forward(
                 olens_in = olens.new([olen // self.reduction_factor for olen in olens])
             else:
                 olens_in = olens
-            h_masks = self._source_mask(olens_in)
+            h_masks = self._source_mask(olens_in).to(xs.device)
         else:
             h_masks = None
         zs, _ = self.decoder(hs, h_masks)  # (B, Lmax, adim)
@@ -816,7 +816,7 @@ def _source_mask(self, ilens):
                      [1, 1, 1, 0, 0]]], dtype=torch.uint8)
 
         """
-        x_masks = make_non_pad_mask(ilens).to(next(self.parameters()).device)
+        x_masks = make_non_pad_mask(ilens)
         return x_masks.unsqueeze(-2)
 
     def _load_teacher_model(self, model_path):
diff --git a/espnet/nets/pytorch_backend/e2e_vc_transformer.py b/espnet/nets/pytorch_backend/e2e_vc_transformer.py
index c4e0144d412..99fd3f3962b 100644
--- a/espnet/nets/pytorch_backend/e2e_vc_transformer.py
+++ b/espnet/nets/pytorch_backend/e2e_vc_transformer.py
@@ -673,7 +673,7 @@ def forward(self, xs, ilens, ys, labels, olens, spembs=None, *args, **kwargs):
             xs_ds, ilens_ds = xs, ilens
 
         # forward encoder
-        x_masks = self._source_mask(ilens_ds)
+        x_masks = self._source_mask(ilens_ds).to(xs.device)
         hs, hs_masks = self.encoder(xs_ds, x_masks)
 
         # integrate speaker embedding
@@ -701,7 +701,7 @@ def forward(self, xs, ilens, ys, labels, olens, spembs=None, *args, **kwargs):
             ilens_ds_st = ilens_ds
 
         # forward decoder
-        y_masks = self._target_mask(olens_in)
+        y_masks = self._target_mask(olens_in).to(xs.device)
         zs, _ = self.decoder(ys_in, y_masks, hs_int, hs_masks)
         # (B, Lmax//r, odim * r) -> (B, Lmax//r * r, odim)
         before_outs = self.feat_out(zs).view(zs.size(0), -1, self.odim)
@@ -977,7 +977,7 @@ def calculate_all_attentions(
                 xs_ds, ilens_ds = xs, ilens
 
             # forward encoder
-            x_masks = self._source_mask(ilens_ds)
+            x_masks = self._source_mask(ilens_ds).to(xs.device)
             hs, hs_masks = self.encoder(xs_ds, x_masks)
 
             # integrate speaker embedding
@@ -996,7 +996,7 @@ def calculate_all_attentions(
             ys_in = self._add_first_frame_and_remove_last_frame(ys_in)
 
             # forward decoder
-            y_masks = self._target_mask(olens_in)
+            y_masks = self._target_mask(olens_in).to(xs.device)
             zs, _ = self.decoder(ys_in, y_masks, hs, hs_masks)
 
             # calculate final outputs
@@ -1099,7 +1099,7 @@ def _source_mask(self, ilens):
                     [[1, 1, 1, 0, 0]]], dtype=torch.uint8)
 
         """
-        x_masks = make_non_pad_mask(ilens).to(next(self.parameters()).device)
+        x_masks = make_non_pad_mask(ilens)
         return x_masks.unsqueeze(-2)
 
     def _target_mask(self, olens):
@@ -1128,7 +1128,7 @@ def _target_mask(self, olens):
                      [1, 1, 1, 0, 0]]], dtype=torch.uint8)
 
         """
-        y_masks = make_non_pad_mask(olens).to(next(self.parameters()).device)
+        y_masks = make_non_pad_mask(olens)
         s_masks = subsequent_mask(y_masks.size(-1), device=y_masks.device).unsqueeze(0)
         return y_masks.unsqueeze(-2) & s_masks
 
diff --git a/espnet/nets/pytorch_backend/nets_utils.py b/espnet/nets/pytorch_backend/nets_utils.py
index 3a7b1e079bc..638b0b0bf23 100644
--- a/espnet/nets/pytorch_backend/nets_utils.py
+++ b/espnet/nets/pytorch_backend/nets_utils.py
@@ -151,10 +151,7 @@ def make_pad_mask(lengths, xs=None, length_dim=-1, maxlen=None):
         raise ValueError("length_dim cannot be 0: {}".format(length_dim))
 
     if not isinstance(lengths, list):
-        lengths = lengths.tolist()
-    else:
-        assert isinstance(lengths, torch.Tensor), type(lengths)
-        lengths = lengths.long()
+        lengths = lengths.long().tolist()
 
     bs = int(len(lengths))
     if maxlen is None:
diff --git a/espnet2/asr/espnet_model.py b/espnet2/asr/espnet_model.py
index 5756598d2ff..67698e95115 100644
--- a/espnet2/asr/espnet_model.py
+++ b/espnet2/asr/espnet_model.py
@@ -1,6 +1,6 @@
 from contextlib import contextmanager
-from packaging.version import parse as V
 import logging
+from packaging.version import parse as V
 from typing import Dict
 from typing import List
 from typing import Optional
diff --git a/espnet2/asr/maskctc_model.py b/espnet2/asr/maskctc_model.py
index 10d91de94c5..2a95eec89ea 100644
--- a/espnet2/asr/maskctc_model.py
+++ b/espnet2/asr/maskctc_model.py
@@ -1,7 +1,7 @@
 from contextlib import contextmanager
-from packaging.version import parse as V
 from itertools import groupby
 import logging
+from packaging.version import parse as V
 from typing import Dict
 from typing import List
 from typing import Optional
diff --git a/espnet2/diar/espnet_model.py b/espnet2/diar/espnet_model.py
index 92b434e7642..2017316f70f 100644
--- a/espnet2/diar/espnet_model.py
+++ b/espnet2/diar/espnet_model.py
@@ -2,8 +2,8 @@
 #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 
 from contextlib import contextmanager
-from packaging.version import parse as V
 from itertools import permutations
+from packaging.version import parse as V
 from typing import Dict
 from typing import Optional
 from typing import Tuple
diff --git a/espnet2/enh/espnet_enh_s2t_model.py b/espnet2/enh/espnet_enh_s2t_model.py
index c2e05654fce..4d37ce0b0c0 100644
--- a/espnet2/enh/espnet_enh_s2t_model.py
+++ b/espnet2/enh/espnet_enh_s2t_model.py
@@ -1,6 +1,6 @@
 from contextlib import contextmanager
-from packaging.version import parse as V
 import logging
+from packaging.version import parse as V
 import random
 from typing import Dict
 from typing import List
diff --git a/espnet2/enh/loss/criterions/tf_domain.py b/espnet2/enh/loss/criterions/tf_domain.py
index cb81d7cf25d..4c4a91ef5d2 100644
--- a/espnet2/enh/loss/criterions/tf_domain.py
+++ b/espnet2/enh/loss/criterions/tf_domain.py
@@ -1,8 +1,8 @@
 from abc import ABC
 from abc import abstractmethod
-from packaging.version import parse as V
 from functools import reduce
 import math
+from packaging.version import parse as V
 
 import torch
 import torch.nn.functional as F
diff --git a/espnet2/mt/espnet_model.py b/espnet2/mt/espnet_model.py
index b937cbe3dfd..8a493366046 100644
--- a/espnet2/mt/espnet_model.py
+++ b/espnet2/mt/espnet_model.py
@@ -1,6 +1,6 @@
 from contextlib import contextmanager
-from packaging.version import parse as V
 import logging
+from packaging.version import parse as V
 from typing import Dict
 from typing import List
 from typing import Optional
diff --git a/espnet2/st/espnet_model.py b/espnet2/st/espnet_model.py
index fb8fcfdaee9..743b53d8288 100644
--- a/espnet2/st/espnet_model.py
+++ b/espnet2/st/espnet_model.py
@@ -1,6 +1,6 @@
 from contextlib import contextmanager
-from packaging.version import parse as V
 import logging
+from packaging.version import parse as V
 from typing import Dict
 from typing import List
 from typing import Optional
diff --git a/espnet2/tasks/abs_task.py b/espnet2/tasks/abs_task.py
index 54c4cd26a43..0f23feaa93d 100644
--- a/espnet2/tasks/abs_task.py
+++ b/espnet2/tasks/abs_task.py
@@ -3,10 +3,10 @@
 from abc import abstractmethod
 import argparse
 from dataclasses import dataclass
-from packaging.version import parse as V
 import functools
 import logging
 import os
+from packaging.version import parse as V
 from pathlib import Path
 import sys
 from typing import Any
diff --git a/espnet2/train/reporter.py b/espnet2/train/reporter.py
index 65b4ac6a9d8..be1d2a51fe5 100644
--- a/espnet2/train/reporter.py
+++ b/espnet2/train/reporter.py
@@ -3,8 +3,8 @@
 from contextlib import contextmanager
 import dataclasses
 import datetime
-from packaging.version import parse as V
 import logging
+from packaging.version import parse as V
 from pathlib import Path
 import time
 from typing import ContextManager
diff --git a/espnet2/train/trainer.py b/espnet2/train/trainer.py
index 6fe2726880d..da8ea6144b4 100644
--- a/espnet2/train/trainer.py
+++ b/espnet2/train/trainer.py
@@ -3,8 +3,8 @@
 from contextlib import contextmanager
 import dataclasses
 from dataclasses import is_dataclass
-from packaging.version import parse as V
 import logging
+from packaging.version import parse as V
 from pathlib import Path
 import time
 from typing import Dict
diff --git a/espnet2/utils/griffin_lim.py b/espnet2/utils/griffin_lim.py
index 3d4a948b7aa..c9b08cd1235 100644
--- a/espnet2/utils/griffin_lim.py
+++ b/espnet2/utils/griffin_lim.py
@@ -7,8 +7,8 @@
 
 import logging
 
-from packaging.version import parse as V
 from functools import partial
+from packaging.version import parse as V
 from typeguard import check_argument_types
 from typing import Optional
 

From 98689a5f0bfd88efffdbbcdd5d924e186d563a91 Mon Sep 17 00:00:00 2001
From: kamo-naoyuki <naoyuki.kamo829@gmail.com>
Date: Thu, 12 May 2022 21:17:35 +0900
Subject: [PATCH 18/22] change to show the error logs when jobs are failed

---
 .../asr1/scripts/utils/evaluate_asr.sh        |   4 +-
 egs2/TEMPLATE/diar1/diar.sh                   |   8 +-
 egs2/TEMPLATE/enh1/enh.sh                     |   8 +-
 egs2/TEMPLATE/enh_asr1/enh_asr.sh             |  12 +--
 egs2/TEMPLATE/enh_st1/enh_st.sh               |  32 +++---
 egs2/TEMPLATE/mt1/mt.sh                       |  32 +++---
 egs2/TEMPLATE/ssl1/hubert.sh                  | 100 +++++++++---------
 egs2/TEMPLATE/st1/st.sh                       |  42 ++++----
 egs2/TEMPLATE/tts1/tts.sh                     |   8 +-
 9 files changed, 123 insertions(+), 123 deletions(-)

diff --git a/egs2/TEMPLATE/asr1/scripts/utils/evaluate_asr.sh b/egs2/TEMPLATE/asr1/scripts/utils/evaluate_asr.sh
index 7d3da2bfbea..0cc2c632591 100755
--- a/egs2/TEMPLATE/asr1/scripts/utils/evaluate_asr.sh
+++ b/egs2/TEMPLATE/asr1/scripts/utils/evaluate_asr.sh
@@ -173,14 +173,14 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
 
     # 2. Submit decoding jobs
     log "Decoding started... log: '${logdir}/asr_inference.*.log'"
-    # shellcheck disable=SC2086
+    # shellcheck disable=SC2046,SC2086
     ${_cmd} --gpu "${_ngpu}" JOB=1:"${_nj}" "${logdir}"/asr_inference.JOB.log \
         python3 -m espnet2.bin.asr_inference \
             --ngpu "${_ngpu}" \
             --data_path_and_name_and_type "${wavscp},speech,sound" \
             --key_file "${logdir}"/keys.JOB.scp \
             --output_dir "${logdir}"/output.JOB \
-            "${_opts[@]}" ${inference_args}
+            "${_opts[@]}" ${inference_args} || { cat $(grep -l -i error "${logdir}"/asr_inference.*.log) ; exit 1; }
 
     # 3. Concatenates the output files from each jobs
     for f in token token_int score text; do
diff --git a/egs2/TEMPLATE/diar1/diar.sh b/egs2/TEMPLATE/diar1/diar.sh
index 815c73537f4..b711d324eab 100755
--- a/egs2/TEMPLATE/diar1/diar.sh
+++ b/egs2/TEMPLATE/diar1/diar.sh
@@ -348,7 +348,7 @@ if ! "${skip_train}"; then
         # NOTE: --*_shape_file doesn't require length information if --batch_type=unsorted,
         #       but it's used only for deciding the sample ids.
 
-        # shellcheck disable=SC2086
+        # shellcheck disable=SC2046,SC2086
         ${train_cmd} JOB=1:"${_nj}" "${_logdir}"/stats.JOB.log \
             ${python} -m espnet2.bin.diar_train \
                 --collect_stats true \
@@ -360,7 +360,7 @@ if ! "${skip_train}"; then
                 --train_shape_file "${_logdir}/train.JOB.scp" \
                 --valid_shape_file "${_logdir}/valid.JOB.scp" \
                 --output_dir "${_logdir}/stats.JOB" \
-                ${_opts} ${diar_args} || { cat "${_logdir}"/stats.1.log; exit 1; }
+                ${_opts} ${diar_args} || { cat $(grep -l -i error "${_logdir}"/stats.*.log) ; exit 1; }
 
         # 4. Aggregate shape files
         _opts=
@@ -510,7 +510,7 @@ if ! "${skip_eval}"; then
 
             # 2. Submit inference jobs
             log "Diarization started... log: '${_logdir}/diar_inference.*.log'"
-            # shellcheck disable=SC2086
+            # shellcheck disable=SC2046,SC2086
             ${_cmd} --gpu "${_ngpu}" JOB=1:"${_nj}" "${_logdir}"/diar_inference.JOB.log \
                 ${python} -m espnet2.bin.diar_inference \
                     --ngpu "${_ngpu}" \
@@ -520,7 +520,7 @@ if ! "${skip_eval}"; then
                     --train_config "${diar_exp}"/config.yaml \
                     --model_file "${diar_exp}"/"${inference_model}" \
                     --output_dir "${_logdir}"/output.JOB \
-                    ${_opts}
+                    ${_opts} || { cat $(grep -l -i error "${_logdir}"/diar_inference.*.log) ; exit 1; }
 
             # 3. Concatenates the output files from each jobs
             for i in $(seq "${_nj}"); do
diff --git a/egs2/TEMPLATE/enh1/enh.sh b/egs2/TEMPLATE/enh1/enh.sh
index db170043db6..864a0485df0 100755
--- a/egs2/TEMPLATE/enh1/enh.sh
+++ b/egs2/TEMPLATE/enh1/enh.sh
@@ -494,7 +494,7 @@ if ! "${skip_train}"; then
         #       but it's used only for deciding the sample ids.
 
 
-        # shellcheck disable=SC2086
+        # shellcheck disable=SC2046,SC2086
         ${train_cmd} JOB=1:"${_nj}" "${_logdir}"/stats.JOB.log \
             ${python} -m espnet2.bin.enh_train \
                 --collect_stats true \
@@ -504,7 +504,7 @@ if ! "${skip_train}"; then
                 --train_shape_file "${_logdir}/train.JOB.scp" \
                 --valid_shape_file "${_logdir}/valid.JOB.scp" \
                 --output_dir "${_logdir}/stats.JOB" \
-                ${_opts} ${enh_args} || { cat "${_logdir}"/stats.1.log; exit 1; }
+                ${_opts} ${enh_args} || { cat $(grep -l -i error "${_logdir}"/stats.*.log) ; exit 1; }
 
         # 4. Aggregate shape files
         _opts=
@@ -652,7 +652,7 @@ if ! "${skip_eval}"; then
 
             # 2. Submit inference jobs
             log "Enhancement started... log: '${_logdir}/enh_inference.*.log'"
-            # shellcheck disable=SC2086
+            # shellcheck disable=SC2046,SC2086
             ${_cmd} --gpu "${_ngpu}" JOB=1:"${_nj}" "${_logdir}"/enh_inference.JOB.log \
                 ${python} -m espnet2.bin.enh_inference \
                     --ngpu "${_ngpu}" \
@@ -663,7 +663,7 @@ if ! "${skip_eval}"; then
                     ${inference_enh_config:+--inference_config "$inference_enh_config"} \
                     --model_file "${enh_exp}"/"${inference_model}" \
                     --output_dir "${_logdir}"/output.JOB \
-                    ${_opts} ${inference_args}
+                    ${_opts} ${inference_args} || { cat $(grep -l -i error "${_logdir}"/enh_inference.*.log) ; exit 1; }
 
 
             _spk_list=" "
diff --git a/egs2/TEMPLATE/enh_asr1/enh_asr.sh b/egs2/TEMPLATE/enh_asr1/enh_asr.sh
index fc720ddf94b..9ec09219613 100755
--- a/egs2/TEMPLATE/enh_asr1/enh_asr.sh
+++ b/egs2/TEMPLATE/enh_asr1/enh_asr.sh
@@ -794,7 +794,7 @@ if ! "${skip_train}"; then
             log "LM collect-stats started... log: '${_logdir}/stats.*.log'"
             # NOTE: --*_shape_file doesn't require length information if --batch_type=unsorted,
             #       but it's used only for deciding the sample ids.
-            # shellcheck disable=SC2086
+            # shellcheck disable=SC2046,SC2086
             ${train_cmd} JOB=1:"${_nj}" "${_logdir}"/stats.JOB.log \
                 ${python} -m espnet2.bin.lm_train \
                     --collect_stats true \
@@ -810,7 +810,7 @@ if ! "${skip_train}"; then
                     --train_shape_file "${_logdir}/train.JOB.scp" \
                     --valid_shape_file "${_logdir}/dev.JOB.scp" \
                     --output_dir "${_logdir}/stats.JOB" \
-                    ${_opts} ${lm_args} || { cat "${_logdir}"/stats.1.log; exit 1; }
+                    ${_opts} ${lm_args} || { cat $(grep -l -i error "${_logdir}"/stats.*.log) ; exit 1; }
 
             # 4. Aggregate shape files
             _opts=
@@ -937,7 +937,7 @@ if ! "${skip_train}"; then
         if "${use_ngram}"; then
             log "Stage 9: Ngram Training: train_set=${data_feats}/lm_train.txt"
             cut -f 2 -d " " ${data_feats}/lm_train.txt | lmplz -S "20%" --discount_fallback -o ${ngram_num} - >${ngram_exp}/${ngram_num}gram.arpa
-            build_binary -s ${ngram_exp}/${ngram_num}gram.arpa ${ngram_exp}/${ngram_num}gram.bin 
+            build_binary -s ${ngram_exp}/${ngram_num}gram.arpa ${ngram_exp}/${ngram_num}gram.bin
         else
             log "Stage 9: Skip ngram stages: use_ngram=${use_ngram}"
         fi
@@ -1335,7 +1335,7 @@ if ! "${skip_eval}"; then
 
             # 2. Submit inference jobs
             log "Enhancement started... log: '${_logdir}/enh_inference.*.log'"
-            # shellcheck disable=SC2086
+            # shellcheck disable=SC2046,SC2086
             ${_cmd} --gpu "${_ngpu}" JOB=1:"${_nj}" "${_logdir}"/enh_inference.JOB.log \
                 ${python} -m espnet2.bin.enh_inference \
                     --enh_s2t_task true \
@@ -1347,7 +1347,7 @@ if ! "${skip_eval}"; then
                     ${inference_enh_config:+--inference_config "$inference_enh_config"} \
                     --model_file "${enh_asr_exp}"/"${inference_enh_asr_model}" \
                     --output_dir "${_logdir}"/output.JOB \
-                    ${_opts} ${enh_inference_args}
+                    ${_opts} ${enh_inference_args} || { cat $(grep -l -i error "${_logdir}"/enh_inference.*.log) ; exit 1; }
 
             # 3. Concatenates the output files from each jobs
             _spk_list=" "
@@ -1632,7 +1632,7 @@ if ! "${skip_upload_hf}"; then
         # Generate description file
         # shellcheck disable=SC2034
         hf_task=speech-enhancement-recognition
-        # shellcheck disable=SC2034     
+        # shellcheck disable=SC2034
         espnet_task=EnhS2T
         # shellcheck disable=SC2034
         task_exp=${enh_asr_exp}
diff --git a/egs2/TEMPLATE/enh_st1/enh_st.sh b/egs2/TEMPLATE/enh_st1/enh_st.sh
index eabf49cc29d..b27f986e582 100755
--- a/egs2/TEMPLATE/enh_st1/enh_st.sh
+++ b/egs2/TEMPLATE/enh_st1/enh_st.sh
@@ -551,7 +551,7 @@ if ! "${skip_data_prep}"; then
             done
             utils/combine_data.sh --extra_files "${utt_extra_files} ${_scp_list}" "data/${train_set}_sp" ${_dirs}
             for extra_file in ${utt_extra_files}; do
-                python pyscripts/utils/remove_duplicate_keys.py data/"${train_set}_sp"/${extra_file} > data/"${train_set}_sp"/${extra_file}.tmp 
+                python pyscripts/utils/remove_duplicate_keys.py data/"${train_set}_sp"/${extra_file} > data/"${train_set}_sp"/${extra_file}.tmp
                 mv data/"${train_set}_sp"/${extra_file}.tmp data/"${train_set}_sp"/${extra_file}
             done
         else
@@ -593,7 +593,7 @@ if ! "${skip_data_prep}"; then
                         fi
                         cp ${single_file} "${data_feats}${_suf}/${dset}"
                         expand_utt_extra_files="${expand_utt_extra_files} $(basename ${single_file})"
-                    done 
+                    done
                 done
                 echo "${expand_utt_extra_files}"
                 utils/fix_data_dir.sh --utt_extra_files "${expand_utt_extra_files}" "${data_feats}${_suf}/${dset}"
@@ -727,9 +727,9 @@ if ! "${skip_data_prep}"; then
             utils/fix_data_dir.sh --utt_extra_files "${utt_extra_files}" "${data_feats}/${dset}"
             for utt_extra_file in ${utt_extra_files}; do
                 python pyscripts/utils/remove_duplicate_keys.py ${data_feats}/${dset}/${utt_extra_file} \
-                    > ${data_feats}/${dset}/${utt_extra_file}.tmp 
+                    > ${data_feats}/${dset}/${utt_extra_file}.tmp
                 mv ${data_feats}/${dset}/${utt_extra_file}.tmp ${data_feats}/${dset}/${utt_extra_file}
-            done 
+            done
         done
 
         # shellcheck disable=SC2002
@@ -934,7 +934,7 @@ if ! "${skip_train}"; then
             log "LM collect-stats started... log: '${_logdir}/stats.*.log'"
             # NOTE: --*_shape_file doesn't require length information if --batch_type=unsorted,
             #       but it's used only for deciding the sample ids.
-            # shellcheck disable=SC2086
+            # shellcheck disable=SC2046,SC2086
             ${train_cmd} JOB=1:"${_nj}" "${_logdir}"/stats.JOB.log \
                 ${python} -m espnet2.bin.lm_train \
                     --collect_stats true \
@@ -950,7 +950,7 @@ if ! "${skip_train}"; then
                     --train_shape_file "${_logdir}/train.JOB.scp" \
                     --valid_shape_file "${_logdir}/dev.JOB.scp" \
                     --output_dir "${_logdir}/stats.JOB" \
-                    ${_opts} ${lm_args} || { cat "${_logdir}"/stats.1.log; exit 1; }
+                    ${_opts} ${lm_args} || { cat $(grep -l -i error "${_logdir}"/stats.*.log) ; exit 1; }
 
             # 4. Aggregate shape files
             _opts=
@@ -1078,7 +1078,7 @@ if ! "${skip_train}"; then
         if "${use_ngram}"; then
             log "Stage 9: Ngram Training: train_set=${data_feats}/lm_train.txt"
             cut -f 2 -d " " ${data_feats}/lm_train.txt | lmplz -S "20%" --discount_fallback -o ${ngram_num} - >${ngram_exp}/${ngram_num}gram.arpa
-            build_binary -s ${ngram_exp}/${ngram_num}gram.arpa ${ngram_exp}/${ngram_num}gram.bin 
+            build_binary -s ${ngram_exp}/${ngram_num}gram.arpa ${ngram_exp}/${ngram_num}gram.bin
         else
             log "Stage 9: Skip ngram stages: use_ngram=${use_ngram}"
         fi
@@ -1148,7 +1148,7 @@ if ! "${skip_train}"; then
         #       but it's used only for deciding the sample ids.
 
         # TODO(jiatong): fix different bpe model
-        # shellcheck disable=SC2086
+        # shellcheck disable=SC2046,SC2086
         ${train_cmd} JOB=1:"${_nj}" "${_logdir}"/stats.JOB.log \
             ${python} -m espnet2.bin.enh_s2t_train \
                 --collect_stats true \
@@ -1173,7 +1173,7 @@ if ! "${skip_train}"; then
                 --train_shape_file "${_logdir}/train.JOB.scp" \
                 --valid_shape_file "${_logdir}/valid.JOB.scp" \
                 --output_dir "${_logdir}/stats.JOB" \
-                ${_opts} ${enh_st_args} || { cat "${_logdir}"/stats.1.log; exit 1; }
+                ${_opts} ${enh_st_args} || { cat $(grep -l -i error "${_logdir}"/stats.*.log) ; exit 1; }
 
         # 4. Aggregate shape files
         _opts=
@@ -1436,7 +1436,7 @@ if ! "${skip_eval}"; then
 
             # 2. Submit decoding jobs
             log "Decoding started... log: '${_logdir}/st_inference.*.log'"
-            # shellcheck disable=SC2086
+            # shellcheck disable=SC2046,SC2086
             ${_cmd} --gpu "${_ngpu}" JOB=1:"${_nj}" "${_logdir}"/st_inference.JOB.log \
                 ${python} -m ${st_inference_tool} \
                     --enh_s2t_task true \
@@ -1447,7 +1447,7 @@ if ! "${skip_eval}"; then
                     --st_train_config "${enh_st_exp}"/config.yaml \
                     --st_model_file "${enh_st_exp}"/"${inference_enh_st_model}" \
                     --output_dir "${_logdir}"/output.JOB \
-                    ${_opts} ${st_inference_args}
+                    ${_opts} ${st_inference_args} || { cat $(grep -l -i error "${_logdir}"/st_inference.*.log) ; exit 1; }
 
             # 3. Concatenates the output files from each jobs
             for f in token token_int score text; do
@@ -1773,11 +1773,11 @@ if ! "${skip_upload_hf}"; then
         gitlfs=$(git lfs --version 2> /dev/null || true)
         [ -z "${gitlfs}" ] && \
             log "ERROR: You need to install git-lfs first" && \
-            exit 1             
-  
+            exit 1
+
         dir_repo=${expdir}/hf_${hf_repo//"/"/"_"}
         [ ! -d "${dir_repo}" ] && git clone https://huggingface.co/${hf_repo} ${dir_repo}
-  
+
         if command -v git &> /dev/null; then
             _creator_name="$(git config user.name)"
             _checkout="git checkout $(git show -s --format=%H)"
@@ -1790,13 +1790,13 @@ if ! "${skip_upload_hf}"; then
         # foo/asr1 -> foo
         _corpus="${_task%/*}"
         _model_name="${_creator_name}/${_corpus}_$(basename ${packed_model} .zip)"
-  
+
         # copy files in ${dir_repo}
         unzip -o ${packed_model} -d ${dir_repo}
         # Generate description file
         # shellcheck disable=SC2034
         hf_task=speech-enhancement-translation
-        # shellcheck disable=SC2034     
+        # shellcheck disable=SC2034
         espnet_task=EnhS2T
         # shellcheck disable=SC2034
         task_exp=${enh_st_exp}
diff --git a/egs2/TEMPLATE/mt1/mt.sh b/egs2/TEMPLATE/mt1/mt.sh
index bf6996c13c8..02260cb3a4d 100755
--- a/egs2/TEMPLATE/mt1/mt.sh
+++ b/egs2/TEMPLATE/mt1/mt.sh
@@ -455,7 +455,7 @@ if ! "${skip_data_prep}"; then
         log "Stage 1: Data preparation for data/${train_set}, data/${valid_set}, etc."
         # [Task dependent] Need to create data.sh for new corpus
         local/data.sh ${local_data_opts}
-        
+
     fi
 
     if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
@@ -474,7 +474,7 @@ if ! "${skip_data_prep}"; then
                     # with regex to suuport multi-references
                     for single_file in $(ls data/"${dset}"/${extra_file}*); do
                         cp ${single_file} "${data_feats}${_suf}/${dset}"
-                    done 
+                    done
                 done
                 echo "${feats_type}" > "${data_feats}${_suf}/${dset}/feats_type"
             done
@@ -702,7 +702,7 @@ if ! "${skip_train}"; then
             log "LM collect-stats started... log: '${_logdir}/stats.*.log'"
             # NOTE: --*_shape_file doesn't require length information if --batch_type=unsorted,
             #       but it's used only for deciding the sample ids.
-            # shellcheck disable=SC2086
+            # shellcheck disable=SC2046,SC2086
             ${train_cmd} JOB=1:"${_nj}" "${_logdir}"/stats.JOB.log \
                 ${python} -m espnet2.bin.lm_train \
                     --collect_stats true \
@@ -718,7 +718,7 @@ if ! "${skip_train}"; then
                     --train_shape_file "${_logdir}/train.JOB.scp" \
                     --valid_shape_file "${_logdir}/dev.JOB.scp" \
                     --output_dir "${_logdir}/stats.JOB" \
-                    ${_opts} ${lm_args} || { cat "${_logdir}"/stats.1.log; exit 1; }
+                    ${_opts} ${lm_args} || { cat $(grep -l -i error "${_logdir}"/stats.*.log) ; exit 1; }
 
             # 4. Aggregate shape files
             _opts=
@@ -845,7 +845,7 @@ if ! "${skip_train}"; then
         if "${use_ngram}"; then
             log "Stage 8: Ngram Training: train_set=${data_feats}/lm_train.txt"
             cut -f 2 -d " " ${data_feats}/lm_train.txt | lmplz -S "20%" --discount_fallback -o ${ngram_num} - >${ngram_exp}/${ngram_num}gram.arpa
-            build_binary -s ${ngram_exp}/${ngram_num}gram.arpa ${ngram_exp}/${ngram_num}gram.bin 
+            build_binary -s ${ngram_exp}/${ngram_num}gram.arpa ${ngram_exp}/${ngram_num}gram.bin
         else
             log "Stage 8: Skip ngram stages: use_ngram=${use_ngram}"
         fi
@@ -1132,7 +1132,7 @@ if ! "${skip_eval}"; then
 
             # 2. Submit decoding jobs
             log "Decoding started... log: '${_logdir}/mt_inference.*.log'"
-            # shellcheck disable=SC2086
+            # shellcheck disable=SC2046,SC2086
             ${_cmd} --gpu "${_ngpu}" JOB=1:"${_nj}" "${_logdir}"/mt_inference.JOB.log \
                 ${python} -m ${mt_inference_tool} \
                     --batch_size ${batch_size} \
@@ -1142,7 +1142,7 @@ if ! "${skip_eval}"; then
                     --mt_train_config "${mt_exp}"/config.yaml \
                     --mt_model_file "${mt_exp}"/"${inference_mt_model}" \
                     --output_dir "${_logdir}"/output.JOB \
-                    ${_opts} ${inference_args}
+                    ${_opts} ${inference_args} || { cat $(grep -l -i error "${_logdir}"/mt_inference.*.log) ; exit 1; }
 
             # 3. Concatenates the output files from each jobs
             for f in token token_int score text; do
@@ -1205,7 +1205,7 @@ if ! "${skip_eval}"; then
             #                ) \
             #    <(<"${_data}/text.${tgt_case}.${tgt_lang}" awk '{ print "(" $2 "-" $1 ")" }') \
             #        >"${_scoredir}/hyp.trn.org"
-            
+
             # remove utterance id
             #perl -pe 's/\([^\)]+\)//g;' "${_scoredir}/ref.trn.org" > "${_scoredir}/ref.trn"
             #perl -pe 's/\([^\)]+\)//g;' "${_scoredir}/hyp.trn.org" > "${_scoredir}/hyp.trn"
@@ -1220,7 +1220,7 @@ if ! "${skip_eval}"; then
                           -i "${_scoredir}/hyp.trn.detok" \
                           -m bleu chrf ter \
                           >> ${_scoredir}/result.tc.txt
-                
+
                 log "Write a case-sensitive BLEU (single-reference) result in ${_scoredir}/result.tc.txt"
             fi
 
@@ -1252,8 +1252,8 @@ if ! "${skip_eval}"; then
                                 ) \
                         <(<"${_data}/text.${tgt_case}.${tgt_lang}" awk '{ print "(" $2 "-" $1 ")" }') \
                             >"${_scoredir}/ref.trn.org.${ref_idx}"
-                    
-                    # 
+
+                    #
                     perl -pe 's/\([^\)]+\)//g;' "${_scoredir}/ref.trn.org.${ref_idx}" > "${_scoredir}/ref.trn.${ref_idx}"
                     detokenizer.perl -l ${tgt_lang} -q < "${_scoredir}/ref.trn.${ref_idx}" > "${_scoredir}/ref.trn.detok.${ref_idx}"
                     remove_punctuation.pl < "${_scoredir}/ref.trn.detok.${ref_idx}" > "${_scoredir}/ref.trn.detok.lc.rm.${ref_idx}"
@@ -1386,11 +1386,11 @@ if ! "${skip_upload_hf}"; then
         gitlfs=$(git lfs --version 2> /dev/null || true)
         [ -z "${gitlfs}" ] && \
             log "ERROR: You need to install git-lfs first" && \
-            exit 1             
-  
+            exit 1
+
         dir_repo=${expdir}/hf_${hf_repo//"/"/"_"}
         [ ! -d "${dir_repo}" ] && git clone https://huggingface.co/${hf_repo} ${dir_repo}
-  
+
         if command -v git &> /dev/null; then
             _creator_name="$(git config user.name)"
             _checkout="git checkout $(git show -s --format=%H)"
@@ -1403,13 +1403,13 @@ if ! "${skip_upload_hf}"; then
         # foo/asr1 -> foo
         _corpus="${_task%/*}"
         _model_name="${_creator_name}/${_corpus}_$(basename ${packed_model} .zip)"
-  
+
         # copy files in ${dir_repo}
         unzip -o ${packed_model} -d ${dir_repo}
         # Generate description file
         # shellcheck disable=SC2034
         hf_task=machine-translation
-        # shellcheck disable=SC2034     
+        # shellcheck disable=SC2034
         espnet_task=MT
         # shellcheck disable=SC2034
         task_exp=${mt_exp}
diff --git a/egs2/TEMPLATE/ssl1/hubert.sh b/egs2/TEMPLATE/ssl1/hubert.sh
index 8a6f7590cb8..027b6636782 100755
--- a/egs2/TEMPLATE/ssl1/hubert.sh
+++ b/egs2/TEMPLATE/ssl1/hubert.sh
@@ -143,7 +143,7 @@ Options:
     # Pretrain related
     --pretrain_configs # configration files of pretraining stage
     --n_clusters       # number of k-means clusters of pretraining stage
-    --features_km      # feature for k-means clustering of pretraining stage    
+    --features_km      # feature for k-means clustering of pretraining stage
     --pt_args         # Arguments for hubert model pretraining (default="${pt_args}").
                        # e.g., --pt_args "--max_epoch 10"
                        # Note that it will overwrite args in pt config.
@@ -180,7 +180,7 @@ fi
 [ -z "${valid_set}" ] && { log "${help_message}"; log "Error: --valid_set is required"; exit 2; };
 
 # Check pretrain_config, n_clusters and feature list
-pretrain_config_list=(${pretrain_configs// / }) 
+pretrain_config_list=(${pretrain_configs// / })
 n_clusters_list=(${n_clusters// / })
 feature_list=(${features_km// / })
 if ! [ ${pretrain_start_iter} -le ${pretrain_stop_iter} ]; then
@@ -227,7 +227,7 @@ fi
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     if [ "${feats_type}" = raw ]; then
         log "Stage 3: Format wav.scp: data/ -> ${data_feats}"
-        
+
         # ====== Recreating "wav.scp" ======
         # Kaldi-wav.scp, which can describe the file path with unix-pipe, like "cat /some/path |",
         # shouldn't be used in training process.
@@ -235,7 +235,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
         # and it can also change the audio-format and sampling rate.
         # If nothing is need, then format_wav_scp.sh does nothing:
         # i.e. the input file format and rate is same as the output.
-        
+
         for dset in "${train_set}" "${valid_set}"; do
 	    _suf="/org"
             utils/copy_data_dir.sh --validate_opts --non-print data/"${dset}" "${data_feats}${_suf}/${dset}"
@@ -253,7 +253,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
             scripts/audio/format_wav_scp.sh --nj "${nj}" --cmd "${train_cmd}" \
                                             --audio-format "${audio_format}" --fs "${fs}" ${_opts} \
                                             "data/${dset}/wav.scp" "${data_feats}${_suf}/${dset}"
-            
+
             echo "${feats_type}" > "${data_feats}${_suf}/${dset}/feats_type"
         done
     else
@@ -265,21 +265,21 @@ fi
 
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
     log "Stage 4: Remove long/short data: ${data_feats}/org -> ${data_feats}"
-    
+
     # NOTE(kamo): Not applying to test_sets to keep original data
     for dset in "${train_set}" "${valid_set}"; do
-        
+
         # Copy data dir
         utils/copy_data_dir.sh --validate_opts --non-print "${data_feats}/org/${dset}" "${data_feats}/${dset}"
         cp "${data_feats}/org/${dset}/feats_type" "${data_feats}/${dset}/feats_type"
-        
+
         # Remove short utterances
         _feats_type="$(<${data_feats}/${dset}/feats_type)"
         if [ "${_feats_type}" = raw ]; then
             _fs=$(python3 -c "import humanfriendly as h;print(h.parse_size('${fs}'))")
             _min_length=$(python3 -c "print(int(${min_wav_duration} * ${_fs}))")
             _max_length=$(python3 -c "print(int(${max_wav_duration} * ${_fs}))")
-            
+
             # utt2num_samples is created by format_wav_scp.sh
             <"${data_feats}/org/${dset}/utt2num_samples" \
              awk -v min_length="${_min_length}" -v max_length="${_max_length}" \
@@ -291,11 +291,11 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
         else
             log "Error: not supported: --feats_type ${feats_type}"
         fi
-        
+
         # Remove empty text
         <"${data_feats}/org/${dset}/text" \
          awk ' { if( NF != 1 ) print $0; } ' >"${data_feats}/${dset}/text"
-        
+
         # fix_data_dir.sh leaves only utts which exist in all files
         utils/fix_data_dir.sh "${data_feats}/${dset}"
     done
@@ -303,7 +303,7 @@ fi
 
 
 if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then
-    
+
     for ((iter=${pretrain_start_iter}; iter<=${pretrain_stop_iter};iter++)); do
         asr_config="${pretrain_config_list[${iter}]}"
         if [ "${lang}" != noinfo ]; then
@@ -311,25 +311,25 @@ if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then
         else
             asr_stats_dir="${expdir}/pretrain_iter${iter}_stats_${feats_type}"
         fi
-        
+
         if [ -n "${asr_config}" ]; then
             asr_tag="$(basename "${asr_config}" .yaml)_${feats_type}"
         else
             asr_tag="train_${feats_type}"
         fi
-        
+
         asr_exp="${expdir}/pretrain_${asr_tag}_iter${iter}"
-        
+
         train_set_plabel=$(eval "echo ${train_set}_\${feature_list[${iter}]}_km\${n_clusters_list[${iter}]}")
         valid_set_plabel=$(eval "echo ${valid_set}_\${feature_list[${iter}]}_km\${n_clusters_list[${iter}]}")
-        
+
         feats_km="${feature_list[${iter}]}"
         n_clusters="${n_clusters_list[${iter}]}"
         dictdir="./data/${feats_km}_km${n_clusters}_token_list_iter${iter}/${token_type}"
-        
+
         if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
             log "Stage 5.iter${iter}: Running ${n_clusters} cluster K-means on ${feats_km} feature."
-            
+
             if [ ${iter} -eq 0 ] || [ ${feats_km} == "mfcc" ]; then
                 ./scripts/km.sh \
                     --train_set "${train_set}" \
@@ -354,21 +354,21 @@ if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then
                     --hubert_dir_path "${expdir}/pretrained_model_iter$((iter-1))"/valid.acc.best.pth
             fi
         fi
-        
+
         if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
             _asr_train_dir="${data_feats}/${train_set_plabel}"
             _asr_valid_dir="${data_feats}/${valid_set_plabel}"
-            
+
             log "Stage 6.iter${iter}: ${feats_km} pretrain model collect stats: \
                        train_set=${_asr_train_dir}, valid_set=${_asr_valid_dir}"
-            
+
             _opts=
             if [ -n "${asr_config}" ]; then
                 # To generate the config file: e.g.
                 #   % python3 -m espnet2.bin.asr_train --print_config --optim adam
                 _opts+="--config ${asr_config} "
             fi
-            
+
             _feats_type="$(<${_asr_train_dir}/feats_type)"
             if [ "${_feats_type}" = raw ]; then
                 _scp=wav.scp
@@ -385,14 +385,14 @@ if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then
                 _input_size="$(<${_asr_train_dir}/feats_dim)"
                 _opts+="--input_size=${_input_size} "
             fi
-            
+
             # 1. Split the key file
             _logdir="${asr_stats_dir}/logdir"
             mkdir -p "${_logdir}"
-            
+
             # Get the minimum number among ${nj} and the number lines of input files
             _nj=$(min "${nj}" "$(<${_asr_train_dir}/${_scp} wc -l)" "$(<${_asr_valid_dir}/${_scp} wc -l)")
-            
+
             key_file="${_asr_train_dir}/${_scp}"
             split_scps=""
             for n in $(seq "${_nj}"); do
@@ -400,7 +400,7 @@ if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then
             done
             # shellcheck disable=SC2086
             utils/split_scp.pl "${key_file}" ${split_scps}
-            
+
             key_file="${_asr_valid_dir}/${_scp}"
             split_scps=""
             for n in $(seq "${_nj}"); do
@@ -408,18 +408,18 @@ if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then
             done
             # shellcheck disable=SC2086
             utils/split_scp.pl "${key_file}" ${split_scps}
-            
+
             # 2. Generate run.sh
             log "Generate '${asr_stats_dir}/run.sh'. You can resume the process from stage 5.iter${iter} using this script"
             mkdir -p "${asr_stats_dir}"; echo "${run_args} --stage 6 \"\$@\"; exit \$?" > "${asr_stats_dir}/run.sh"; chmod +x "${asr_stats_dir}/run.sh"
-            
+
             # 3. Submit jobs
             log "Hubert pretraining collect-stats started... log: '${_logdir}/stats.*.log'"
-            
+
             # NOTE: --*_shape_file doesn't require length information if --batch_type=unsorted,
             #       but it's used only for deciding the sample ids.
-            
-            # shellcheck disable=SC2086
+
+            # shellcheck disableSC2046,SC2086
             ${train_cmd} JOB=1:"${_nj}" "${_logdir}"/stats.JOB.log \
                          ${python} -m espnet2.bin.hubert_train \
                          --collect_stats true \
@@ -439,8 +439,8 @@ if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then
                          --valid_shape_file "${_logdir}/valid.JOB.scp" \
                          --output_dir "${_logdir}/stats.JOB" \
                          --hubert_dict "${dictdir}/dict.txt" \
-                         ${_opts} ${pt_args} || { cat "${_logdir}"/stats.1.log; exit 1; }
-            
+                         ${_opts} ${pt_args} || { cat $(grep -l -i error "${_logdir}"/stats.*.log) ; exit 1; }
+
             # 4. Aggregate shape files
             _opts=
             for i in $(seq "${_nj}"); do
@@ -448,30 +448,30 @@ if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then
             done
             # shellcheck disable=SC2086
             ${python} -m espnet2.bin.aggregate_stats_dirs ${_opts} --output_dir "${asr_stats_dir}"
-            
+
             # Append the num-tokens at the last dimensions. This is used for batch-bins count
             <"${asr_stats_dir}/train/text_shape" \
              awk -v N="$(<${dictdir}/tokens.txt wc -l)" '{ print $0 "," N }' \
              >"${asr_stats_dir}/train/text_shape.${token_type}"
-            
+
             <"${asr_stats_dir}/valid/text_shape" \
              awk -v N="$(<${dictdir}/tokens.txt wc -l)" '{ print $0 "," N }' \
              >"${asr_stats_dir}/valid/text_shape.${token_type}"
         fi
-        
+
         if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then
             _asr_train_dir="${data_feats}/${train_set_plabel}"
             _asr_valid_dir="${data_feats}/${valid_set_plabel}"
-            
+
             log "Stage 7.iter${iter}: Hubert Pretraining: train_set=${_asr_train_dir}, valid_set=${_asr_valid_dir}"
-            
+
             _opts=
             if [ -n "${asr_config}" ]; then
                 # To generate the config file: e.g.
                 #   % python3 -m espnet2.bin.hubert_train --print_config --optim adam
                 _opts+="--config ${asr_config} "
             fi
-            
+
             _feats_type="$(<${_asr_train_dir}/feats_type)"
             if [ "${_feats_type}" = raw ]; then
                 _scp=wav.scp
@@ -488,14 +488,14 @@ if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then
                 _type=kaldi_ark
                 _fold_length="${asr_speech_fold_length}"
                 _input_size="$(<${_asr_train_dir}/feats_dim)"
-                _opts+="--input_size=${_input_size} "        
+                _opts+="--input_size=${_input_size} "
             fi
-            
+
             if [ "${num_splits_asr}" -gt 1 ]; then
                 # If you met a memory error when parsing text files, this option may help you.
                 # The corpus is split into subsets and each subset is used for training one by one in order,
                 # so the memory footprint can be limited to the memory required for each dataset.
-                
+
                 _split_dir="${asr_stats_dir}/splits${num_splits_asr}"
                 if [ ! -f "${_split_dir}/.done" ]; then
                     rm -f "${_split_dir}/.done"
@@ -511,23 +511,23 @@ if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then
                 else
                     log "${_split_dir}/.done exists. Spliting is skipped"
                 fi
-                
+
                 _opts+="--train_data_path_and_name_and_type ${_split_dir}/${_scp},speech,${_type} "
                 _opts+="--train_data_path_and_name_and_type ${_split_dir}/text,text,text "
                 _opts+="--train_shape_file ${_split_dir}/speech_shape "
                 _opts+="--train_shape_file ${_split_dir}/text_shape.${token_type} "
                 _opts+="--multiple_iterator true "
-                
+
             else
                 _opts+="--train_data_path_and_name_and_type ${_asr_train_dir}/${_scp},speech,${_type} "
                 _opts+="--train_data_path_and_name_and_type ${_asr_train_dir}/text,text,text "
                 _opts+="--train_shape_file ${asr_stats_dir}/train/speech_shape "
                 _opts+="--train_shape_file ${asr_stats_dir}/train/text_shape.${token_type} "
             fi
-            
+
             log "Generate '${asr_exp}/run.sh'. You can resume the process from stage 6 using this script"
             mkdir -p "${asr_exp}"; echo "${run_args} --stage 7 \"\$@\"; exit \$?" > "${asr_exp}/run.sh"; chmod +x "${asr_exp}/run.sh"
-            
+
             # NOTE(kamo): --fold_length is used only if --batch_type=folded and it's ignored in the other case
             log "Hubert pretraining started... log: '${asr_exp}/train.log'"
             if echo "${cuda_cmd}" | grep -e queue.pl -e queue-freegpu.pl &> /dev/null; then
@@ -536,7 +536,7 @@ if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then
             else
                 jobname="${asr_exp}/train.log"
             fi
-            
+
             # shellcheck disable=SC2086
             ${python} -m espnet2.bin.launch \
                       --cmd "${cuda_cmd} --name ${jobname}" \
@@ -564,19 +564,19 @@ if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 5 ]; then
                       --output_dir "${asr_exp}" \
                       --hubert_dict "${dictdir}/dict.txt" \
                       ${_opts} ${pt_args}
-            
+
             if [ "${iter}" -ge 0 ]; then
                 log "Create a symbolic link of the pretrained model"
                 if  [ -L "${expdir}/pretrained_model_iter${iter}" ]; then
                     log "Symbolic link ${expdir}/pretrained_model_iter${iter} already exists, remove it."
                     rm "${expdir}/pretrained_model_iter${iter}"
                 fi
-                
+
                 if ! [ -z "${asr_exp}" ]; then
                     ln -s "../${asr_exp}" "${expdir}/pretrained_model_iter${iter}"
                 fi
             fi
-            
+
             log "Model saved in: ${asr_exp}"
         else
             log "Skip the pretraining stages"
diff --git a/egs2/TEMPLATE/st1/st.sh b/egs2/TEMPLATE/st1/st.sh
index 18303210f87..ebd2903d7a7 100755
--- a/egs2/TEMPLATE/st1/st.sh
+++ b/egs2/TEMPLATE/st1/st.sh
@@ -505,9 +505,9 @@ if ! "${skip_data_prep}"; then
             done
             utils/combine_data.sh --extra_files "${utt_extra_files}" "data/${train_set}_sp" ${_dirs}
             for extra_file in ${utt_extra_files}; do
-                python pyscripts/utils/remove_duplicate_keys.py data/"${train_set}_sp"/${extra_file} > data/"${train_set}_sp"/${extra_file}.tmp 
+                python pyscripts/utils/remove_duplicate_keys.py data/"${train_set}_sp"/${extra_file} > data/"${train_set}_sp"/${extra_file}.tmp
                 mv data/"${train_set}_sp"/${extra_file}.tmp data/"${train_set}_sp"/${extra_file}
-            done 
+            done
         else
            log "Skip stage 2: Speed perturbation"
         fi
@@ -544,7 +544,7 @@ if ! "${skip_data_prep}"; then
                     for single_file in $(ls data/"${dset}"/${extra_file}*); do
                         cp ${single_file} "${data_feats}${_suf}/${dset}"
                         expand_utt_extra_files="${expand_utt_extra_files} $(basename ${single_file})"
-                    done 
+                    done
                 done
                 echo "${expand_utt_extra_files}"
                 utils/fix_data_dir.sh --utt_extra_files "${expand_utt_extra_files}" "${data_feats}${_suf}/${dset}"
@@ -589,7 +589,7 @@ if ! "${skip_data_prep}"; then
                     for single_file in $(ls data/"${dset}"/${extra_file}*); do
                         cp ${single_file} "${data_feats}${_suf}/${dset}"
                         expand_utt_extra_files="${expand_utt_extra_files} $(basename ${single_file})"
-                    done 
+                    done
                 done
                 for extra_file in ${expand_utt_extra_files}; do
                     LC_ALL=C sort -u -k1,1 "${data_feats}${_suf}/${dset}/${extra_file}" -o "${data_feats}${_suf}/${dset}/${extra_file}"
@@ -638,7 +638,7 @@ if ! "${skip_data_prep}"; then
                     for single_file in $(ls data/"${dset}"/${extra_file}*); do
                         cp ${single_file} "${data_feats}${_suf}/${dset}"
                         expand_utt_extra_files="${expand_utt_extra_files} $(basename ${single_file})"
-                    done 
+                    done
                 done
                 utils/fix_data_dir.sh --utt_extra_files "${expand_utt_extra_files}*" "${data_feats}${_suf}/${dset}"
                 for extra_file in ${expand_utt_extra_files}; do
@@ -724,9 +724,9 @@ if ! "${skip_data_prep}"; then
             utils/fix_data_dir.sh --utt_extra_files "${utt_extra_files}" "${data_feats}/${dset}"
             for utt_extra_file in ${utt_extra_files}; do
                 python pyscripts/utils/remove_duplicate_keys.py ${data_feats}/${dset}/${utt_extra_file} \
-                    > ${data_feats}/${dset}/${utt_extra_file}.tmp 
+                    > ${data_feats}/${dset}/${utt_extra_file}.tmp
                 mv ${data_feats}/${dset}/${utt_extra_file}.tmp ${data_feats}/${dset}/${utt_extra_file}
-            done 
+            done
         done
 
         # shellcheck disable=SC2002
@@ -931,7 +931,7 @@ if ! "${skip_train}"; then
             log "LM collect-stats started... log: '${_logdir}/stats.*.log'"
             # NOTE: --*_shape_file doesn't require length information if --batch_type=unsorted,
             #       but it's used only for deciding the sample ids.
-            # shellcheck disable=SC2086
+            # shellcheck disable=SC2046,SC2086
             ${train_cmd} JOB=1:"${_nj}" "${_logdir}"/stats.JOB.log \
                 ${python} -m espnet2.bin.lm_train \
                     --collect_stats true \
@@ -947,7 +947,7 @@ if ! "${skip_train}"; then
                     --train_shape_file "${_logdir}/train.JOB.scp" \
                     --valid_shape_file "${_logdir}/dev.JOB.scp" \
                     --output_dir "${_logdir}/stats.JOB" \
-                    ${_opts} ${lm_args} || { cat "${_logdir}"/stats.1.log; exit 1; }
+                    ${_opts} ${lm_args} || { cat $(grep -l -i error "${_logdir}"/stats.*.log) ; exit 1; }
 
             # 4. Aggregate shape files
             _opts=
@@ -1075,7 +1075,7 @@ if ! "${skip_train}"; then
         if "${use_ngram}"; then
             log "Stage 9: Ngram Training: train_set=${data_feats}/lm_train.txt"
             cut -f 2 -d " " ${data_feats}/lm_train.txt | lmplz -S "20%" --discount_fallback -o ${ngram_num} - >${ngram_exp}/${ngram_num}gram.arpa
-            build_binary -s ${ngram_exp}/${ngram_num}gram.arpa ${ngram_exp}/${ngram_num}gram.bin 
+            build_binary -s ${ngram_exp}/${ngram_num}gram.arpa ${ngram_exp}/${ngram_num}gram.bin
         else
             log "Stage 9: Skip ngram stages: use_ngram=${use_ngram}"
         fi
@@ -1427,7 +1427,7 @@ if ! "${skip_eval}"; then
 
             # 2. Submit decoding jobs
             log "Decoding started... log: '${_logdir}/st_inference.*.log'"
-            # shellcheck disable=SC2086
+            # shellcheck disable=SC2046,SC2086
             ${_cmd} --gpu "${_ngpu}" JOB=1:"${_nj}" "${_logdir}"/st_inference.JOB.log \
                 ${python} -m ${st_inference_tool} \
                     --batch_size ${batch_size} \
@@ -1437,7 +1437,7 @@ if ! "${skip_eval}"; then
                     --st_train_config "${st_exp}"/config.yaml \
                     --st_model_file "${st_exp}"/"${inference_st_model}" \
                     --output_dir "${_logdir}"/output.JOB \
-                    ${_opts} ${inference_args}
+                    ${_opts} ${inference_args} || { cat $(grep -l -i error "${_logdir}"/st_inference.*.log) ; exit 1; }
 
             # 3. Concatenates the output files from each jobs
             for f in token token_int score text; do
@@ -1483,7 +1483,7 @@ if ! "${skip_eval}"; then
                             ) \
                 <(<"${_data}/utt2spk" awk '{ print "(" $2 "-" $1 ")" }') \
                     >"${_scoredir}/hyp.trn.org"
-            
+
             # remove utterance id
             perl -pe 's/\([^\)]+\)//g;' "${_scoredir}/ref.trn.org" > "${_scoredir}/ref.trn"
             perl -pe 's/\([^\)]+\)//g;' "${_scoredir}/hyp.trn.org" > "${_scoredir}/hyp.trn"
@@ -1498,7 +1498,7 @@ if ! "${skip_eval}"; then
                           -i "${_scoredir}/hyp.trn.detok" \
                           -m bleu chrf ter \
                           >> ${_scoredir}/result.tc.txt
-                
+
                 log "Write a case-sensitive BLEU (single-reference) result in ${_scoredir}/result.tc.txt"
             fi
 
@@ -1530,8 +1530,8 @@ if ! "${skip_eval}"; then
                                 ) \
                         <(<"${_data}/utt2spk" awk '{ print "(" $2 "-" $1 ")" }') \
                             >"${_scoredir}/ref.trn.org.${ref_idx}"
-                    
-                    # 
+
+                    #
                     perl -pe 's/\([^\)]+\)//g;' "${_scoredir}/ref.trn.org.${ref_idx}" > "${_scoredir}/ref.trn.${ref_idx}"
                     detokenizer.perl -l ${tgt_lang} -q < "${_scoredir}/ref.trn.${ref_idx}" > "${_scoredir}/ref.trn.detok.${ref_idx}"
                     remove_punctuation.pl < "${_scoredir}/ref.trn.detok.${ref_idx}" > "${_scoredir}/ref.trn.detok.lc.rm.${ref_idx}"
@@ -1667,11 +1667,11 @@ if ! "${skip_upload_hf}"; then
         gitlfs=$(git lfs --version 2> /dev/null || true)
         [ -z "${gitlfs}" ] && \
             log "ERROR: You need to install git-lfs first" && \
-            exit 1             
-  
+            exit 1
+
         dir_repo=${expdir}/hf_${hf_repo//"/"/"_"}
         [ ! -d "${dir_repo}" ] && git clone https://huggingface.co/${hf_repo} ${dir_repo}
-  
+
         if command -v git &> /dev/null; then
             _creator_name="$(git config user.name)"
             _checkout="git checkout $(git show -s --format=%H)"
@@ -1684,13 +1684,13 @@ if ! "${skip_upload_hf}"; then
         # foo/asr1 -> foo
         _corpus="${_task%/*}"
         _model_name="${_creator_name}/${_corpus}_$(basename ${packed_model} .zip)"
-  
+
         # copy files in ${dir_repo}
         unzip -o ${packed_model} -d ${dir_repo}
         # Generate description file
         # shellcheck disable=SC2034
         hf_task=speech-translation
-        # shellcheck disable=SC2034     
+        # shellcheck disable=SC2034
         espnet_task=ST
         # shellcheck disable=SC2034
         task_exp=${st_exp}
diff --git a/egs2/TEMPLATE/tts1/tts.sh b/egs2/TEMPLATE/tts1/tts.sh
index 0bd2e0debb8..13a3aaf2d5d 100755
--- a/egs2/TEMPLATE/tts1/tts.sh
+++ b/egs2/TEMPLATE/tts1/tts.sh
@@ -644,7 +644,7 @@ if ! "${skip_train}"; then
 
         # 3. Submit jobs
         log "TTS collect_stats started... log: '${_logdir}/stats.*.log'"
-        # shellcheck disable=SC2086
+        # shellcheck disable=SC2046,SC2086
         ${train_cmd} JOB=1:"${_nj}" "${_logdir}"/stats.JOB.log \
             ${python} -m "espnet2.bin.${tts_task}_train" \
                 --collect_stats true \
@@ -665,7 +665,7 @@ if ! "${skip_train}"; then
                 --train_shape_file "${_logdir}/train.JOB.scp" \
                 --valid_shape_file "${_logdir}/valid.JOB.scp" \
                 --output_dir "${_logdir}/stats.JOB" \
-                ${_opts} ${train_args} || { cat "${_logdir}"/stats.1.log; exit 1; }
+                ${_opts} ${train_args} || { cat $(grep -l -i error "${_logdir}"/stats.*.log) ; exit 1; }
 
         # 4. Aggregate shape files
         _opts=
@@ -1008,7 +1008,7 @@ if ! "${skip_eval}"; then
 
             # 3. Submit decoding jobs
             log "Decoding started... log: '${_logdir}/tts_inference.*.log'"
-            # shellcheck disable=SC2086
+            # shellcheck disable=SC2046,SC2086
             ${_cmd} --gpu "${_ngpu}" JOB=1:"${_nj}" "${_logdir}"/tts_inference.JOB.log \
                 ${python} -m espnet2.bin.tts_inference \
                     --ngpu "${_ngpu}" \
@@ -1019,7 +1019,7 @@ if ! "${skip_eval}"; then
                     --train_config "${tts_exp}"/config.yaml \
                     --output_dir "${_logdir}"/output.JOB \
                     --vocoder_file "${vocoder_file}" \
-                    ${_opts} ${_ex_opts} ${inference_args}
+                    ${_opts} ${_ex_opts} ${inference_args} || { cat $(grep -l -i error "${_logdir}"/tts_inference.*.log) ; exit 1; }
 
             # 4. Concatenates the output files from each jobs
             if [ -e "${_logdir}/output.${_nj}/norm" ]; then

From 5518b6ba0af0bba9e9d59d6c47607656f49c9988 Mon Sep 17 00:00:00 2001
From: kamo-naoyuki <naoyuki.kamo829@gmail.com>
Date: Thu, 12 May 2022 22:04:42 +0900
Subject: [PATCH 19/22] fix import order

---
 espnet/asr/pytorch_backend/recog.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/espnet/asr/pytorch_backend/recog.py b/espnet/asr/pytorch_backend/recog.py
index b64131d1ad2..c6818e41ee7 100644
--- a/espnet/asr/pytorch_backend/recog.py
+++ b/espnet/asr/pytorch_backend/recog.py
@@ -1,8 +1,8 @@
 """V2 backend for `asr_recog.py` using py:class:`espnet.nets.beam_search.BeamSearch`."""
 
-from packaging.version import parse as V
 import json
 import logging
+from packaging.version import parse as V
 
 import torch
 

From 9a2001fac56dddf5ba1c2eaec092cb420f83f7c9 Mon Sep 17 00:00:00 2001
From: kamo-naoyuki <naoyuki.kamo829@gmail.com>
Date: Fri, 13 May 2022 03:44:11 +0900
Subject: [PATCH 20/22] fix for pytorch1.11 (+= became inplace op)

---
 espnet/nets/pytorch_backend/tacotron2/encoder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/espnet/nets/pytorch_backend/tacotron2/encoder.py b/espnet/nets/pytorch_backend/tacotron2/encoder.py
index fee4b1c5552..148db765cc7 100644
--- a/espnet/nets/pytorch_backend/tacotron2/encoder.py
+++ b/espnet/nets/pytorch_backend/tacotron2/encoder.py
@@ -145,7 +145,7 @@ def forward(self, xs, ilens=None):
         if self.convs is not None:
             for i in six.moves.range(len(self.convs)):
                 if self.use_residual:
-                    xs += self.convs[i](xs)
+                    xs = xs + self.convs[i](xs)
                 else:
                     xs = self.convs[i](xs)
         if self.blstm is None:

From 2625be71a722e7eb030dff4f71d8dc9599a33844 Mon Sep 17 00:00:00 2001
From: kamo-naoyuki <naoyuki.kamo829@gmail.com>
Date: Fri, 13 May 2022 03:46:24 +0900
Subject: [PATCH 21/22] remove warning

---
 test/espnet2/tasks/test_abs_task.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/espnet2/tasks/test_abs_task.py b/test/espnet2/tasks/test_abs_task.py
index 7a9297f78e2..6b36d3b51d1 100644
--- a/test/espnet2/tasks/test_abs_task.py
+++ b/test/espnet2/tasks/test_abs_task.py
@@ -8,7 +8,7 @@
 from espnet2.train.collate_fn import CommonCollateFn
 
 
-class TestModel(AbsESPnetModel):
+class DummyModel(AbsESPnetModel):
     def __init__(self):
         super().__init__()
         self.layer1 = torch.nn.Linear(1, 1)
@@ -60,7 +60,7 @@ def optional_data_names(cls, train=True, inference=False):
 
     @classmethod
     def build_model(cls, args):
-        model = TestModel()
+        model = DummyModel()
         return model
 
     @classmethod

From 9cfd6af64a28237019196cd495fbd2943790ce21 Mon Sep 17 00:00:00 2001
From: kamo-naoyuki <naoyuki.kamo829@gmail.com>
Date: Fri, 13 May 2022 09:58:04 +0900
Subject: [PATCH 22/22] fix

---
 espnet/asr/pytorch_backend/asr.py   | 4 ++--
 espnet/asr/pytorch_backend/recog.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/espnet/asr/pytorch_backend/asr.py b/espnet/asr/pytorch_backend/asr.py
index 0effaaaa893..7a265f2badf 100644
--- a/espnet/asr/pytorch_backend/asr.py
+++ b/espnet/asr/pytorch_backend/asr.py
@@ -989,7 +989,7 @@ def recog(args):
         # It seems quantized LSTM only supports non-packed sequence before torch 1.4.0.
         # Reference issue: https://github.com/pytorch/pytorch/issues/27963
         if (
-            torch.__version__ < V("1.4.0")
+            V(torch.__version__) < V("1.4.0")
             and "lstm" in train_args.etype
             and torch.nn.LSTM in q_config
         ):
@@ -999,7 +999,7 @@ def recog(args):
 
         # Dunno why but weight_observer from dynamic quantized module must have
         # dtype=torch.qint8 with torch < 1.5 although dtype=torch.float16 is supported.
-        if args.quantize_dtype == "float16" and torch.__version__ < V("1.5.0"):
+        if args.quantize_dtype == "float16" and V(torch.__version__) < V("1.5.0"):
             raise ValueError(
                 "float16 dtype for dynamic quantization is not supported with torch "
                 "version < 1.5.0. Switching to qint8 dtype instead."
diff --git a/espnet/asr/pytorch_backend/recog.py b/espnet/asr/pytorch_backend/recog.py
index c6818e41ee7..0824f6e7b26 100644
--- a/espnet/asr/pytorch_backend/recog.py
+++ b/espnet/asr/pytorch_backend/recog.py
@@ -54,7 +54,7 @@ def recog_v2(args):
 
         # See https://github.com/espnet/espnet/pull/3616 for more information.
         if (
-            torch.__version__ < V("1.4.0")
+            V(torch.__version__) < V("1.4.0")
             and "lstm" in train_args.etype
             and torch.nn.LSTM in q_config
         ):
@@ -62,7 +62,7 @@ def recog_v2(args):
                 "Quantized LSTM in ESPnet is only supported with torch 1.4+."
             )
 
-        if args.quantize_dtype == "float16" and torch.__version__ < V("1.5.0"):
+        if args.quantize_dtype == "float16" and V(torch.__version__) < V("1.5.0"):
             raise ValueError(
                 "float16 dtype for dynamic quantization is not supported with torch "
                 "version < 1.5.0. Switching to qint8 dtype instead."