Import torchaudio #1466 9d50acf

Reviewed By: vincentqb, mthrok Differential Revision: D27922742 fbshipit-source-id: 6fa96728171687089abe6d734c23fc98bd29430b
pytorch · Apr 22, 2021 · dfb4943 · dfb4943
1 parent 93fb018
commit dfb4943
Show file tree

Hide file tree

Showing 18 changed files with 227 additions and 313 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -424,12 +424,10 @@ jobs:
       - run:
           name: Run tests
           command: .circleci/unittest/linux/scripts/run_test.sh
-      - run:
-          name: Post process
-          command: .circleci/unittest/linux/scripts/post_process.sh
       - store_test_results:
           path: test-results
-
+      - store_artifacts:
+          path: test/htmlcov
   unittest_linux_gpu:
     <<: *binary_common
     machine:
@@ -456,11 +454,10 @@ jobs:
       - run:
           name: Run tests
           command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/run_test.sh
-      - run:
-          name: Post Process
-          command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/post_process.sh
       - store_test_results:
           path: test-results
+      - store_artifacts:
+          path: test/htmlcov
 
   unittest_windows_cpu:
     <<: *binary_common
@@ -479,11 +476,10 @@ jobs:
       - run:
           name: Run tests
           command: .circleci/unittest/windows/scripts/run_test.sh
-      - run:
-          name: Post process
-          command: .circleci/unittest/windows/scripts/post_process.sh
       - store_test_results:
           path: test-results
+      - store_artifacts:
+          path: test/htmlcov
 
   unittest_windows_gpu:
     <<: *binary_common
@@ -505,11 +501,10 @@ jobs:
       - run:
           name: Run tests
           command: .circleci/unittest/windows/scripts/run_test.sh
-      - run:
-          name: Post process
-          command: .circleci/unittest/windows/scripts/post_process.sh
       - store_test_results:
           path: test-results
+      - store_artifacts:
+          path: test/htmlcov
 
   unittest_macos_cpu:
     <<: *binary_common
@@ -532,11 +527,10 @@ jobs:
       - run:
           name: Run tests
           command: .circleci/unittest/linux/scripts/run_test.sh
-      - run:
-          name: Post process
-          command: .circleci/unittest/linux/scripts/post_process.sh
       - store_test_results:
           path: test-results
+      - store_artifacts:
+          path: test/htmlcov
 
   stylecheck:
     <<: *binary_common

diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in
@@ -424,12 +424,10 @@ jobs:
       - run:
           name: Run tests
           command: .circleci/unittest/linux/scripts/run_test.sh
-      - run:
-          name: Post process
-          command: .circleci/unittest/linux/scripts/post_process.sh
       - store_test_results:
           path: test-results
-
+      - store_artifacts:
+          path: test/htmlcov
   unittest_linux_gpu:
     <<: *binary_common
     machine:
@@ -456,11 +454,10 @@ jobs:
       - run:
           name: Run tests
           command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/run_test.sh
-      - run:
-          name: Post Process
-          command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/post_process.sh
       - store_test_results:
           path: test-results
+      - store_artifacts:
+          path: test/htmlcov
 
   unittest_windows_cpu:
     <<: *binary_common
@@ -479,11 +476,10 @@ jobs:
       - run:
           name: Run tests
           command: .circleci/unittest/windows/scripts/run_test.sh
-      - run:
-          name: Post process
-          command: .circleci/unittest/windows/scripts/post_process.sh
       - store_test_results:
           path: test-results
+      - store_artifacts:
+          path: test/htmlcov
 
   unittest_windows_gpu:
     <<: *binary_common
@@ -505,11 +501,10 @@ jobs:
       - run:
           name: Run tests
           command: .circleci/unittest/windows/scripts/run_test.sh
-      - run:
-          name: Post process
-          command: .circleci/unittest/windows/scripts/post_process.sh
       - store_test_results:
           path: test-results
+      - store_artifacts:
+          path: test/htmlcov
 
   unittest_macos_cpu:
     <<: *binary_common
@@ -532,11 +527,10 @@ jobs:
       - run:
           name: Run tests
           command: .circleci/unittest/linux/scripts/run_test.sh
-      - run:
-          name: Post process
-          command: .circleci/unittest/linux/scripts/post_process.sh
       - store_test_results:
           path: test-results
+      - store_artifacts:
+          path: test/htmlcov
 
   stylecheck:
     <<: *binary_common

diff --git a/.circleci/unittest/linux/scripts/install.sh b/.circleci/unittest/linux/scripts/install.sh
@@ -56,5 +56,5 @@ fi
 (
     set -x
     conda install -y -c conda-forge ${NUMBA_DEV_CHANNEL} 'librosa>=0.8.0' parameterized 'requests>=2.20'
-    pip install kaldi-io SoundFile codecov pytest pytest-cov scipy
+    pip install kaldi-io SoundFile coverage pytest pytest-cov scipy
 )
diff --git a/.circleci/unittest/linux/scripts/run_test.sh b/.circleci/unittest/linux/scripts/run_test.sh
@@ -24,3 +24,4 @@ declare -a args=(
 
 cd test
 pytest "${args[@]}" torchaudio_unittest
+coverage html
diff --git a/.circleci/unittest/windows/scripts/install.sh b/.circleci/unittest/windows/scripts/install.sh
@@ -44,5 +44,5 @@ fi
 (
     set -x
     conda install -y -c conda-forge ${NUMBA_DEV_CHANNEL} 'librosa>=0.8.0' parameterized 'requests>=2.20'
-    pip install kaldi-io SoundFile codecov pytest pytest-cov scipy
+    pip install kaldi-io SoundFile coverage pytest pytest-cov scipy
 )
diff --git a/.circleci/unittest/windows/scripts/run_test.sh b/.circleci/unittest/windows/scripts/run_test.sh
@@ -8,3 +8,4 @@ conda activate ./env
 python -m torch.utils.collect_env
 cd test
 pytest --cov=torchaudio --junitxml=../test-results/junit.xml -v --durations 20 torchaudio_unittest
+coverage html
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -123,7 +123,8 @@
     'collapse_navigation': False,
     'display_version': True,
     'logo_only': True,
-    'navigation_with_keys': True
+    'navigation_with_keys': True,
+    'analytics_id': 'UA-117752657-2',
 }
 
 html_logo = '_static/img/pytorch-logo-dark.svg'

diff --git a/examples/pipeline_wav2letter/README.md b/examples/pipeline_wav2letter/README.md
@@ -4,8 +4,13 @@ This is an example pipeline for speech recognition using a greedy or Viterbi CTC
 
 More information about each command line parameters is available with the `--help` option. An example can be invoked as follows.
 ```
+DATASET_ROOT = <Top>/<level>/<folder>
+DATASET_FOLDER_IN_ARCHIVE =  'LibriSpeech'
+
 python main.py \
     --reduce-lr-valid \
+    --dataset-root "${DATASET_ROOT}" \
+    --dataset-folder-in-archive "${DATASET_FOLDER_IN_ARCHIVE}" \
     --dataset-train train-clean-100 train-clean-360 train-other-500 \
     --dataset-valid dev-clean \
     --batch-size 128 \
@@ -15,7 +20,6 @@ python main.py \
     --clip-grad 0. \
     --gamma .99 \
     --hop-length 160 \
-    --n-hidden-channels 2000 \
     --win-length 400 \
     --n-bins 13 \
     --normalize \

diff --git a/examples/pipeline_wavernn/main.py b/examples/pipeline_wavernn/main.py
@@ -17,7 +17,7 @@
 
 from datasets import collate_factory, split_process_dataset
 from losses import LongCrossEntropyLoss, MoLLoss
-from processing import LinearToMel, NormalizeDB
+from processing import NormalizeDB
 from utils import MetricLogger, count_parameters, save_checkpoint
 
 
@@ -269,12 +269,12 @@ def main(args):
     }
 
     transforms = torch.nn.Sequential(
-        torchaudio.transforms.Spectrogram(**melkwargs),
-        LinearToMel(
+        torchaudio.transforms.MelSpectrogram(
             sample_rate=args.sample_rate,
-            n_fft=args.n_fft,
             n_mels=args.n_freq,
-            fmin=args.f_min,
+            f_min=args.f_min,
+            mel_scale='slaney',
+            **melkwargs,
         ),
         NormalizeDB(min_level_db=args.min_level_db, normalization=args.normalization),
     )

diff --git a/examples/pipeline_wavernn/processing.py b/examples/pipeline_wavernn/processing.py
@@ -1,32 +1,7 @@
-import librosa
 import torch
 import torch.nn as nn
 
 
-# TODO Replace by torchaudio, once https://github.com/pytorch/audio/pull/593 is resolved
-class LinearToMel(nn.Module):
-    def __init__(self, sample_rate, n_fft, n_mels, fmin, htk=False, norm="slaney"):
-        super().__init__()
-        self.sample_rate = sample_rate
-        self.n_fft = n_fft
-        self.n_mels = n_mels
-        self.fmin = fmin
-        self.htk = htk
-        self.norm = norm
-
-    def forward(self, specgram):
-        specgram = librosa.feature.melspectrogram(
-            S=specgram.squeeze(0).numpy(),
-            sr=self.sample_rate,
-            n_fft=self.n_fft,
-            n_mels=self.n_mels,
-            fmin=self.fmin,
-            htk=self.htk,
-            norm=self.norm,
-        )
-        return torch.from_numpy(specgram)
-
-
 class NormalizeDB(nn.Module):
     r"""Normalize the spectrogram with a minimum db value
     """
@@ -37,7 +12,7 @@ def __init__(self, min_level_db, normalization):
         self.normalization = normalization
 
     def forward(self, specgram):
-        specgram = torch.log10(torch.clamp(specgram, min=1e-5))
+        specgram = torch.log10(torch.clamp(specgram.squeeze(0), min=1e-5))
         if self.normalization:
             return torch.clamp(
                 (self.min_level_db - 20 * specgram) / self.min_level_db, min=0, max=1