[BC-Breaking] Remove compute_kaldi_pitch

This commit removes compute_kaldi_pitch function and the underlying Kaldi integration from torchaudio. Kaldi pitch function was added in a short period of time by integrating the original Kaldi implementation, instead of reimplementing it in PyTorch. The Kaldi integration employed a hack which replaces the base vector/matrix implementation of Kaldi with PyTorch Tensor so that there is only one blas library within torchaudio. Recently, we are making torchaudio more lean, and we don't see a wide adoption of kaldi_pitch feature, so we decided to remove them. See some of the discussion pytorch#1269
mthrok · May 24, 2023 · eb4dcea · eb4dcea
1 parent 8690e6e
commit eb4dcea
Show file tree

Hide file tree

Showing 22 changed files with 6 additions and 904 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -1,4 +0,0 @@
-[submodule "kaldi"]
-	path = third_party/kaldi/submodule
-	url = https://github.com/kaldi-asr/kaldi
-	ignore = dirty

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -53,7 +53,6 @@ endif()
 
 # Options
 option(BUILD_SOX "Build libsox statically" ON)
-option(BUILD_KALDI "Build kaldi statically" ON)
 option(BUILD_RIR "Enable RIR simulation" ON)
 option(BUILD_RNNT "Enable RNN transducer" ON)
 option(BUILD_ALIGN "Enable forced alignment" ON)

diff --git a/setup.py b/setup.py
@@ -124,7 +124,8 @@ def _fetch_archives(src):
 
 
 def _fetch_third_party_libraries():
-    _init_submodule()
+    # Revert this when a submodule is added again
+    # _init_submodule()
     if os.name != "nt":
         _fetch_archives(_parse_sources())
 

diff --git a/test/torchaudio_unittest/common_utils/__init__.py b/test/torchaudio_unittest/common_utils/__init__.py
@@ -12,7 +12,6 @@
     skipIfNoExec,
     skipIfNoFFmpeg,
     skipIfNoHWAccel,
-    skipIfNoKaldi,
     skipIfNoMacOS,
     skipIfNoModule,
     skipIfNoQengine,
@@ -51,7 +50,6 @@
     "skipIfNoExec",
     "skipIfNoMacOS",
     "skipIfNoModule",
-    "skipIfNoKaldi",
     "skipIfNoRIR",
     "skipIfNoSox",
     "skipIfNoSoxBackend",

diff --git a/test/torchaudio_unittest/common_utils/case_utils.py b/test/torchaudio_unittest/common_utils/case_utils.py
@@ -234,11 +234,6 @@ def skipIfNoModule(module, display_name=None):
     reason="Sox features are not available.",
     key="NO_SOX",
 )
-skipIfNoKaldi = _skipIf(
-    not torchaudio._extension._IS_KALDI_AVAILABLE,
-    reason="Kaldi features are not available.",
-    key="NO_KALDI",
-)
 skipIfNoRIR = _skipIf(
     not torchaudio._extension._IS_RIR_AVAILABLE,
     reason="RIR features are not available.",

diff --git a/test/torchaudio_unittest/functional/batch_consistency_test.py b/test/torchaudio_unittest/functional/batch_consistency_test.py
@@ -257,18 +257,6 @@ def test_resample_waveform(self, resampling_method):
             atol=1e-7,
         )
 
-    @common_utils.skipIfNoKaldi
-    def test_compute_kaldi_pitch(self):
-        sample_rate = 44100
-        n_channels = 2
-        waveform = common_utils.get_whitenoise(sample_rate=sample_rate, n_channels=self.batch_size * n_channels)
-        batch = waveform.view(self.batch_size, n_channels, waveform.size(-1))
-        kwargs = {
-            "sample_rate": sample_rate,
-        }
-        func = partial(F.compute_kaldi_pitch, **kwargs)
-        self.assert_batch_consistency(func, inputs=(batch,))
-
     def test_lfilter(self):
         signal_length = 2048
         x = torch.randn(self.batch_size, signal_length)

diff --git a/test/torchaudio_unittest/functional/kaldi_compatibility_cpu_test.py b/test/torchaudio_unittest/functional/kaldi_compatibility_cpu_test.py
@@ -1,12 +1,7 @@
 import torch
 from torchaudio_unittest.common_utils import PytorchTestCase
 
-from .kaldi_compatibility_test_impl import Kaldi, KaldiCPUOnly
-
-
-class TestKaldiCPUOnly(KaldiCPUOnly, PytorchTestCase):
-    dtype = torch.float32
-    device = torch.device("cpu")
+from .kaldi_compatibility_test_impl import Kaldi
 
 
 class TestKaldiFloat32(Kaldi, PytorchTestCase):

diff --git a/test/torchaudio_unittest/functional/kaldi_compatibility_test_impl.py b/test/torchaudio_unittest/functional/kaldi_compatibility_test_impl.py
@@ -1,14 +1,6 @@
 import torch
 import torchaudio.functional as F
-from parameterized import parameterized
-from torchaudio_unittest.common_utils import (
-    get_sinusoid,
-    load_params,
-    save_wav,
-    skipIfNoExec,
-    TempDirMixin,
-    TestBaseMixin,
-)
+from torchaudio_unittest.common_utils import skipIfNoExec, TempDirMixin, TestBaseMixin
 from torchaudio_unittest.common_utils.kaldi_utils import convert_args, run_kaldi
 
 
@@ -32,25 +24,3 @@ def test_sliding_window_cmn(self):
         command = ["apply-cmvn-sliding"] + convert_args(**kwargs) + ["ark:-", "ark:-"]
         kaldi_result = run_kaldi(command, "ark", tensor)
         self.assert_equal(result, expected=kaldi_result)
-
-
-class KaldiCPUOnly(TempDirMixin, TestBaseMixin):
-    def assert_equal(self, output, *, expected, rtol=None, atol=None):
-        expected = expected.to(dtype=self.dtype, device=self.device)
-        self.assertEqual(output, expected, rtol=rtol, atol=atol)
-
-    @parameterized.expand(load_params("kaldi_test_pitch_args.jsonl"))
-    @skipIfNoExec("compute-kaldi-pitch-feats")
-    def test_pitch_feats(self, kwargs):
-        """compute_kaldi_pitch produces numerically compatible result with compute-kaldi-pitch-feats"""
-        sample_rate = kwargs["sample_rate"]
-        waveform = get_sinusoid(dtype="float32", sample_rate=sample_rate)
-        result = F.compute_kaldi_pitch(waveform[0], **kwargs)
-
-        waveform = get_sinusoid(dtype="int16", sample_rate=sample_rate)
-        wave_file = self.get_temp_path("test.wav")
-        save_wav(wave_file, waveform, sample_rate)
-
-        command = ["compute-kaldi-pitch-feats"] + convert_args(**kwargs) + ["scp:-", "ark:-"]
-        kaldi_result = run_kaldi(command, "scp", wave_file)
-        self.assert_equal(result, expected=kaldi_result)
diff --git a/test/torchaudio_unittest/functional/torchscript_consistency_impl.py b/test/torchaudio_unittest/functional/torchscript_consistency_impl.py
@@ -585,18 +585,6 @@ def func(tensor):
         tensor = common_utils.get_whitenoise(sample_rate=44100)
         self._assert_consistency(func, (tensor,))
 
-    @common_utils.skipIfNoKaldi
-    def test_compute_kaldi_pitch(self):
-        if self.dtype != torch.float32 or self.device != torch.device("cpu"):
-            raise unittest.SkipTest("Only float32, cpu is supported.")
-
-        def func(tensor):
-            sample_rate: float = 44100.0
-            return F.compute_kaldi_pitch(tensor, sample_rate)
-
-        tensor = common_utils.get_whitenoise(sample_rate=44100)
-        self._assert_consistency(func, (tensor,))
-
     def test_resample_sinc(self):
         def func(tensor):
             sr1, sr2 = 16000, 8000

diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt
@@ -9,10 +9,3 @@ file(MAKE_DIRECTORY install/lib)
 if (BUILD_SOX)
   add_subdirectory(sox)
 endif()
-
-################################################################################
-# kaldi
-################################################################################
-if (BUILD_KALDI)
-  add_subdirectory(kaldi)
-endif()
diff --git a/third_party/kaldi/CMakeLists.txt b/third_party/kaldi/CMakeLists.txt
diff --git a/third_party/kaldi/README.md b/third_party/kaldi/README.md
diff --git a/third_party/kaldi/kaldi.patch b/third_party/kaldi/kaldi.patch
diff --git a/third_party/kaldi/src/matrix/kaldi-matrix.cc b/third_party/kaldi/src/matrix/kaldi-matrix.cc