diff --git a/.circleci/unittest/linux/scripts/run_style_checks.sh b/.circleci/unittest/linux/scripts/run_style_checks.sh index 0620f4867e..d6e8ef87fa 100755 --- a/.circleci/unittest/linux/scripts/run_style_checks.sh +++ b/.circleci/unittest/linux/scripts/run_style_checks.sh @@ -38,7 +38,7 @@ fi printf "\x1b[34mRunning clang-format:\x1b[0m\n" "${this_dir}"/run_clang_format.py \ - -r torchaudio/csrc third_party/kaldi/src \ + -r torchaudio/csrc \ --clang-format-executable "${clangformat_path}" \ && git diff --exit-code status=$? diff --git a/.gitmodules b/.gitmodules index 724846120c..e69de29bb2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +0,0 @@ -[submodule "kaldi"] - path = third_party/kaldi/submodule - url = https://github.com/kaldi-asr/kaldi - ignore = dirty diff --git a/CMakeLists.txt b/CMakeLists.txt index ab70cff60c..3e45a134ed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,7 +53,6 @@ endif() # Options option(BUILD_SOX "Build libsox statically" ON) -option(BUILD_KALDI "Build kaldi statically" ON) option(BUILD_RIR "Enable RIR simulation" ON) option(BUILD_RNNT "Enable RNN transducer" ON) option(BUILD_ALIGN "Enable forced alignment" ON) diff --git a/examples/tutorials/audio_feature_extractions_tutorial.py b/examples/tutorials/audio_feature_extractions_tutorial.py index f71d424ade..8a085ab5a3 100644 --- a/examples/tutorials/audio_feature_extractions_tutorial.py +++ b/examples/tutorials/audio_feature_extractions_tutorial.py @@ -406,54 +406,3 @@ def plot_pitch(waveform, sr, pitch): plot_pitch(SPEECH_WAVEFORM, SAMPLE_RATE, pitch) - -###################################################################### -# Kaldi Pitch (beta) -# ------------------ -# -# Kaldi Pitch feature [1] is a pitch detection mechanism tuned for automatic -# speech recognition (ASR) applications. This is a beta feature in ``torchaudio``, -# and it is available as :py:func:`torchaudio.functional.compute_kaldi_pitch`. -# -# 1. A pitch extraction algorithm tuned for automatic speech recognition -# -# Ghahremani, B. BabaAli, D. Povey, K. Riedhammer, J. Trmal and S. -# Khudanpur -# -# 2014 IEEE International Conference on Acoustics, Speech and Signal -# Processing (ICASSP), Florence, 2014, pp. 2494-2498, doi: -# 10.1109/ICASSP.2014.6854049. -# [`abstract `__], -# [`paper `__] -# - -pitch_feature = F.compute_kaldi_pitch(SPEECH_WAVEFORM, SAMPLE_RATE) -pitch, nfcc = pitch_feature[..., 0], pitch_feature[..., 1] - -###################################################################### -# - -def plot_kaldi_pitch(waveform, sr, pitch, nfcc): - _, axis = plt.subplots(1, 1) - axis.set_title("Kaldi Pitch Feature") - axis.grid(True) - - end_time = waveform.shape[1] / sr - time_axis = torch.linspace(0, end_time, waveform.shape[1]) - axis.plot(time_axis, waveform[0], linewidth=1, color="gray", alpha=0.3) - - time_axis = torch.linspace(0, end_time, pitch.shape[1]) - ln1 = axis.plot(time_axis, pitch[0], linewidth=2, label="Pitch", color="green") - axis.set_ylim((-1.3, 1.3)) - - axis2 = axis.twinx() - time_axis = torch.linspace(0, end_time, nfcc.shape[1]) - ln2 = axis2.plot(time_axis, nfcc[0], linewidth=2, label="NFCC", color="blue", linestyle="--") - - lns = ln1 + ln2 - labels = [l.get_label() for l in lns] - axis.legend(lns, labels, loc=0) - plt.show(block=False) - - -plot_kaldi_pitch(SPEECH_WAVEFORM, SAMPLE_RATE, pitch, nfcc) diff --git a/setup.py b/setup.py index f55a2c3f01..917e03b0cc 100644 --- a/setup.py +++ b/setup.py @@ -124,7 +124,8 @@ def _fetch_archives(src): def _fetch_third_party_libraries(): - _init_submodule() + # Revert this when a submodule is added again + # _init_submodule() if os.name != "nt": _fetch_archives(_parse_sources()) diff --git a/test/torchaudio_unittest/common_utils/__init__.py b/test/torchaudio_unittest/common_utils/__init__.py index 1bff73a907..8c66f2149a 100644 --- a/test/torchaudio_unittest/common_utils/__init__.py +++ b/test/torchaudio_unittest/common_utils/__init__.py @@ -13,7 +13,6 @@ skipIfNoExec, skipIfNoFFmpeg, skipIfNoHWAccel, - skipIfNoKaldi, skipIfNoMacOS, skipIfNoModule, skipIfNoQengine, @@ -52,7 +51,6 @@ "skipIfNoExec", "skipIfNoMacOS", "skipIfNoModule", - "skipIfNoKaldi", "skipIfNoRIR", "skipIfNoSox", "skipIfNoSoxBackend", diff --git a/test/torchaudio_unittest/common_utils/case_utils.py b/test/torchaudio_unittest/common_utils/case_utils.py index b5adfcb3b1..249ca6d98f 100644 --- a/test/torchaudio_unittest/common_utils/case_utils.py +++ b/test/torchaudio_unittest/common_utils/case_utils.py @@ -234,11 +234,6 @@ def skipIfNoModule(module, display_name=None): reason="Sox features are not available.", key="NO_SOX", ) -skipIfNoKaldi = _skipIf( - not torchaudio._extension._IS_KALDI_AVAILABLE, - reason="Kaldi features are not available.", - key="NO_KALDI", -) skipIfNoRIR = _skipIf( not torchaudio._extension._IS_RIR_AVAILABLE, reason="RIR features are not available.", diff --git a/test/torchaudio_unittest/functional/batch_consistency_test.py b/test/torchaudio_unittest/functional/batch_consistency_test.py index eee49e62cb..24553a833c 100644 --- a/test/torchaudio_unittest/functional/batch_consistency_test.py +++ b/test/torchaudio_unittest/functional/batch_consistency_test.py @@ -257,18 +257,6 @@ def test_resample_waveform(self, resampling_method): atol=1e-7, ) - @common_utils.skipIfNoKaldi - def test_compute_kaldi_pitch(self): - sample_rate = 44100 - n_channels = 2 - waveform = common_utils.get_whitenoise(sample_rate=sample_rate, n_channels=self.batch_size * n_channels) - batch = waveform.view(self.batch_size, n_channels, waveform.size(-1)) - kwargs = { - "sample_rate": sample_rate, - } - func = partial(F.compute_kaldi_pitch, **kwargs) - self.assert_batch_consistency(func, inputs=(batch,)) - def test_lfilter(self): signal_length = 2048 x = torch.randn(self.batch_size, signal_length) diff --git a/test/torchaudio_unittest/functional/kaldi_compatibility_cpu_test.py b/test/torchaudio_unittest/functional/kaldi_compatibility_cpu_test.py index 2c2a0de8e0..ef7b52037c 100644 --- a/test/torchaudio_unittest/functional/kaldi_compatibility_cpu_test.py +++ b/test/torchaudio_unittest/functional/kaldi_compatibility_cpu_test.py @@ -1,12 +1,7 @@ import torch from torchaudio_unittest.common_utils import PytorchTestCase -from .kaldi_compatibility_test_impl import Kaldi, KaldiCPUOnly - - -class TestKaldiCPUOnly(KaldiCPUOnly, PytorchTestCase): - dtype = torch.float32 - device = torch.device("cpu") +from .kaldi_compatibility_test_impl import Kaldi class TestKaldiFloat32(Kaldi, PytorchTestCase): diff --git a/test/torchaudio_unittest/functional/kaldi_compatibility_test_impl.py b/test/torchaudio_unittest/functional/kaldi_compatibility_test_impl.py index d6b8b86180..d87b463b9a 100644 --- a/test/torchaudio_unittest/functional/kaldi_compatibility_test_impl.py +++ b/test/torchaudio_unittest/functional/kaldi_compatibility_test_impl.py @@ -1,14 +1,6 @@ import torch import torchaudio.functional as F -from parameterized import parameterized -from torchaudio_unittest.common_utils import ( - get_sinusoid, - load_params, - save_wav, - skipIfNoExec, - TempDirMixin, - TestBaseMixin, -) +from torchaudio_unittest.common_utils import skipIfNoExec, TempDirMixin, TestBaseMixin from torchaudio_unittest.common_utils.kaldi_utils import convert_args, run_kaldi @@ -32,25 +24,3 @@ def test_sliding_window_cmn(self): command = ["apply-cmvn-sliding"] + convert_args(**kwargs) + ["ark:-", "ark:-"] kaldi_result = run_kaldi(command, "ark", tensor) self.assert_equal(result, expected=kaldi_result) - - -class KaldiCPUOnly(TempDirMixin, TestBaseMixin): - def assert_equal(self, output, *, expected, rtol=None, atol=None): - expected = expected.to(dtype=self.dtype, device=self.device) - self.assertEqual(output, expected, rtol=rtol, atol=atol) - - @parameterized.expand(load_params("kaldi_test_pitch_args.jsonl")) - @skipIfNoExec("compute-kaldi-pitch-feats") - def test_pitch_feats(self, kwargs): - """compute_kaldi_pitch produces numerically compatible result with compute-kaldi-pitch-feats""" - sample_rate = kwargs["sample_rate"] - waveform = get_sinusoid(dtype="float32", sample_rate=sample_rate) - result = F.compute_kaldi_pitch(waveform[0], **kwargs) - - waveform = get_sinusoid(dtype="int16", sample_rate=sample_rate) - wave_file = self.get_temp_path("test.wav") - save_wav(wave_file, waveform, sample_rate) - - command = ["compute-kaldi-pitch-feats"] + convert_args(**kwargs) + ["scp:-", "ark:-"] - kaldi_result = run_kaldi(command, "scp", wave_file) - self.assert_equal(result, expected=kaldi_result) diff --git a/test/torchaudio_unittest/functional/torchscript_consistency_impl.py b/test/torchaudio_unittest/functional/torchscript_consistency_impl.py index 9d36b52f93..c8afc3f8d1 100644 --- a/test/torchaudio_unittest/functional/torchscript_consistency_impl.py +++ b/test/torchaudio_unittest/functional/torchscript_consistency_impl.py @@ -585,18 +585,6 @@ def func(tensor): tensor = common_utils.get_whitenoise(sample_rate=44100) self._assert_consistency(func, (tensor,)) - @common_utils.skipIfNoKaldi - def test_compute_kaldi_pitch(self): - if self.dtype != torch.float32 or self.device != torch.device("cpu"): - raise unittest.SkipTest("Only float32, cpu is supported.") - - def func(tensor): - sample_rate: float = 44100.0 - return F.compute_kaldi_pitch(tensor, sample_rate) - - tensor = common_utils.get_whitenoise(sample_rate=44100) - self._assert_consistency(func, (tensor,)) - def test_resample_sinc(self): def func(tensor): sr1, sr2 = 16000, 8000 diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt index f2dfa0a786..07cd3c9d4b 100644 --- a/third_party/CMakeLists.txt +++ b/third_party/CMakeLists.txt @@ -9,10 +9,3 @@ file(MAKE_DIRECTORY install/lib) if (BUILD_SOX) add_subdirectory(sox) endif() - -################################################################################ -# kaldi -################################################################################ -if (BUILD_KALDI) - add_subdirectory(kaldi) -endif() diff --git a/third_party/kaldi/CMakeLists.txt b/third_party/kaldi/CMakeLists.txt deleted file mode 100644 index 75fca8ed99..0000000000 --- a/third_party/kaldi/CMakeLists.txt +++ /dev/null @@ -1,32 +0,0 @@ -set(KALDI_REPO ${CMAKE_CURRENT_SOURCE_DIR}/submodule) - -if (NOT EXISTS ${KALDI_REPO}/src/base/version.h) -# Apply custom patch -execute_process( - WORKING_DIRECTORY ${KALDI_REPO} - COMMAND "git" "checkout" "." - ) -execute_process( - WORKING_DIRECTORY ${KALDI_REPO} - COMMAND git apply ../kaldi.patch - ) -# Update the version string -execute_process( - WORKING_DIRECTORY ${KALDI_REPO}/src/base - COMMAND bash get_version.sh - ) -endif() - -set(KALDI_SOURCES - src/matrix/kaldi-vector.cc - src/matrix/kaldi-matrix.cc - submodule/src/base/kaldi-error.cc - submodule/src/base/kaldi-math.cc - submodule/src/feat/feature-functions.cc - submodule/src/feat/pitch-functions.cc - submodule/src/feat/resample.cc - ) - -add_library(kaldi STATIC ${KALDI_SOURCES}) -target_include_directories(kaldi PUBLIC src submodule/src) -target_include_directories(kaldi PRIVATE ${TORCH_INCLUDE_DIRS}) diff --git a/third_party/kaldi/README.md b/third_party/kaldi/README.md deleted file mode 100644 index f981674601..0000000000 --- a/third_party/kaldi/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# Custom Kaldi build - -This directory contains original Kaldi repository (as submodule), [the custom implementation of Kaldi's vector/matrix](./src) and the build script. - -We use the custom build process so that the resulting library only contains what torchaudio needs. -We use the custom vector/matrix implementation so that we can use the same BLAS library that PyTorch is compiled with, and so that we can (hopefully, in future) take advantage of other PyTorch features (such as differentiability and GPU support). The down side of this approach is that it adds a lot of overhead compared to the original Kaldi (operator dispatch and element-wise processing, which PyTorch is not efficient at). We can improve this gradually, and if you are interested in helping, please let us know by opening an issue. diff --git a/third_party/kaldi/kaldi.patch b/third_party/kaldi/kaldi.patch deleted file mode 100644 index 40667bced8..0000000000 --- a/third_party/kaldi/kaldi.patch +++ /dev/null @@ -1,76 +0,0 @@ -diff --git a/src/base/kaldi-types.h b/src/base/kaldi-types.h -index 7ebf4f853..c15b288b2 100644 ---- a/src/base/kaldi-types.h -+++ b/src/base/kaldi-types.h -@@ -41,6 +41,7 @@ typedef float BaseFloat; - - // for discussion on what to do if you need compile kaldi - // without OpenFST, see the bottom of this this file -+/* - #include - - namespace kaldi { -@@ -53,10 +54,10 @@ namespace kaldi { - typedef float float32; - typedef double double64; - } // end namespace kaldi -+*/ - - // In a theoretical case you decide compile Kaldi without the OpenFST - // comment the previous namespace statement and uncomment the following --/* - namespace kaldi { - typedef int8_t int8; - typedef int16_t int16; -@@ -70,6 +71,5 @@ namespace kaldi { - typedef float float32; - typedef double double64; - } // end namespace kaldi --*/ - - #endif // KALDI_BASE_KALDI_TYPES_H_ -diff --git a/src/matrix/matrix-lib.h b/src/matrix/matrix-lib.h -index b6059b06c..4fb9e1b16 100644 ---- a/src/matrix/matrix-lib.h -+++ b/src/matrix/matrix-lib.h -@@ -25,14 +25,14 @@ - #include "base/kaldi-common.h" - #include "matrix/kaldi-vector.h" - #include "matrix/kaldi-matrix.h" --#include "matrix/sp-matrix.h" --#include "matrix/tp-matrix.h" -+// #include "matrix/sp-matrix.h" -+// #include "matrix/tp-matrix.h" - #include "matrix/matrix-functions.h" - #include "matrix/srfft.h" - #include "matrix/compressed-matrix.h" --#include "matrix/sparse-matrix.h" -+// #include "matrix/sparse-matrix.h" - #include "matrix/optimization.h" --#include "matrix/numpy-array.h" -+// #include "matrix/numpy-array.h" - - #endif - -diff --git a/src/util/common-utils.h b/src/util/common-utils.h -index cfb0c255c..48d199e97 100644 ---- a/src/util/common-utils.h -+++ b/src/util/common-utils.h -@@ -21,11 +21,11 @@ - - #include "base/kaldi-common.h" - #include "util/parse-options.h" --#include "util/kaldi-io.h" --#include "util/simple-io-funcs.h" --#include "util/kaldi-holder.h" --#include "util/kaldi-table.h" --#include "util/table-types.h" --#include "util/text-utils.h" -+// #include "util/kaldi-io.h" -+// #include "util/simple-io-funcs.h" -+// #include "util/kaldi-holder.h" -+// #include "util/kaldi-table.h" -+// #include "util/table-types.h" -+// #include "util/text-utils.h" - - #endif // KALDI_UTIL_COMMON_UTILS_H_ diff --git a/third_party/kaldi/src/matrix/kaldi-matrix.cc b/third_party/kaldi/src/matrix/kaldi-matrix.cc deleted file mode 100644 index a89c3809c9..0000000000 --- a/third_party/kaldi/src/matrix/kaldi-matrix.cc +++ /dev/null @@ -1,39 +0,0 @@ -#include "matrix/kaldi-matrix.h" -#include - -namespace { - -template -void assert_matrix_shape(const torch::Tensor& tensor_); - -template <> -void assert_matrix_shape(const torch::Tensor& tensor_) { - TORCH_INTERNAL_ASSERT(tensor_.ndimension() == 2); - TORCH_INTERNAL_ASSERT(tensor_.dtype() == torch::kFloat32); - TORCH_CHECK(tensor_.device().is_cpu(), "Input tensor has to be on CPU."); -} - -template <> -void assert_matrix_shape(const torch::Tensor& tensor_) { - TORCH_INTERNAL_ASSERT(tensor_.ndimension() == 2); - TORCH_INTERNAL_ASSERT(tensor_.dtype() == torch::kFloat64); - TORCH_CHECK(tensor_.device().is_cpu(), "Input tensor has to be on CPU."); -} - -} // namespace - -namespace kaldi { - -template -MatrixBase::MatrixBase(torch::Tensor tensor) : tensor_(tensor) { - assert_matrix_shape(tensor_); -}; - -template class Matrix; -template class Matrix; -template class MatrixBase; -template class MatrixBase; -template class SubMatrix; -template class SubMatrix; - -} // namespace kaldi diff --git a/third_party/kaldi/src/matrix/kaldi-matrix.h b/third_party/kaldi/src/matrix/kaldi-matrix.h deleted file mode 100644 index f64828b84f..0000000000 --- a/third_party/kaldi/src/matrix/kaldi-matrix.h +++ /dev/null @@ -1,178 +0,0 @@ -// https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-matrix.h - -#ifndef KALDI_MATRIX_KALDI_MATRIX_H_ -#define KALDI_MATRIX_KALDI_MATRIX_H_ - -#include -#include "matrix/kaldi-vector.h" -#include "matrix/matrix-common.h" - -using namespace torch::indexing; - -namespace kaldi { - -// https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-matrix.h#L44-L48 -template -class MatrixBase { - public: - //////////////////////////////////////////////////////////////////////////////// - // PyTorch-specific items - //////////////////////////////////////////////////////////////////////////////// - torch::Tensor tensor_; - /// Construct VectorBase which is an interface to an existing torch::Tensor - /// object. - MatrixBase(torch::Tensor tensor); - - //////////////////////////////////////////////////////////////////////////////// - // Kaldi-compatible items - //////////////////////////////////////////////////////////////////////////////// - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-matrix.h#L62-L63 - inline MatrixIndexT NumRows() const { - return tensor_.size(0); - }; - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-matrix.h#L65-L66 - inline MatrixIndexT NumCols() const { - return tensor_.size(1); - }; - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-matrix.h#L177-L178 - void CopyColFromVec(const VectorBase& v, const MatrixIndexT col) { - tensor_.index_put_({Slice(), col}, v.tensor_); - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-matrix.h#L99-L107 - inline Real& operator()(MatrixIndexT r, MatrixIndexT c) { - // CPU only - return tensor_.accessor()[r][c]; - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-matrix.h#L112-L120 - inline const Real operator()(MatrixIndexT r, MatrixIndexT c) const { - return tensor_.index({Slice(r), Slice(c)}).item().template to(); - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-matrix.h#L138-L141 - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-matrix.cc#L859-L898 - template - void CopyFromMat( - const MatrixBase& M, - MatrixTransposeType trans = kNoTrans) { - auto src = M.tensor_; - if (trans == kTrans) - src = src.transpose(1, 0); - tensor_.index_put_({Slice(), Slice()}, src); - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-matrix.h#L186-L191 - inline const SubVector Row(MatrixIndexT i) const { - return SubVector(*this, i); - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-matrix.h#L208-L211 - inline SubMatrix RowRange( - const MatrixIndexT row_offset, - const MatrixIndexT num_rows) const { - return SubMatrix(*this, row_offset, num_rows, 0, NumCols()); - } - - protected: - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-matrix.h#L749-L753 - explicit MatrixBase() : tensor_(torch::empty({0, 0})) { - KALDI_ASSERT_IS_FLOATING_TYPE(Real); - } -}; - -// https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-matrix.h#L781-L784 -template -class Matrix : public MatrixBase { - public: - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-matrix.h#L786-L787 - Matrix() : MatrixBase() {} - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-matrix.h#L789-L793 - Matrix( - const MatrixIndexT r, - const MatrixIndexT c, - MatrixResizeType resize_type = kSetZero, - MatrixStrideType stride_type = kDefaultStride) - : MatrixBase() { - Resize(r, c, resize_type, stride_type); - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-matrix.h#L808-L811 - explicit Matrix( - const MatrixBase& M, - MatrixTransposeType trans = kNoTrans) - : MatrixBase( - trans == kNoTrans ? M.tensor_ : M.tensor_.transpose(1, 0)) {} - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-matrix.h#L816-L819 - template - explicit Matrix( - const MatrixBase& M, - MatrixTransposeType trans = kNoTrans) - : MatrixBase( - trans == kNoTrans ? M.tensor_ : M.tensor_.transpose(1, 0)) {} - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-matrix.h#L859-L874 - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-matrix.cc#L817-L857 - void Resize( - const MatrixIndexT r, - const MatrixIndexT c, - MatrixResizeType resize_type = kSetZero, - MatrixStrideType stride_type = kDefaultStride) { - auto& tensor_ = MatrixBase::tensor_; - switch (resize_type) { - case kSetZero: - tensor_.resize_({r, c}).zero_(); - break; - case kUndefined: - tensor_.resize_({r, c}); - break; - case kCopyData: - auto tmp = tensor_; - auto tmp_rows = tmp.size(0); - auto tmp_cols = tmp.size(1); - tensor_.resize_({r, c}).zero_(); - auto rows = Slice(None, r < tmp_rows ? r : tmp_rows); - auto cols = Slice(None, c < tmp_cols ? c : tmp_cols); - tensor_.index_put_({rows, cols}, tmp.index({rows, cols})); - break; - } - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-matrix.h#L876-L883 - Matrix& operator=(const MatrixBase& other) { - if (MatrixBase::NumRows() != other.NumRows() || - MatrixBase::NumCols() != other.NumCols()) - Resize(other.NumRows(), other.NumCols(), kUndefined); - MatrixBase::CopyFromMat(other); - return *this; - } -}; - -// https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-matrix.h#L940-L948 -template -class SubMatrix : public MatrixBase { - public: - SubMatrix( - const MatrixBase& T, - const MatrixIndexT ro, // row offset, 0 < ro < NumRows() - const MatrixIndexT r, // number of rows, r > 0 - const MatrixIndexT co, // column offset, 0 < co < NumCols() - const MatrixIndexT c) // number of columns, c > 0 - : MatrixBase( - T.tensor_.index({Slice(ro, ro + r), Slice(co, co + c)})) {} -}; - -// https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-matrix.h#L1059-L1060 -template -std::ostream& operator<<(std::ostream& Out, const MatrixBase& M) { - Out << M.tensor_; - return Out; -} - -} // namespace kaldi - -#endif diff --git a/third_party/kaldi/src/matrix/kaldi-vector.cc b/third_party/kaldi/src/matrix/kaldi-vector.cc deleted file mode 100644 index df59f13a36..0000000000 --- a/third_party/kaldi/src/matrix/kaldi-vector.cc +++ /dev/null @@ -1,42 +0,0 @@ -#include "matrix/kaldi-vector.h" -#include -#include "matrix/kaldi-matrix.h" - -namespace { - -template -void assert_vector_shape(const torch::Tensor& tensor_); - -template <> -void assert_vector_shape(const torch::Tensor& tensor_) { - TORCH_INTERNAL_ASSERT(tensor_.ndimension() == 1); - TORCH_INTERNAL_ASSERT(tensor_.dtype() == torch::kFloat32); - TORCH_CHECK(tensor_.device().is_cpu(), "Input tensor has to be on CPU."); -} - -template <> -void assert_vector_shape(const torch::Tensor& tensor_) { - TORCH_INTERNAL_ASSERT(tensor_.ndimension() == 1); - TORCH_INTERNAL_ASSERT(tensor_.dtype() == torch::kFloat64); - TORCH_CHECK(tensor_.device().is_cpu(), "Input tensor has to be on CPU."); -} - -} // namespace - -namespace kaldi { - -template -VectorBase::VectorBase(torch::Tensor tensor) - : tensor_(tensor), data_(tensor.data_ptr()) { - assert_vector_shape(tensor_); -}; - -template -VectorBase::VectorBase() : VectorBase(torch::empty({0})) {} - -template class Vector; -template class Vector; -template class VectorBase; -template class VectorBase; - -} // namespace kaldi diff --git a/third_party/kaldi/src/matrix/kaldi-vector.h b/third_party/kaldi/src/matrix/kaldi-vector.h deleted file mode 100644 index 620f3676d3..0000000000 --- a/third_party/kaldi/src/matrix/kaldi-vector.h +++ /dev/null @@ -1,313 +0,0 @@ -// https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h - -#ifndef KALDI_MATRIX_KALDI_VECTOR_H_ -#define KALDI_MATRIX_KALDI_VECTOR_H_ - -#include -#include "matrix/matrix-common.h" - -using namespace torch::indexing; - -namespace kaldi { - -// https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L36-L40 -template -class VectorBase { - public: - //////////////////////////////////////////////////////////////////////////////// - // PyTorch-specific things - //////////////////////////////////////////////////////////////////////////////// - torch::Tensor tensor_; - - /// Construct VectorBase which is an interface to an existing torch::Tensor - /// object. - VectorBase(torch::Tensor tensor); - - //////////////////////////////////////////////////////////////////////////////// - // Kaldi-compatible methods - //////////////////////////////////////////////////////////////////////////////// - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L42-L43 - void SetZero() { - Set(0); - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L48-L49 - void Set(Real f) { - tensor_.index_put_({"..."}, f); - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L62-L63 - inline MatrixIndexT Dim() const { - return tensor_.numel(); - }; - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L68-L69 - inline Real* Data() { - return data_; - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L71-L72 - inline const Real* Data() const { - return data_; - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L74-L79 - inline Real operator()(MatrixIndexT i) const { - return data_[i]; - }; - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L81-L86 - inline Real& operator()(MatrixIndexT i) { - return tensor_.accessor()[i]; - }; - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L88-L95 - SubVector Range(const MatrixIndexT o, const MatrixIndexT l) { - return SubVector(*this, o, l); - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L97-L105 - const SubVector Range(const MatrixIndexT o, const MatrixIndexT l) - const { - return SubVector(*this, o, l); - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L107-L108 - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.cc#L226-L233 - void CopyFromVec(const VectorBase& v) { - TORCH_INTERNAL_ASSERT(tensor_.sizes() == v.tensor_.sizes()); - tensor_.copy_(v.tensor_); - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L137-L139 - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.cc#L816-L832 - void ApplyFloor(Real floor_val, MatrixIndexT* floored_count = nullptr) { - auto index = tensor_ < floor_val; - auto tmp = tensor_.index_put_({index}, floor_val); - if (floored_count) { - *floored_count = index.sum().item().template to(); - } - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L164-L165 - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.cc#L449-L479 - void ApplyPow(Real power) { - tensor_.pow_(power); - TORCH_INTERNAL_ASSERT(!tensor_.isnan().sum().item().template to()); - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L181-L184 - template - void AddVec(const Real alpha, const VectorBase& v) { - tensor_ += alpha * v.tensor_; - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L186-L187 - void AddVec2(const Real alpha, const VectorBase& v) { - tensor_ += alpha * (v.tensor_.square()); - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L196-L198 - void AddMatVec( - const Real alpha, - const MatrixBase& M, - const MatrixTransposeType trans, - const VectorBase& v, - const Real beta) { // **beta previously defaulted to 0.0** - auto mat = M.tensor_; - if (trans == kTrans) { - mat = mat.transpose(1, 0); - } - tensor_.addmv_(mat, v.tensor_, beta, alpha); - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L221-L222 - void MulElements(const VectorBase& v) { - tensor_ *= v.tensor_; - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L233-L234 - void Add(Real c) { - tensor_ += c; - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L236-L239 - void AddVecVec( - Real alpha, - const VectorBase& v, - const VectorBase& r, - Real beta) { - tensor_ = beta * tensor_ + alpha * v.tensor_ * r.tensor_; - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L246-L247 - void Scale(Real alpha) { - tensor_ *= alpha; - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L305-L306 - Real Min() const { - if (tensor_.numel()) { - return tensor_.min().item().template to(); - } - return std::numeric_limits::infinity(); - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L308-L310 - Real Min(MatrixIndexT* index) const { - TORCH_INTERNAL_ASSERT(tensor_.numel()); - torch::Tensor value, ind; - std::tie(value, ind) = tensor_.min(0); - *index = ind.item().to(); - return value.item().to(); - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L312-L313 - Real Sum() const { - return tensor_.sum().item().template to(); - }; - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L320-L321 - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.cc#L718-L736 - void AddRowSumMat(Real alpha, const MatrixBase& M, Real beta = 1.0) { - Vector ones(M.NumRows()); - ones.Set(1.0); - this->AddMatVec(alpha, M, kTrans, ones, beta); - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L323-L324 - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.cc#L738-L757 - void AddColSumMat(Real alpha, const MatrixBase& M, Real beta = 1.0) { - Vector ones(M.NumCols()); - ones.Set(1.0); - this->AddMatVec(alpha, M, kNoTrans, ones, beta); - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L326-L330 - void AddDiagMat2( - Real alpha, - const MatrixBase& M, - MatrixTransposeType trans = kNoTrans, - Real beta = 1.0) { - auto mat = M.tensor_; - if (trans == kNoTrans) { - tensor_ = - beta * tensor_ + torch::diag(torch::mm(mat, mat.transpose(1, 0))); - } else { - tensor_ = - beta * tensor_ + torch::diag(torch::mm(mat.transpose(1, 0), mat)); - } - } - - protected: - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L362-L365 - explicit VectorBase(); - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L378-L379 - Real* data_; - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L382 - KALDI_DISALLOW_COPY_AND_ASSIGN(VectorBase); -}; - -// https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L385-L390 -template -class Vector : public VectorBase { - public: - //////////////////////////////////////////////////////////////////////////////// - // PyTorch-compatibility things - //////////////////////////////////////////////////////////////////////////////// - /// Construct VectorBase which is an interface to an existing torch::Tensor - /// object. - Vector(torch::Tensor tensor) : VectorBase(tensor){}; - - //////////////////////////////////////////////////////////////////////////////// - // Kaldi-compatible methods - //////////////////////////////////////////////////////////////////////////////// - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L392-L393 - Vector() : VectorBase(){}; - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L395-L399 - explicit Vector(const MatrixIndexT s, MatrixResizeType resize_type = kSetZero) - : VectorBase() { - Resize(s, resize_type); - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L406-L410 - // Note: unlike the original implementation, this is "explicit". - explicit Vector(const Vector& v) - : VectorBase(v.tensor_.clone()) {} - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L412-L416 - explicit Vector(const VectorBase& v) - : VectorBase(v.tensor_.clone()) {} - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L434-L435 - void Swap(Vector* other) { - auto tmp = VectorBase::tensor_; - this->tensor_ = other->tensor_; - other->tensor_ = tmp; - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L444-L451 - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.cc#L189-L223 - void Resize(MatrixIndexT length, MatrixResizeType resize_type = kSetZero) { - auto& tensor_ = this->tensor_; - switch (resize_type) { - case kSetZero: - tensor_.resize_({length}).zero_(); - break; - case kUndefined: - tensor_.resize_({length}); - break; - case kCopyData: - auto tmp = tensor_; - auto tmp_numel = tensor_.numel(); - tensor_.resize_({length}).zero_(); - auto numel = Slice(length < tmp_numel ? length : tmp_numel); - tensor_.index_put_({numel}, tmp.index({numel})); - break; - } - // data_ptr() causes compiler error - this->data_ = static_cast(tensor_.data_ptr()); - } - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L463-L468 - Vector& operator=(const VectorBase& other) { - Resize(other.Dim(), kUndefined); - this->CopyFromVec(other); - return *this; - } -}; - -// https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L482-L485 -template -class SubVector : public VectorBase { - public: - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L487-L499 - SubVector( - const VectorBase& t, - const MatrixIndexT origin, - const MatrixIndexT length) - : VectorBase(t.tensor_.index({Slice(origin, origin + length)})) {} - - // https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L524-L528 - SubVector(const MatrixBase& matrix, MatrixIndexT row) - : VectorBase(matrix.tensor_.index({row})) {} -}; - -// https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L540-L543 -template -std::ostream& operator<<(std::ostream& out, const VectorBase& v) { - out << v.tensor_; - return out; -} - -// https://github.com/kaldi-asr/kaldi/blob/7fb716aa0f56480af31514c7e362db5c9f787fd4/src/matrix/kaldi-vector.h#L573-L575 -template -Real VecVec(const VectorBase& v1, const VectorBase& v2) { - return torch::dot(v1.tensor_, v2.tensor_).item().template to(); -} - -} // namespace kaldi - -#endif diff --git a/third_party/kaldi/submodule b/third_party/kaldi/submodule deleted file mode 160000 index 3eea37dd09..0000000000 --- a/third_party/kaldi/submodule +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 3eea37dd09b55064e6362216f7e9a60641f29f09 diff --git a/torchaudio/_extension/__init__.py b/torchaudio/_extension/__init__.py index f65b982228..951e381ae2 100644 --- a/torchaudio/_extension/__init__.py +++ b/torchaudio/_extension/__init__.py @@ -14,12 +14,10 @@ # Builder uses it for debugging purpose, so we export it. # https://github.com/pytorch/builder/blob/e2e4542b8eb0bdf491214451a1a4128bd606cce2/test/smoke_test/smoke_test.py#L80 __all__ = [ - "fail_if_no_kaldi", "fail_if_no_sox", "fail_if_no_ffmpeg", "_check_cuda_version", "_IS_TORCHAUDIO_EXT_AVAILABLE", - "_IS_KALDI_AVAILABLE", "_IS_RIR_AVAILABLE", "_SOX_INITIALIZED", "_FFMPEG_INITIALIZED", @@ -34,11 +32,10 @@ # In case of an error, we do not catch the failure as it suggests there is something # wrong with the installation. _IS_TORCHAUDIO_EXT_AVAILABLE = is_module_available("torchaudio.lib._torchaudio") -# Kaldi and RIR features are implemented in _torchaudio extension, but they can be individually +# RIR features are implemented in _torchaudio extension, but they can be individually # turned on/off at build time. Available means that _torchaudio is loaded properly, and -# Kaldi or RIR features are found there. +# RIR features are found there. _IS_RIR_AVAILABLE = False -_IS_KALDI_AVAILABLE = False _IS_ALIGN_AVAILABLE = False if _IS_TORCHAUDIO_EXT_AVAILABLE: _load_lib("libtorchaudio") @@ -47,7 +44,6 @@ _check_cuda_version() _IS_RIR_AVAILABLE = torchaudio.lib._torchaudio.is_rir_available() - _IS_KALDI_AVAILABLE = torchaudio.lib._torchaudio.is_kaldi_available() _IS_ALIGN_AVAILABLE = torchaudio.lib._torchaudio.is_align_available() @@ -77,13 +73,6 @@ _LG.debug("Failed to initialize ffmpeg bindings", exc_info=True) -fail_if_no_kaldi = ( - no_op - if _IS_KALDI_AVAILABLE - else fail_with_message( - "requires kaldi extension, but TorchAudio is not compiled with it. Please build TorchAudio with kaldi support." - ) -) fail_if_no_sox = ( no_op if _SOX_INITIALIZED diff --git a/torchaudio/csrc/CMakeLists.txt b/torchaudio/csrc/CMakeLists.txt index 88c8736d01..fc0c549493 100644 --- a/torchaudio/csrc/CMakeLists.txt +++ b/torchaudio/csrc/CMakeLists.txt @@ -76,12 +76,6 @@ if(USE_CUDA) ) endif() -if(BUILD_KALDI) - list(APPEND additional_libs kaldi) - list(APPEND sources kaldi.cpp) - list(APPEND compile_definitions INCLUDE_KALDI) -endif() - if(OpenMP_CXX_FOUND) list( APPEND diff --git a/torchaudio/functional/__init__.py b/torchaudio/functional/__init__.py index 6c874d28a6..5f06a8a837 100644 --- a/torchaudio/functional/__init__.py +++ b/torchaudio/functional/__init__.py @@ -28,7 +28,6 @@ apply_beamforming, apply_codec, compute_deltas, - compute_kaldi_pitch, convolve, create_dct, DB_to_amplitude, @@ -65,7 +64,6 @@ __all__ = [ "amplitude_to_DB", "compute_deltas", - "compute_kaldi_pitch", "create_dct", "melscale_fbanks", "linear_fbanks", diff --git a/torchaudio/functional/functional.py b/torchaudio/functional/functional.py index d259a84527..6d83681be7 100644 --- a/torchaudio/functional/functional.py +++ b/torchaudio/functional/functional.py @@ -21,7 +21,6 @@ "amplitude_to_DB", "DB_to_amplitude", "compute_deltas", - "compute_kaldi_pitch", "melscale_fbanks", "linear_fbanks", "create_dct", @@ -1340,120 +1339,6 @@ def apply_codec( return augmented -@torchaudio._extension.fail_if_no_kaldi -def compute_kaldi_pitch( - waveform: torch.Tensor, - sample_rate: float, - frame_length: float = 25.0, - frame_shift: float = 10.0, - min_f0: float = 50, - max_f0: float = 400, - soft_min_f0: float = 10.0, - penalty_factor: float = 0.1, - lowpass_cutoff: float = 1000, - resample_frequency: float = 4000, - delta_pitch: float = 0.005, - nccf_ballast: float = 7000, - lowpass_filter_width: int = 1, - upsample_filter_width: int = 5, - max_frames_latency: int = 0, - frames_per_chunk: int = 0, - simulate_first_pass_online: bool = False, - recompute_frame: int = 500, - snip_edges: bool = True, -) -> torch.Tensor: - """Extract pitch based on method described in *A pitch extraction algorithm tuned - for automatic speech recognition* :cite:`6854049`. - - .. devices:: CPU - - .. properties:: TorchScript - - This function computes the equivalent of `compute-kaldi-pitch-feats` from Kaldi. - - Args: - waveform (Tensor): - The input waveform of shape `(..., time)`. - sample_rate (float): - Sample rate of `waveform`. - frame_length (float, optional): - Frame length in milliseconds. (default: 25.0) - frame_shift (float, optional): - Frame shift in milliseconds. (default: 10.0) - min_f0 (float, optional): - Minimum F0 to search for (Hz) (default: 50.0) - max_f0 (float, optional): - Maximum F0 to search for (Hz) (default: 400.0) - soft_min_f0 (float, optional): - Minimum f0, applied in soft way, must not exceed min-f0 (default: 10.0) - penalty_factor (float, optional): - Cost factor for FO change. (default: 0.1) - lowpass_cutoff (float, optional): - Cutoff frequency for LowPass filter (Hz) (default: 1000) - resample_frequency (float, optional): - Frequency that we down-sample the signal to. Must be more than twice lowpass-cutoff. - (default: 4000) - delta_pitch( float, optional): - Smallest relative change in pitch that our algorithm measures. (default: 0.005) - nccf_ballast (float, optional): - Increasing this factor reduces NCCF for quiet frames (default: 7000) - lowpass_filter_width (int, optional): - Integer that determines filter width of lowpass filter, more gives sharper filter. - (default: 1) - upsample_filter_width (int, optional): - Integer that determines filter width when upsampling NCCF. (default: 5) - max_frames_latency (int, optional): - Maximum number of frames of latency that we allow pitch tracking to introduce into - the feature processing (affects output only if ``frames_per_chunk > 0`` and - ``simulate_first_pass_online=True``) (default: 0) - frames_per_chunk (int, optional): - The number of frames used for energy normalization. (default: 0) - simulate_first_pass_online (bool, optional): - If true, the function will output features that correspond to what an online decoder - would see in the first pass of decoding -- not the final version of the features, - which is the default. (default: False) - Relevant if ``frames_per_chunk > 0``. - recompute_frame (int, optional): - Only relevant for compatibility with online pitch extraction. - A non-critical parameter; the frame at which we recompute some of the forward pointers, - after revising our estimate of the signal energy. - Relevant if ``frames_per_chunk > 0``. (default: 500) - snip_edges (bool, optional): - If this is set to false, the incomplete frames near the ending edge won't be snipped, - so that the number of frames is the file size divided by the frame-shift. - This makes different types of features give the same number of frames. (default: True) - - Returns: - Tensor: Pitch feature. Shape: `(batch, frames 2)` where the last dimension - corresponds to pitch and NCCF. - """ - shape = waveform.shape - waveform = waveform.reshape(-1, shape[-1]) - result = torch.ops.torchaudio.kaldi_ComputeKaldiPitch( - waveform, - sample_rate, - frame_length, - frame_shift, - min_f0, - max_f0, - soft_min_f0, - penalty_factor, - lowpass_cutoff, - resample_frequency, - delta_pitch, - nccf_ballast, - lowpass_filter_width, - upsample_filter_width, - max_frames_latency, - frames_per_chunk, - simulate_first_pass_online, - recompute_frame, - snip_edges, - ) - result = result.reshape(shape[:-1] + result.shape[-2:]) - return result - - def _get_sinc_resample_kernel( orig_freq: int, new_freq: int,