diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 125aa83847..3e615a799e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -74,7 +74,6 @@ python setup.py develop Some environmnet variables that change the build behavior - `BUILD_SOX`: Deteremines whether build and bind libsox in non-Windows environments. (no effect in Windows as libsox integration is not available) Default value is 1 (build and bind). Use 0 for disabling it. - `USE_CUDA`: Determines whether build the custom CUDA kernel. Default to the availability of CUDA-compatible GPUs. -- `BUILD_KALDI`: Determines whether build Kaldi extension. This is required for `kaldi_pitch` function. Default value is 1 on Linux/macOS and 0 on Windows. - `BUILD_RNNT`: Determines whether build RNN-T loss function. Default value is 1. - `BUILD_CUDA_CTC_DECODER`: Determines whether build decoder features based on CUDA CTC decoder. Default value is 1. (`USE_CUDA` has to be 1.) diff --git a/docs/source/functional.rst b/docs/source/functional.rst index fb9bea9cd7..8be4492648 100644 --- a/docs/source/functional.rst +++ b/docs/source/functional.rst @@ -80,7 +80,6 @@ Feature Extractions compute_deltas detect_pitch_frequency sliding_window_cmn - compute_kaldi_pitch spectral_centroid Multi-channel diff --git a/tools/setup_helpers/extension.py b/tools/setup_helpers/extension.py index c72415f0d8..c68de9f4c3 100644 --- a/tools/setup_helpers/extension.py +++ b/tools/setup_helpers/extension.py @@ -34,7 +34,6 @@ def _get_build(var, default=False): _BUILD_SOX = False if platform.system() == "Windows" else _get_build("BUILD_SOX", True) -_BUILD_KALDI = False if platform.system() == "Windows" else _get_build("BUILD_KALDI", True) _BUILD_RIR = _get_build("BUILD_RIR", True) _BUILD_RNNT = _get_build("BUILD_RNNT", True) _USE_FFMPEG = _get_build("USE_FFMPEG", False) @@ -117,7 +116,6 @@ def build_extension(self, ext): "-DCMAKE_VERBOSE_MAKEFILE=ON", f"-DPython_INCLUDE_DIR={distutils.sysconfig.get_python_inc()}", f"-DBUILD_SOX:BOOL={'ON' if _BUILD_SOX else 'OFF'}", - f"-DBUILD_KALDI:BOOL={'ON' if _BUILD_KALDI else 'OFF'}", f"-DBUILD_RIR:BOOL={'ON' if _BUILD_RIR else 'OFF'}", f"-DBUILD_RNNT:BOOL={'ON' if _BUILD_RNNT else 'OFF'}", f"-DBUILD_ALIGN:BOOL={'ON' if _BUILD_ALIGN else 'OFF'}", diff --git a/torchaudio/csrc/kaldi.cpp b/torchaudio/csrc/kaldi.cpp deleted file mode 100644 index 6f2b36c28f..0000000000 --- a/torchaudio/csrc/kaldi.cpp +++ /dev/null @@ -1,93 +0,0 @@ -#include -#include "feat/pitch-functions.h" - -namespace torchaudio { -namespace kaldi { - -namespace { - -torch::Tensor denormalize(const torch::Tensor& t) { - auto ret = t; - auto pos = t > 0, neg = t < 0; - ret.index_put({pos}, t.index({pos}) * 32767); - ret.index_put({neg}, t.index({neg}) * 32768); - return ret; -} - -torch::Tensor compute_kaldi_pitch( - const torch::Tensor& wave, - const ::kaldi::PitchExtractionOptions& opts) { - ::kaldi::VectorBase<::kaldi::BaseFloat> input(wave); - ::kaldi::Matrix<::kaldi::BaseFloat> output; - ::kaldi::ComputeKaldiPitch(opts, input, &output); - return output.tensor_; -} - -} // namespace - -torch::Tensor ComputeKaldiPitch( - const torch::Tensor& wave, - double sample_frequency, - double frame_length, - double frame_shift, - double min_f0, - double max_f0, - double soft_min_f0, - double penalty_factor, - double lowpass_cutoff, - double resample_frequency, - double delta_pitch, - double nccf_ballast, - int64_t lowpass_filter_width, - int64_t upsample_filter_width, - int64_t max_frames_latency, - int64_t frames_per_chunk, - bool simulate_first_pass_online, - int64_t recompute_frame, - bool snip_edges) { - TORCH_CHECK(wave.ndimension() == 2, "Input tensor must be 2 dimentional."); - TORCH_CHECK(wave.device().is_cpu(), "Input tensor must be on CPU."); - TORCH_CHECK( - wave.dtype() == torch::kFloat32, "Input tensor must be float32 type."); - - ::kaldi::PitchExtractionOptions opts; - opts.samp_freq = static_cast<::kaldi::BaseFloat>(sample_frequency); - opts.frame_shift_ms = static_cast<::kaldi::BaseFloat>(frame_shift); - opts.frame_length_ms = static_cast<::kaldi::BaseFloat>(frame_length); - opts.min_f0 = static_cast<::kaldi::BaseFloat>(min_f0); - opts.max_f0 = static_cast<::kaldi::BaseFloat>(max_f0); - opts.soft_min_f0 = static_cast<::kaldi::BaseFloat>(soft_min_f0); - opts.penalty_factor = static_cast<::kaldi::BaseFloat>(penalty_factor); - opts.lowpass_cutoff = static_cast<::kaldi::BaseFloat>(lowpass_cutoff); - opts.resample_freq = static_cast<::kaldi::BaseFloat>(resample_frequency); - opts.delta_pitch = static_cast<::kaldi::BaseFloat>(delta_pitch); - opts.lowpass_filter_width = static_cast<::kaldi::int32>(lowpass_filter_width); - opts.upsample_filter_width = - static_cast<::kaldi::int32>(upsample_filter_width); - opts.max_frames_latency = static_cast<::kaldi::int32>(max_frames_latency); - opts.frames_per_chunk = static_cast<::kaldi::int32>(frames_per_chunk); - opts.simulate_first_pass_online = simulate_first_pass_online; - opts.recompute_frame = static_cast<::kaldi::int32>(recompute_frame); - opts.snip_edges = snip_edges; - - // Kaldi's float type expects value range of int16 expressed as float - torch::Tensor wave_ = denormalize(wave); - - auto batch_size = wave_.size(0); - std::vector results(batch_size); - at::parallel_for(0, batch_size, 1, [&](int64_t begin, int64_t end) { - for (auto i = begin; i < end; ++i) { - results[i] = compute_kaldi_pitch(wave_.index({i}), opts); - } - }); - return torch::stack(results, 0); -} - -TORCH_LIBRARY_FRAGMENT(torchaudio, m) { - m.def( - "torchaudio::kaldi_ComputeKaldiPitch", - &torchaudio::kaldi::ComputeKaldiPitch); -} - -} // namespace kaldi -} // namespace torchaudio diff --git a/torchaudio/csrc/pybind/pybind.cpp b/torchaudio/csrc/pybind/pybind.cpp index 9d2d0e35a2..b956deb0e4 100644 --- a/torchaudio/csrc/pybind/pybind.cpp +++ b/torchaudio/csrc/pybind/pybind.cpp @@ -5,7 +5,6 @@ namespace torchaudio { namespace { PYBIND11_MODULE(_torchaudio, m) { - m.def("is_kaldi_available", &is_kaldi_available, ""); m.def("is_rir_available", &is_rir_available, ""); m.def("is_align_available", &is_align_available, ""); m.def("cuda_version", &cuda_version, ""); diff --git a/torchaudio/csrc/utils.cpp b/torchaudio/csrc/utils.cpp index c76a4ffa7a..8c5898cb49 100644 --- a/torchaudio/csrc/utils.cpp +++ b/torchaudio/csrc/utils.cpp @@ -7,14 +7,6 @@ namespace torchaudio { -bool is_kaldi_available() { -#ifdef INCLUDE_KALDI - return true; -#else - return false; -#endif -} - bool is_rir_available() { #ifdef INCLUDE_RIR return true; diff --git a/torchaudio/csrc/utils.h b/torchaudio/csrc/utils.h index 751cfa1ad2..1d7060efa7 100644 --- a/torchaudio/csrc/utils.h +++ b/torchaudio/csrc/utils.h @@ -2,7 +2,6 @@ #include namespace torchaudio { -bool is_kaldi_available(); bool is_rir_available(); bool is_align_available(); c10::optional cuda_version();