Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

【Hackathon 7th No.22】NO.22 在 paddle.audio.functional.get_window 中支持 bartlett 、 kaiser 和 nuttall 窗函数 -part #68268

Merged
merged 5 commits into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions python/paddle/audio/features/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
'hamming',
'hann',
'kaiser',
'bartlett',
'nuttall',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because of adding types of window functions, we should synchronously expand the introduction documents of window member in Spectrogram, MelSpectrogram, LogMelSpectrogram, and MFCC classes below in this file.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

好的好的

'gaussian',
'exponential',
'triang',
Expand All @@ -50,7 +52,7 @@ class Spectrogram(nn.Layer):
n_fft (int, optional): The number of frequency components of the discrete Fourier transform. Defaults to 512.
hop_length (Optional[int], optional): The hop length of the short time FFT. If `None`, it is set to `win_length//4`. Defaults to None.
win_length (Optional[int], optional): The window length of the short time FFT. If `None`, it is set to same as `n_fft`. Defaults to None.
window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'.
window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor', 'bartlett', 'kaiser', 'nuttall'. Defaults to 'hann'.
power (float, optional): Exponent for the magnitude spectrogram. Defaults to 2.0.
center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to 'reflect'.
Expand Down Expand Up @@ -135,7 +137,7 @@ class MelSpectrogram(nn.Layer):
n_fft (int, optional): The number of frequency components of the discrete Fourier transform. Defaults to 512.
hop_length (Optional[int], optional): The hop length of the short time FFT. If `None`, it is set to `win_length//4`. Defaults to None.
win_length (Optional[int], optional): The window length of the short time FFT. If `None`, it is set to same as `n_fft`. Defaults to None.
window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'.
window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor', 'bartlett', 'kaiser', 'nuttall'. Defaults to 'hann'.
power (float, optional): Exponent for the magnitude spectrogram. Defaults to 2.0.
center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to 'reflect'.
Expand Down Expand Up @@ -242,7 +244,7 @@ class LogMelSpectrogram(nn.Layer):
n_fft (int, optional): The number of frequency components of the discrete Fourier transform. Defaults to 512.
hop_length (Optional[int], optional): The hop length of the short time FFT. If `None`, it is set to `win_length//4`. Defaults to None.
win_length (Optional[int], optional): The window length of the short time FFT. If `None`, it is set to same as `n_fft`. Defaults to None.
window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'.
window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor', 'bartlett', 'kaiser', 'nuttall'. Defaults to 'hann'.
power (float, optional): Exponent for the magnitude spectrogram. Defaults to 2.0.
center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to 'reflect'.
Expand Down Expand Up @@ -350,7 +352,7 @@ class MFCC(nn.Layer):
n_fft (int, optional): The number of frequency components of the discrete Fourier transform. Defaults to 512.
hop_length (Optional[int], optional): The hop length of the short time FFT. If `None`, it is set to `win_length//4`. Defaults to None.
win_length (Optional[int], optional): The window length of the short time FFT. If `None`, it is set to same as `n_fft`. Defaults to None.
window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'.
window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor', 'bartlett', 'kaiser', 'nuttall'. Defaults to 'hann'.
power (float, optional): Exponent for the magnitude spectrogram. Defaults to 2.0.
center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to 'reflect'.
Expand Down
63 changes: 58 additions & 5 deletions python/paddle/audio/functional/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,61 @@ def _cat(x: list[Tensor], data_type: str) -> Tensor:
return paddle.concat(l)


@window_function_register.register()
def _bartlett(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor:
"""
Computes the Bartlett window.
This function is consistent with scipy.signal.windows.bartlett().
"""
if _len_guards(M):
return paddle.ones((M,), dtype=dtype)
M, needs_trunc = _extend(M, sym)

n = paddle.arange(0, M, dtype=dtype)
M = paddle.to_tensor(M, dtype=dtype)
w = paddle.where(
paddle.less_equal(n, (M - 1) / 2.0),
2.0 * n / (M - 1),
2.0 - 2.0 * n / (M - 1),
)

return _truncate(w, needs_trunc)


@window_function_register.register()
def _kaiser(
M: int, beta: float, sym: bool = True, dtype: str = 'float64'
) -> Tensor:
"""Compute the Kaiser window.
This function is consistent with scipy.signal.windows.kaiser().
"""
if _len_guards(M):
return paddle.ones((M,), dtype=dtype)
M, needs_trunc = _extend(M, sym)

beta = paddle.to_tensor(beta, dtype=dtype)

n = paddle.arange(0, M, dtype=dtype)
M = paddle.to_tensor(M, dtype=dtype)
alpha = (M - 1) / 2.0
w = paddle.i0(
beta * paddle.sqrt(1 - ((n - alpha) / alpha) ** 2.0)
) / paddle.i0(beta)

return _truncate(w, needs_trunc)


@window_function_register.register()
def _nuttall(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor:
"""Nuttall window.
This function is consistent with scipy.signal.windows.nuttall().
"""
a = paddle.to_tensor(
[0.3635819, 0.4891775, 0.1365995, 0.0106411], dtype=dtype
)
return _general_cosine(M, a=a, sym=sym, dtype=dtype)


@window_function_register.register()
def _acosh(x: Tensor | float) -> Tensor:
if isinstance(x, float):
Expand Down Expand Up @@ -347,7 +402,7 @@ def get_window(
"""Return a window of a given length and type.

Args:
window (Union[str, Tuple[str, float]]): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'gaussian', 'general_gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'.
window (Union[str, Tuple[str, float]]): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'gaussian', 'general_gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor', 'bartlett', 'kaiser', 'nuttall'.
win_length (int): Number of samples.
fftbins (bool, optional): If True, create a "periodic" window. Otherwise, create a "symmetric" window, for use in filter design. Defaults to True.
dtype (str, optional): The data type of the return window. Defaults to 'float64'.
Expand All @@ -364,17 +419,16 @@ def get_window(
>>> cosine_window = paddle.audio.functional.get_window('cosine', n_fft)

>>> std = 7
>>> gaussian_window = paddle.audio.functional.get_window(('gaussian',std), n_fft)
>>> gaussian_window = paddle.audio.functional.get_window(('gaussian', std), n_fft)
"""
sym = not fftbins

args = ()
if isinstance(window, tuple):
winstr = window[0]
if len(window) > 1:
args = window[1:]
elif isinstance(window, str):
if window in ['gaussian', 'exponential']:
if window in ['gaussian', 'exponential', 'kaiser']:
raise ValueError(
"The '" + window + "' window needs one or "
"more parameters -- pass a tuple."
Expand All @@ -388,7 +442,6 @@ def get_window(
winfunc = window_function_register.get('_' + winstr)
except KeyError as e:
raise ValueError("Unknown window type.") from e

params = (win_length, *args)
kwargs = {'sym': sym}
return winfunc(*params, dtype=dtype, **kwargs)
19 changes: 17 additions & 2 deletions test/legacy_test/test_audio_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ def test_gaussian_window_and_exception(self, n_fft: int):
np.testing.assert_array_almost_equal(
window_scipy_exp, window_paddle_exp.numpy(), decimal=5
)

try:
window_paddle = paddle.audio.functional.get_window("hann", -1)
except ValueError:
Expand Down Expand Up @@ -290,7 +291,14 @@ def dct(n_filters, n_input):
np.testing.assert_array_almost_equal(librosa_dct, paddle_dct, decimal=5)

@parameterize(
[128, 256, 512], ["hamming", "hann", "triang", "bohman"], [True, False]
[128, 256, 512],
[
"hamming",
"hann",
"triang",
"bohman",
],
[True, False],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

另外也请review一下这个#68268 我觉得ci似乎有问题,我在 test/legacy_test/test_audio_functions.py 中的修改并没有执行,导致我一开始的单侧覆盖率一直过不去,后来我新加一个文件后,他却提示数据对不上,但我在本身的window电脑、aistudio的linux环境以及我朋友的mac电脑里面都是可以的,另外您也是可以看到,代码使用的都是paddle已经实现的api

我看这里的修改和原来的一样呀。

image 目前单侧没有过。

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

另外也请review一下这个#68268 我觉得ci似乎有问题,我在 test/legacy_test/test_audio_functions.py 中的修改并没有执行,导致我一开始的单侧覆盖率一直过不去,后来我新加一个文件后,他却提示数据对不上,但我在本身的window电脑、aistudio的linux环境以及我朋友的mac电脑里面都是可以的,另外您也是可以看到,代码使用的都是paddle已经实现的api

我看这里的修改和原来的一样呀。

image 目前单侧没有过。

image
你看,这个是 #68430 的覆盖率,他显示代码并没有被执行,然后单侧我是在我朋友还有我自己电脑、aistudio那里试过了的

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

已经可以了,请review @jeff41404

)
def test_stft_and_spect(
self, n_fft: int, window_str: str, center_flag: bool
Expand Down Expand Up @@ -345,7 +353,14 @@ def test_stft_and_spect(
)

@parameterize(
[128, 256, 512], [64, 82], ["hamming", "hann", "triang", "bohman"]
[128, 256, 512],
[64, 82],
[
"hamming",
"hann",
"triang",
"bohman",
],
)
def test_istft(self, n_fft: int, hop_length: int, window_str: str):
if len(self.waveform.shape) == 2: # (C, T)
Expand Down
120 changes: 120 additions & 0 deletions test/legacy_test/test_get_window.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import itertools
import unittest

from parameterized import parameterized
from scipy import signal

import paddle
import paddle.audio
from paddle.base import core


def parameterize(*params):
return parameterized.expand(list(itertools.product(*params)))


class TestAudioFuncitons(unittest.TestCase):
def setUp(self):
paddle.disable_static(
paddle.CUDAPlace(0)
if core.is_compiled_with_cuda()
else paddle.CPUPlace()
)

@parameterize(
[
"hamming",
"hann",
"triang",
"bohman",
"blackman",
"cosine",
"tukey",
"taylor",
"bartlett",
"nuttall",
],
[1, 512],
)
def test_window(self, window_type: str, n_fft: int):
window_scipy = signal.get_window(window_type, n_fft)
window_paddle = paddle.audio.functional.get_window(window_type, n_fft)
window_scipy = paddle.to_tensor(window_scipy, dtype=window_paddle.dtype)
paddle.allclose(
window_scipy,
window_paddle,
atol=0.0001,
rtol=0.0001,
)

@parameterize([1, 512])
def test_window_and_exception(self, n_fft: int):
window_scipy_gaussain = signal.windows.gaussian(n_fft, std=7)
window_paddle_gaussian = paddle.audio.functional.get_window(
('gaussian', 7), n_fft, False
)
window_scipy_gaussain = paddle.to_tensor(
window_scipy_gaussain, dtype=window_paddle_gaussian.dtype
)
paddle.allclose(
window_scipy_gaussain,
window_paddle_gaussian,
atol=0.0001,
rtol=0.0001,
)

window_scipy_general_gaussain = signal.windows.general_gaussian(
n_fft, 1, 7
)
window_paddle_general_gaussian = paddle.audio.functional.get_window(
('general_gaussian', 1, 7), n_fft, False
)
window_scipy_general_gaussain = paddle.to_tensor(
window_scipy_general_gaussain,
dtype=window_paddle_general_gaussian.dtype,
)
paddle.allclose(
window_scipy_gaussain,
window_paddle_gaussian,
atol=0.0001,
rtol=0.0001,
)

window_scipy_exp = signal.windows.exponential(n_fft)
window_paddle_exp = paddle.audio.functional.get_window(
('exponential', None, 1), n_fft, False
)
window_scipy_exp = paddle.to_tensor(
window_scipy_exp, dtype=window_paddle_exp.dtype
)
paddle.allclose(
window_scipy_exp, window_paddle_exp, atol=0.0001, rtol=0.0001
)

window_scipy_kaiser = signal.windows.kaiser(n_fft, beta=14.0)
window_paddle_kaiser = paddle.audio.functional.get_window(
('kaiser', 14.0), n_fft
)
window_scipy_kaiser = paddle.to_tensor(
window_scipy_kaiser, dtype=window_paddle_kaiser.dtype
)
paddle.allclose(
window_scipy_kaiser, window_paddle_kaiser, atol=0.0001, rtol=0.0001
)


if __name__ == '__main__':
unittest.main()