Skip to content

Commit

Permalink
Merge pull request #273 from MinaKh/add_audio_transforms
Browse files Browse the repository at this point in the history
Adding  some audio transforms and augmentations to tonic
  • Loading branch information
biphasic committed May 15, 2024
2 parents a4e8a45 + 2fb1664 commit 5a20a54
Show file tree
Hide file tree
Showing 9 changed files with 930 additions and 8 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/ci-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-2022]
python-version: ["3.7", "3.9", "3.11"]
python-version: ["3.8", "3.10", "3.11"]
steps:
- uses: actions/checkout@v3
- if: matrix.os == 'ubuntu-latest'
Expand All @@ -21,8 +21,8 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Install requirements
run: |
pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
pip install -r test/requirements.txt
pip install -r test/torch_requirements.txt
pip install .
- name: Test with pytest
run: pytest test
Expand All @@ -42,8 +42,8 @@ jobs:
python-version: 3.9
- name: Generate coverage report
run: |
pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
pip install -r test/requirements.txt
pip install -r test/torch_requirements.txt
pip install .
coverage run -m pytest test
coverage xml
Expand All @@ -63,8 +63,8 @@ jobs:
python-version: 3.9
- name: Install dependencies
run: |
pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
pip install -r docs/requirements.txt
pip install -r test/torch_requirements.txt
pip install .
- name: Build documentation
run: cd docs && make clean && make html # Use SPHINXOPTS="-W" to fail on warning.
Expand Down
2 changes: 0 additions & 2 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ sphinx-book-theme
sphinx-gallery
myst_nb
pbr
torchvision
ipywidgets
matplotlib
torchdata
sphinx-autoapi
446 changes: 446 additions & 0 deletions docs/tutorials/audio_transforms_tutorial.ipynb

Large diffs are not rendered by default.

2 changes: 0 additions & 2 deletions test/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
pytest
coverage
torch
matplotlib
hdf5plugin
imageio
torchdata
aedat
142 changes: 142 additions & 0 deletions test/test_audio_augmentations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import numpy as np
import pytest


def test_random_time_stretch():
"""Tests the time_stretch transform with synthetic data for 2 scenarions: slowing down and
speeding up.
- verifies if the output of transform is different than the input data
- verifies if the length of signal has chanched according to the stretch factor (and it should remain fixed if fix_length flag is True)
"""
from tonic.audio_augmentations import RandomTimeStretch

np.random.seed(123)

sr = 16_000 # sample rate
sl = 1 # sample length
data = np.random.rand(1, sr * sl)

for fix_length in [False, True]:
# verify length of stretched signal
slowing_down = RandomTimeStretch(
samplerate=sr, sample_length=sl, factors=[0.5], fix_length=fix_length
)
slow = slowing_down(data)

assert slow is not data

if fix_length:
assert slow.shape[1] == data.shape[1]
else:
assert np.allclose(
slow.shape[1],
data.shape[1] / (slowing_down.factors[0]),
rtol=1e-2,
atol=1e-3,
)

speeding_up = RandomTimeStretch(
samplerate=sr, sample_length=sl, factors=[1.5], fix_length=fix_length
)
fast = speeding_up(data)

assert fast is not data

if fix_length:
assert fast.shape[1] == data.shape[1]
else:
assert np.allclose(
fast.shape[1],
data.shape[1] / (speeding_up.factors[0]),
rtol=1e-2,
atol=1e-3,
)


def test_random_pitch_shift():
"""Tests the pitch_shift transform with synthetic data.
- verifies if the output of transform is different than the input data
- verifies that the size has not changed
"""
from tonic.audio_augmentations import RandomPitchShift

np.random.seed(123)

sr = 16_000 # sample rate
sl = 1 # sample length
data = np.random.rand(1, sr * sl)

aug = RandomPitchShift(samplerate=sr)
pitch_shifted = aug(data)

assert pitch_shifted is not data

assert pitch_shifted.shape[1] == data.shape[1]


def test_random_amplitude_scale():
"""Tests the amplitude_scale transform with synthetic data.
- verifies if the output of transform is different than the input data
- verifies that the size has not changed
- verifies that maximum amplitude is in the defined range
"""
from tonic.audio_augmentations import RandomAmplitudeScale

np.random.seed(123)

sr = 16_000 # sample rate
sl = 1 # sample length
data = np.ones((1, sr * sl))
min_amp, max_amp = 0.05, 0.15

aug = RandomAmplitudeScale(samplerate=sr, min_amp=min_amp, max_amp=max_amp)
amp_scaled = aug(data)

assert amp_scaled is not data
assert amp_scaled.shape[1] == data.shape[1]
assert amp_scaled.max() <= max_amp


def test_add_white_noise():
"""Tests the add_white_noise transform with synthetic data.
- verifies if the output of transform is different than the input data
- verifies that the size has not changed
"""
from tonic.audio_augmentations import AddWhiteNoise

np.random.seed(123)

sr = 16_000 # sample rate
sl = 1 # sample length
data = np.random.rand(1, sr * sl)

aug = AddWhiteNoise(samplerate=sr)
noisy = aug(data)
assert noisy is not data
assert noisy.shape[1] == data.shape[1]


def test_RIR():
"""Tests the RIR transform with a synthetic data.
- verifies if the output of transform is different than the input data
- verifies that the size has not changed
"""
from tonic.audio_augmentations import RIR

np.random.seed(123)

sr = 16_000 # sample rate
sl = 1 # sample length
data = np.random.rand(1, sr * sl).astype("float32")
rir_audio_path = (
"tutorial-assets/Lab41-SRI-VOiCES-rm1-impulse-mc01-stu-clo-8000hz.wav"
)
aug = RIR(samplerate=sr, rir_audio=rir_audio_path)
RIR_augmented = aug(data)
assert RIR_augmented is not data
assert RIR_augmented.shape[1] == data.shape[1]
57 changes: 57 additions & 0 deletions test/test_audio_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,60 @@ def __getitem__(self, item):

signal = add_noise(data)
assert signal.shape == (1, 16_000)


def test_swap_axes():
"""Tests SwapAxes transform with synthetic data."""
from tonic.audio_transforms import SwapAxes

np.random.seed(123)
sr = 16_000 # sample rate
sl = 1 # sample length
data = np.random.rand(1, sr * sl)
ax1, ax2 = 0, 1
swap_ax = SwapAxes(ax1=ax1, ax2=ax2)
swaped = swap_ax(data)

assert swaped.shape[0] == data.shape[1]
assert swaped.shape[1] == data.shape[0]


def test_amplitude_scale():
"""Tests the amplitude scaling transform with synthetic data."""
from tonic.audio_transforms import AmplitudeScale

np.random.seed(123)
sr = 16_000 # sample rate
sl = 1 # sample length
data = np.random.rand(1, sr * sl)
max_amps = np.random.rand(10)

for amp in max_amps:
AmpScale = AmplitudeScale(max_amplitude=amp)
transformed = AmpScale(data)
assert data.shape[1] == transformed.shape[1]
assert transformed.max() == amp


def test_robust_amplitude_scale():
"""Tests robust amplitude scaling transform with a synthetic data."""
from tonic.audio_transforms import RobustAmplitudeScale

np.random.seed(123)
sr = 16_000 # sample rate
sl = 1 # sample length
data = np.random.rand(1, sr * sl)
max_amps = np.random.rand(10)
percent = 0.01
for amp in max_amps:
RobustAmpScale = RobustAmplitudeScale(
max_robust_amplitude=amp, outlier_percent=percent
)
transformed = RobustAmpScale(data)
sorted_transformed = np.sort(np.abs(transformed.ravel()))
non_outlier = sorted_transformed[
0 : int(np.floor(len(sorted_transformed)) * (1 - percent))
]
print(non_outlier)
assert data.shape[1] == transformed.shape[1]
assert np.all(non_outlier <= amp)
5 changes: 5 additions & 0 deletions test/torch_requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
--index-url https://download.pytorch.org/whl/cpu
torch==2.1.0
torchaudio==2.1.0
torchvision==0.16.0
torchdata
Loading

0 comments on commit 5a20a54

Please sign in to comment.