Merge pull request #273 from MinaKh/add_audio_transforms

Adding some audio transforms and augmentations to tonic
neuromorphs · May 15, 2024 · 5a20a54 · 5a20a54
2 parents a4e8a45 + 2fb1664
commit 5a20a54
Show file tree

Hide file tree

Showing 9 changed files with 930 additions and 8 deletions.
diff --git a/.github/workflows/ci-pipeline.yml b/.github/workflows/ci-pipeline.yml
@@ -9,7 +9,7 @@ jobs:
  fail-fast: false
  matrix:
  os: [ubuntu-latest, windows-2022]
- python-version: ["3.7", "3.9", "3.11"]
+ python-version: ["3.8", "3.10", "3.11"]
  steps:
  - uses: actions/checkout@v3
  - if: matrix.os == 'ubuntu-latest'
@@ -21,8 +21,8 @@ jobs:
  python-version: ${{ matrix.python-version }}
  - name: Install requirements
  run: |
- pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
  pip install -r test/requirements.txt
+ pip install -r test/torch_requirements.txt
  pip install .
  - name: Test with pytest
  run: pytest test
@@ -42,8 +42,8 @@ jobs:
  python-version: 3.9
  - name: Generate coverage report
  run: |
- pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
  pip install -r test/requirements.txt
+ pip install -r test/torch_requirements.txt
  pip install .
  coverage run -m pytest test
  coverage xml
@@ -63,8 +63,8 @@ jobs:
  python-version: 3.9
  - name: Install dependencies
  run: |
- pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
  pip install -r docs/requirements.txt
+ pip install -r test/torch_requirements.txt
  pip install .
  - name: Build documentation
  run: cd docs && make clean && make html # Use SPHINXOPTS="-W" to fail on warning.

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -4,8 +4,6 @@ sphinx-book-theme
 sphinx-gallery
 myst_nb
 pbr
-torchvision
 ipywidgets
 matplotlib
-torchdata
 sphinx-autoapi
diff --git a/docs/tutorials/audio_transforms_tutorial.ipynb b/docs/tutorials/audio_transforms_tutorial.ipynb
diff --git a/test/requirements.txt b/test/requirements.txt
@@ -1,8 +1,6 @@
 pytest
 coverage
-torch
 matplotlib
 hdf5plugin
 imageio
-torchdata
 aedat
diff --git a/test/test_audio_augmentations.py b/test/test_audio_augmentations.py
@@ -0,0 +1,142 @@
+import numpy as np
+import pytest
+
+
+def test_random_time_stretch():
+ """Tests the time_stretch transform with synthetic data for 2 scenarions: slowing down and
+ speeding up.
+
+ - verifies if the output of transform is different than the input data
+ - verifies if the length of signal has chanched according to the stretch factor (and it should remain fixed if fix_length flag is True)
+ """
+ from tonic.audio_augmentations import RandomTimeStretch
+
+ np.random.seed(123)
+
+ sr = 16_000 # sample rate
+ sl = 1 # sample length
+ data = np.random.rand(1, sr * sl)
+
+ for fix_length in [False, True]:
+ # verify length of stretched signal
+ slowing_down = RandomTimeStretch(
+ samplerate=sr, sample_length=sl, factors=[0.5], fix_length=fix_length
+ )
+ slow = slowing_down(data)
+
+ assert slow is not data
+
+ if fix_length:
+ assert slow.shape[1] == data.shape[1]
+ else:
+ assert np.allclose(
+ slow.shape[1],
+ data.shape[1] / (slowing_down.factors[0]),
+ rtol=1e-2,
+ atol=1e-3,
+ )
+
+ speeding_up = RandomTimeStretch(
+ samplerate=sr, sample_length=sl, factors=[1.5], fix_length=fix_length
+ )
+ fast = speeding_up(data)
+
+ assert fast is not data
+
+ if fix_length:
+ assert fast.shape[1] == data.shape[1]
+ else:
+ assert np.allclose(
+ fast.shape[1],
+ data.shape[1] / (speeding_up.factors[0]),
+ rtol=1e-2,
+ atol=1e-3,
+ )
+
+
+def test_random_pitch_shift():
+ """Tests the pitch_shift transform with synthetic data.
+
+ - verifies if the output of transform is different than the input data
+ - verifies that the size has not changed
+ """
+ from tonic.audio_augmentations import RandomPitchShift
+
+ np.random.seed(123)
+
+ sr = 16_000 # sample rate
+ sl = 1 # sample length
+ data = np.random.rand(1, sr * sl)
+
+ aug = RandomPitchShift(samplerate=sr)
+ pitch_shifted = aug(data)
+
+ assert pitch_shifted is not data
+
+ assert pitch_shifted.shape[1] == data.shape[1]
+
+
+def test_random_amplitude_scale():
+ """Tests the amplitude_scale transform with synthetic data.
+
+ - verifies if the output of transform is different than the input data
+ - verifies that the size has not changed
+ - verifies that maximum amplitude is in the defined range
+ """
+ from tonic.audio_augmentations import RandomAmplitudeScale
+
+ np.random.seed(123)
+
+ sr = 16_000 # sample rate
+ sl = 1 # sample length
+ data = np.ones((1, sr * sl))
+ min_amp, max_amp = 0.05, 0.15
+
+ aug = RandomAmplitudeScale(samplerate=sr, min_amp=min_amp, max_amp=max_amp)
+ amp_scaled = aug(data)
+
+ assert amp_scaled is not data
+ assert amp_scaled.shape[1] == data.shape[1]
+ assert amp_scaled.max() <= max_amp
+
+
+def test_add_white_noise():
+ """Tests the add_white_noise transform with synthetic data.
+
+ - verifies if the output of transform is different than the input data
+ - verifies that the size has not changed
+ """
+ from tonic.audio_augmentations import AddWhiteNoise
+
+ np.random.seed(123)
+
+ sr = 16_000 # sample rate
+ sl = 1 # sample length
+ data = np.random.rand(1, sr * sl)
+
+ aug = AddWhiteNoise(samplerate=sr)
+ noisy = aug(data)
+ assert noisy is not data
+ assert noisy.shape[1] == data.shape[1]
+
+
+def test_RIR():
+ """Tests the RIR transform with a synthetic data.
+
+ - verifies if the output of transform is different than the input data
+ - verifies that the size has not changed
+ """
+ from tonic.audio_augmentations import RIR
+
+ np.random.seed(123)
+
+ sr = 16_000 # sample rate
+ sl = 1 # sample length
+ data = np.random.rand(1, sr * sl).astype("float32")
+ rir_audio_path = (
+ "tutorial-assets/Lab41-SRI-VOiCES-rm1-impulse-mc01-stu-clo-8000hz.wav"
+ )
+ aug = RIR(samplerate=sr, rir_audio=rir_audio_path)
+ RIR_augmented = aug(data)
+ assert RIR_augmented is not data
+ assert RIR_augmented.shape[1] == data.shape[1]
diff --git a/test/test_audio_transforms.py b/test/test_audio_transforms.py
@@ -80,3 +80,60 @@ def __getitem__(self, item):
 
  signal = add_noise(data)
  assert signal.shape == (1, 16_000)
+
+
+def test_swap_axes():
+ """Tests SwapAxes transform with synthetic data."""
+ from tonic.audio_transforms import SwapAxes
+
+ np.random.seed(123)
+ sr = 16_000 # sample rate
+ sl = 1 # sample length
+ data = np.random.rand(1, sr * sl)
+ ax1, ax2 = 0, 1
+ swap_ax = SwapAxes(ax1=ax1, ax2=ax2)
+ swaped = swap_ax(data)
+
+ assert swaped.shape[0] == data.shape[1]
+ assert swaped.shape[1] == data.shape[0]
+
+
+def test_amplitude_scale():
+ """Tests the amplitude scaling transform with synthetic data."""
+ from tonic.audio_transforms import AmplitudeScale
+
+ np.random.seed(123)
+ sr = 16_000 # sample rate
+ sl = 1 # sample length
+ data = np.random.rand(1, sr * sl)
+ max_amps = np.random.rand(10)
+
+ for amp in max_amps:
+ AmpScale = AmplitudeScale(max_amplitude=amp)
+ transformed = AmpScale(data)
+ assert data.shape[1] == transformed.shape[1]
+ assert transformed.max() == amp
+
+
+def test_robust_amplitude_scale():
+ """Tests robust amplitude scaling transform with a synthetic data."""
+ from tonic.audio_transforms import RobustAmplitudeScale
+
+ np.random.seed(123)
+ sr = 16_000 # sample rate
+ sl = 1 # sample length
+ data = np.random.rand(1, sr * sl)
+ max_amps = np.random.rand(10)
+ percent = 0.01
+ for amp in max_amps:
+ RobustAmpScale = RobustAmplitudeScale(
+ max_robust_amplitude=amp, outlier_percent=percent
+ )
+ transformed = RobustAmpScale(data)
+ sorted_transformed = np.sort(np.abs(transformed.ravel()))
+ non_outlier = sorted_transformed[
+ 0 : int(np.floor(len(sorted_transformed)) * (1 - percent))
+ ]
+ print(non_outlier)
+ assert data.shape[1] == transformed.shape[1]
+ assert np.all(non_outlier <= amp)
diff --git a/test/torch_requirements.txt b/test/torch_requirements.txt
@@ -0,0 +1,5 @@
+--index-url https://download.pytorch.org/whl/cpu
+torch==2.1.0
+torchaudio==2.1.0
+torchvision==0.16.0
+torchdata