noise related augmentations removed

neuromorphs · Dec 5, 2023 · 628bb55 · 628bb55
1 parent 04aac21
commit 628bb55
Showing 1 changed file with 0 additions and 197 deletions.
diff --git a/tonic/audio_augmentations.py b/tonic/audio_augmentations.py
@@ -1,14 +1,10 @@
-import os
 import random
 from dataclasses import dataclass, field
-from typing import Optional
 
 import librosa
 import numpy as np
 import torch
 import torchaudio
-
-# from qut_noise import QUTNoise
 from torchaudio.utils import download_asset
 
 from tonic.audio_transforms import FixLength
@@ -18,8 +14,6 @@
  "RandomPitchShift",
  "RandomAmplitudeScale",
  "AddWhiteNoise",
- "AddHomeNoise",
- "EmbeddedHomeNoise",
  "RIR",
 ]
 
@@ -174,197 +168,6 @@ def __call__(self, audio: np.ndarray):
  return noisy_audio
 
 
-# @dataclass
-# class AddHomeNoise:
-# """Add a home background noise (from QUTNOise dataset) to the audio sample with a known snr
-# (signal to noise ratio).
-
-# Parameters:
-# sample_length (int): sample length in seconds
-# target_sr (float): the target sample rate of the mixed final signal (default to the higher sample rate, between sample rates of noise and data )
-# params_dataset (dict): containing other parameters of the noise dataset
-# orig_sr (float): original sample rate of data
-# factors (float): range of desired snrs
-# partition (str): partition of the QUTNoise dataset that is used for noise augmentation
-# aug_index (int): index of the chosen factor for snr. It will be randomly chosen from the desired range (if not passed while initilization)
-# caching (bool): if we are caching the DiskCached dataset will dynamically pass copy index of data item to the transform (to set aug_index). Otherwise the aug_index will be chosen randomly in every call of transform
-# seed (int): a fixed seed for reproducibility
-# Args:
-# audio (np.ndarray): data sample
-# Returns:
-# np.ndarray: data sample with added noise
-# """
-
-# sample_length: int
-# params_dataset: dict
-# target_sr: float = 48000
-# orig_sr: float = 16000
-# factors: list = field(default_factory=lambda: [0, 10, 20])
-# partition: str = "test"
-# aug_index: int = 0
-# caching: bool = False
-# seed: int = 123
-
-# def __post_init__(self):
-# random.seed(self.seed)
-
-# noises = QUTNoise(
-# classes=["HOME"],
-# create_splits=False,
-# duration_split=[self.sample_length],
-# partition=self.partition,
-# **self.params_dataset,
-# )
-
-# split_qutnoise_path = noises.config_path
-
-# self.wave_files_path = (
-# str(split_qutnoise_path)
-# + "/splits_"
-# + str(self.sample_length)
-# + "s"
-# + "/"
-# + self.partition
-# + "/"
-# )
-
-# self.home_noises = os.listdir(self.wave_files_path)
-
-# def resample(self, audio):
-# audio_resampled = librosa.resample(
-# audio, orig_sr=self.orig_sr, target_sr=self.target_sr
-# )
-# return audio_resampled
-
-# def get_noise(self):
-# self.noise_wave = random.choice(self.home_noises)
-
-# noise, _ = librosa.core.load(
-# self.wave_files_path + self.noise_wave, sr=self.target_sr
-# )
-# self.noise = noise[0 : int(self.target_sr) * self.sample_length]
-# return self.noise
-
-# def add_noise(
-# self,
-# waveform: torch.Tensor,
-# noise: torch.Tensor,
-# snr: torch.Tensor,
-# ) -> torch.Tensor:
-# """Scales and adds noise to waveform per signal-to-noise ratio.
-
-# Specifically, for each pair of waveform vector :math:`x \in \mathbb{R}^L` and noise vector
-# :math:`n \in \mathbb{R}^L`, the function computes output :math:`y` as
-# .. math::
-# y = x + a n \, \text{,}
-# where
-# .. math::
-# a = \sqrt{ \frac{ ||x||_{2}^{2} }{ ||n||_{2}^{2} } \cdot 10^{-\frac{\text{SNR}}{10}} } \, \text{,}
-# with :math:`\text{SNR}` being the desired signal-to-noise ratio between :math:`x` and :math:`n`, in dB.
-# Note that this function broadcasts singleton leading dimensions in its inputs in a manner that is
-# consistent with the above formulae and PyTorch's broadcasting semantics.
-# .. devices:: CPU CUDA
-# .. properties:: Autograd TorchScript
-# Args:
-# waveform (torch.Tensor): Input waveform, with shape `(..., L)`.
-# noise (torch.Tensor): Noise, with shape `(..., L)` (same shape as ``waveform``).
-# snr (torch.Tensor): Signal-to-noise ratios in dB, with shape `(...,)`.
-# Returns:
-# torch.Tensor: Result of scaling and adding ``noise`` to ``waveform``, with shape `(..., L)`
-# (same shape as ``waveform``).
-# """
-
-# L = waveform.size(-1)
-
-# if L != noise.size(-1):
-# raise ValueError(
-# f"Length dimensions of waveform and noise don't match (got {L} and {noise.size(-1)})."
-# )
-
-# # compute scale, second by second
-# noisy_audio = torch.zeros_like(waveform)
-# for i in range(0, self.sample_length):
-# start, end = int(i * self.target_sr), int((i + 1) * self.target_sr)
-# sig, noise_ = waveform[:, start:end], noise[:, start:end]
-
-# energy_signal = torch.linalg.vector_norm(sig, ord=2, dim=-1) ** 2 # (*,)
-# energy_noise = torch.linalg.vector_norm(noise_, ord=2, dim=-1) ** 2 # (*,)
-# original_snr_db = 10 * (
-# torch.log10(energy_signal) - torch.log10(energy_noise)
-# )
-# scale = 10 ** ((original_snr_db - snr) / 20.0) # (*,)
-
-# # scale noise
-# self.scaled_noise = scale.unsqueeze(-1) * noise_ # (*, 1) * (*, L) = (*, L)
-# noisy_audio[:, start:end] = sig + self.scaled_noise
-
-# return noisy_audio
-
-# def __call__(self, audio: np.ndarray):
-# if not self.caching:
-# self.aug_index = random.choice(range(0, len(self.factors)))
-# snr_db = torch.tensor([self.factors[self.aug_index]])
-# self.noise = torch.from_numpy(self.get_noise())
-# self.noise = torch.unsqueeze(self.noise, dim=0)
-# self.resampled_audio = torch.from_numpy(self.resample(audio))
-# noisy_audio = self.add_noise(self.resampled_audio, self.noise, snr_db)
-
-# return noisy_audio.detach().numpy()
-
-
-# @dataclass
-# class EmbeddedHomeNoise(AddHomeNoise):
-# """Add a home background noise (from QUTNOise dataset) to the data sample with a known snr_db
-# (signal to noise ratio).
-
-# The difference with AddHomeNoise is that a leading (/and trainling) noise will be added to the augmented sample.
-# Parameters:
-# noise_length (int): the length of noise (in seconds) that will be added to the sample
-# two_sided (bool): if True the augmented signal will be encompassed between leading and trailing noises
-# Args:
-# audio (np.ndarray): data sample
-# Returns:
-# np.ndarray: data sample with added noise at the begining
-# """
-
-# noise_length: int = None
-# two_sided: bool = False
-
-# def __post_init__(self):
-# super().__post_init__()
-
-# if self.noise_length is None:
-# raise ValueError("noise length is not specified")
-# elif self.noise_length > self.sample_length:
-# raise ValueError(
-# "in the current implementation length of noise can't exceed sample length"
-# )
-
-# def __call__(self, audio: np.ndarray):
-# if not self.caching:
-# self.aug_index = random.choice(range(0, len(self.factors)))
-# snr_db = torch.tensor([self.factors[self.aug_index]])
-
-# self.noise = torch.from_numpy(self.get_noise())
-# self.noise = torch.unsqueeze(self.noise, dim=0)
-# self.resampled_audio = torch.from_numpy(self.resample(audio))
-# noisy_audio = (
-# self.add_noise(self.resampled_audio, self.noise, snr_db).detach().numpy()
-# )
-
-# initial_noise = self.scaled_noise[
-# :, 0 : int(self.target_sr * self.noise_length)
-# ]
-# if self.two_sided:
-# noise_then_audio = np.concatenate(
-# (initial_noise, noisy_audio, initial_noise), axis=1
-# )
-# else:
-# noise_then_audio = np.concatenate((initial_noise, noisy_audio), axis=1)
-
-# return noise_then_audio
-
-
 @dataclass
 class RIR:
  """Convolves a RIR (room impluse response) to the data sample.