Skip to content

Commit

Permalink
noise related augmentations removed
Browse files Browse the repository at this point in the history
  • Loading branch information
MinaKh committed Dec 5, 2023
1 parent 04aac21 commit 628bb55
Showing 1 changed file with 0 additions and 197 deletions.
197 changes: 0 additions & 197 deletions tonic/audio_augmentations.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
import os
import random
from dataclasses import dataclass, field
from typing import Optional

import librosa
import numpy as np
import torch
import torchaudio

# from qut_noise import QUTNoise
from torchaudio.utils import download_asset

from tonic.audio_transforms import FixLength
Expand All @@ -18,8 +14,6 @@
"RandomPitchShift",
"RandomAmplitudeScale",
"AddWhiteNoise",
"AddHomeNoise",
"EmbeddedHomeNoise",
"RIR",
]

Expand Down Expand Up @@ -174,197 +168,6 @@ def __call__(self, audio: np.ndarray):
return noisy_audio


# @dataclass
# class AddHomeNoise:
# """Add a home background noise (from QUTNOise dataset) to the audio sample with a known snr
# (signal to noise ratio).

# Parameters:
# sample_length (int): sample length in seconds
# target_sr (float): the target sample rate of the mixed final signal (default to the higher sample rate, between sample rates of noise and data )
# params_dataset (dict): containing other parameters of the noise dataset
# orig_sr (float): original sample rate of data
# factors (float): range of desired snrs
# partition (str): partition of the QUTNoise dataset that is used for noise augmentation
# aug_index (int): index of the chosen factor for snr. It will be randomly chosen from the desired range (if not passed while initilization)
# caching (bool): if we are caching the DiskCached dataset will dynamically pass copy index of data item to the transform (to set aug_index). Otherwise the aug_index will be chosen randomly in every call of transform
# seed (int): a fixed seed for reproducibility
# Args:
# audio (np.ndarray): data sample
# Returns:
# np.ndarray: data sample with added noise
# """

# sample_length: int
# params_dataset: dict
# target_sr: float = 48000
# orig_sr: float = 16000
# factors: list = field(default_factory=lambda: [0, 10, 20])
# partition: str = "test"
# aug_index: int = 0
# caching: bool = False
# seed: int = 123

# def __post_init__(self):
# random.seed(self.seed)

# noises = QUTNoise(
# classes=["HOME"],
# create_splits=False,
# duration_split=[self.sample_length],
# partition=self.partition,
# **self.params_dataset,
# )

# split_qutnoise_path = noises.config_path

# self.wave_files_path = (
# str(split_qutnoise_path)
# + "/splits_"
# + str(self.sample_length)
# + "s"
# + "/"
# + self.partition
# + "/"
# )

# self.home_noises = os.listdir(self.wave_files_path)

# def resample(self, audio):
# audio_resampled = librosa.resample(
# audio, orig_sr=self.orig_sr, target_sr=self.target_sr
# )
# return audio_resampled

# def get_noise(self):
# self.noise_wave = random.choice(self.home_noises)

# noise, _ = librosa.core.load(
# self.wave_files_path + self.noise_wave, sr=self.target_sr
# )
# self.noise = noise[0 : int(self.target_sr) * self.sample_length]
# return self.noise

# def add_noise(
# self,
# waveform: torch.Tensor,
# noise: torch.Tensor,
# snr: torch.Tensor,
# ) -> torch.Tensor:
# """Scales and adds noise to waveform per signal-to-noise ratio.

# Specifically, for each pair of waveform vector :math:`x \in \mathbb{R}^L` and noise vector
# :math:`n \in \mathbb{R}^L`, the function computes output :math:`y` as
# .. math::
# y = x + a n \, \text{,}
# where
# .. math::
# a = \sqrt{ \frac{ ||x||_{2}^{2} }{ ||n||_{2}^{2} } \cdot 10^{-\frac{\text{SNR}}{10}} } \, \text{,}
# with :math:`\text{SNR}` being the desired signal-to-noise ratio between :math:`x` and :math:`n`, in dB.
# Note that this function broadcasts singleton leading dimensions in its inputs in a manner that is
# consistent with the above formulae and PyTorch's broadcasting semantics.
# .. devices:: CPU CUDA
# .. properties:: Autograd TorchScript
# Args:
# waveform (torch.Tensor): Input waveform, with shape `(..., L)`.
# noise (torch.Tensor): Noise, with shape `(..., L)` (same shape as ``waveform``).
# snr (torch.Tensor): Signal-to-noise ratios in dB, with shape `(...,)`.
# Returns:
# torch.Tensor: Result of scaling and adding ``noise`` to ``waveform``, with shape `(..., L)`
# (same shape as ``waveform``).
# """

# L = waveform.size(-1)

# if L != noise.size(-1):
# raise ValueError(
# f"Length dimensions of waveform and noise don't match (got {L} and {noise.size(-1)})."
# )

# # compute scale, second by second
# noisy_audio = torch.zeros_like(waveform)
# for i in range(0, self.sample_length):
# start, end = int(i * self.target_sr), int((i + 1) * self.target_sr)
# sig, noise_ = waveform[:, start:end], noise[:, start:end]

# energy_signal = torch.linalg.vector_norm(sig, ord=2, dim=-1) ** 2 # (*,)
# energy_noise = torch.linalg.vector_norm(noise_, ord=2, dim=-1) ** 2 # (*,)
# original_snr_db = 10 * (
# torch.log10(energy_signal) - torch.log10(energy_noise)
# )
# scale = 10 ** ((original_snr_db - snr) / 20.0) # (*,)

# # scale noise
# self.scaled_noise = scale.unsqueeze(-1) * noise_ # (*, 1) * (*, L) = (*, L)
# noisy_audio[:, start:end] = sig + self.scaled_noise

# return noisy_audio

# def __call__(self, audio: np.ndarray):
# if not self.caching:
# self.aug_index = random.choice(range(0, len(self.factors)))
# snr_db = torch.tensor([self.factors[self.aug_index]])
# self.noise = torch.from_numpy(self.get_noise())
# self.noise = torch.unsqueeze(self.noise, dim=0)
# self.resampled_audio = torch.from_numpy(self.resample(audio))
# noisy_audio = self.add_noise(self.resampled_audio, self.noise, snr_db)

# return noisy_audio.detach().numpy()


# @dataclass
# class EmbeddedHomeNoise(AddHomeNoise):
# """Add a home background noise (from QUTNOise dataset) to the data sample with a known snr_db
# (signal to noise ratio).

# The difference with AddHomeNoise is that a leading (/and trainling) noise will be added to the augmented sample.
# Parameters:
# noise_length (int): the length of noise (in seconds) that will be added to the sample
# two_sided (bool): if True the augmented signal will be encompassed between leading and trailing noises
# Args:
# audio (np.ndarray): data sample
# Returns:
# np.ndarray: data sample with added noise at the begining
# """

# noise_length: int = None
# two_sided: bool = False

# def __post_init__(self):
# super().__post_init__()

# if self.noise_length is None:
# raise ValueError("noise length is not specified")
# elif self.noise_length > self.sample_length:
# raise ValueError(
# "in the current implementation length of noise can't exceed sample length"
# )

# def __call__(self, audio: np.ndarray):
# if not self.caching:
# self.aug_index = random.choice(range(0, len(self.factors)))
# snr_db = torch.tensor([self.factors[self.aug_index]])

# self.noise = torch.from_numpy(self.get_noise())
# self.noise = torch.unsqueeze(self.noise, dim=0)
# self.resampled_audio = torch.from_numpy(self.resample(audio))
# noisy_audio = (
# self.add_noise(self.resampled_audio, self.noise, snr_db).detach().numpy()
# )

# initial_noise = self.scaled_noise[
# :, 0 : int(self.target_sr * self.noise_length)
# ]
# if self.two_sided:
# noise_then_audio = np.concatenate(
# (initial_noise, noisy_audio, initial_noise), axis=1
# )
# else:
# noise_then_audio = np.concatenate((initial_noise, noisy_audio), axis=1)

# return noise_then_audio


@dataclass
class RIR:
"""Convolves a RIR (room impluse response) to the data sample.
Expand Down

0 comments on commit 628bb55

Please sign in to comment.