Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add GPU acceleration #14

Merged
merged 16 commits into from
Jun 11, 2019
5 changes: 4 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -6,4 +6,7 @@ python:
script:
- pytest --cov=noisereduce/
after_success:
- coveralls
- coveralls
install:
- pip install -r requirements.txt
- pip install -r requirements-test.txt
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -3,7 +3,8 @@
[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/timsainb/noisereduce/master?filepath=notebooks%2F1.0-test-noise-reduction.ipynb)
[![PyPI version](https://badge.fury.io/py/noisereduce.svg)](https://badge.fury.io/py/noisereduce)

# Noise reduction in python using

# Noise reduction in python using spectral gating
- This algorithm is based (but not completely reproducing) on the one [outlined by Audacity](https://wiki.audacityteam.org/wiki/How_Audacity_Noise_Reduction_Works) for the **noise reduction effect** ([Link to C++ code](https://github.com/audacity/audacity/blob/master/src/effects/NoiseReduction.cpp))
- The algorithm requires two inputs:
1. A *noise* audio clip comtaining prototypical noise of the audio clip
@@ -21,6 +22,8 @@
## Installation
`pip install noisereduce`

*noisereduce optionally uses Tensorflow as a backend to speed up FFT and gaussian convolution. It is not listed in the requirements.txt so because (1) it is optional and (2) tensorflow-gpu and tensorflow (cpu) are both compatible with this package. The package requires Tensorflow 2+ for all tensorflow operations.*

## Usage
(see notebooks)

@@ -43,6 +46,8 @@ win_length (int): Each frame of audio is windowed by `window()`. The window will
hop_length (int):number audio of frames between STFT columns.
n_std_thresh (int): how many standard deviations louder than the mean dB of the noise (at each frequency level) to be considered signal
prop_decrease (float): To what extent should you decrease noise (1 = all, 0 = none)
pad_clipping (bool): Pad the signals with zeros to ensure that the reconstructed data is equal length to the data
use_tensorflow (bool): Use tensorflow as a backend for convolution and fft to speed up computation
verbose (bool): Whether to plot the steps of the algorithm
```
<div style="text-align:center">
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: birdbrain
name: noisereduce
channels:
- conda-forge
- defaults
Binary file modified noisereduce/._noisereduce.py
Binary file not shown.
141 changes: 127 additions & 14 deletions noisereduce/noisereduce.py
Original file line number Diff line number Diff line change
@@ -3,16 +3,73 @@
import librosa
from noisereduce.plotting import plot_reduction_steps
from tqdm.autonotebook import tqdm
import warnings

try:
import tensorflow as tf

print(
"GPUs available: {}".format(tf.config.experimental.list_physical_devices("GPU"))
)
if int(tf.__version__[0]) < 2:
warnings.warn(
"Tensorflow version is below 2.0, some GPU accelerated functionality may not work"
)
except ImportError:
warnings.warn(
"Tensorflow is not installed and cannot be used for GPU accelerated STFT"
)


def _stft(y, n_fft, hop_length, win_length, use_tensorflow=False):
if use_tensorflow:
# return librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length, center=True)
return _stft_tensorflow(y, n_fft, hop_length, win_length)
else:
return librosa.stft(
y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length, center=True
)


def _istft(y, n_fft, hop_length, win_length, use_tensorflow=False):
if use_tensorflow:
# return librosa.istft(y, hop_length, win_length)
return _istft_tensorflow(y.T, n_fft, hop_length, win_length)
else:
return librosa.istft(y, hop_length, win_length)

def _stft(y, n_fft, hop_length, win_length):
return librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length)

def _stft_librosa(y, n_fft, hop_length, win_length):
return librosa.stft(
y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length, center=True
)


def _istft(y, hop_length, win_length):
def _istft_librosa(y, hop_length, win_length):
return librosa.istft(y, hop_length, win_length)


def _stft_tensorflow(y, n_fft, hop_length, win_length):
return (
tf.signal.stft(
y,
win_length,
hop_length,
n_fft,
pad_end=True,
window_fn=tf.signal.hann_window,
)
.numpy()
.T
)


def _istft_tensorflow(y, n_fft, hop_length, win_length):
return tf.signal.inverse_stft(
y.astype(np.complex64), win_length, hop_length, n_fft
).numpy()


def _amp_to_db(x):
return librosa.core.amplitude_to_db(x, ref=1.0, amin=1e-20, top_db=80.0)

@@ -31,9 +88,7 @@ def update_pbar(pbar, message):

def _smoothing_filter(n_grad_freq, n_grad_time):
"""Generates a filter to smooth the mask for the spectrogram

[description]


Arguments:
n_grad_freq {[type]} -- [how many frequency channels to smooth over with the mask.]
n_grad_time {[type]} -- [how many time channels to smooth over with the mask.]
@@ -58,10 +113,8 @@ def _smoothing_filter(n_grad_freq, n_grad_time):


def mask_signal(sig_stft_db, sig_mask, mask_gain_dB, sig_stft):
"""[summary]

[description]

""" Reduces amplitude of time/frequency regions of a spectrogram based upon a mask

Arguments:
sig_stft_db {[type]} -- spectrogram of signal in dB
sig_mask {[type]} -- mask to apply to signal
@@ -83,6 +136,33 @@ def mask_signal(sig_stft_db, sig_mask, mask_gain_dB, sig_stft):
return sig_stft_amp, sig_stft_db_masked


def convolve_gaussian(sig_mask, smoothing_filter, use_tensorflow=False):
""" Convolves a gaussian filter with a mask (or any image)

Arguments:
sig_mask {[type]} -- The signal mask
smoothing_filter {[type]} -- the filter to convolve

Keyword Arguments:
use_tensorflow {bool} -- use tensorflow.signal or scipy.signal (default: {False})
"""
if use_tensorflow:
smoothing_filter = smoothing_filter * (
(np.shape(smoothing_filter)[1] - 1) / 2 + 1
)
smoothing_filter = smoothing_filter[:, :, tf.newaxis, tf.newaxis].astype(
"float32"
)
img = sig_mask[:, :, tf.newaxis, tf.newaxis].astype("float32")
return (
tf.nn.conv2d(img, smoothing_filter, strides=[1, 1, 1, 1], padding="SAME")
.numpy()
.squeeze()
)
else:
return scipy.signal.fftconvolve(sig_mask, smoothing_filter, mode="same")


def reduce_noise(
audio_clip,
noise_clip,
@@ -93,6 +173,8 @@ def reduce_noise(
hop_length=512,
n_std_thresh=1.5,
prop_decrease=1.0,
pad_clipping=True,
use_tensorflow=False,
verbose=False,
):
"""Remove noise from audio based upon a clip containing only noise
@@ -107,6 +189,8 @@ def reduce_noise(
hop_length (int):number audio of frames between STFT columns.
n_std_thresh (int): how many standard deviations louder than the mean dB of the noise (at each frequency level) to be considered signal
prop_decrease (float): To what extent should you decrease noise (1 = all, 0 = none)
pad_clipping (bool): Pad the signals with zeros to ensure that the reconstructed data is equal length to the data
use_tensorflow (bool): Use tensorflow as a backend for convolution and fft to speed up computation
verbose (bool): Whether to plot the steps of the algorithm

Returns:
@@ -120,7 +204,9 @@ def reduce_noise(

update_pbar(pbar, "STFT on noise")
# STFT over noise
noise_stft = _stft(noise_clip, n_fft, hop_length, win_length)
noise_stft = _stft(
noise_clip, n_fft, hop_length, win_length, use_tensorflow=use_tensorflow
)
noise_stft_db = _amp_to_db(np.abs(noise_stft)) # convert to dB
# Calculate statistics over noise
update_pbar(pbar, "STFT on signal")
@@ -129,7 +215,15 @@ def reduce_noise(
noise_thresh = mean_freq_noise + std_freq_noise * n_std_thresh
# STFT over signal
update_pbar(pbar, "STFT on signal")
sig_stft = _stft(audio_clip, n_fft, hop_length, win_length)

# pad signal with zeros to avoid extra frames being clipped if desired
if pad_clipping:
nsamp = len(audio_clip)
audio_clip = np.pad(audio_clip, [0, hop_length], mode="constant")

sig_stft = _stft(
audio_clip, n_fft, hop_length, win_length, use_tensorflow=use_tensorflow
)
sig_stft_db = _amp_to_db(np.abs(sig_stft))
update_pbar(pbar, "Generate mask")
# Calculate value to mask dB to
@@ -145,19 +239,38 @@ def reduce_noise(
update_pbar(pbar, "Smooth mask")
# Create a smoothing filter for the mask in time and frequency
smoothing_filter = _smoothing_filter(n_grad_freq, n_grad_time)

# convolve the mask with a smoothing filter
sig_mask = convolve_gaussian(sig_mask, smoothing_filter, use_tensorflow)

sig_mask = scipy.signal.fftconvolve(sig_mask, smoothing_filter, mode="same")
sig_mask = sig_mask * prop_decrease
update_pbar(pbar, "Apply mask")
# mask the signal

sig_stft_amp, sig_stft_db_masked = mask_signal(
sig_stft_db, sig_mask, mask_gain_dB, sig_stft
)

update_pbar(pbar, "Recover signal")
# recover the signal
recovered_signal = _istft(sig_stft_amp, hop_length, win_length)
recovered_signal = _istft(
sig_stft_amp, n_fft, hop_length, win_length, use_tensorflow=use_tensorflow
)
# fix the recovered signal length if padding signal
if pad_clipping:
recovered_signal = librosa.util.fix_length(recovered_signal, nsamp)

recovered_spec = _amp_to_db(
np.abs(_stft(recovered_signal, n_fft, hop_length, win_length))
np.abs(
_stft(
recovered_signal,
n_fft,
hop_length,
win_length,
use_tensorflow=use_tensorflow,
)
)
)
if verbose:
plot_reduction_steps(
15 changes: 15 additions & 0 deletions noisereduce/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import numpy as np


def int16_to_float32(data):
""" Converts from uint16 wav to float32 wav
"""
if np.max(np.abs(data)) > 32768:
raise ValueError("Data has values above 32768")
return (data / 32768.0).astype("float32")


def float32_to_int16(data):
if np.max(data) > 1:
data = data / np.max(np.abs(data))
return np.array(data * 32767).astype("int16")
Loading