timsainb · timsainb · Jun 11, 2019 · Jun 11, 2019 · Jun 11, 2019 · Jun 11, 2019
diff --git a/.travis.yml b/.travis.yml
@@ -6,4 +6,7 @@ python:
 script:
   - pytest --cov=noisereduce/
 after_success:
-  - coveralls
+  - coveralls
+install:
+  - pip install -r requirements.txt
+  - pip install -r requirements-test.txt
diff --git a/README.md b/README.md
@@ -3,7 +3,8 @@
 [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/timsainb/noisereduce/master?filepath=notebooks%2F1.0-test-noise-reduction.ipynb)
 [![PyPI version](https://badge.fury.io/py/noisereduce.svg)](https://badge.fury.io/py/noisereduce)
 
-# Noise reduction in python using 
+
+# Noise reduction in python using spectral gating
 - This algorithm is based (but not completely reproducing) on the one [outlined by Audacity](https://wiki.audacityteam.org/wiki/How_Audacity_Noise_Reduction_Works) for the **noise reduction effect** ([Link to C++ code](https://github.com/audacity/audacity/blob/master/src/effects/NoiseReduction.cpp))
 - The algorithm requires two inputs: 
     1. A *noise* audio clip comtaining prototypical noise of the audio clip
@@ -21,6 +22,8 @@
 ## Installation
 `pip install noisereduce`
 
+*noisereduce optionally uses Tensorflow as a backend to speed up FFT and gaussian convolution. It is not listed in the requirements.txt so because (1) it is optional and (2) tensorflow-gpu and tensorflow (cpu) are both compatible with this package. The package requires Tensorflow 2+ for all tensorflow operations.* 
+
 ## Usage
 (see notebooks)
 
@@ -43,6 +46,8 @@ win_length (int): Each frame of audio is windowed by `window()`. The window will
 hop_length (int):number audio of frames between STFT columns.
 n_std_thresh (int): how many standard deviations louder than the mean dB of the noise (at each frequency level) to be considered signal
 prop_decrease (float): To what extent should you decrease noise (1 = all, 0 = none)
+pad_clipping (bool): Pad the signals with zeros to ensure that the reconstructed data is equal length to the data
+        use_tensorflow (bool): Use tensorflow as a backend for convolution and fft to speed up computation
 verbose (bool): Whether to plot the steps of the algorithm
 ```
 <div style="text-align:center">

diff --git a/environment.yml b/environment.yml
@@ -1,4 +1,4 @@
-name: birdbrain
+name: noisereduce
 channels:
   - conda-forge
   - defaults

diff --git a/noisereduce/._noisereduce.py b/noisereduce/._noisereduce.py
diff --git a/noisereduce/noisereduce.py b/noisereduce/noisereduce.py
@@ -3,16 +3,73 @@
 import librosa
 from noisereduce.plotting import plot_reduction_steps
 from tqdm.autonotebook import tqdm
+import warnings
 
+try:
+    import tensorflow as tf
+
+    print(
+        "GPUs available: {}".format(tf.config.experimental.list_physical_devices("GPU"))
+    )
+    if int(tf.__version__[0]) < 2:
+        warnings.warn(
+            "Tensorflow version is below 2.0, some GPU accelerated functionality may not work"
+        )
+except ImportError:
+    warnings.warn(
+        "Tensorflow is not installed and cannot be used for GPU accelerated STFT"
+    )
+
+
+def _stft(y, n_fft, hop_length, win_length, use_tensorflow=False):
+    if use_tensorflow:
+        # return librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length, center=True)
+        return _stft_tensorflow(y, n_fft, hop_length, win_length)
+    else:
+        return librosa.stft(
+            y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length, center=True
+        )
+
+
+def _istft(y, n_fft, hop_length, win_length, use_tensorflow=False):
+    if use_tensorflow:
+        # return librosa.istft(y, hop_length, win_length)
+        return _istft_tensorflow(y.T, n_fft, hop_length, win_length)
+    else:
+        return librosa.istft(y, hop_length, win_length)
 
-def _stft(y, n_fft, hop_length, win_length):
-    return librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length)
+
+def _stft_librosa(y, n_fft, hop_length, win_length):
+    return librosa.stft(
+        y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length, center=True
+    )
 
 
-def _istft(y, hop_length, win_length):
+def _istft_librosa(y, hop_length, win_length):
     return librosa.istft(y, hop_length, win_length)
 
 
+def _stft_tensorflow(y, n_fft, hop_length, win_length):
+    return (
+        tf.signal.stft(
+            y,
+            win_length,
+            hop_length,
+            n_fft,
+            pad_end=True,
+            window_fn=tf.signal.hann_window,
+        )
+        .numpy()
+        .T
+    )
+
+
+def _istft_tensorflow(y, n_fft, hop_length, win_length):
+    return tf.signal.inverse_stft(
+        y.astype(np.complex64), win_length, hop_length, n_fft
+    ).numpy()
+
+
 def _amp_to_db(x):
     return librosa.core.amplitude_to_db(x, ref=1.0, amin=1e-20, top_db=80.0)
 
@@ -31,9 +88,7 @@ def update_pbar(pbar, message):
 
 def _smoothing_filter(n_grad_freq, n_grad_time):
     """Generates a filter to smooth the mask for the spectrogram
-
-    [description]
-
+
     Arguments:
         n_grad_freq {[type]} -- [how many frequency channels to smooth over with the mask.]
         n_grad_time {[type]} -- [how many time channels to smooth over with the mask.]
@@ -58,10 +113,8 @@ def _smoothing_filter(n_grad_freq, n_grad_time):
 
 
 def mask_signal(sig_stft_db, sig_mask, mask_gain_dB, sig_stft):
-    """[summary]
-
-    [description]
-
+    """ Reduces amplitude of time/frequency regions of a spectrogram based upon a mask 
+
     Arguments:
         sig_stft_db {[type]} -- spectrogram of signal in dB
         sig_mask {[type]} -- mask to apply to signal
@@ -83,6 +136,33 @@ def mask_signal(sig_stft_db, sig_mask, mask_gain_dB, sig_stft):
     return sig_stft_amp, sig_stft_db_masked
 
 
+def convolve_gaussian(sig_mask, smoothing_filter, use_tensorflow=False):
+    """ Convolves a gaussian filter with a mask (or any image)
+
+    Arguments:
+        sig_mask {[type]} -- The signal mask
+        smoothing_filter {[type]} -- the filter to convolve
+
+    Keyword Arguments:
+        use_tensorflow {bool} -- use tensorflow.signal or scipy.signal (default: {False})
+    """
+    if use_tensorflow:
+        smoothing_filter = smoothing_filter * (
+            (np.shape(smoothing_filter)[1] - 1) / 2 + 1
+        )
+        smoothing_filter = smoothing_filter[:, :, tf.newaxis, tf.newaxis].astype(
+            "float32"
+        )
+        img = sig_mask[:, :, tf.newaxis, tf.newaxis].astype("float32")
+        return (
+            tf.nn.conv2d(img, smoothing_filter, strides=[1, 1, 1, 1], padding="SAME")
+            .numpy()
+            .squeeze()
+        )
+    else:
+        return scipy.signal.fftconvolve(sig_mask, smoothing_filter, mode="same")
+
+
 def reduce_noise(
     audio_clip,
     noise_clip,
@@ -93,6 +173,8 @@ def reduce_noise(
     hop_length=512,
     n_std_thresh=1.5,
     prop_decrease=1.0,
+    pad_clipping=True,
+    use_tensorflow=False,
     verbose=False,
 ):
     """Remove noise from audio based upon a clip containing only noise
@@ -107,6 +189,8 @@ def reduce_noise(
         hop_length (int):number audio of frames between STFT columns.
         n_std_thresh (int): how many standard deviations louder than the mean dB of the noise (at each frequency level) to be considered signal
         prop_decrease (float): To what extent should you decrease noise (1 = all, 0 = none)
+        pad_clipping (bool): Pad the signals with zeros to ensure that the reconstructed data is equal length to the data
+        use_tensorflow (bool): Use tensorflow as a backend for convolution and fft to speed up computation
         verbose (bool): Whether to plot the steps of the algorithm
 
     Returns:
@@ -120,7 +204,9 @@ def reduce_noise(
 
     update_pbar(pbar, "STFT on noise")
     # STFT over noise
-    noise_stft = _stft(noise_clip, n_fft, hop_length, win_length)
+    noise_stft = _stft(
+        noise_clip, n_fft, hop_length, win_length, use_tensorflow=use_tensorflow
+    )
     noise_stft_db = _amp_to_db(np.abs(noise_stft))  # convert to dB
     # Calculate statistics over noise
     update_pbar(pbar, "STFT on signal")
@@ -129,7 +215,15 @@ def reduce_noise(
     noise_thresh = mean_freq_noise + std_freq_noise * n_std_thresh
     # STFT over signal
     update_pbar(pbar, "STFT on signal")
-    sig_stft = _stft(audio_clip, n_fft, hop_length, win_length)
+
+    # pad signal with zeros to avoid extra frames being clipped if desired
+    if pad_clipping:
+        nsamp = len(audio_clip)
+        audio_clip = np.pad(audio_clip, [0, hop_length], mode="constant")
+
+    sig_stft = _stft(
+        audio_clip, n_fft, hop_length, win_length, use_tensorflow=use_tensorflow
+    )
     sig_stft_db = _amp_to_db(np.abs(sig_stft))
     update_pbar(pbar, "Generate mask")
     # Calculate value to mask dB to
@@ -145,19 +239,38 @@ def reduce_noise(
     update_pbar(pbar, "Smooth mask")
     # Create a smoothing filter for the mask in time and frequency
     smoothing_filter = _smoothing_filter(n_grad_freq, n_grad_time)
+
     # convolve the mask with a smoothing filter
+    sig_mask = convolve_gaussian(sig_mask, smoothing_filter, use_tensorflow)
+
     sig_mask = scipy.signal.fftconvolve(sig_mask, smoothing_filter, mode="same")
     sig_mask = sig_mask * prop_decrease
     update_pbar(pbar, "Apply mask")
     # mask the signal
+
     sig_stft_amp, sig_stft_db_masked = mask_signal(
         sig_stft_db, sig_mask, mask_gain_dB, sig_stft
     )
+
     update_pbar(pbar, "Recover signal")
     # recover the signal
-    recovered_signal = _istft(sig_stft_amp, hop_length, win_length)
+    recovered_signal = _istft(
+        sig_stft_amp, n_fft, hop_length, win_length, use_tensorflow=use_tensorflow
+    )
+    # fix the recovered signal length if padding signal
+    if pad_clipping:
+        recovered_signal = librosa.util.fix_length(recovered_signal, nsamp)
+
     recovered_spec = _amp_to_db(
-        np.abs(_stft(recovered_signal, n_fft, hop_length, win_length))
+        np.abs(
+            _stft(
+                recovered_signal,
+                n_fft,
+                hop_length,
+                win_length,
+                use_tensorflow=use_tensorflow,
+            )
+        )
     )
     if verbose:
         plot_reduction_steps(

diff --git a/noisereduce/utils.py b/noisereduce/utils.py
@@ -0,0 +1,15 @@
+import numpy as np
+
+
+def int16_to_float32(data):
+    """ Converts from uint16 wav to float32 wav
+    """
+    if np.max(np.abs(data)) > 32768:
+        raise ValueError("Data has values above 32768")
+    return (data / 32768.0).astype("float32")
+
+
+def float32_to_int16(data):
+    if np.max(data) > 1:
+        data = data / np.max(np.abs(data))
+    return np.array(data * 32767).astype("int16")