wavenet_util.py

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

import librosa.display as dsp
import numpy as np
import tensorflow as tf


def _assert_valid_input_type(s):
	assert s == 'mulaw-quantize' or s == 'mulaw' or s == 'raw'

def is_mulaw_quantize(s):
	_assert_valid_input_type(s)
	return s == 'mulaw-quantize'

def is_mulaw(s):
	_assert_valid_input_type(s)
	return s == 'mulaw'

def is_raw(s):
	_assert_valid_input_type(s)
	return s == 'raw'

def is_scalar_input(s):
	return is_raw(s) or is_mulaw(s)


#From https://github.com/r9y9/nnmnkwii/blob/master/nnmnkwii/preprocessing/generic.py
def mulaw(x, mu=256):
	"""Mu-Law companding
	Method described in paper [1]_.
	.. math::
		f(x) = sign(x) ln (1 + mu |x|) / ln (1 + mu)
	Args:
		x (array-like): Input signal. Each value of input signal must be in
		  range of [-1, 1].
		mu (number): Compression parameter ``μ``.
	Returns:
		array-like: Compressed signal ([-1, 1])
	See also:
		:func:`nnmnkwii.preprocessing.inv_mulaw`
		:func:`nnmnkwii.preprocessing.mulaw_quantize`
		:func:`nnmnkwii.preprocessing.inv_mulaw_quantize`
	.. [1] Brokish, Charles W., and Michele Lewis. "A-law and mu-law companding
		implementations using the tms320c54x." SPRA163 (1997).
	"""
	mu = 255
	return _sign(x) * _log1p(mu * _abs(x)) / _log1p(mu)


def inv_mulaw(y, mu=256):
	"""Inverse of mu-law companding (mu-law expansion)
	.. math::
		f^{-1}(x) = sign(y) (1 / mu) (1 + mu)^{|y|} - 1)
	Args:
		y (array-like): Compressed signal. Each value of input signal must be in
		  range of [-1, 1].
		mu (number): Compression parameter ``μ``.
	Returns:
		array-like: Uncomprresed signal (-1 <= x <= 1)
	See also:
		:func:`nnmnkwii.preprocessing.inv_mulaw`
		:func:`nnmnkwii.preprocessing.mulaw_quantize`
		:func:`nnmnkwii.preprocessing.inv_mulaw_quantize`
	"""
	mu = 255
	return _sign(y) * (1.0 / mu) * ((1.0 + mu)**_abs(y) - 1.0)


def mulaw_quantize(x, mu=256):
	"""Mu-Law companding + quantize
	Args:
		x (array-like): Input signal. Each value of input signal must be in
		  range of [-1, 1].
		mu (number): Compression parameter ``μ``.
	Returns:
		array-like: Quantized signal (dtype=int)
		  - y ∈ [0, mu] if x ∈ [-1, 1]
		  - y ∈ [0, mu) if x ∈ [-1, 1)
	.. note::
		If you want to get quantized values of range [0, mu) (not [0, mu]),
		then you need to provide input signal of range [-1, 1).
	Examples:
		>>> from scipy.io import wavfile
		>>> import pysptk
		>>> import numpy as np
		>>> from nnmnkwii import preprocessing as P
		>>> fs, x = wavfile.read(pysptk.util.example_audio_file())
		>>> x = (x / 32768.0).astype(np.float32)
		>>> y = P.mulaw_quantize(x)
		>>> print(y.min(), y.max(), y.dtype)
		15 246 int64
	See also:
		:func:`nnmnkwii.preprocessing.mulaw`
		:func:`nnmnkwii.preprocessing.inv_mulaw`
		:func:`nnmnkwii.preprocessing.inv_mulaw_quantize`
	"""
	mu = 255
	y = mulaw(x, mu)
	# scale [-1, 1] to [0, mu]
	return _asint((y + 1) / 2 * mu)


def inv_mulaw_quantize(y, mu=256):
	"""Inverse of mu-law companding + quantize
	Args:
		y (array-like): Quantized signal (∈ [0, mu]).
		mu (number): Compression parameter ``μ``.
	Returns:
		array-like: Uncompressed signal ([-1, 1])
	Examples:
		>>> from scipy.io import wavfile
		>>> import pysptk
		>>> import numpy as np
		>>> from nnmnkwii import preprocessing as P
		>>> fs, x = wavfile.read(pysptk.util.example_audio_file())
		>>> x = (x / 32768.0).astype(np.float32)
		>>> x_hat = P.inv_mulaw_quantize(P.mulaw_quantize(x))
		>>> x_hat = (x_hat * 32768).astype(np.int16)
	See also:
		:func:`nnmnkwii.preprocessing.mulaw`
		:func:`nnmnkwii.preprocessing.inv_mulaw`
		:func:`nnmnkwii.preprocessing.mulaw_quantize`
	"""
	# [0, m) to [-1, 1]
	mu = 255
	y = 2 * _asfloat(y) / mu - 1
	return inv_mulaw(y, mu)

def _sign(x):
	#wrapper to support tensorflow tensors/numpy arrays
	isnumpy = isinstance(x, np.ndarray)
	isscalar = np.isscalar(x)
	return np.sign(x) if (isnumpy or isscalar) else tf.sign(x)


def _log1p(x):
	#wrapper to support tensorflow tensors/numpy arrays
	isnumpy = isinstance(x, np.ndarray)
	isscalar = np.isscalar(x)
	return np.log1p(x) if (isnumpy or isscalar) else tf.log1p(x)


def _abs(x):
	#wrapper to support tensorflow tensors/numpy arrays
	isnumpy = isinstance(x, np.ndarray)
	isscalar = np.isscalar(x)
	return np.abs(x) if (isnumpy or isscalar) else tf.abs(x)


def _asint(x):
	#wrapper to support tensorflow tensors/numpy arrays
	isnumpy = isinstance(x, np.ndarray)
	isscalar = np.isscalar(x)
	return x.astype(np.int) if isnumpy else int(x) if isscalar else tf.cast(x, tf.int32)


def _asfloat(x):
	#wrapper to support tensorflow tensors/numpy arrays
	isnumpy = isinstance(x, np.ndarray)
	isscalar = np.isscalar(x)
	return x.astype(np.float32) if isnumpy else float(x) if isscalar else tf.cast(x, tf.float32)

def sequence_mask(input_lengths, max_len=None, expand=True):
	if max_len is None:
		max_len = tf.reduce_max(input_lengths)

	if expand:
		return tf.expand_dims(tf.sequence_mask(input_lengths, max_len, dtype=tf.float32), axis=-1)
	return tf.sequence_mask(input_lengths, max_len, dtype=tf.float32)


def waveplot(path, y_hat, y_target, hparams, title=None):
	sr = hparams.sample_rate

	fig = plt.figure(figsize=(12, 4))
	if y_target is not None:
		ax = plt.subplot(3, 1, 1)
		dsp.waveplot(y_target, sr=sr)
		ax.set_title('Target waveform')
		ax = plt.subplot(3, 1, 2)
		dsp.waveplot(y_hat, sr=sr)
		ax.set_title('Predicted waveform')
	else:
		ax = plt.subplot(2, 1, 1)
		dsp.waveplot(y_hat, sr=sr)
		ax.set_title('Generated waveform')

	if title is not None:
		# Set common labels
		fig.text(0.5, 0.18, title, horizontalalignment='center', fontsize=16)

	plt.tight_layout()
	plt.savefig(path, format="png")
	plt.close()

def plot_spectrogram(pred_spectrogram, path, title=None, split_title=False, target_spectrogram=None, max_len=None, auto_aspect=False):
	if max_len is not None:
		target_spectrogram = target_spectrogram[:max_len]
		pred_spectrogram = pred_spectrogram[:max_len]

	if split_title:
		title = split_title_line(title)

	fig = plt.figure(figsize=(10, 8))
	# Set common labels
	fig.text(0.5, 0.18, title, horizontalalignment='center', fontsize=16)

	#target spectrogram subplot
	if target_spectrogram is not None:
		ax1 = fig.add_subplot(311)
		ax2 = fig.add_subplot(312)

		if auto_aspect:
			im = ax1.imshow(np.rot90(target_spectrogram), aspect='auto', interpolation='none')
		else:
			im = ax1.imshow(np.rot90(target_spectrogram), interpolation='none')
		ax1.set_title('Target Mel-Spectrogram')
		fig.colorbar(mappable=im, shrink=0.65, orientation='horizontal', ax=ax1)
		ax2.set_title('Predicted Mel-Spectrogram')
	else:
		ax2 = fig.add_subplot(211)

	if auto_aspect:
		im = ax2.imshow(np.rot90(pred_spectrogram), aspect='auto', interpolation='none')
	else:
		im = ax2.imshow(np.rot90(pred_spectrogram), interpolation='none')
	fig.colorbar(mappable=im, shrink=0.65, orientation='horizontal', ax=ax2)

	plt.tight_layout()
	plt.savefig(path, format='png')
	plt.close()