Prepare for v1.0.1 (#24)

* prepare for next release * fixed style and removed unused file
IBM · Feb 14, 2019 · a090174 · a090174
1 parent f2f8b4d
commit a090174
Show file tree

Hide file tree

Showing 8 changed files with 358 additions and 457 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -6,8 +6,9 @@ services:
 install:
   - docker build -t max-audio-classifier .
   - docker run -it -d -p 5000:5000 max-audio-classifier
-  - sleep 30
+  - pip install pytest requests flake8
 before_script:
-  - pip install pytest requests
+  - flake8 . --max-line-length=127
+  - sleep 30
 script:
   - pytest tests/test.py
diff --git a/Dockerfile b/Dockerfile
@@ -12,7 +12,9 @@ COPY requirements.txt /workspace
 RUN pip install -r requirements.txt
 
 COPY . /workspace
-RUN md5sum -c md5sums.txt # check file integrity
+
+# check file integrity
+RUN md5sum -c md5sums.txt
 
 EXPOSE 5000
 

diff --git a/config.py b/config.py
@@ -7,8 +7,8 @@
 # Application settings
 
 # API metadata
-API_TITLE = 'Model Asset Exchange Server'
-API_DESC = 'An API for serving models'
+API_TITLE = 'MAX Audio Classifier'
+API_DESC = 'Identify sounds in short audio clips.'
 API_VERSION = '0.1'
 
 # default model

diff --git a/core/mel_features.py b/core/mel_features.py
diff --git a/core/vggish_inference_demo.py b/core/vggish_inference_demo.py
diff --git a/core/vggish_input.py b/core/vggish_input.py
@@ -23,51 +23,52 @@
 from . import vggish_params
 import sys
 
+
 def waveform_to_examples(data, sample_rate):
-  """Converts audio waveform into an array of examples for VGGish.
+    """Converts audio waveform into an array of examples for VGGish.
 
-  Args:
-    data: np.array of either one dimension (mono) or two dimensions
-      (multi-channel, with the outer dimension representing channels).
-      Each sample is generally expected to lie in the range [-1.0, +1.0],
-      although this is not required.
-    sample_rate: Sample rate of data.
+    Args:
+      data: np.array of either one dimension (mono) or two dimensions
+        (multi-channel, with the outer dimension representing channels).
+        Each sample is generally expected to lie in the range [-1.0, +1.0],
+        although this is not required.
+      sample_rate: Sample rate of data.
 
-  Returns:
-    3-D np.array of shape [num_examples, num_frames, num_bands] which represents
-    a sequence of examples, each of which contains a patch of log mel
-    spectrogram, covering num_frames frames of audio and num_bands mel frequency
-    bands, where the frame length is vggish_params.STFT_HOP_LENGTH_SECONDS.
-  """
-  # Convert to mono.
-  if len(data.shape) > 1:
-    data = np.mean(data, axis=1)
-  # Resample to the rate assumed by VGGish.
-  if sample_rate != vggish_params.SAMPLE_RATE:
-    data = resampy.resample(data, sample_rate, vggish_params.SAMPLE_RATE)
+    Returns:
+      3-D np.array of shape [num_examples, num_frames, num_bands] which represents
+      a sequence of examples, each of which contains a patch of log mel
+      spectrogram, covering num_frames frames of audio and num_bands mel frequency
+      bands, where the frame length is vggish_params.STFT_HOP_LENGTH_SECONDS.
+    """
+    # Convert to mono.
+    if len(data.shape) > 1:
+        data = np.mean(data, axis=1)
+    # Resample to the rate assumed by VGGish.
+    if sample_rate != vggish_params.SAMPLE_RATE:
+        data = resampy.resample(data, sample_rate, vggish_params.SAMPLE_RATE)
 
-  # Compute log mel spectrogram features.
-  log_mel = mel_features.log_mel_spectrogram(
-      data,
-      audio_sample_rate=vggish_params.SAMPLE_RATE,
-      log_offset=vggish_params.LOG_OFFSET,
-      window_length_secs=vggish_params.STFT_WINDOW_LENGTH_SECONDS,
-      hop_length_secs=vggish_params.STFT_HOP_LENGTH_SECONDS,
-      num_mel_bins=vggish_params.NUM_MEL_BINS,
-      lower_edge_hertz=vggish_params.MEL_MIN_HZ,
-      upper_edge_hertz=vggish_params.MEL_MAX_HZ)
+    # Compute log mel spectrogram features.
+    log_mel = mel_features.log_mel_spectrogram(
+        data,
+        audio_sample_rate=vggish_params.SAMPLE_RATE,
+        log_offset=vggish_params.LOG_OFFSET,
+        window_length_secs=vggish_params.STFT_WINDOW_LENGTH_SECONDS,
+        hop_length_secs=vggish_params.STFT_HOP_LENGTH_SECONDS,
+        num_mel_bins=vggish_params.NUM_MEL_BINS,
+        lower_edge_hertz=vggish_params.MEL_MIN_HZ,
+        upper_edge_hertz=vggish_params.MEL_MAX_HZ)
 
-  # Frame features into examples.
-  features_sample_rate = 1.0 / vggish_params.STFT_HOP_LENGTH_SECONDS
-  example_window_length = int(round(
-      vggish_params.EXAMPLE_WINDOW_SECONDS * features_sample_rate))
-  example_hop_length = int(round(
-      vggish_params.EXAMPLE_HOP_SECONDS * features_sample_rate))
-  log_mel_examples = mel_features.frame(
-      log_mel,
-      window_length=example_window_length,
-      hop_length=example_hop_length)
-  return log_mel_examples
+    # Frame features into examples.
+    features_sample_rate = 1.0 / vggish_params.STFT_HOP_LENGTH_SECONDS
+    example_window_length = int(round(
+        vggish_params.EXAMPLE_WINDOW_SECONDS * features_sample_rate))
+    example_hop_length = int(round(
+        vggish_params.EXAMPLE_HOP_SECONDS * features_sample_rate))
+    log_mel_examples = mel_features.frame(
+        log_mel,
+        window_length=example_window_length,
+        hop_length=example_hop_length)
+    return log_mel_examples
 
 
 def wavfile_to_examples(wav_file):
@@ -82,7 +83,7 @@ def wavfile_to_examples(wav_file):
     """
     try:
         sr, wav_data = wavfile.read(wav_file)
-    except:
+    except IOError:
         print("Error reading WAV file!")
         print("The specified WAV file type is not supported by scipy.io.wavfile.read()")
         sys.exit(1)

diff --git a/core/vggish_postprocess.py b/core/vggish_postprocess.py
@@ -21,71 +21,71 @@
 
 
 class Postprocessor(object):
-  """Post-processes VGGish embeddings.
+    """Post-processes VGGish embeddings.
 
-  The initial release of AudioSet included 128-D VGGish embeddings for each
-  segment of AudioSet. These released embeddings were produced by applying
-  a PCA transformation (technically, a whitening transform is included as well)
-  and 8-bit quantization to the raw embedding output from VGGish, in order to
-  stay compatible with the YouTube-8M project which provides visual embeddings
-  in the same format for a large set of YouTube videos. This class implements
-  the same PCA (with whitening) and quantization transformations.
-  """
+    The initial release of AudioSet included 128-D VGGish embeddings for each
+    segment of AudioSet. These released embeddings were produced by applying
+    a PCA transformation (technically, a whitening transform is included as well)
+    and 8-bit quantization to the raw embedding output from VGGish, in order to
+    stay compatible with the YouTube-8M project which provides visual embeddings
+    in the same format for a large set of YouTube videos. This class implements
+    the same PCA (with whitening) and quantization transformations.
+    """
 
-  def __init__(self, pca_params_npz_path):
-    """Constructs a postprocessor.
+    def __init__(self, pca_params_npz_path):
+        """Constructs a postprocessor.
 
-    Args:
-      pca_params_npz_path: Path to a NumPy-format .npz file that
-        contains the PCA parameters used in postprocessing.
-    """
-    params = np.load(pca_params_npz_path)
-    self._pca_matrix = params[vggish_params.PCA_EIGEN_VECTORS_NAME]
-    # Load means into a column vector for easier broadcasting later.
-    self._pca_means = params[vggish_params.PCA_MEANS_NAME].reshape(-1, 1)
-    assert self._pca_matrix.shape == (
-        vggish_params.EMBEDDING_SIZE, vggish_params.EMBEDDING_SIZE), (
-            'Bad PCA matrix shape: %r' % (self._pca_matrix.shape,))
-    assert self._pca_means.shape == (vggish_params.EMBEDDING_SIZE, 1), (
-        'Bad PCA means shape: %r' % (self._pca_means.shape,))
+        Args:
+          pca_params_npz_path: Path to a NumPy-format .npz file that
+            contains the PCA parameters used in postprocessing.
+        """
+        params = np.load(pca_params_npz_path)
+        self._pca_matrix = params[vggish_params.PCA_EIGEN_VECTORS_NAME]
+        # Load means into a column vector for easier broadcasting later.
+        self._pca_means = params[vggish_params.PCA_MEANS_NAME].reshape(-1, 1)
+        assert self._pca_matrix.shape == (
+            vggish_params.EMBEDDING_SIZE, vggish_params.EMBEDDING_SIZE), (
+                'Bad PCA matrix shape: %r' % (self._pca_matrix.shape,))
+        assert self._pca_means.shape == (vggish_params.EMBEDDING_SIZE, 1), (
+                'Bad PCA means shape: %r' % (self._pca_means.shape,))
 
-  def postprocess(self, embeddings_batch):
-    """Applies postprocessing to a batch of embeddings.
+    def postprocess(self, embeddings_batch):
+        """Applies postprocessing to a batch of embeddings.
 
-    Args:
-      embeddings_batch: An nparray of shape [batch_size, embedding_size]
-        containing output from the embedding layer of VGGish.
+        Args:
+          embeddings_batch: An nparray of shape [batch_size, embedding_size]
+            containing output from the embedding layer of VGGish.
 
-    Returns:
-      An nparray of the same shape as the input but of type uint8,
-      containing the PCA-transformed and quantized version of the input.
-    """
-    assert len(embeddings_batch.shape) == 2, (
-        'Expected 2-d batch, got %r' % (embeddings_batch.shape,))
-    assert embeddings_batch.shape[1] == vggish_params.EMBEDDING_SIZE, (
-        'Bad batch shape: %r' % (embeddings_batch.shape,))
+        Returns:
+          An nparray of the same shape as the input but of type uint8,
+          containing the PCA-transformed and quantized version of the input.
+        """
+        assert len(embeddings_batch.shape) == 2, (
+                'Expected 2-d batch, got %r' % (embeddings_batch.shape,))
+        assert embeddings_batch.shape[1] == vggish_params.EMBEDDING_SIZE, (
+                'Bad batch shape: %r' % (embeddings_batch.shape,))
 
-    # Apply PCA.
-    # - Embeddings come in as [batch_size, embedding_size].
-    # - Transpose to [embedding_size, batch_size].
-    # - Subtract pca_means column vector from each column.
-    # - Premultiply by PCA matrix of shape [output_dims, input_dims]
-    #   where both are are equal to embedding_size in our case.
-    # - Transpose result back to [batch_size, embedding_size].
-    pca_applied = np.dot(self._pca_matrix,
-                         (embeddings_batch.T - self._pca_means)).T
+        # Apply PCA.
+        # - Embeddings come in as [batch_size, embedding_size].
+        # - Transpose to [embedding_size, batch_size].
+        # - Subtract pca_means column vector from each column.
+        # - Premultiply by PCA matrix of shape [output_dims, input_dims]
+        #   where both are are equal to embedding_size in our case.
+        # - Transpose result back to [batch_size, embedding_size].
+        pca_applied = np.dot(self._pca_matrix,
+                             (embeddings_batch.T - self._pca_means)).T
 
-    # Quantize by:
-    # - clipping to [min, max] range
-    clipped_embeddings = np.clip(
-        pca_applied, vggish_params.QUANTIZE_MIN_VAL,
-        vggish_params.QUANTIZE_MAX_VAL)
-    # - convert to 8-bit in range [0.0, 255.0]
-    quantized_embeddings = (
-        (clipped_embeddings - vggish_params.QUANTIZE_MIN_VAL) *
-        (255.0 /
-         (vggish_params.QUANTIZE_MAX_VAL - vggish_params.QUANTIZE_MIN_VAL)))
-    # - cast 8-bit float to uint8
-    quantized_embeddings = quantized_embeddings.astype(np.uint8)
+        # Quantize by:
+        # - clipping to [min, max] range
+        clipped_embeddings = np.clip(
+            pca_applied, vggish_params.QUANTIZE_MIN_VAL,
+            vggish_params.QUANTIZE_MAX_VAL)
+        # - convert to 8-bit in range [0.0, 255.0]
+        quantized_embeddings = (
+                (clipped_embeddings - vggish_params.QUANTIZE_MIN_VAL) *
+                (255.0 /
+                 (vggish_params.QUANTIZE_MAX_VAL - vggish_params.QUANTIZE_MIN_VAL)))
+        # - cast 8-bit float to uint8
+        quantized_embeddings = quantized_embeddings.astype(np.uint8)
 
-    return quantized_embeddings
+        return quantized_embeddings