Fix preprocessing for audio input pipelines in external.py (#2779)

* Fix bug in loading audio * Changelog * Add test * Fail if error doesn't come from rate limit * lint * update model examples Co-authored-by: Abubakar Abid <abubakar@huggingface.co>
gradio-app · Dec 9, 2022 · 2a773d5 · 2a773d5
1 parent 76c4a55
commit 2a773d5
Show file tree

Hide file tree

Showing 4 changed files with 30 additions and 4 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -79,6 +79,7 @@ Add Brazilian Portuguese translation (pt-BR.json) by [@pstwh](http://github.com/
 ## Bug Fixes:
 * Fixed issue where image thumbnails were not showing when an example directory was provided
 by by [@abidlabs](https://github.com/abidlabs) in [PR 2745](https://github.com/gradio-app/gradio/pull/2745) 
+* Fixed bug loading audio input models from the hub by [@freddyaboulton](https://github.com/freddyaboulton) in [PR 2779](https://github.com/gradio-app/gradio/pull/2779). 
 
 ## Documentation Changes:
 No changes to highlight.

diff --git a/gradio/external.py b/gradio/external.py
@@ -84,14 +84,14 @@ def from_model(model_name: str, api_key: str | None, alias: str, **kwargs):
             ),
         },
         "audio-to-audio": {
-            # example model: speechbrain/mtl-mimic-voicebank
+            # example model: facebook/xm_transformer_sm_all-en
             "inputs": components.Audio(source="upload", type="filepath", label="Input"),
             "outputs": components.Audio(label="Output"),
             "preprocess": to_binary,
             "postprocess": encode_to_base64,
         },
         "automatic-speech-recognition": {
-            # example model: jonatasgrosman/wav2vec2-large-xlsr-53-english
+            # example model: facebook/wav2vec2-base-960h
             "inputs": components.Audio(source="upload", type="filepath", label="Input"),
             "outputs": components.Textbox(label="Output"),
             "preprocess": to_binary,

diff --git a/gradio/processing_utils.py b/gradio/processing_utils.py
@@ -31,8 +31,10 @@
 
 def to_binary(x: str | Dict) -> bytes:
     """Converts a base64 string or dictionary to a binary string that can be sent in a POST."""
-    if isinstance(x, dict):
+    if isinstance(x, dict) and not x.get("data"):
         x = encode_url_or_file_to_base64(x["name"])
+    elif isinstance(x, dict) and x.get("data"):
+        x = x["data"]
     return base64.b64decode(x.split(",")[1])
 
 

diff --git a/test/test_external.py b/test/test_external.py
@@ -3,13 +3,15 @@
 import pathlib
 import sys
 import textwrap
+import warnings
 from unittest.mock import MagicMock, patch
 
 import pytest
+from fastapi.testclient import TestClient
 
 import gradio
 import gradio as gr
-from gradio import utils
+from gradio import media_data, utils
 from gradio.exceptions import InvalidApiName
 from gradio.external import (
     TooManyRequestsError,
@@ -234,6 +236,27 @@ def test_speech_recognition_model(self):
         except TooManyRequestsError:
             pass
 
+        app, _, _ = io.launch(prevent_thread_lock=True, show_error=True)
+        client = TestClient(app)
+        resp = client.post(
+            "api/predict",
+            json={"fn_index": 0, "data": [media_data.BASE64_AUDIO], "name": "sample"},
+        )
+        try:
+            if resp.status_code != 200:
+                warnings.warn("Request for speech recognition model failed!")
+                if (
+                    "Could not complete request to HuggingFace API"
+                    in resp.json()["error"]
+                ):
+                    pass
+                else:
+                    assert False
+            else:
+                assert resp.json()["data"] is not None
+        finally:
+            io.close()
+
     def test_text_to_image_model(self):
         io = gr.Interface.load("models/osanseviero/BigGAN-deep-128")
         try: