Skip to content

Commit

Permalink
Fix preprocessing for audio input pipelines in external.py (#2779)
Browse files Browse the repository at this point in the history
* Fix bug in loading audio

* Changelog

* Add test

* Fail if error doesn't come from rate limit

* lint

* update model examples

Co-authored-by: Abubakar Abid <abubakar@huggingface.co>
  • Loading branch information
freddyaboulton and abidlabs authored Dec 9, 2022
1 parent 76c4a55 commit 2a773d5
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ Add Brazilian Portuguese translation (pt-BR.json) by [@pstwh](http://github.com/
## Bug Fixes:
* Fixed issue where image thumbnails were not showing when an example directory was provided
by by [@abidlabs](https://github.com/abidlabs) in [PR 2745](https://github.com/gradio-app/gradio/pull/2745)
* Fixed bug loading audio input models from the hub by [@freddyaboulton](https://github.com/freddyaboulton) in [PR 2779](https://github.com/gradio-app/gradio/pull/2779).

## Documentation Changes:
No changes to highlight.
Expand Down
4 changes: 2 additions & 2 deletions gradio/external.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,14 @@ def from_model(model_name: str, api_key: str | None, alias: str, **kwargs):
),
},
"audio-to-audio": {
# example model: speechbrain/mtl-mimic-voicebank
# example model: facebook/xm_transformer_sm_all-en
"inputs": components.Audio(source="upload", type="filepath", label="Input"),
"outputs": components.Audio(label="Output"),
"preprocess": to_binary,
"postprocess": encode_to_base64,
},
"automatic-speech-recognition": {
# example model: jonatasgrosman/wav2vec2-large-xlsr-53-english
# example model: facebook/wav2vec2-base-960h
"inputs": components.Audio(source="upload", type="filepath", label="Input"),
"outputs": components.Textbox(label="Output"),
"preprocess": to_binary,
Expand Down
4 changes: 3 additions & 1 deletion gradio/processing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@

def to_binary(x: str | Dict) -> bytes:
"""Converts a base64 string or dictionary to a binary string that can be sent in a POST."""
if isinstance(x, dict):
if isinstance(x, dict) and not x.get("data"):
x = encode_url_or_file_to_base64(x["name"])
elif isinstance(x, dict) and x.get("data"):
x = x["data"]
return base64.b64decode(x.split(",")[1])


Expand Down
25 changes: 24 additions & 1 deletion test/test_external.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
import pathlib
import sys
import textwrap
import warnings
from unittest.mock import MagicMock, patch

import pytest
from fastapi.testclient import TestClient

import gradio
import gradio as gr
from gradio import utils
from gradio import media_data, utils
from gradio.exceptions import InvalidApiName
from gradio.external import (
TooManyRequestsError,
Expand Down Expand Up @@ -234,6 +236,27 @@ def test_speech_recognition_model(self):
except TooManyRequestsError:
pass

app, _, _ = io.launch(prevent_thread_lock=True, show_error=True)
client = TestClient(app)
resp = client.post(
"api/predict",
json={"fn_index": 0, "data": [media_data.BASE64_AUDIO], "name": "sample"},
)
try:
if resp.status_code != 200:
warnings.warn("Request for speech recognition model failed!")
if (
"Could not complete request to HuggingFace API"
in resp.json()["error"]
):
pass
else:
assert False
else:
assert resp.json()["data"] is not None
finally:
io.close()

def test_text_to_image_model(self):
io = gr.Interface.load("models/osanseviero/BigGAN-deep-128")
try:
Expand Down

0 comments on commit 2a773d5

Please sign in to comment.