-
Notifications
You must be signed in to change notification settings - Fork 77
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Set Working Default Models in HF Parsers #1221
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,6 +43,13 @@ def refine_completion_params(model_settings: dict[Any, Any]) -> dict[str, Any]: | |
if key.lower() in supported_keys: | ||
completion_data[key.lower()] = model_settings[key] | ||
|
||
# The default model is openai/whisper-large-v3, which does not work as of | ||
# 02/13/2024. Instead, default to a free model (which supports remote | ||
# inference) with the next most "likes" in HF | ||
# https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&sort=likes | ||
Comment on lines
+46
to
+49
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for documenting |
||
if completion_data.get("model") is None: | ||
completion_data["model"] = "openai/whisper-large-v2" | ||
|
||
return completion_data | ||
|
||
|
||
|
@@ -299,7 +306,7 @@ def get_output_text( | |
output_data = output.data | ||
if isinstance(output_data, str): | ||
return output_data | ||
|
||
else: | ||
raise ValueError( | ||
f"Invalid output data type {type(output_data)} for prompt '{prompt.name}'. Expected string." | ||
|
@@ -347,7 +354,7 @@ def validate_and_retrieve_audio_from_attachments(prompt: Prompt) -> str: | |
raise ValueError( | ||
"Multiple audio inputs are not supported for the HF Automatic Speech Recognition Inference api. Please specify a single audio input attachment for Prompt: {prompt.name}." | ||
) | ||
|
||
attachment = prompt.input.attachments[0] | ||
|
||
validate_attachment_type_is_audio(attachment) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,6 +47,13 @@ def refine_completion_params(model_settings: dict[Any, Any]) -> dict[str, Any]: | |
if key.lower() in supported_keys: | ||
completion_data[key.lower()] = model_settings[key] | ||
|
||
# The default model is suno/bark, which requires HF Pro subscription | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NIt: I feel we should remove these as default from the input schema? Similar issue we've had where people can't use GPT-4 because that requires paid access There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Just so I understand, this comment is not relevant after the next PR, right? I just did them as separate PRs to separate concerns There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yea I guess my point is more than this comment "default model is suno/bark" won't apply anymore to this after next PR, but not a blocker |
||
# Instead, default to a free model (which supports remote inference) with | ||
# the next most "likes" in HF | ||
# https://huggingface.co/models?pipeline_tag=text-to-speech&sort=likes | ||
if completion_data.get("model") is None: | ||
completion_data["model"] = "facebook/fastspeech2-en-ljspeech" | ||
|
||
return completion_data | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same comment as below