lastmile-ai · rholinshead · Feb 13, 2024 · Feb 13, 2024 · rossdanlm · Feb 13, 2024
@@ -43,6 +43,13 @@ def refine_completion_params(model_settings: dict[Any, Any]) -> dict[str, Any]:
  if key.lower() in supported_keys:
  completion_data[key.lower()] = model_settings[key]
 
+ # The default model is openai/whisper-large-v3, which does not work as of
+ # 02/13/2024. Instead, default to a free model (which supports remote
+ # inference) with the next most "likes" in HF
+ # https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&sort=likes
+ if completion_data.get("model") is None:
+ completion_data["model"] = "openai/whisper-large-v2"
+
  return completion_data
 
 
@@ -299,7 +306,7 @@ def get_output_text(
  output_data = output.data
  if isinstance(output_data, str):
  return output_data
- 
+
  else:
  raise ValueError(
  f"Invalid output data type {type(output_data)} for prompt '{prompt.name}'. Expected string."
@@ -347,7 +354,7 @@ def validate_and_retrieve_audio_from_attachments(prompt: Prompt) -> str:
  raise ValueError(
  "Multiple audio inputs are not supported for the HF Automatic Speech Recognition Inference api. Please specify a single audio input attachment for Prompt: {prompt.name}."
  )
- 
+
  attachment = prompt.input.attachments[0]
 
  validate_attachment_type_is_audio(attachment)

@@ -47,6 +47,13 @@ def refine_completion_params(model_settings: dict[Any, Any]) -> dict[str, Any]:
  if key.lower() in supported_keys:
  completion_data[key.lower()] = model_settings[key]
 
+ # The default model is suno/bark, which requires HF Pro subscription
+ # Instead, default to a free model (which supports remote inference) with
+ # the next most "likes" in HF
+ # https://huggingface.co/models?pipeline_tag=text-to-speech&sort=likes
+ if completion_data.get("model") is None:
+ completion_data["model"] = "facebook/fastspeech2-en-ljspeech"
+
  return completion_data