Skip to content

Commit

Permalink
feat: add tts and speech2text model for gpustack
Browse files Browse the repository at this point in the history
  • Loading branch information
alexcodelf committed Jan 7, 2025
1 parent 2dbc5ff commit 9d1b7cf
Show file tree
Hide file tree
Showing 8 changed files with 120 additions and 19 deletions.
4 changes: 2 additions & 2 deletions models/gpustack/manifest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ resource:
llm: true
moderation: false
rerank: true
speech2text: false
speech2text: true
text_embedding: true
tts: false
tts: true
tool:
enabled: true
type: plugin
Expand Down
18 changes: 10 additions & 8 deletions models/gpustack/models/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from dify_plugin import OAICompatLargeLanguageModel
from dify_plugin.entities.model.llm import LLMResult
from dify_plugin.entities.model.message import PromptMessage, PromptMessageTool
from yarl import URL


class GPUStackLanguageModel(OAICompatLargeLanguageModel):
Expand All @@ -17,10 +16,10 @@ def _invoke(
stream: bool = True,
user: str | None = None,
) -> LLMResult | Generator:
self._add_custom_parameters(credentials)
compatible_credentials = self._get_compatible_credentials(credentials)
return super()._invoke(
model,
credentials,
compatible_credentials,
prompt_messages,
model_parameters,
tools,
Expand All @@ -30,11 +29,14 @@ def _invoke(
)

def validate_credentials(self, model: str, credentials: dict) -> None:
self._add_custom_parameters(credentials)
super().validate_credentials(model, credentials)
compatible_credentials = self._get_compatible_credentials(credentials)
super().validate_credentials(model, compatible_credentials)

def _add_custom_parameters(self, credentials: dict) -> None:
credentials["endpoint_url"] = str(
URL(credentials["endpoint_url"]) / "v1-openai"
)
credentials["mode"] = "chat"

def _get_compatible_credentials(self, credentials: dict) -> dict:
credentials = credentials.copy()
base_url = credentials["endpoint_url"].rstrip("/").removesuffix("/v1-openai")
credentials["endpoint_url"] = f"{base_url}/v1-openai"
return credentials
Empty file.
38 changes: 38 additions & 0 deletions models/gpustack/models/speech2text/speech2text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from typing import Optional

from dify_plugin import OAICompatSpeechToTextModel
from dify_plugin.entities.model.speech2text import SpeechToTextResult


class GPUStackSpeechToTextModel(OAICompatSpeechToTextModel):
"""
Model class for GPUStack Speech to text model.
"""

def _invoke(
self,
model: str,
credentials: dict,
audio: bytes,
user: Optional[str] = None,
) -> SpeechToTextResult:
compatible_credentials = self._get_compatible_credentials(credentials)
return super()._invoke(model, compatible_credentials, audio, user)

def validate_credentials(self, model: str, credentials: dict) -> None:
"""
Validate model credentials
:param model: model name
:param credentials: model credentials
"""
compatible_credentials = self._get_compatible_credentials(credentials)
super().validate_credentials(model, compatible_credentials)

def _get_compatible_credentials(self, credentials: dict) -> dict:
credentials = credentials.copy()
base_url = credentials["endpoint_url"].rstrip("/").removesuffix("/v1-openai")
credentials["endpoint_url"] = f"{base_url}/v1-openai"
return credentials


18 changes: 9 additions & 9 deletions models/gpustack/models/text_embedding/text_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ def _invoke(
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
self._add_custom_parameters(credentials)
return super()._invoke(model, credentials, texts, user, input_type)
compatible_credentials = self._get_compatible_credentials(credentials)
return super()._invoke(model, compatible_credentials, texts, user, input_type)

def validate_credentials(self, model: str, credentials: dict) -> None:
self._add_custom_parameters(credentials)
super().validate_credentials(model, credentials)
compatible_credentials = self._get_compatible_credentials(credentials)
super().validate_credentials(model, compatible_credentials)

@staticmethod
def _add_custom_parameters(credentials: dict) -> None:
credentials["endpoint_url"] = str(
URL(credentials["endpoint_url"]) / "v1-openai"
)
def _get_compatible_credentials(self, credentials: dict) -> dict:
credentials = credentials.copy()
base_url = credentials["endpoint_url"].rstrip("/").removesuffix("/v1-openai")
credentials["endpoint_url"] = f"{base_url}/v1-openai"
return credentials
Empty file.
43 changes: 43 additions & 0 deletions models/gpustack/models/tts/tts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from collections.abc import Generator
from dify_plugin import OAICompatTextToSpeechModel
from dify_plugin.entities.model.tts import TTSResult


class GPUStackTextToSpeechModel(OAICompatTextToSpeechModel):
"""
Model class for GPUStack Text to Speech model.
"""

def _invoke(
self,
model: str,
credentials: dict,
text: str,
user: str | None = None,
) -> TTSResult | Generator:
compatible_credentials = self._get_compatible_credentials(credentials)
return super()._invoke(model, compatible_credentials, text, user)

def validate_credentials(self, model: str, credentials: dict, user: Optional[str] = None) -> None:
"""
Validate model credentials
:param model: model name
:param credentials: model credentials
:param user: unique user id
"""
compatible_credentials = self._get_compatible_credentials(credentials)
super().validate_credentials(model, compatible_credentials)

def _get_compatible_credentials(self, credentials: dict) -> dict:
"""
Get compatible credentials
:param credentials: model credentials
:return: compatible credentials
"""
compatible_credentials = credentials.copy()
base_url = credentials["endpoint_url"].rstrip("/").removesuffix("/v1-openai")
compatible_credentials["endpoint_url"] = f"{base_url}/v1-openai"

return compatible_credentials
18 changes: 18 additions & 0 deletions models/gpustack/provider/gpustack.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,22 @@ model_credential_schema:
variable: __model_type
type: select
variable: vision_support
- variable: voices
show_on:
- variable: __model_type
value: tts
label:
en_US: Available Voices (comma-separated)
zh_Hans: 可用声音(用英文逗号分隔)
type: text-input
required: false
default: "Chinese Female"
placeholder:
en_US: "Chinese Female, Chinese Male, Japanese Male, Cantonese Female, English Female, English Male, Korean Female"
zh_Hans: "Chinese Female, Chinese Male, Japanese Male, Cantonese Female, English Female, English Male, Korean Female"
help:
en_US: "List voice names separated by commas. First voice will be used as default."
zh_Hans: "用英文逗号分隔的声音列表。第一个声音将作为默认值。"
model:
label:
en_US: Model Name
Expand All @@ -125,3 +141,5 @@ supported_model_types:
- llm
- text-embedding
- rerank
- speech2text
- tts

0 comments on commit 9d1b7cf

Please sign in to comment.