Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/llama_stack_client/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def __init__(
if base_url is None:
base_url = os.environ.get("LLAMA_STACK_BASE_URL")
if base_url is None:
base_url = "http://any-hosted-llama-stack.com"
base_url = f"http://any-hosted-llama-stack.com"

custom_headers = default_headers or {}
custom_headers["X-LlamaStack-Client-Version"] = __version__
Expand Down Expand Up @@ -351,7 +351,7 @@ def __init__(
if base_url is None:
base_url = os.environ.get("LLAMA_STACK_BASE_URL")
if base_url is None:
base_url = "http://any-hosted-llama-stack.com"
base_url = f"http://any-hosted-llama-stack.com"

custom_headers = default_headers or {}
custom_headers["X-LlamaStack-Client-Version"] = __version__
Expand Down
4 changes: 2 additions & 2 deletions src/llama_stack_client/_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def _transform_file(file: FileTypes) -> HttpxFileTypes:
if is_tuple_t(file):
return (file[0], _read_file_content(file[1]), *file[2:])

raise TypeError("Expected file types input to be a FileContent type or to be a tuple")
raise TypeError(f"Expected file types input to be a FileContent type or to be a tuple")


def _read_file_content(file: FileContent) -> HttpxFileContent:
Expand Down Expand Up @@ -113,7 +113,7 @@ async def _async_transform_file(file: FileTypes) -> HttpxFileTypes:
if is_tuple_t(file):
return (file[0], await _async_read_file_content(file[1]), *file[2:])

raise TypeError("Expected file types input to be a FileContent type or to be a tuple")
raise TypeError(f"Expected file types input to be a FileContent type or to be a tuple")


async def _async_read_file_content(file: FileContent) -> HttpxFileContent:
Expand Down
8 changes: 4 additions & 4 deletions src/llama_stack_client/_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
# the response class ourselves but that is something that should be supported directly in httpx
# as it would be easy to incorrectly construct the Response object due to the multitude of arguments.
if cast_to != httpx.Response:
raise ValueError("Subclasses of httpx.Response cannot be passed to `cast_to`")
raise ValueError(f"Subclasses of httpx.Response cannot be passed to `cast_to`")
return cast(R, response)

if (
Expand All @@ -245,9 +245,9 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:

if (
cast_to is not object
and origin is not list
and origin is not dict
and origin is not Union
and not origin is list
and not origin is dict
and not origin is Union
and not issubclass(origin, BaseModel)
):
raise RuntimeError(
Expand Down
72 changes: 66 additions & 6 deletions src/llama_stack_client/resources/agents/turn.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,18 @@ def resume(
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> Turn:
"""
"""Resume an agent turn with executed tool call responses.

When a Turn has the
status `awaiting_input` due to pending input from client side tool calls, this
endpoint can be used to submit the outputs from the tool calls once they are
ready.

Args:
tool_responses: The tool call responses to resume the turn with.

stream: Whether to stream the response.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -275,8 +285,18 @@ def resume(
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> Stream[AgentTurnResponseStreamChunk]:
"""
"""Resume an agent turn with executed tool call responses.

When a Turn has the
status `awaiting_input` due to pending input from client side tool calls, this
endpoint can be used to submit the outputs from the tool calls once they are
ready.

Args:
stream: Whether to stream the response.

tool_responses: The tool call responses to resume the turn with.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -303,8 +323,18 @@ def resume(
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> Turn | Stream[AgentTurnResponseStreamChunk]:
"""
"""Resume an agent turn with executed tool call responses.

When a Turn has the
status `awaiting_input` due to pending input from client side tool calls, this
endpoint can be used to submit the outputs from the tool calls once they are
ready.

Args:
stream: Whether to stream the response.

tool_responses: The tool call responses to resume the turn with.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand Down Expand Up @@ -571,8 +601,18 @@ async def resume(
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> Turn:
"""
"""Resume an agent turn with executed tool call responses.

When a Turn has the
status `awaiting_input` due to pending input from client side tool calls, this
endpoint can be used to submit the outputs from the tool calls once they are
ready.

Args:
tool_responses: The tool call responses to resume the turn with.

stream: Whether to stream the response.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -599,8 +639,18 @@ async def resume(
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> AsyncStream[AgentTurnResponseStreamChunk]:
"""
"""Resume an agent turn with executed tool call responses.

When a Turn has the
status `awaiting_input` due to pending input from client side tool calls, this
endpoint can be used to submit the outputs from the tool calls once they are
ready.

Args:
stream: Whether to stream the response.

tool_responses: The tool call responses to resume the turn with.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -627,8 +677,18 @@ async def resume(
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
"""
"""Resume an agent turn with executed tool call responses.

When a Turn has the
status `awaiting_input` due to pending input from client side tool calls, this
endpoint can be used to submit the outputs from the tool calls once they are
ready.

Args:
stream: Whether to stream the response.

tool_responses: The tool call responses to resume the turn with.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand Down
49 changes: 40 additions & 9 deletions src/llama_stack_client/resources/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from __future__ import annotations

from typing import List, Iterable
from typing import List, Union, Iterable
from typing_extensions import Literal, overload

import httpx
Expand Down Expand Up @@ -36,6 +36,7 @@
from ..types.shared.chat_completion_response import ChatCompletionResponse
from ..types.shared_params.interleaved_content import InterleavedContent
from ..types.chat_completion_response_stream_chunk import ChatCompletionResponseStreamChunk
from ..types.shared_params.interleaved_content_item import InterleavedContentItem

__all__ = ["InferenceResource", "AsyncInferenceResource"]

Expand Down Expand Up @@ -493,8 +494,11 @@ def completion(
def embeddings(
self,
*,
contents: List[InterleavedContent],
contents: Union[List[str], Iterable[InterleavedContentItem]],
model_id: str,
output_dimension: int | NotGiven = NOT_GIVEN,
task_type: Literal["query", "document"] | NotGiven = NOT_GIVEN,
text_truncation: Literal["none", "start", "end"] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
Expand All @@ -506,13 +510,22 @@ def embeddings(
Generate embeddings for content pieces using the specified model.

Args:
contents: List of contents to generate embeddings for. Note that content can be
multimodal. The behavior depends on the model and provider. Some models may only
support text.
contents: List of contents to generate embeddings for. Each content can be a string or an
InterleavedContentItem (and hence can be multimodal). The behavior depends on
the model and provider. Some models may only support text.

model_id: The identifier of the model to use. The model must be an embedding model
registered with Llama Stack and available via the /models endpoint.

output_dimension: (Optional) Output dimensionality for the embeddings. Only supported by
Matryoshka models.

task_type: (Optional) How is the embedding being used? This is only supported by asymmetric
embedding models.

text_truncation: (Optional) Config for how to truncate text for embedding when text is longer
than the model's max sequence length.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -527,6 +540,9 @@ def embeddings(
{
"contents": contents,
"model_id": model_id,
"output_dimension": output_dimension,
"task_type": task_type,
"text_truncation": text_truncation,
},
inference_embeddings_params.InferenceEmbeddingsParams,
),
Expand Down Expand Up @@ -990,8 +1006,11 @@ async def completion(
async def embeddings(
self,
*,
contents: List[InterleavedContent],
contents: Union[List[str], Iterable[InterleavedContentItem]],
model_id: str,
output_dimension: int | NotGiven = NOT_GIVEN,
task_type: Literal["query", "document"] | NotGiven = NOT_GIVEN,
text_truncation: Literal["none", "start", "end"] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
Expand All @@ -1003,13 +1022,22 @@ async def embeddings(
Generate embeddings for content pieces using the specified model.

Args:
contents: List of contents to generate embeddings for. Note that content can be
multimodal. The behavior depends on the model and provider. Some models may only
support text.
contents: List of contents to generate embeddings for. Each content can be a string or an
InterleavedContentItem (and hence can be multimodal). The behavior depends on
the model and provider. Some models may only support text.

model_id: The identifier of the model to use. The model must be an embedding model
registered with Llama Stack and available via the /models endpoint.

output_dimension: (Optional) Output dimensionality for the embeddings. Only supported by
Matryoshka models.

task_type: (Optional) How is the embedding being used? This is only supported by asymmetric
embedding models.

text_truncation: (Optional) Config for how to truncate text for embedding when text is longer
than the model's max sequence length.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request
Expand All @@ -1024,6 +1052,9 @@ async def embeddings(
{
"contents": contents,
"model_id": model_id,
"output_dimension": output_dimension,
"task_type": task_type,
"text_truncation": text_truncation,
},
inference_embeddings_params.InferenceEmbeddingsParams,
),
Expand Down
3 changes: 3 additions & 0 deletions src/llama_stack_client/types/agents/turn_resume_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,17 @@ class TurnResumeParamsBase(TypedDict, total=False):
session_id: Required[str]

tool_responses: Required[Iterable[ToolResponseMessage]]
"""The tool call responses to resume the turn with."""


class TurnResumeParamsNonStreaming(TurnResumeParamsBase, total=False):
stream: Literal[False]
"""Whether to stream the response."""


class TurnResumeParamsStreaming(TurnResumeParamsBase):
stream: Required[Literal[True]]
"""Whether to stream the response."""


TurnResumeParams = Union[TurnResumeParamsNonStreaming, TurnResumeParamsStreaming]
31 changes: 25 additions & 6 deletions src/llama_stack_client/types/inference_embeddings_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,21 @@

from __future__ import annotations

from typing import List
from typing_extensions import Required, TypedDict
from typing import List, Union, Iterable
from typing_extensions import Literal, Required, TypedDict

from .shared_params.interleaved_content import InterleavedContent
from .shared_params.interleaved_content_item import InterleavedContentItem

__all__ = ["InferenceEmbeddingsParams"]


class InferenceEmbeddingsParams(TypedDict, total=False):
contents: Required[List[InterleavedContent]]
contents: Required[Union[List[str], Iterable[InterleavedContentItem]]]
"""List of contents to generate embeddings for.

Note that content can be multimodal. The behavior depends on the model and
provider. Some models may only support text.
Each content can be a string or an InterleavedContentItem (and hence can be
multimodal). The behavior depends on the model and provider. Some models may
only support text.
"""

model_id: Required[str]
Expand All @@ -24,3 +25,21 @@ class InferenceEmbeddingsParams(TypedDict, total=False):
The model must be an embedding model registered with Llama Stack and available
via the /models endpoint.
"""

output_dimension: int
"""(Optional) Output dimensionality for the embeddings.

Only supported by Matryoshka models.
"""

task_type: Literal["query", "document"]
"""
(Optional) How is the embedding being used? This is only supported by asymmetric
embedding models.
"""

text_truncation: Literal["none", "start", "end"]
"""
(Optional) Config for how to truncate text for embedding when text is longer
than the model's max sequence length.
"""
4 changes: 3 additions & 1 deletion src/llama_stack_client/types/shared/query_result.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.

from typing import Optional
from typing import Dict, List, Union, Optional

from ..._models import BaseModel
from .interleaved_content import InterleavedContent
Expand All @@ -9,5 +9,7 @@


class QueryResult(BaseModel):
metadata: Dict[str, Union[bool, float, str, List[object], object, None]]

content: Optional[InterleavedContent] = None
"""A image content item"""
4 changes: 3 additions & 1 deletion src/llama_stack_client/types/tool_invocation_result.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.

from typing import Optional
from typing import Dict, List, Union, Optional

from .._models import BaseModel
from .shared.interleaved_content import InterleavedContent
Expand All @@ -15,3 +15,5 @@ class ToolInvocationResult(BaseModel):
error_code: Optional[int] = None

error_message: Optional[str] = None

metadata: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
Loading