diff --git a/src/llama_stack_client/_base_client.py b/src/llama_stack_client/_base_client.py index bd7fe8ce..c8b0b413 100644 --- a/src/llama_stack_client/_base_client.py +++ b/src/llama_stack_client/_base_client.py @@ -418,10 +418,17 @@ def _build_headers(self, options: FinalRequestOptions, *, retries_taken: int = 0 if idempotency_header and options.method.lower() != "get" and idempotency_header not in headers: headers[idempotency_header] = options.idempotency_key or self._idempotency_key() - # Don't set the retry count header if it was already set or removed by the caller. We check + # Don't set these headers if they were already set or removed by the caller. We check # `custom_headers`, which can contain `Omit()`, instead of `headers` to account for the removal case. - if "x-stainless-retry-count" not in (header.lower() for header in custom_headers): + lower_custom_headers = [header.lower() for header in custom_headers] + if "x-stainless-retry-count" not in lower_custom_headers: headers["x-stainless-retry-count"] = str(retries_taken) + if "x-stainless-read-timeout" not in lower_custom_headers: + timeout = self.timeout if isinstance(options.timeout, NotGiven) else options.timeout + if isinstance(timeout, Timeout): + timeout = timeout.read + if timeout is not None: + headers["x-stainless-read-timeout"] = str(timeout) return headers diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py index 584da069..35d993d8 100644 --- a/src/llama_stack_client/_client.py +++ b/src/llama_stack_client/_client.py @@ -98,12 +98,13 @@ class LlamaStackClient(SyncAPIClient): with_streaming_response: LlamaStackClientWithStreamedResponse # client options + api_key: str | None def __init__( self, *, - base_url: str | httpx.URL | None = None, api_key: str | None = None, + base_url: str | httpx.URL | None = None, timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN, max_retries: int = DEFAULT_MAX_RETRIES, default_headers: Mapping[str, str] | None = None, @@ -123,19 +124,20 @@ def __init__( _strict_response_validation: bool = False, provider_data: Mapping[str, Any] | None = None, ) -> None: - """Construct a new synchronous llama-stack-client client instance.""" - if base_url is None: - base_url = os.environ.get("LLAMA_STACK_CLIENT_BASE_URL") - if base_url is None: - base_url = f"http://any-hosted-llama-stack.com" + """Construct a new synchronous llama-stack-client client instance. + This automatically infers the `api_key` argument from the `LLAMA_STACK_CLIENT_API_KEY` environment variable if it is not provided. + """ if api_key is None: api_key = os.environ.get("LLAMA_STACK_CLIENT_API_KEY") self.api_key = api_key + if base_url is None: + base_url = os.environ.get("LLAMA_STACK_CLIENT_BASE_URL") + if base_url is None: + base_url = f"http://any-hosted-llama-stack.com" + custom_headers = default_headers or {} - if api_key is not None: - custom_headers["Authorization"] = f"Bearer {api_key}" custom_headers["X-LlamaStack-Client-Version"] = __version__ if provider_data is not None: custom_headers["X-LlamaStack-Provider-Data"] = json.dumps(provider_data) @@ -182,6 +184,14 @@ def __init__( def qs(self) -> Querystring: return Querystring(array_format="comma") + @property + @override + def auth_headers(self) -> dict[str, str]: + api_key = self.api_key + if api_key is None: + return {} + return {"Authorization": f"Bearer {api_key}"} + @property @override def default_headers(self) -> dict[str, str | Omit]: @@ -194,8 +204,8 @@ def default_headers(self) -> dict[str, str | Omit]: def copy( self, *, - base_url: str | httpx.URL | None = None, api_key: str | None = None, + base_url: str | httpx.URL | None = None, timeout: float | Timeout | None | NotGiven = NOT_GIVEN, http_client: httpx.Client | None = None, max_retries: int | NotGiven = NOT_GIVEN, @@ -228,8 +238,8 @@ def copy( http_client = http_client or self._client return self.__class__( - base_url=base_url or self.base_url, api_key=api_key or self.api_key, + base_url=base_url or self.base_url, timeout=self.timeout if isinstance(timeout, NotGiven) else timeout, http_client=http_client, max_retries=max_retries if is_given(max_retries) else self.max_retries, @@ -304,12 +314,13 @@ class AsyncLlamaStackClient(AsyncAPIClient): with_streaming_response: AsyncLlamaStackClientWithStreamedResponse # client options + api_key: str | None def __init__( self, *, - base_url: str | httpx.URL | None = None, api_key: str | None = None, + base_url: str | httpx.URL | None = None, timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN, max_retries: int = DEFAULT_MAX_RETRIES, default_headers: Mapping[str, str] | None = None, @@ -329,19 +340,20 @@ def __init__( _strict_response_validation: bool = False, provider_data: Mapping[str, Any] | None = None, ) -> None: - """Construct a new async llama-stack-client client instance.""" - if base_url is None: - base_url = os.environ.get("LLAMA_STACK_CLIENT_BASE_URL") - if base_url is None: - base_url = f"http://any-hosted-llama-stack.com" + """Construct a new async llama-stack-client client instance. + This automatically infers the `api_key` argument from the `LLAMA_STACK_CLIENT_API_KEY` environment variable if it is not provided. + """ if api_key is None: api_key = os.environ.get("LLAMA_STACK_CLIENT_API_KEY") self.api_key = api_key + if base_url is None: + base_url = os.environ.get("LLAMA_STACK_CLIENT_BASE_URL") + if base_url is None: + base_url = f"http://any-hosted-llama-stack.com" + custom_headers = default_headers or {} - if api_key is not None: - custom_headers["Authorization"] = f"Bearer {api_key}" custom_headers["X-LlamaStack-Client-Version"] = __version__ if provider_data is not None: custom_headers["X-LlamaStack-Provider-Data"] = json.dumps(provider_data) @@ -388,6 +400,14 @@ def __init__( def qs(self) -> Querystring: return Querystring(array_format="comma") + @property + @override + def auth_headers(self) -> dict[str, str]: + api_key = self.api_key + if api_key is None: + return {} + return {"Authorization": f"Bearer {api_key}"} + @property @override def default_headers(self) -> dict[str, str | Omit]: @@ -400,8 +420,8 @@ def default_headers(self) -> dict[str, str | Omit]: def copy( self, *, - base_url: str | httpx.URL | None = None, api_key: str | None = None, + base_url: str | httpx.URL | None = None, timeout: float | Timeout | None | NotGiven = NOT_GIVEN, http_client: httpx.AsyncClient | None = None, max_retries: int | NotGiven = NOT_GIVEN, @@ -434,8 +454,8 @@ def copy( http_client = http_client or self._client return self.__class__( - base_url=base_url or self.base_url, api_key=api_key or self.api_key, + base_url=base_url or self.base_url, timeout=self.timeout if isinstance(timeout, NotGiven) else timeout, http_client=http_client, max_retries=max_retries if is_given(max_retries) else self.max_retries, diff --git a/src/llama_stack_client/_constants.py b/src/llama_stack_client/_constants.py index a2ac3b6f..6ddf2c71 100644 --- a/src/llama_stack_client/_constants.py +++ b/src/llama_stack_client/_constants.py @@ -6,7 +6,7 @@ OVERRIDE_CAST_TO_HEADER = "____stainless_override_cast_to" # default timeout is 1 minute -DEFAULT_TIMEOUT = httpx.Timeout(timeout=60.0, connect=5.0) +DEFAULT_TIMEOUT = httpx.Timeout(timeout=60, connect=5.0) DEFAULT_MAX_RETRIES = 2 DEFAULT_CONNECTION_LIMITS = httpx.Limits(max_connections=100, max_keepalive_connections=20) diff --git a/src/llama_stack_client/resources/agents/turn.py b/src/llama_stack_client/resources/agents/turn.py index 272ea4d9..da659e26 100644 --- a/src/llama_stack_client/resources/agents/turn.py +++ b/src/llama_stack_client/resources/agents/turn.py @@ -59,6 +59,7 @@ def create( messages: Iterable[turn_create_params.Message], documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN, stream: Literal[False] | NotGiven = NOT_GIVEN, + tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN, toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -69,6 +70,8 @@ def create( ) -> Turn: """ Args: + tool_config: Configuration for tool use. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -88,6 +91,7 @@ def create( messages: Iterable[turn_create_params.Message], stream: Literal[True], documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN, + tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN, toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -98,6 +102,8 @@ def create( ) -> Stream[AgentTurnResponseStreamChunk]: """ Args: + tool_config: Configuration for tool use. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -117,6 +123,7 @@ def create( messages: Iterable[turn_create_params.Message], stream: bool, documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN, + tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN, toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -127,6 +134,8 @@ def create( ) -> Turn | Stream[AgentTurnResponseStreamChunk]: """ Args: + tool_config: Configuration for tool use. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -146,6 +155,7 @@ def create( messages: Iterable[turn_create_params.Message], documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN, stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN, + tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN, toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -165,6 +175,7 @@ def create( "messages": messages, "documents": documents, "stream": stream, + "tool_config": tool_config, "toolgroups": toolgroups, }, turn_create_params.TurnCreateParams, @@ -244,6 +255,7 @@ async def create( messages: Iterable[turn_create_params.Message], documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN, stream: Literal[False] | NotGiven = NOT_GIVEN, + tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN, toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -254,6 +266,8 @@ async def create( ) -> Turn: """ Args: + tool_config: Configuration for tool use. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -273,6 +287,7 @@ async def create( messages: Iterable[turn_create_params.Message], stream: Literal[True], documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN, + tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN, toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -283,6 +298,8 @@ async def create( ) -> AsyncStream[AgentTurnResponseStreamChunk]: """ Args: + tool_config: Configuration for tool use. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -302,6 +319,7 @@ async def create( messages: Iterable[turn_create_params.Message], stream: bool, documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN, + tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN, toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -312,6 +330,8 @@ async def create( ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]: """ Args: + tool_config: Configuration for tool use. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -331,6 +351,7 @@ async def create( messages: Iterable[turn_create_params.Message], documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN, stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN, + tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN, toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -350,6 +371,7 @@ async def create( "messages": messages, "documents": documents, "stream": stream, + "tool_config": tool_config, "toolgroups": toolgroups, }, turn_create_params.TurnCreateParams, diff --git a/src/llama_stack_client/resources/batch_inference.py b/src/llama_stack_client/resources/batch_inference.py index 39ddc1ee..fdae58fa 100644 --- a/src/llama_stack_client/resources/batch_inference.py +++ b/src/llama_stack_client/resources/batch_inference.py @@ -72,6 +72,14 @@ def chat_completion( ) -> BatchInferenceChatCompletionResponse: """ Args: + response_format: Configuration for JSON schema-guided response generation. + + tool_choice: Whether tool use is required or automatic. This is a hint to the model which may + not be followed. It depends on the Instruction Following capabilities of the + model. + + tool_prompt_format: Prompt format for calling custom / zero shot tools. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -118,6 +126,8 @@ def completion( ) -> BatchCompletion: """ Args: + response_format: Configuration for JSON schema-guided response generation. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -185,6 +195,14 @@ async def chat_completion( ) -> BatchInferenceChatCompletionResponse: """ Args: + response_format: Configuration for JSON schema-guided response generation. + + tool_choice: Whether tool use is required or automatic. This is a hint to the model which may + not be followed. It depends on the Instruction Following capabilities of the + model. + + tool_prompt_format: Prompt format for calling custom / zero shot tools. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -231,6 +249,8 @@ async def completion( ) -> BatchCompletion: """ Args: + response_format: Configuration for JSON schema-guided response generation. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/llama_stack_client/resources/inference.py b/src/llama_stack_client/resources/inference.py index c837a6e4..9990b208 100644 --- a/src/llama_stack_client/resources/inference.py +++ b/src/llama_stack_client/resources/inference.py @@ -71,6 +71,7 @@ def chat_completion( sampling_params: SamplingParams | NotGiven = NOT_GIVEN, stream: Literal[False] | NotGiven = NOT_GIVEN, tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN, + tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN, tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN, tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. @@ -103,14 +104,17 @@ def chat_completion( False. tool_choice: (Optional) Whether tool use is required or automatic. Defaults to - ToolChoice.auto. + ToolChoice.auto. .. deprecated:: Use tool_config instead. + + tool_config: (Optional) Configuration for tool use. tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a tag. - `ToolPromptFormat.python_list`: The tool calls - are output as Python syntax -- a list of function calls. + are output as Python syntax -- a list of function calls. .. deprecated:: Use + tool_config instead. tools: (Optional) List of tool definitions available to the model @@ -135,6 +139,7 @@ def chat_completion( response_format: ResponseFormat | NotGiven = NOT_GIVEN, sampling_params: SamplingParams | NotGiven = NOT_GIVEN, tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN, + tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN, tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN, tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. @@ -167,14 +172,17 @@ def chat_completion( sampling_params: Parameters to control the sampling strategy tool_choice: (Optional) Whether tool use is required or automatic. Defaults to - ToolChoice.auto. + ToolChoice.auto. .. deprecated:: Use tool_config instead. + + tool_config: (Optional) Configuration for tool use. tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a tag. - `ToolPromptFormat.python_list`: The tool calls - are output as Python syntax -- a list of function calls. + are output as Python syntax -- a list of function calls. .. deprecated:: Use + tool_config instead. tools: (Optional) List of tool definitions available to the model @@ -199,6 +207,7 @@ def chat_completion( response_format: ResponseFormat | NotGiven = NOT_GIVEN, sampling_params: SamplingParams | NotGiven = NOT_GIVEN, tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN, + tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN, tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN, tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. @@ -231,14 +240,17 @@ def chat_completion( sampling_params: Parameters to control the sampling strategy tool_choice: (Optional) Whether tool use is required or automatic. Defaults to - ToolChoice.auto. + ToolChoice.auto. .. deprecated:: Use tool_config instead. + + tool_config: (Optional) Configuration for tool use. tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a tag. - `ToolPromptFormat.python_list`: The tool calls - are output as Python syntax -- a list of function calls. + are output as Python syntax -- a list of function calls. .. deprecated:: Use + tool_config instead. tools: (Optional) List of tool definitions available to the model @@ -263,6 +275,7 @@ def chat_completion( sampling_params: SamplingParams | NotGiven = NOT_GIVEN, stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN, tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN, + tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN, tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN, tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. @@ -285,6 +298,7 @@ def chat_completion( "sampling_params": sampling_params, "stream": stream, "tool_choice": tool_choice, + "tool_config": tool_config, "tool_prompt_format": tool_prompt_format, "tools": tools, }, @@ -554,6 +568,7 @@ async def chat_completion( sampling_params: SamplingParams | NotGiven = NOT_GIVEN, stream: Literal[False] | NotGiven = NOT_GIVEN, tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN, + tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN, tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN, tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. @@ -586,14 +601,17 @@ async def chat_completion( False. tool_choice: (Optional) Whether tool use is required or automatic. Defaults to - ToolChoice.auto. + ToolChoice.auto. .. deprecated:: Use tool_config instead. + + tool_config: (Optional) Configuration for tool use. tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a tag. - `ToolPromptFormat.python_list`: The tool calls - are output as Python syntax -- a list of function calls. + are output as Python syntax -- a list of function calls. .. deprecated:: Use + tool_config instead. tools: (Optional) List of tool definitions available to the model @@ -618,6 +636,7 @@ async def chat_completion( response_format: ResponseFormat | NotGiven = NOT_GIVEN, sampling_params: SamplingParams | NotGiven = NOT_GIVEN, tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN, + tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN, tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN, tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. @@ -650,14 +669,17 @@ async def chat_completion( sampling_params: Parameters to control the sampling strategy tool_choice: (Optional) Whether tool use is required or automatic. Defaults to - ToolChoice.auto. + ToolChoice.auto. .. deprecated:: Use tool_config instead. + + tool_config: (Optional) Configuration for tool use. tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a tag. - `ToolPromptFormat.python_list`: The tool calls - are output as Python syntax -- a list of function calls. + are output as Python syntax -- a list of function calls. .. deprecated:: Use + tool_config instead. tools: (Optional) List of tool definitions available to the model @@ -682,6 +704,7 @@ async def chat_completion( response_format: ResponseFormat | NotGiven = NOT_GIVEN, sampling_params: SamplingParams | NotGiven = NOT_GIVEN, tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN, + tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN, tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN, tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. @@ -714,14 +737,17 @@ async def chat_completion( sampling_params: Parameters to control the sampling strategy tool_choice: (Optional) Whether tool use is required or automatic. Defaults to - ToolChoice.auto. + ToolChoice.auto. .. deprecated:: Use tool_config instead. + + tool_config: (Optional) Configuration for tool use. tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a tag. - `ToolPromptFormat.python_list`: The tool calls - are output as Python syntax -- a list of function calls. + are output as Python syntax -- a list of function calls. .. deprecated:: Use + tool_config instead. tools: (Optional) List of tool definitions available to the model @@ -746,6 +772,7 @@ async def chat_completion( sampling_params: SamplingParams | NotGiven = NOT_GIVEN, stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN, tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN, + tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN, tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN, tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. @@ -768,6 +795,7 @@ async def chat_completion( "sampling_params": sampling_params, "stream": stream, "tool_choice": tool_choice, + "tool_config": tool_config, "tool_prompt_format": tool_prompt_format, "tools": tools, }, diff --git a/src/llama_stack_client/resources/synthetic_data_generation.py b/src/llama_stack_client/resources/synthetic_data_generation.py index 91d6ee72..3c848575 100644 --- a/src/llama_stack_client/resources/synthetic_data_generation.py +++ b/src/llama_stack_client/resources/synthetic_data_generation.py @@ -63,6 +63,8 @@ def generate( ) -> SyntheticDataGenerationResponse: """ Args: + filtering_function: The type of filtering function. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -123,6 +125,8 @@ async def generate( ) -> SyntheticDataGenerationResponse: """ Args: + filtering_function: The type of filtering function. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/llama_stack_client/resources/tool_runtime/rag_tool.py b/src/llama_stack_client/resources/tool_runtime/rag_tool.py index da0ce761..14ea8454 100644 --- a/src/llama_stack_client/resources/tool_runtime/rag_tool.py +++ b/src/llama_stack_client/resources/tool_runtime/rag_tool.py @@ -108,6 +108,8 @@ def query( Query the RAG system for context; typically invoked by the agent Args: + content: A image content item + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -212,6 +214,8 @@ async def query( Query the RAG system for context; typically invoked by the agent Args: + content: A image content item + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/llama_stack_client/resources/vector_io.py b/src/llama_stack_client/resources/vector_io.py index e71f8f1d..a432ea40 100644 --- a/src/llama_stack_client/resources/vector_io.py +++ b/src/llama_stack_client/resources/vector_io.py @@ -102,6 +102,8 @@ def query( ) -> QueryChunksResponse: """ Args: + query: A image content item + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -202,6 +204,8 @@ async def query( ) -> QueryChunksResponse: """ Args: + query: A image content item + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/llama_stack_client/types/agents/turn.py b/src/llama_stack_client/types/agents/turn.py index c34efa08..2ead7bbe 100644 --- a/src/llama_stack_client/types/agents/turn.py +++ b/src/llama_stack_client/types/agents/turn.py @@ -19,16 +19,21 @@ __all__ = [ "Turn", "InputMessage", + "Step", "OutputAttachment", "OutputAttachmentContent", "OutputAttachmentContentImageContentItem", "OutputAttachmentContentImageContentItemImage", "OutputAttachmentContentTextContentItem", - "Step", ] InputMessage: TypeAlias = Union[UserMessage, ToolResponseMessage] +Step: TypeAlias = Annotated[ + Union[InferenceStep, ToolExecutionStep, ShieldCallStep, MemoryRetrievalStep], + PropertyInfo(discriminator="step_type"), +] + class OutputAttachmentContentImageContentItemImage(BaseModel): data: Optional[str] = None @@ -68,22 +73,16 @@ class OutputAttachmentContentTextContentItem(BaseModel): class OutputAttachment(BaseModel): content: OutputAttachmentContent + """A image content item""" mime_type: str -Step: TypeAlias = Annotated[ - Union[InferenceStep, ToolExecutionStep, ShieldCallStep, MemoryRetrievalStep], - PropertyInfo(discriminator="step_type"), -] - - class Turn(BaseModel): input_messages: List[InputMessage] - output_attachments: List[OutputAttachment] - output_message: CompletionMessage + """A message containing the model's (assistant) response in a chat conversation.""" session_id: str @@ -94,3 +93,5 @@ class Turn(BaseModel): turn_id: str completed_at: Optional[datetime] = None + + output_attachments: Optional[List[OutputAttachment]] = None diff --git a/src/llama_stack_client/types/agents/turn_create_params.py b/src/llama_stack_client/types/agents/turn_create_params.py index 23bfc6a3..fee300dd 100644 --- a/src/llama_stack_client/types/agents/turn_create_params.py +++ b/src/llama_stack_client/types/agents/turn_create_params.py @@ -18,6 +18,7 @@ "DocumentContentImageContentItem", "DocumentContentImageContentItemImage", "DocumentContentTextContentItem", + "ToolConfig", "Toolgroup", "ToolgroupUnionMember1", "TurnCreateParamsNonStreaming", @@ -32,6 +33,9 @@ class TurnCreateParamsBase(TypedDict, total=False): documents: Iterable[Document] + tool_config: ToolConfig + """Configuration for tool use.""" + toolgroups: List[Toolgroup] @@ -72,10 +76,39 @@ class DocumentContentTextContentItem(TypedDict, total=False): class Document(TypedDict, total=False): content: Required[DocumentContent] + """A image content item""" mime_type: Required[str] +class ToolConfig(TypedDict, total=False): + system_message_behavior: Required[Literal["append", "replace"]] + """(Optional) Config for how to override the default system prompt. + + - `SystemMessageBehavior.append`: Appends the provided system message to the + default system prompt. - `SystemMessageBehavior.replace`: Replaces the default + system prompt with the provided system message. The system message can include + the string '{{function_definitions}}' to indicate where the function + definitions should be inserted. + """ + + tool_choice: Literal["auto", "required"] + """(Optional) Whether tool use is required or automatic. + + Defaults to ToolChoice.auto. + """ + + tool_prompt_format: Literal["json", "function_tag", "python_list"] + """(Optional) Instructs the model how to format tool calls. + + By default, Llama Stack will attempt to use a format that is best adapted to the + model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON + object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a + tag. - `ToolPromptFormat.python_list`: The tool calls + are output as Python syntax -- a list of function calls. + """ + + class ToolgroupUnionMember1(TypedDict, total=False): args: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] diff --git a/src/llama_stack_client/types/agents/turn_response_event_payload.py b/src/llama_stack_client/types/agents/turn_response_event_payload.py index d320a9b3..f12f8b03 100644 --- a/src/llama_stack_client/types/agents/turn_response_event_payload.py +++ b/src/llama_stack_client/types/agents/turn_response_event_payload.py @@ -69,6 +69,7 @@ class AgentTurnResponseTurnCompletePayload(BaseModel): event_type: Literal["turn_complete"] turn: Turn + """A single turn in an interaction with an Agentic System.""" TurnResponseEventPayload: TypeAlias = Annotated[ diff --git a/src/llama_stack_client/types/batch_inference_chat_completion_params.py b/src/llama_stack_client/types/batch_inference_chat_completion_params.py index b0b5ce51..fcdbb166 100644 --- a/src/llama_stack_client/types/batch_inference_chat_completion_params.py +++ b/src/llama_stack_client/types/batch_inference_chat_completion_params.py @@ -21,12 +21,19 @@ class BatchInferenceChatCompletionParams(TypedDict, total=False): logprobs: Logprobs response_format: ResponseFormat + """Configuration for JSON schema-guided response generation.""" sampling_params: SamplingParams tool_choice: Literal["auto", "required"] + """Whether tool use is required or automatic. + + This is a hint to the model which may not be followed. It depends on the + Instruction Following capabilities of the model. + """ tool_prompt_format: Literal["json", "function_tag", "python_list"] + """Prompt format for calling custom / zero shot tools.""" tools: Iterable[Tool] diff --git a/src/llama_stack_client/types/batch_inference_completion_params.py b/src/llama_stack_client/types/batch_inference_completion_params.py index 0a33c5b3..3f80d625 100644 --- a/src/llama_stack_client/types/batch_inference_completion_params.py +++ b/src/llama_stack_client/types/batch_inference_completion_params.py @@ -20,6 +20,7 @@ class BatchInferenceCompletionParams(TypedDict, total=False): logprobs: Logprobs response_format: ResponseFormat + """Configuration for JSON schema-guided response generation.""" sampling_params: SamplingParams diff --git a/src/llama_stack_client/types/eval_candidate_param.py b/src/llama_stack_client/types/eval_candidate_param.py index d9483b5b..77c3c806 100644 --- a/src/llama_stack_client/types/eval_candidate_param.py +++ b/src/llama_stack_client/types/eval_candidate_param.py @@ -20,6 +20,7 @@ class ModelCandidate(TypedDict, total=False): type: Required[Literal["model"]] system_message: SystemMessage + """A system message providing instructions or context to the model.""" class AgentCandidate(TypedDict, total=False): diff --git a/src/llama_stack_client/types/inference_chat_completion_params.py b/src/llama_stack_client/types/inference_chat_completion_params.py index 6382696f..4407cfa7 100644 --- a/src/llama_stack_client/types/inference_chat_completion_params.py +++ b/src/llama_stack_client/types/inference_chat_completion_params.py @@ -13,6 +13,7 @@ __all__ = [ "InferenceChatCompletionParamsBase", "Logprobs", + "ToolConfig", "Tool", "InferenceChatCompletionParamsNonStreaming", "InferenceChatCompletionParamsStreaming", @@ -51,9 +52,12 @@ class InferenceChatCompletionParamsBase(TypedDict, total=False): tool_choice: Literal["auto", "required"] """(Optional) Whether tool use is required or automatic. - Defaults to ToolChoice.auto. + Defaults to ToolChoice.auto. .. deprecated:: Use tool_config instead. """ + tool_config: ToolConfig + """(Optional) Configuration for tool use.""" + tool_prompt_format: Literal["json", "function_tag", "python_list"] """(Optional) Instructs the model how to format tool calls. @@ -61,7 +65,8 @@ class InferenceChatCompletionParamsBase(TypedDict, total=False): model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a tag. - `ToolPromptFormat.python_list`: The tool calls - are output as Python syntax -- a list of function calls. + are output as Python syntax -- a list of function calls. .. deprecated:: Use + tool_config instead. """ tools: Iterable[Tool] @@ -73,6 +78,34 @@ class Logprobs(TypedDict, total=False): """How many tokens (for each position) to return log probabilities for.""" +class ToolConfig(TypedDict, total=False): + system_message_behavior: Required[Literal["append", "replace"]] + """(Optional) Config for how to override the default system prompt. + + - `SystemMessageBehavior.append`: Appends the provided system message to the + default system prompt. - `SystemMessageBehavior.replace`: Replaces the default + system prompt with the provided system message. The system message can include + the string '{{function_definitions}}' to indicate where the function + definitions should be inserted. + """ + + tool_choice: Literal["auto", "required"] + """(Optional) Whether tool use is required or automatic. + + Defaults to ToolChoice.auto. + """ + + tool_prompt_format: Literal["json", "function_tag", "python_list"] + """(Optional) Instructs the model how to format tool calls. + + By default, Llama Stack will attempt to use a format that is best adapted to the + model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON + object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a + tag. - `ToolPromptFormat.python_list`: The tool calls + are output as Python syntax -- a list of function calls. + """ + + class Tool(TypedDict, total=False): tool_name: Required[Union[Literal["brave_search", "wolfram_alpha", "photogen", "code_interpreter"], str]] diff --git a/src/llama_stack_client/types/inference_step.py b/src/llama_stack_client/types/inference_step.py index ba429fa3..d2c1ed8e 100644 --- a/src/llama_stack_client/types/inference_step.py +++ b/src/llama_stack_client/types/inference_step.py @@ -14,6 +14,7 @@ class InferenceStep(BaseModel): api_model_response: CompletionMessage = FieldInfo(alias="model_response") + """A message containing the model's (assistant) response in a chat conversation.""" step_id: str diff --git a/src/llama_stack_client/types/memory_retrieval_step.py b/src/llama_stack_client/types/memory_retrieval_step.py index 2d5840c6..6f37c93a 100644 --- a/src/llama_stack_client/types/memory_retrieval_step.py +++ b/src/llama_stack_client/types/memory_retrieval_step.py @@ -12,6 +12,7 @@ class MemoryRetrievalStep(BaseModel): inserted_context: InterleavedContent + """A image content item""" step_id: str diff --git a/src/llama_stack_client/types/query_chunks_response.py b/src/llama_stack_client/types/query_chunks_response.py index 4fb11316..d90e464e 100644 --- a/src/llama_stack_client/types/query_chunks_response.py +++ b/src/llama_stack_client/types/query_chunks_response.py @@ -10,6 +10,7 @@ class Chunk(BaseModel): content: InterleavedContent + """A image content item""" metadata: Dict[str, Union[bool, float, str, List[object], object, None]] diff --git a/src/llama_stack_client/types/shared/agent_config.py b/src/llama_stack_client/types/shared/agent_config.py index ad94e3f1..273a98db 100644 --- a/src/llama_stack_client/types/shared/agent_config.py +++ b/src/llama_stack_client/types/shared/agent_config.py @@ -8,7 +8,35 @@ from .response_format import ResponseFormat from .sampling_params import SamplingParams -__all__ = ["AgentConfig", "Toolgroup", "ToolgroupUnionMember1"] +__all__ = ["AgentConfig", "ToolConfig", "Toolgroup", "ToolgroupUnionMember1"] + + +class ToolConfig(BaseModel): + system_message_behavior: Literal["append", "replace"] + """(Optional) Config for how to override the default system prompt. + + - `SystemMessageBehavior.append`: Appends the provided system message to the + default system prompt. - `SystemMessageBehavior.replace`: Replaces the default + system prompt with the provided system message. The system message can include + the string '{{function_definitions}}' to indicate where the function + definitions should be inserted. + """ + + tool_choice: Optional[Literal["auto", "required"]] = None + """(Optional) Whether tool use is required or automatic. + + Defaults to ToolChoice.auto. + """ + + tool_prompt_format: Optional[Literal["json", "function_tag", "python_list"]] = None + """(Optional) Instructs the model how to format tool calls. + + By default, Llama Stack will attempt to use a format that is best adapted to the + model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON + object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a + tag. - `ToolPromptFormat.python_list`: The tool calls + are output as Python syntax -- a list of function calls. + """ class ToolgroupUnionMember1(BaseModel): @@ -25,22 +53,32 @@ class AgentConfig(BaseModel): instructions: str - max_infer_iters: int - model: str client_tools: Optional[List[ToolDef]] = None input_shields: Optional[List[str]] = None + max_infer_iters: Optional[int] = None + output_shields: Optional[List[str]] = None response_format: Optional[ResponseFormat] = None + """Configuration for JSON schema-guided response generation.""" sampling_params: Optional[SamplingParams] = None tool_choice: Optional[Literal["auto", "required"]] = None + """Whether tool use is required or automatic. + + This is a hint to the model which may not be followed. It depends on the + Instruction Following capabilities of the model. + """ + + tool_config: Optional[ToolConfig] = None + """Configuration for tool use.""" tool_prompt_format: Optional[Literal["json", "function_tag", "python_list"]] = None + """Prompt format for calling custom / zero shot tools.""" toolgroups: Optional[List[Toolgroup]] = None diff --git a/src/llama_stack_client/types/shared/completion_message.py b/src/llama_stack_client/types/shared/completion_message.py index 373d6b1d..61c10a5b 100644 --- a/src/llama_stack_client/types/shared/completion_message.py +++ b/src/llama_stack_client/types/shared/completion_message.py @@ -1,6 +1,6 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import List +from typing import List, Optional from typing_extensions import Literal from ..._models import BaseModel @@ -27,5 +27,5 @@ class CompletionMessage(BaseModel): `StopReason.out_of_tokens`: The model ran out of token budget. """ - tool_calls: List[ToolCall] + tool_calls: Optional[List[ToolCall]] = None """List of tool calls. Each tool call is a ToolCall object.""" diff --git a/src/llama_stack_client/types/shared/document.py b/src/llama_stack_client/types/shared/document.py index c0ac8b9d..1282bd0a 100644 --- a/src/llama_stack_client/types/shared/document.py +++ b/src/llama_stack_client/types/shared/document.py @@ -42,6 +42,7 @@ class ContentTextContentItem(BaseModel): class Document(BaseModel): content: Content + """A image content item""" document_id: str diff --git a/src/llama_stack_client/types/shared/query_result.py b/src/llama_stack_client/types/shared/query_result.py index 5a0156c8..dc20becf 100644 --- a/src/llama_stack_client/types/shared/query_result.py +++ b/src/llama_stack_client/types/shared/query_result.py @@ -10,3 +10,4 @@ class QueryResult(BaseModel): content: Optional[InterleavedContent] = None + """A image content item""" diff --git a/src/llama_stack_client/types/shared_params/agent_config.py b/src/llama_stack_client/types/shared_params/agent_config.py index 186eff85..fe62bc24 100644 --- a/src/llama_stack_client/types/shared_params/agent_config.py +++ b/src/llama_stack_client/types/shared_params/agent_config.py @@ -9,7 +9,35 @@ from .response_format import ResponseFormat from .sampling_params import SamplingParams -__all__ = ["AgentConfig", "Toolgroup", "ToolgroupUnionMember1"] +__all__ = ["AgentConfig", "ToolConfig", "Toolgroup", "ToolgroupUnionMember1"] + + +class ToolConfig(TypedDict, total=False): + system_message_behavior: Required[Literal["append", "replace"]] + """(Optional) Config for how to override the default system prompt. + + - `SystemMessageBehavior.append`: Appends the provided system message to the + default system prompt. - `SystemMessageBehavior.replace`: Replaces the default + system prompt with the provided system message. The system message can include + the string '{{function_definitions}}' to indicate where the function + definitions should be inserted. + """ + + tool_choice: Literal["auto", "required"] + """(Optional) Whether tool use is required or automatic. + + Defaults to ToolChoice.auto. + """ + + tool_prompt_format: Literal["json", "function_tag", "python_list"] + """(Optional) Instructs the model how to format tool calls. + + By default, Llama Stack will attempt to use a format that is best adapted to the + model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON + object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a + tag. - `ToolPromptFormat.python_list`: The tool calls + are output as Python syntax -- a list of function calls. + """ class ToolgroupUnionMember1(TypedDict, total=False): @@ -26,22 +54,32 @@ class AgentConfig(TypedDict, total=False): instructions: Required[str] - max_infer_iters: Required[int] - model: Required[str] client_tools: Iterable[ToolDefParam] input_shields: List[str] + max_infer_iters: int + output_shields: List[str] response_format: ResponseFormat + """Configuration for JSON schema-guided response generation.""" sampling_params: SamplingParams tool_choice: Literal["auto", "required"] + """Whether tool use is required or automatic. + + This is a hint to the model which may not be followed. It depends on the + Instruction Following capabilities of the model. + """ + + tool_config: ToolConfig + """Configuration for tool use.""" tool_prompt_format: Literal["json", "function_tag", "python_list"] + """Prompt format for calling custom / zero shot tools.""" toolgroups: List[Toolgroup] diff --git a/src/llama_stack_client/types/shared_params/completion_message.py b/src/llama_stack_client/types/shared_params/completion_message.py index 4c480807..43b2529e 100644 --- a/src/llama_stack_client/types/shared_params/completion_message.py +++ b/src/llama_stack_client/types/shared_params/completion_message.py @@ -28,5 +28,5 @@ class CompletionMessage(TypedDict, total=False): `StopReason.out_of_tokens`: The model ran out of token budget. """ - tool_calls: Required[Iterable[ToolCall]] + tool_calls: Iterable[ToolCall] """List of tool calls. Each tool call is a ToolCall object.""" diff --git a/src/llama_stack_client/types/shared_params/document.py b/src/llama_stack_client/types/shared_params/document.py index 1b5d5f09..fd464554 100644 --- a/src/llama_stack_client/types/shared_params/document.py +++ b/src/llama_stack_client/types/shared_params/document.py @@ -43,6 +43,7 @@ class ContentTextContentItem(TypedDict, total=False): class Document(TypedDict, total=False): content: Required[Content] + """A image content item""" document_id: Required[str] diff --git a/src/llama_stack_client/types/synthetic_data_generation_generate_params.py b/src/llama_stack_client/types/synthetic_data_generation_generate_params.py index e10842bd..abf51059 100644 --- a/src/llama_stack_client/types/synthetic_data_generation_generate_params.py +++ b/src/llama_stack_client/types/synthetic_data_generation_generate_params.py @@ -14,5 +14,6 @@ class SyntheticDataGenerationGenerateParams(TypedDict, total=False): dialogs: Required[Iterable[Message]] filtering_function: Required[Literal["none", "random", "top_k", "top_p", "top_k_top_p", "sigmoid"]] + """The type of filtering function.""" model: str diff --git a/src/llama_stack_client/types/tool_invocation_result.py b/src/llama_stack_client/types/tool_invocation_result.py index 5c286b2c..4ecc3d03 100644 --- a/src/llama_stack_client/types/tool_invocation_result.py +++ b/src/llama_stack_client/types/tool_invocation_result.py @@ -10,6 +10,7 @@ class ToolInvocationResult(BaseModel): content: InterleavedContent + """A image content item""" error_code: Optional[int] = None diff --git a/src/llama_stack_client/types/tool_response.py b/src/llama_stack_client/types/tool_response.py index 721111ff..2617f6e3 100644 --- a/src/llama_stack_client/types/tool_response.py +++ b/src/llama_stack_client/types/tool_response.py @@ -13,5 +13,6 @@ class ToolResponse(BaseModel): call_id: str content: InterleavedContent + """A image content item""" tool_name: Union[Literal["brave_search", "wolfram_alpha", "photogen", "code_interpreter"], str] diff --git a/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py b/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py index b4e7c003..08208b77 100644 --- a/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py +++ b/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py @@ -13,6 +13,7 @@ class RagToolQueryParams(TypedDict, total=False): content: Required[InterleavedContent] + """A image content item""" vector_db_ids: Required[List[str]] diff --git a/src/llama_stack_client/types/vector_io_insert_params.py b/src/llama_stack_client/types/vector_io_insert_params.py index faac744b..5ac67c10 100644 --- a/src/llama_stack_client/types/vector_io_insert_params.py +++ b/src/llama_stack_client/types/vector_io_insert_params.py @@ -20,5 +20,6 @@ class VectorIoInsertParams(TypedDict, total=False): class Chunk(TypedDict, total=False): content: Required[InterleavedContent] + """A image content item""" metadata: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] diff --git a/src/llama_stack_client/types/vector_io_query_params.py b/src/llama_stack_client/types/vector_io_query_params.py index 97b48ddc..2fe675d1 100644 --- a/src/llama_stack_client/types/vector_io_query_params.py +++ b/src/llama_stack_client/types/vector_io_query_params.py @@ -12,6 +12,7 @@ class VectorIoQueryParams(TypedDict, total=False): query: Required[InterleavedContent] + """A image content item""" vector_db_id: Required[str] diff --git a/tests/api_resources/agents/test_turn.py b/tests/api_resources/agents/test_turn.py index b4bc87b3..b64bf957 100644 --- a/tests/api_resources/agents/test_turn.py +++ b/tests/api_resources/agents/test_turn.py @@ -50,6 +50,11 @@ def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient } ], stream=False, + tool_config={ + "system_message_behavior": "append", + "tool_choice": "auto", + "tool_prompt_format": "json", + }, toolgroups=["string"], ) assert_matches_type(Turn, turn, path=["response"]) @@ -152,6 +157,11 @@ def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient "mime_type": "mime_type", } ], + tool_config={ + "system_message_behavior": "append", + "tool_choice": "auto", + "tool_prompt_format": "json", + }, toolgroups=["string"], ) turn_stream.response.close() @@ -320,6 +330,11 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn } ], stream=False, + tool_config={ + "system_message_behavior": "append", + "tool_choice": "auto", + "tool_prompt_format": "json", + }, toolgroups=["string"], ) assert_matches_type(Turn, turn, path=["response"]) @@ -422,6 +437,11 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn "mime_type": "mime_type", } ], + tool_config={ + "system_message_behavior": "append", + "tool_choice": "auto", + "tool_prompt_format": "json", + }, toolgroups=["string"], ) await turn_stream.response.aclose() diff --git a/tests/api_resources/test_agents.py b/tests/api_resources/test_agents.py index 30dd9ac9..54006114 100644 --- a/tests/api_resources/test_agents.py +++ b/tests/api_resources/test_agents.py @@ -23,7 +23,6 @@ def test_method_create(self, client: LlamaStackClient) -> None: agent_config={ "enable_session_persistence": True, "instructions": "instructions", - "max_infer_iters": 0, "model": "model", }, ) @@ -35,7 +34,6 @@ def test_method_create_with_all_params(self, client: LlamaStackClient) -> None: agent_config={ "enable_session_persistence": True, "instructions": "instructions", - "max_infer_iters": 0, "model": "model", "client_tools": [ { @@ -54,6 +52,7 @@ def test_method_create_with_all_params(self, client: LlamaStackClient) -> None: } ], "input_shields": ["string"], + "max_infer_iters": 0, "output_shields": ["string"], "response_format": { "json_schema": {"foo": True}, @@ -65,6 +64,11 @@ def test_method_create_with_all_params(self, client: LlamaStackClient) -> None: "repetition_penalty": 0, }, "tool_choice": "auto", + "tool_config": { + "system_message_behavior": "append", + "tool_choice": "auto", + "tool_prompt_format": "json", + }, "tool_prompt_format": "json", "toolgroups": ["string"], }, @@ -77,7 +81,6 @@ def test_raw_response_create(self, client: LlamaStackClient) -> None: agent_config={ "enable_session_persistence": True, "instructions": "instructions", - "max_infer_iters": 0, "model": "model", }, ) @@ -93,7 +96,6 @@ def test_streaming_response_create(self, client: LlamaStackClient) -> None: agent_config={ "enable_session_persistence": True, "instructions": "instructions", - "max_infer_iters": 0, "model": "model", }, ) as response: @@ -153,7 +155,6 @@ async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None: agent_config={ "enable_session_persistence": True, "instructions": "instructions", - "max_infer_iters": 0, "model": "model", }, ) @@ -165,7 +166,6 @@ async def test_method_create_with_all_params(self, async_client: AsyncLlamaStack agent_config={ "enable_session_persistence": True, "instructions": "instructions", - "max_infer_iters": 0, "model": "model", "client_tools": [ { @@ -184,6 +184,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncLlamaStack } ], "input_shields": ["string"], + "max_infer_iters": 0, "output_shields": ["string"], "response_format": { "json_schema": {"foo": True}, @@ -195,6 +196,11 @@ async def test_method_create_with_all_params(self, async_client: AsyncLlamaStack "repetition_penalty": 0, }, "tool_choice": "auto", + "tool_config": { + "system_message_behavior": "append", + "tool_choice": "auto", + "tool_prompt_format": "json", + }, "tool_prompt_format": "json", "toolgroups": ["string"], }, @@ -207,7 +213,6 @@ async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) -> agent_config={ "enable_session_persistence": True, "instructions": "instructions", - "max_infer_iters": 0, "model": "model", }, ) @@ -223,7 +228,6 @@ async def test_streaming_response_create(self, async_client: AsyncLlamaStackClie agent_config={ "enable_session_persistence": True, "instructions": "instructions", - "max_infer_iters": 0, "model": "model", }, ) as response: diff --git a/tests/api_resources/test_inference.py b/tests/api_resources/test_inference.py index 64c912d2..6cf7c8ba 100644 --- a/tests/api_resources/test_inference.py +++ b/tests/api_resources/test_inference.py @@ -57,6 +57,11 @@ def test_method_chat_completion_with_all_params_overload_1(self, client: LlamaSt }, stream=False, tool_choice="auto", + tool_config={ + "system_message_behavior": "append", + "tool_choice": "auto", + "tool_prompt_format": "json", + }, tool_prompt_format="json", tools=[ { @@ -148,6 +153,11 @@ def test_method_chat_completion_with_all_params_overload_2(self, client: LlamaSt "repetition_penalty": 0, }, tool_choice="auto", + tool_config={ + "system_message_behavior": "append", + "tool_choice": "auto", + "tool_prompt_format": "json", + }, tool_prompt_format="json", tools=[ { @@ -385,6 +395,11 @@ async def test_method_chat_completion_with_all_params_overload_1(self, async_cli }, stream=False, tool_choice="auto", + tool_config={ + "system_message_behavior": "append", + "tool_choice": "auto", + "tool_prompt_format": "json", + }, tool_prompt_format="json", tools=[ { @@ -476,6 +491,11 @@ async def test_method_chat_completion_with_all_params_overload_2(self, async_cli "repetition_penalty": 0, }, tool_choice="auto", + tool_config={ + "system_message_behavior": "append", + "tool_choice": "auto", + "tool_prompt_format": "json", + }, tool_prompt_format="json", tools=[ { diff --git a/tests/test_client.py b/tests/test_client.py index 38796e2c..3ea5f0b7 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -311,6 +311,9 @@ def test_default_headers_option(self) -> None: assert request.headers.get("x-foo") == "stainless" assert request.headers.get("x-stainless-lang") == "my-overriding-header" + def test_validate_headers(self) -> None: + client = LlamaStackClient(base_url=base_url, _strict_response_validation=True) + def test_default_query_option(self) -> None: client = LlamaStackClient( base_url=base_url, _strict_response_validation=True, default_query={"query_param": "bar"} @@ -1092,6 +1095,9 @@ def test_default_headers_option(self) -> None: assert request.headers.get("x-foo") == "stainless" assert request.headers.get("x-stainless-lang") == "my-overriding-header" + def test_validate_headers(self) -> None: + client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True) + def test_default_query_option(self) -> None: client = AsyncLlamaStackClient( base_url=base_url, _strict_response_validation=True, default_query={"query_param": "bar"} @@ -1628,7 +1634,7 @@ def test_get_platform(self) -> None: import threading from llama_stack_client._utils import asyncify - from llama_stack_client._base_client import get_platform + from llama_stack_client._base_client import get_platform async def test_main() -> None: result = await asyncify(get_platform)()