diff --git a/src/llama_stack_client/_base_client.py b/src/llama_stack_client/_base_client.py
index bd7fe8ce..c8b0b413 100644
--- a/src/llama_stack_client/_base_client.py
+++ b/src/llama_stack_client/_base_client.py
@@ -418,10 +418,17 @@ def _build_headers(self, options: FinalRequestOptions, *, retries_taken: int = 0
         if idempotency_header and options.method.lower() != "get" and idempotency_header not in headers:
             headers[idempotency_header] = options.idempotency_key or self._idempotency_key()
 
-        # Don't set the retry count header if it was already set or removed by the caller. We check
+        # Don't set these headers if they were already set or removed by the caller. We check
         # `custom_headers`, which can contain `Omit()`, instead of `headers` to account for the removal case.
-        if "x-stainless-retry-count" not in (header.lower() for header in custom_headers):
+        lower_custom_headers = [header.lower() for header in custom_headers]
+        if "x-stainless-retry-count" not in lower_custom_headers:
             headers["x-stainless-retry-count"] = str(retries_taken)
+        if "x-stainless-read-timeout" not in lower_custom_headers:
+            timeout = self.timeout if isinstance(options.timeout, NotGiven) else options.timeout
+            if isinstance(timeout, Timeout):
+                timeout = timeout.read
+            if timeout is not None:
+                headers["x-stainless-read-timeout"] = str(timeout)
 
         return headers
 
diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py
index 584da069..35d993d8 100644
--- a/src/llama_stack_client/_client.py
+++ b/src/llama_stack_client/_client.py
@@ -98,12 +98,13 @@ class LlamaStackClient(SyncAPIClient):
     with_streaming_response: LlamaStackClientWithStreamedResponse
 
     # client options
+    api_key: str | None
 
     def __init__(
         self,
         *,
-        base_url: str | httpx.URL | None = None,
         api_key: str | None = None,
+        base_url: str | httpx.URL | None = None,
         timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
@@ -123,19 +124,20 @@ def __init__(
         _strict_response_validation: bool = False,
         provider_data: Mapping[str, Any] | None = None,
     ) -> None:
-        """Construct a new synchronous llama-stack-client client instance."""
-        if base_url is None:
-            base_url = os.environ.get("LLAMA_STACK_CLIENT_BASE_URL")
-        if base_url is None:
-            base_url = f"http://any-hosted-llama-stack.com"
+        """Construct a new synchronous llama-stack-client client instance.
 
+        This automatically infers the `api_key` argument from the `LLAMA_STACK_CLIENT_API_KEY` environment variable if it is not provided.
+        """
         if api_key is None:
             api_key = os.environ.get("LLAMA_STACK_CLIENT_API_KEY")
         self.api_key = api_key
 
+        if base_url is None:
+            base_url = os.environ.get("LLAMA_STACK_CLIENT_BASE_URL")
+        if base_url is None:
+            base_url = f"http://any-hosted-llama-stack.com"
+
         custom_headers = default_headers or {}
-        if api_key is not None:
-            custom_headers["Authorization"] = f"Bearer {api_key}"
         custom_headers["X-LlamaStack-Client-Version"] = __version__
         if provider_data is not None:
             custom_headers["X-LlamaStack-Provider-Data"] = json.dumps(provider_data)
@@ -182,6 +184,14 @@ def __init__(
     def qs(self) -> Querystring:
         return Querystring(array_format="comma")
 
+    @property
+    @override
+    def auth_headers(self) -> dict[str, str]:
+        api_key = self.api_key
+        if api_key is None:
+            return {}
+        return {"Authorization": f"Bearer {api_key}"}
+
     @property
     @override
     def default_headers(self) -> dict[str, str | Omit]:
@@ -194,8 +204,8 @@ def default_headers(self) -> dict[str, str | Omit]:
     def copy(
         self,
         *,
-        base_url: str | httpx.URL | None = None,
         api_key: str | None = None,
+        base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         http_client: httpx.Client | None = None,
         max_retries: int | NotGiven = NOT_GIVEN,
@@ -228,8 +238,8 @@ def copy(
 
         http_client = http_client or self._client
         return self.__class__(
-            base_url=base_url or self.base_url,
             api_key=api_key or self.api_key,
+            base_url=base_url or self.base_url,
             timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
             http_client=http_client,
             max_retries=max_retries if is_given(max_retries) else self.max_retries,
@@ -304,12 +314,13 @@ class AsyncLlamaStackClient(AsyncAPIClient):
     with_streaming_response: AsyncLlamaStackClientWithStreamedResponse
 
     # client options
+    api_key: str | None
 
     def __init__(
         self,
         *,
-        base_url: str | httpx.URL | None = None,
         api_key: str | None = None,
+        base_url: str | httpx.URL | None = None,
         timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
@@ -329,19 +340,20 @@ def __init__(
         _strict_response_validation: bool = False,
         provider_data: Mapping[str, Any] | None = None,
     ) -> None:
-        """Construct a new async llama-stack-client client instance."""
-        if base_url is None:
-            base_url = os.environ.get("LLAMA_STACK_CLIENT_BASE_URL")
-        if base_url is None:
-            base_url = f"http://any-hosted-llama-stack.com"
+        """Construct a new async llama-stack-client client instance.
 
+        This automatically infers the `api_key` argument from the `LLAMA_STACK_CLIENT_API_KEY` environment variable if it is not provided.
+        """
         if api_key is None:
             api_key = os.environ.get("LLAMA_STACK_CLIENT_API_KEY")
         self.api_key = api_key
 
+        if base_url is None:
+            base_url = os.environ.get("LLAMA_STACK_CLIENT_BASE_URL")
+        if base_url is None:
+            base_url = f"http://any-hosted-llama-stack.com"
+
         custom_headers = default_headers or {}
-        if api_key is not None:
-            custom_headers["Authorization"] = f"Bearer {api_key}"
         custom_headers["X-LlamaStack-Client-Version"] = __version__
         if provider_data is not None:
             custom_headers["X-LlamaStack-Provider-Data"] = json.dumps(provider_data)
@@ -388,6 +400,14 @@ def __init__(
     def qs(self) -> Querystring:
         return Querystring(array_format="comma")
 
+    @property
+    @override
+    def auth_headers(self) -> dict[str, str]:
+        api_key = self.api_key
+        if api_key is None:
+            return {}
+        return {"Authorization": f"Bearer {api_key}"}
+
     @property
     @override
     def default_headers(self) -> dict[str, str | Omit]:
@@ -400,8 +420,8 @@ def default_headers(self) -> dict[str, str | Omit]:
     def copy(
         self,
         *,
-        base_url: str | httpx.URL | None = None,
         api_key: str | None = None,
+        base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         http_client: httpx.AsyncClient | None = None,
         max_retries: int | NotGiven = NOT_GIVEN,
@@ -434,8 +454,8 @@ def copy(
 
         http_client = http_client or self._client
         return self.__class__(
-            base_url=base_url or self.base_url,
             api_key=api_key or self.api_key,
+            base_url=base_url or self.base_url,
             timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
             http_client=http_client,
             max_retries=max_retries if is_given(max_retries) else self.max_retries,
diff --git a/src/llama_stack_client/_constants.py b/src/llama_stack_client/_constants.py
index a2ac3b6f..6ddf2c71 100644
--- a/src/llama_stack_client/_constants.py
+++ b/src/llama_stack_client/_constants.py
@@ -6,7 +6,7 @@
 OVERRIDE_CAST_TO_HEADER = "____stainless_override_cast_to"
 
 # default timeout is 1 minute
-DEFAULT_TIMEOUT = httpx.Timeout(timeout=60.0, connect=5.0)
+DEFAULT_TIMEOUT = httpx.Timeout(timeout=60, connect=5.0)
 DEFAULT_MAX_RETRIES = 2
 DEFAULT_CONNECTION_LIMITS = httpx.Limits(max_connections=100, max_keepalive_connections=20)
 
diff --git a/src/llama_stack_client/resources/agents/turn.py b/src/llama_stack_client/resources/agents/turn.py
index 272ea4d9..da659e26 100644
--- a/src/llama_stack_client/resources/agents/turn.py
+++ b/src/llama_stack_client/resources/agents/turn.py
@@ -59,6 +59,7 @@ def create(
         messages: Iterable[turn_create_params.Message],
         documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
         stream: Literal[False] | NotGiven = NOT_GIVEN,
+        tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
         toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -69,6 +70,8 @@ def create(
     ) -> Turn:
         """
         Args:
+          tool_config: Configuration for tool use.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -88,6 +91,7 @@ def create(
         messages: Iterable[turn_create_params.Message],
         stream: Literal[True],
         documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
+        tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
         toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -98,6 +102,8 @@ def create(
     ) -> Stream[AgentTurnResponseStreamChunk]:
         """
         Args:
+          tool_config: Configuration for tool use.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -117,6 +123,7 @@ def create(
         messages: Iterable[turn_create_params.Message],
         stream: bool,
         documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
+        tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
         toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -127,6 +134,8 @@ def create(
     ) -> Turn | Stream[AgentTurnResponseStreamChunk]:
         """
         Args:
+          tool_config: Configuration for tool use.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -146,6 +155,7 @@ def create(
         messages: Iterable[turn_create_params.Message],
         documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
         stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
+        tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
         toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -165,6 +175,7 @@ def create(
                     "messages": messages,
                     "documents": documents,
                     "stream": stream,
+                    "tool_config": tool_config,
                     "toolgroups": toolgroups,
                 },
                 turn_create_params.TurnCreateParams,
@@ -244,6 +255,7 @@ async def create(
         messages: Iterable[turn_create_params.Message],
         documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
         stream: Literal[False] | NotGiven = NOT_GIVEN,
+        tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
         toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -254,6 +266,8 @@ async def create(
     ) -> Turn:
         """
         Args:
+          tool_config: Configuration for tool use.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -273,6 +287,7 @@ async def create(
         messages: Iterable[turn_create_params.Message],
         stream: Literal[True],
         documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
+        tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
         toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -283,6 +298,8 @@ async def create(
     ) -> AsyncStream[AgentTurnResponseStreamChunk]:
         """
         Args:
+          tool_config: Configuration for tool use.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -302,6 +319,7 @@ async def create(
         messages: Iterable[turn_create_params.Message],
         stream: bool,
         documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
+        tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
         toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -312,6 +330,8 @@ async def create(
     ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
         """
         Args:
+          tool_config: Configuration for tool use.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -331,6 +351,7 @@ async def create(
         messages: Iterable[turn_create_params.Message],
         documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
         stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
+        tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
         toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -350,6 +371,7 @@ async def create(
                     "messages": messages,
                     "documents": documents,
                     "stream": stream,
+                    "tool_config": tool_config,
                     "toolgroups": toolgroups,
                 },
                 turn_create_params.TurnCreateParams,
diff --git a/src/llama_stack_client/resources/batch_inference.py b/src/llama_stack_client/resources/batch_inference.py
index 39ddc1ee..fdae58fa 100644
--- a/src/llama_stack_client/resources/batch_inference.py
+++ b/src/llama_stack_client/resources/batch_inference.py
@@ -72,6 +72,14 @@ def chat_completion(
     ) -> BatchInferenceChatCompletionResponse:
         """
         Args:
+          response_format: Configuration for JSON schema-guided response generation.
+
+          tool_choice: Whether tool use is required or automatic. This is a hint to the model which may
+              not be followed. It depends on the Instruction Following capabilities of the
+              model.
+
+          tool_prompt_format: Prompt format for calling custom / zero shot tools.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -118,6 +126,8 @@ def completion(
     ) -> BatchCompletion:
         """
         Args:
+          response_format: Configuration for JSON schema-guided response generation.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -185,6 +195,14 @@ async def chat_completion(
     ) -> BatchInferenceChatCompletionResponse:
         """
         Args:
+          response_format: Configuration for JSON schema-guided response generation.
+
+          tool_choice: Whether tool use is required or automatic. This is a hint to the model which may
+              not be followed. It depends on the Instruction Following capabilities of the
+              model.
+
+          tool_prompt_format: Prompt format for calling custom / zero shot tools.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -231,6 +249,8 @@ async def completion(
     ) -> BatchCompletion:
         """
         Args:
+          response_format: Configuration for JSON schema-guided response generation.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
diff --git a/src/llama_stack_client/resources/inference.py b/src/llama_stack_client/resources/inference.py
index c837a6e4..9990b208 100644
--- a/src/llama_stack_client/resources/inference.py
+++ b/src/llama_stack_client/resources/inference.py
@@ -71,6 +71,7 @@ def chat_completion(
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
         stream: Literal[False] | NotGiven = NOT_GIVEN,
         tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
+        tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -103,14 +104,17 @@ def chat_completion(
               False.
 
           tool_choice: (Optional) Whether tool use is required or automatic. Defaults to
-              ToolChoice.auto.
+              ToolChoice.auto. .. deprecated:: Use tool_config instead.
+
+          tool_config: (Optional) Configuration for tool use.
 
           tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack
               will attempt to use a format that is best adapted to the model. -
               `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. -
               `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
               <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
-              are output as Python syntax -- a list of function calls.
+              are output as Python syntax -- a list of function calls. .. deprecated:: Use
+              tool_config instead.
 
           tools: (Optional) List of tool definitions available to the model
 
@@ -135,6 +139,7 @@ def chat_completion(
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
         tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
+        tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -167,14 +172,17 @@ def chat_completion(
           sampling_params: Parameters to control the sampling strategy
 
           tool_choice: (Optional) Whether tool use is required or automatic. Defaults to
-              ToolChoice.auto.
+              ToolChoice.auto. .. deprecated:: Use tool_config instead.
+
+          tool_config: (Optional) Configuration for tool use.
 
           tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack
               will attempt to use a format that is best adapted to the model. -
               `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. -
               `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
               <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
-              are output as Python syntax -- a list of function calls.
+              are output as Python syntax -- a list of function calls. .. deprecated:: Use
+              tool_config instead.
 
           tools: (Optional) List of tool definitions available to the model
 
@@ -199,6 +207,7 @@ def chat_completion(
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
         tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
+        tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -231,14 +240,17 @@ def chat_completion(
           sampling_params: Parameters to control the sampling strategy
 
           tool_choice: (Optional) Whether tool use is required or automatic. Defaults to
-              ToolChoice.auto.
+              ToolChoice.auto. .. deprecated:: Use tool_config instead.
+
+          tool_config: (Optional) Configuration for tool use.
 
           tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack
               will attempt to use a format that is best adapted to the model. -
               `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. -
               `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
               <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
-              are output as Python syntax -- a list of function calls.
+              are output as Python syntax -- a list of function calls. .. deprecated:: Use
+              tool_config instead.
 
           tools: (Optional) List of tool definitions available to the model
 
@@ -263,6 +275,7 @@ def chat_completion(
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
         stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
         tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
+        tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -285,6 +298,7 @@ def chat_completion(
                     "sampling_params": sampling_params,
                     "stream": stream,
                     "tool_choice": tool_choice,
+                    "tool_config": tool_config,
                     "tool_prompt_format": tool_prompt_format,
                     "tools": tools,
                 },
@@ -554,6 +568,7 @@ async def chat_completion(
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
         stream: Literal[False] | NotGiven = NOT_GIVEN,
         tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
+        tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -586,14 +601,17 @@ async def chat_completion(
               False.
 
           tool_choice: (Optional) Whether tool use is required or automatic. Defaults to
-              ToolChoice.auto.
+              ToolChoice.auto. .. deprecated:: Use tool_config instead.
+
+          tool_config: (Optional) Configuration for tool use.
 
           tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack
               will attempt to use a format that is best adapted to the model. -
               `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. -
               `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
               <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
-              are output as Python syntax -- a list of function calls.
+              are output as Python syntax -- a list of function calls. .. deprecated:: Use
+              tool_config instead.
 
           tools: (Optional) List of tool definitions available to the model
 
@@ -618,6 +636,7 @@ async def chat_completion(
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
         tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
+        tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -650,14 +669,17 @@ async def chat_completion(
           sampling_params: Parameters to control the sampling strategy
 
           tool_choice: (Optional) Whether tool use is required or automatic. Defaults to
-              ToolChoice.auto.
+              ToolChoice.auto. .. deprecated:: Use tool_config instead.
+
+          tool_config: (Optional) Configuration for tool use.
 
           tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack
               will attempt to use a format that is best adapted to the model. -
               `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. -
               `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
               <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
-              are output as Python syntax -- a list of function calls.
+              are output as Python syntax -- a list of function calls. .. deprecated:: Use
+              tool_config instead.
 
           tools: (Optional) List of tool definitions available to the model
 
@@ -682,6 +704,7 @@ async def chat_completion(
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
         tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
+        tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -714,14 +737,17 @@ async def chat_completion(
           sampling_params: Parameters to control the sampling strategy
 
           tool_choice: (Optional) Whether tool use is required or automatic. Defaults to
-              ToolChoice.auto.
+              ToolChoice.auto. .. deprecated:: Use tool_config instead.
+
+          tool_config: (Optional) Configuration for tool use.
 
           tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack
               will attempt to use a format that is best adapted to the model. -
               `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. -
               `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
               <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
-              are output as Python syntax -- a list of function calls.
+              are output as Python syntax -- a list of function calls. .. deprecated:: Use
+              tool_config instead.
 
           tools: (Optional) List of tool definitions available to the model
 
@@ -746,6 +772,7 @@ async def chat_completion(
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
         stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
         tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
+        tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -768,6 +795,7 @@ async def chat_completion(
                     "sampling_params": sampling_params,
                     "stream": stream,
                     "tool_choice": tool_choice,
+                    "tool_config": tool_config,
                     "tool_prompt_format": tool_prompt_format,
                     "tools": tools,
                 },
diff --git a/src/llama_stack_client/resources/synthetic_data_generation.py b/src/llama_stack_client/resources/synthetic_data_generation.py
index 91d6ee72..3c848575 100644
--- a/src/llama_stack_client/resources/synthetic_data_generation.py
+++ b/src/llama_stack_client/resources/synthetic_data_generation.py
@@ -63,6 +63,8 @@ def generate(
     ) -> SyntheticDataGenerationResponse:
         """
         Args:
+          filtering_function: The type of filtering function.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -123,6 +125,8 @@ async def generate(
     ) -> SyntheticDataGenerationResponse:
         """
         Args:
+          filtering_function: The type of filtering function.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
diff --git a/src/llama_stack_client/resources/tool_runtime/rag_tool.py b/src/llama_stack_client/resources/tool_runtime/rag_tool.py
index da0ce761..14ea8454 100644
--- a/src/llama_stack_client/resources/tool_runtime/rag_tool.py
+++ b/src/llama_stack_client/resources/tool_runtime/rag_tool.py
@@ -108,6 +108,8 @@ def query(
         Query the RAG system for context; typically invoked by the agent
 
         Args:
+          content: A image content item
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -212,6 +214,8 @@ async def query(
         Query the RAG system for context; typically invoked by the agent
 
         Args:
+          content: A image content item
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
diff --git a/src/llama_stack_client/resources/vector_io.py b/src/llama_stack_client/resources/vector_io.py
index e71f8f1d..a432ea40 100644
--- a/src/llama_stack_client/resources/vector_io.py
+++ b/src/llama_stack_client/resources/vector_io.py
@@ -102,6 +102,8 @@ def query(
     ) -> QueryChunksResponse:
         """
         Args:
+          query: A image content item
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -202,6 +204,8 @@ async def query(
     ) -> QueryChunksResponse:
         """
         Args:
+          query: A image content item
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
diff --git a/src/llama_stack_client/types/agents/turn.py b/src/llama_stack_client/types/agents/turn.py
index c34efa08..2ead7bbe 100644
--- a/src/llama_stack_client/types/agents/turn.py
+++ b/src/llama_stack_client/types/agents/turn.py
@@ -19,16 +19,21 @@
 __all__ = [
     "Turn",
     "InputMessage",
+    "Step",
     "OutputAttachment",
     "OutputAttachmentContent",
     "OutputAttachmentContentImageContentItem",
     "OutputAttachmentContentImageContentItemImage",
     "OutputAttachmentContentTextContentItem",
-    "Step",
 ]
 
 InputMessage: TypeAlias = Union[UserMessage, ToolResponseMessage]
 
+Step: TypeAlias = Annotated[
+    Union[InferenceStep, ToolExecutionStep, ShieldCallStep, MemoryRetrievalStep],
+    PropertyInfo(discriminator="step_type"),
+]
+
 
 class OutputAttachmentContentImageContentItemImage(BaseModel):
     data: Optional[str] = None
@@ -68,22 +73,16 @@ class OutputAttachmentContentTextContentItem(BaseModel):
 
 class OutputAttachment(BaseModel):
     content: OutputAttachmentContent
+    """A image content item"""
 
     mime_type: str
 
 
-Step: TypeAlias = Annotated[
-    Union[InferenceStep, ToolExecutionStep, ShieldCallStep, MemoryRetrievalStep],
-    PropertyInfo(discriminator="step_type"),
-]
-
-
 class Turn(BaseModel):
     input_messages: List[InputMessage]
 
-    output_attachments: List[OutputAttachment]
-
     output_message: CompletionMessage
+    """A message containing the model's (assistant) response in a chat conversation."""
 
     session_id: str
 
@@ -94,3 +93,5 @@ class Turn(BaseModel):
     turn_id: str
 
     completed_at: Optional[datetime] = None
+
+    output_attachments: Optional[List[OutputAttachment]] = None
diff --git a/src/llama_stack_client/types/agents/turn_create_params.py b/src/llama_stack_client/types/agents/turn_create_params.py
index 23bfc6a3..fee300dd 100644
--- a/src/llama_stack_client/types/agents/turn_create_params.py
+++ b/src/llama_stack_client/types/agents/turn_create_params.py
@@ -18,6 +18,7 @@
     "DocumentContentImageContentItem",
     "DocumentContentImageContentItemImage",
     "DocumentContentTextContentItem",
+    "ToolConfig",
     "Toolgroup",
     "ToolgroupUnionMember1",
     "TurnCreateParamsNonStreaming",
@@ -32,6 +33,9 @@ class TurnCreateParamsBase(TypedDict, total=False):
 
     documents: Iterable[Document]
 
+    tool_config: ToolConfig
+    """Configuration for tool use."""
+
     toolgroups: List[Toolgroup]
 
 
@@ -72,10 +76,39 @@ class DocumentContentTextContentItem(TypedDict, total=False):
 
 class Document(TypedDict, total=False):
     content: Required[DocumentContent]
+    """A image content item"""
 
     mime_type: Required[str]
 
 
+class ToolConfig(TypedDict, total=False):
+    system_message_behavior: Required[Literal["append", "replace"]]
+    """(Optional) Config for how to override the default system prompt.
+
+    - `SystemMessageBehavior.append`: Appends the provided system message to the
+      default system prompt. - `SystemMessageBehavior.replace`: Replaces the default
+      system prompt with the provided system message. The system message can include
+      the string '{{function_definitions}}' to indicate where the function
+      definitions should be inserted.
+    """
+
+    tool_choice: Literal["auto", "required"]
+    """(Optional) Whether tool use is required or automatic.
+
+    Defaults to ToolChoice.auto.
+    """
+
+    tool_prompt_format: Literal["json", "function_tag", "python_list"]
+    """(Optional) Instructs the model how to format tool calls.
+
+    By default, Llama Stack will attempt to use a format that is best adapted to the
+    model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON
+    object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
+    <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
+    are output as Python syntax -- a list of function calls.
+    """
+
+
 class ToolgroupUnionMember1(TypedDict, total=False):
     args: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
 
diff --git a/src/llama_stack_client/types/agents/turn_response_event_payload.py b/src/llama_stack_client/types/agents/turn_response_event_payload.py
index d320a9b3..f12f8b03 100644
--- a/src/llama_stack_client/types/agents/turn_response_event_payload.py
+++ b/src/llama_stack_client/types/agents/turn_response_event_payload.py
@@ -69,6 +69,7 @@ class AgentTurnResponseTurnCompletePayload(BaseModel):
     event_type: Literal["turn_complete"]
 
     turn: Turn
+    """A single turn in an interaction with an Agentic System."""
 
 
 TurnResponseEventPayload: TypeAlias = Annotated[
diff --git a/src/llama_stack_client/types/batch_inference_chat_completion_params.py b/src/llama_stack_client/types/batch_inference_chat_completion_params.py
index b0b5ce51..fcdbb166 100644
--- a/src/llama_stack_client/types/batch_inference_chat_completion_params.py
+++ b/src/llama_stack_client/types/batch_inference_chat_completion_params.py
@@ -21,12 +21,19 @@ class BatchInferenceChatCompletionParams(TypedDict, total=False):
     logprobs: Logprobs
 
     response_format: ResponseFormat
+    """Configuration for JSON schema-guided response generation."""
 
     sampling_params: SamplingParams
 
     tool_choice: Literal["auto", "required"]
+    """Whether tool use is required or automatic.
+
+    This is a hint to the model which may not be followed. It depends on the
+    Instruction Following capabilities of the model.
+    """
 
     tool_prompt_format: Literal["json", "function_tag", "python_list"]
+    """Prompt format for calling custom / zero shot tools."""
 
     tools: Iterable[Tool]
 
diff --git a/src/llama_stack_client/types/batch_inference_completion_params.py b/src/llama_stack_client/types/batch_inference_completion_params.py
index 0a33c5b3..3f80d625 100644
--- a/src/llama_stack_client/types/batch_inference_completion_params.py
+++ b/src/llama_stack_client/types/batch_inference_completion_params.py
@@ -20,6 +20,7 @@ class BatchInferenceCompletionParams(TypedDict, total=False):
     logprobs: Logprobs
 
     response_format: ResponseFormat
+    """Configuration for JSON schema-guided response generation."""
 
     sampling_params: SamplingParams
 
diff --git a/src/llama_stack_client/types/eval_candidate_param.py b/src/llama_stack_client/types/eval_candidate_param.py
index d9483b5b..77c3c806 100644
--- a/src/llama_stack_client/types/eval_candidate_param.py
+++ b/src/llama_stack_client/types/eval_candidate_param.py
@@ -20,6 +20,7 @@ class ModelCandidate(TypedDict, total=False):
     type: Required[Literal["model"]]
 
     system_message: SystemMessage
+    """A system message providing instructions or context to the model."""
 
 
 class AgentCandidate(TypedDict, total=False):
diff --git a/src/llama_stack_client/types/inference_chat_completion_params.py b/src/llama_stack_client/types/inference_chat_completion_params.py
index 6382696f..4407cfa7 100644
--- a/src/llama_stack_client/types/inference_chat_completion_params.py
+++ b/src/llama_stack_client/types/inference_chat_completion_params.py
@@ -13,6 +13,7 @@
 __all__ = [
     "InferenceChatCompletionParamsBase",
     "Logprobs",
+    "ToolConfig",
     "Tool",
     "InferenceChatCompletionParamsNonStreaming",
     "InferenceChatCompletionParamsStreaming",
@@ -51,9 +52,12 @@ class InferenceChatCompletionParamsBase(TypedDict, total=False):
     tool_choice: Literal["auto", "required"]
     """(Optional) Whether tool use is required or automatic.
 
-    Defaults to ToolChoice.auto.
+    Defaults to ToolChoice.auto. .. deprecated:: Use tool_config instead.
     """
 
+    tool_config: ToolConfig
+    """(Optional) Configuration for tool use."""
+
     tool_prompt_format: Literal["json", "function_tag", "python_list"]
     """(Optional) Instructs the model how to format tool calls.
 
@@ -61,7 +65,8 @@ class InferenceChatCompletionParamsBase(TypedDict, total=False):
     model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON
     object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
     <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
-    are output as Python syntax -- a list of function calls.
+    are output as Python syntax -- a list of function calls. .. deprecated:: Use
+    tool_config instead.
     """
 
     tools: Iterable[Tool]
@@ -73,6 +78,34 @@ class Logprobs(TypedDict, total=False):
     """How many tokens (for each position) to return log probabilities for."""
 
 
+class ToolConfig(TypedDict, total=False):
+    system_message_behavior: Required[Literal["append", "replace"]]
+    """(Optional) Config for how to override the default system prompt.
+
+    - `SystemMessageBehavior.append`: Appends the provided system message to the
+      default system prompt. - `SystemMessageBehavior.replace`: Replaces the default
+      system prompt with the provided system message. The system message can include
+      the string '{{function_definitions}}' to indicate where the function
+      definitions should be inserted.
+    """
+
+    tool_choice: Literal["auto", "required"]
+    """(Optional) Whether tool use is required or automatic.
+
+    Defaults to ToolChoice.auto.
+    """
+
+    tool_prompt_format: Literal["json", "function_tag", "python_list"]
+    """(Optional) Instructs the model how to format tool calls.
+
+    By default, Llama Stack will attempt to use a format that is best adapted to the
+    model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON
+    object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
+    <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
+    are output as Python syntax -- a list of function calls.
+    """
+
+
 class Tool(TypedDict, total=False):
     tool_name: Required[Union[Literal["brave_search", "wolfram_alpha", "photogen", "code_interpreter"], str]]
 
diff --git a/src/llama_stack_client/types/inference_step.py b/src/llama_stack_client/types/inference_step.py
index ba429fa3..d2c1ed8e 100644
--- a/src/llama_stack_client/types/inference_step.py
+++ b/src/llama_stack_client/types/inference_step.py
@@ -14,6 +14,7 @@
 
 class InferenceStep(BaseModel):
     api_model_response: CompletionMessage = FieldInfo(alias="model_response")
+    """A message containing the model's (assistant) response in a chat conversation."""
 
     step_id: str
 
diff --git a/src/llama_stack_client/types/memory_retrieval_step.py b/src/llama_stack_client/types/memory_retrieval_step.py
index 2d5840c6..6f37c93a 100644
--- a/src/llama_stack_client/types/memory_retrieval_step.py
+++ b/src/llama_stack_client/types/memory_retrieval_step.py
@@ -12,6 +12,7 @@
 
 class MemoryRetrievalStep(BaseModel):
     inserted_context: InterleavedContent
+    """A image content item"""
 
     step_id: str
 
diff --git a/src/llama_stack_client/types/query_chunks_response.py b/src/llama_stack_client/types/query_chunks_response.py
index 4fb11316..d90e464e 100644
--- a/src/llama_stack_client/types/query_chunks_response.py
+++ b/src/llama_stack_client/types/query_chunks_response.py
@@ -10,6 +10,7 @@
 
 class Chunk(BaseModel):
     content: InterleavedContent
+    """A image content item"""
 
     metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
 
diff --git a/src/llama_stack_client/types/shared/agent_config.py b/src/llama_stack_client/types/shared/agent_config.py
index ad94e3f1..273a98db 100644
--- a/src/llama_stack_client/types/shared/agent_config.py
+++ b/src/llama_stack_client/types/shared/agent_config.py
@@ -8,7 +8,35 @@
 from .response_format import ResponseFormat
 from .sampling_params import SamplingParams
 
-__all__ = ["AgentConfig", "Toolgroup", "ToolgroupUnionMember1"]
+__all__ = ["AgentConfig", "ToolConfig", "Toolgroup", "ToolgroupUnionMember1"]
+
+
+class ToolConfig(BaseModel):
+    system_message_behavior: Literal["append", "replace"]
+    """(Optional) Config for how to override the default system prompt.
+
+    - `SystemMessageBehavior.append`: Appends the provided system message to the
+      default system prompt. - `SystemMessageBehavior.replace`: Replaces the default
+      system prompt with the provided system message. The system message can include
+      the string '{{function_definitions}}' to indicate where the function
+      definitions should be inserted.
+    """
+
+    tool_choice: Optional[Literal["auto", "required"]] = None
+    """(Optional) Whether tool use is required or automatic.
+
+    Defaults to ToolChoice.auto.
+    """
+
+    tool_prompt_format: Optional[Literal["json", "function_tag", "python_list"]] = None
+    """(Optional) Instructs the model how to format tool calls.
+
+    By default, Llama Stack will attempt to use a format that is best adapted to the
+    model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON
+    object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
+    <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
+    are output as Python syntax -- a list of function calls.
+    """
 
 
 class ToolgroupUnionMember1(BaseModel):
@@ -25,22 +53,32 @@ class AgentConfig(BaseModel):
 
     instructions: str
 
-    max_infer_iters: int
-
     model: str
 
     client_tools: Optional[List[ToolDef]] = None
 
     input_shields: Optional[List[str]] = None
 
+    max_infer_iters: Optional[int] = None
+
     output_shields: Optional[List[str]] = None
 
     response_format: Optional[ResponseFormat] = None
+    """Configuration for JSON schema-guided response generation."""
 
     sampling_params: Optional[SamplingParams] = None
 
     tool_choice: Optional[Literal["auto", "required"]] = None
+    """Whether tool use is required or automatic.
+
+    This is a hint to the model which may not be followed. It depends on the
+    Instruction Following capabilities of the model.
+    """
+
+    tool_config: Optional[ToolConfig] = None
+    """Configuration for tool use."""
 
     tool_prompt_format: Optional[Literal["json", "function_tag", "python_list"]] = None
+    """Prompt format for calling custom / zero shot tools."""
 
     toolgroups: Optional[List[Toolgroup]] = None
diff --git a/src/llama_stack_client/types/shared/completion_message.py b/src/llama_stack_client/types/shared/completion_message.py
index 373d6b1d..61c10a5b 100644
--- a/src/llama_stack_client/types/shared/completion_message.py
+++ b/src/llama_stack_client/types/shared/completion_message.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import List
+from typing import List, Optional
 from typing_extensions import Literal
 
 from ..._models import BaseModel
@@ -27,5 +27,5 @@ class CompletionMessage(BaseModel):
     `StopReason.out_of_tokens`: The model ran out of token budget.
     """
 
-    tool_calls: List[ToolCall]
+    tool_calls: Optional[List[ToolCall]] = None
     """List of tool calls. Each tool call is a ToolCall object."""
diff --git a/src/llama_stack_client/types/shared/document.py b/src/llama_stack_client/types/shared/document.py
index c0ac8b9d..1282bd0a 100644
--- a/src/llama_stack_client/types/shared/document.py
+++ b/src/llama_stack_client/types/shared/document.py
@@ -42,6 +42,7 @@ class ContentTextContentItem(BaseModel):
 
 class Document(BaseModel):
     content: Content
+    """A image content item"""
 
     document_id: str
 
diff --git a/src/llama_stack_client/types/shared/query_result.py b/src/llama_stack_client/types/shared/query_result.py
index 5a0156c8..dc20becf 100644
--- a/src/llama_stack_client/types/shared/query_result.py
+++ b/src/llama_stack_client/types/shared/query_result.py
@@ -10,3 +10,4 @@
 
 class QueryResult(BaseModel):
     content: Optional[InterleavedContent] = None
+    """A image content item"""
diff --git a/src/llama_stack_client/types/shared_params/agent_config.py b/src/llama_stack_client/types/shared_params/agent_config.py
index 186eff85..fe62bc24 100644
--- a/src/llama_stack_client/types/shared_params/agent_config.py
+++ b/src/llama_stack_client/types/shared_params/agent_config.py
@@ -9,7 +9,35 @@
 from .response_format import ResponseFormat
 from .sampling_params import SamplingParams
 
-__all__ = ["AgentConfig", "Toolgroup", "ToolgroupUnionMember1"]
+__all__ = ["AgentConfig", "ToolConfig", "Toolgroup", "ToolgroupUnionMember1"]
+
+
+class ToolConfig(TypedDict, total=False):
+    system_message_behavior: Required[Literal["append", "replace"]]
+    """(Optional) Config for how to override the default system prompt.
+
+    - `SystemMessageBehavior.append`: Appends the provided system message to the
+      default system prompt. - `SystemMessageBehavior.replace`: Replaces the default
+      system prompt with the provided system message. The system message can include
+      the string '{{function_definitions}}' to indicate where the function
+      definitions should be inserted.
+    """
+
+    tool_choice: Literal["auto", "required"]
+    """(Optional) Whether tool use is required or automatic.
+
+    Defaults to ToolChoice.auto.
+    """
+
+    tool_prompt_format: Literal["json", "function_tag", "python_list"]
+    """(Optional) Instructs the model how to format tool calls.
+
+    By default, Llama Stack will attempt to use a format that is best adapted to the
+    model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON
+    object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
+    <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
+    are output as Python syntax -- a list of function calls.
+    """
 
 
 class ToolgroupUnionMember1(TypedDict, total=False):
@@ -26,22 +54,32 @@ class AgentConfig(TypedDict, total=False):
 
     instructions: Required[str]
 
-    max_infer_iters: Required[int]
-
     model: Required[str]
 
     client_tools: Iterable[ToolDefParam]
 
     input_shields: List[str]
 
+    max_infer_iters: int
+
     output_shields: List[str]
 
     response_format: ResponseFormat
+    """Configuration for JSON schema-guided response generation."""
 
     sampling_params: SamplingParams
 
     tool_choice: Literal["auto", "required"]
+    """Whether tool use is required or automatic.
+
+    This is a hint to the model which may not be followed. It depends on the
+    Instruction Following capabilities of the model.
+    """
+
+    tool_config: ToolConfig
+    """Configuration for tool use."""
 
     tool_prompt_format: Literal["json", "function_tag", "python_list"]
+    """Prompt format for calling custom / zero shot tools."""
 
     toolgroups: List[Toolgroup]
diff --git a/src/llama_stack_client/types/shared_params/completion_message.py b/src/llama_stack_client/types/shared_params/completion_message.py
index 4c480807..43b2529e 100644
--- a/src/llama_stack_client/types/shared_params/completion_message.py
+++ b/src/llama_stack_client/types/shared_params/completion_message.py
@@ -28,5 +28,5 @@ class CompletionMessage(TypedDict, total=False):
     `StopReason.out_of_tokens`: The model ran out of token budget.
     """
 
-    tool_calls: Required[Iterable[ToolCall]]
+    tool_calls: Iterable[ToolCall]
     """List of tool calls. Each tool call is a ToolCall object."""
diff --git a/src/llama_stack_client/types/shared_params/document.py b/src/llama_stack_client/types/shared_params/document.py
index 1b5d5f09..fd464554 100644
--- a/src/llama_stack_client/types/shared_params/document.py
+++ b/src/llama_stack_client/types/shared_params/document.py
@@ -43,6 +43,7 @@ class ContentTextContentItem(TypedDict, total=False):
 
 class Document(TypedDict, total=False):
     content: Required[Content]
+    """A image content item"""
 
     document_id: Required[str]
 
diff --git a/src/llama_stack_client/types/synthetic_data_generation_generate_params.py b/src/llama_stack_client/types/synthetic_data_generation_generate_params.py
index e10842bd..abf51059 100644
--- a/src/llama_stack_client/types/synthetic_data_generation_generate_params.py
+++ b/src/llama_stack_client/types/synthetic_data_generation_generate_params.py
@@ -14,5 +14,6 @@ class SyntheticDataGenerationGenerateParams(TypedDict, total=False):
     dialogs: Required[Iterable[Message]]
 
     filtering_function: Required[Literal["none", "random", "top_k", "top_p", "top_k_top_p", "sigmoid"]]
+    """The type of filtering function."""
 
     model: str
diff --git a/src/llama_stack_client/types/tool_invocation_result.py b/src/llama_stack_client/types/tool_invocation_result.py
index 5c286b2c..4ecc3d03 100644
--- a/src/llama_stack_client/types/tool_invocation_result.py
+++ b/src/llama_stack_client/types/tool_invocation_result.py
@@ -10,6 +10,7 @@
 
 class ToolInvocationResult(BaseModel):
     content: InterleavedContent
+    """A image content item"""
 
     error_code: Optional[int] = None
 
diff --git a/src/llama_stack_client/types/tool_response.py b/src/llama_stack_client/types/tool_response.py
index 721111ff..2617f6e3 100644
--- a/src/llama_stack_client/types/tool_response.py
+++ b/src/llama_stack_client/types/tool_response.py
@@ -13,5 +13,6 @@ class ToolResponse(BaseModel):
     call_id: str
 
     content: InterleavedContent
+    """A image content item"""
 
     tool_name: Union[Literal["brave_search", "wolfram_alpha", "photogen", "code_interpreter"], str]
diff --git a/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py b/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py
index b4e7c003..08208b77 100644
--- a/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py
+++ b/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py
@@ -13,6 +13,7 @@
 
 class RagToolQueryParams(TypedDict, total=False):
     content: Required[InterleavedContent]
+    """A image content item"""
 
     vector_db_ids: Required[List[str]]
 
diff --git a/src/llama_stack_client/types/vector_io_insert_params.py b/src/llama_stack_client/types/vector_io_insert_params.py
index faac744b..5ac67c10 100644
--- a/src/llama_stack_client/types/vector_io_insert_params.py
+++ b/src/llama_stack_client/types/vector_io_insert_params.py
@@ -20,5 +20,6 @@ class VectorIoInsertParams(TypedDict, total=False):
 
 class Chunk(TypedDict, total=False):
     content: Required[InterleavedContent]
+    """A image content item"""
 
     metadata: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
diff --git a/src/llama_stack_client/types/vector_io_query_params.py b/src/llama_stack_client/types/vector_io_query_params.py
index 97b48ddc..2fe675d1 100644
--- a/src/llama_stack_client/types/vector_io_query_params.py
+++ b/src/llama_stack_client/types/vector_io_query_params.py
@@ -12,6 +12,7 @@
 
 class VectorIoQueryParams(TypedDict, total=False):
     query: Required[InterleavedContent]
+    """A image content item"""
 
     vector_db_id: Required[str]
 
diff --git a/tests/api_resources/agents/test_turn.py b/tests/api_resources/agents/test_turn.py
index b4bc87b3..b64bf957 100644
--- a/tests/api_resources/agents/test_turn.py
+++ b/tests/api_resources/agents/test_turn.py
@@ -50,6 +50,11 @@ def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient
                 }
             ],
             stream=False,
+            tool_config={
+                "system_message_behavior": "append",
+                "tool_choice": "auto",
+                "tool_prompt_format": "json",
+            },
             toolgroups=["string"],
         )
         assert_matches_type(Turn, turn, path=["response"])
@@ -152,6 +157,11 @@ def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient
                     "mime_type": "mime_type",
                 }
             ],
+            tool_config={
+                "system_message_behavior": "append",
+                "tool_choice": "auto",
+                "tool_prompt_format": "json",
+            },
             toolgroups=["string"],
         )
         turn_stream.response.close()
@@ -320,6 +330,11 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
                 }
             ],
             stream=False,
+            tool_config={
+                "system_message_behavior": "append",
+                "tool_choice": "auto",
+                "tool_prompt_format": "json",
+            },
             toolgroups=["string"],
         )
         assert_matches_type(Turn, turn, path=["response"])
@@ -422,6 +437,11 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
                     "mime_type": "mime_type",
                 }
             ],
+            tool_config={
+                "system_message_behavior": "append",
+                "tool_choice": "auto",
+                "tool_prompt_format": "json",
+            },
             toolgroups=["string"],
         )
         await turn_stream.response.aclose()
diff --git a/tests/api_resources/test_agents.py b/tests/api_resources/test_agents.py
index 30dd9ac9..54006114 100644
--- a/tests/api_resources/test_agents.py
+++ b/tests/api_resources/test_agents.py
@@ -23,7 +23,6 @@ def test_method_create(self, client: LlamaStackClient) -> None:
             agent_config={
                 "enable_session_persistence": True,
                 "instructions": "instructions",
-                "max_infer_iters": 0,
                 "model": "model",
             },
         )
@@ -35,7 +34,6 @@ def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
             agent_config={
                 "enable_session_persistence": True,
                 "instructions": "instructions",
-                "max_infer_iters": 0,
                 "model": "model",
                 "client_tools": [
                     {
@@ -54,6 +52,7 @@ def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
                     }
                 ],
                 "input_shields": ["string"],
+                "max_infer_iters": 0,
                 "output_shields": ["string"],
                 "response_format": {
                     "json_schema": {"foo": True},
@@ -65,6 +64,11 @@ def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
                     "repetition_penalty": 0,
                 },
                 "tool_choice": "auto",
+                "tool_config": {
+                    "system_message_behavior": "append",
+                    "tool_choice": "auto",
+                    "tool_prompt_format": "json",
+                },
                 "tool_prompt_format": "json",
                 "toolgroups": ["string"],
             },
@@ -77,7 +81,6 @@ def test_raw_response_create(self, client: LlamaStackClient) -> None:
             agent_config={
                 "enable_session_persistence": True,
                 "instructions": "instructions",
-                "max_infer_iters": 0,
                 "model": "model",
             },
         )
@@ -93,7 +96,6 @@ def test_streaming_response_create(self, client: LlamaStackClient) -> None:
             agent_config={
                 "enable_session_persistence": True,
                 "instructions": "instructions",
-                "max_infer_iters": 0,
                 "model": "model",
             },
         ) as response:
@@ -153,7 +155,6 @@ async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
             agent_config={
                 "enable_session_persistence": True,
                 "instructions": "instructions",
-                "max_infer_iters": 0,
                 "model": "model",
             },
         )
@@ -165,7 +166,6 @@ async def test_method_create_with_all_params(self, async_client: AsyncLlamaStack
             agent_config={
                 "enable_session_persistence": True,
                 "instructions": "instructions",
-                "max_infer_iters": 0,
                 "model": "model",
                 "client_tools": [
                     {
@@ -184,6 +184,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncLlamaStack
                     }
                 ],
                 "input_shields": ["string"],
+                "max_infer_iters": 0,
                 "output_shields": ["string"],
                 "response_format": {
                     "json_schema": {"foo": True},
@@ -195,6 +196,11 @@ async def test_method_create_with_all_params(self, async_client: AsyncLlamaStack
                     "repetition_penalty": 0,
                 },
                 "tool_choice": "auto",
+                "tool_config": {
+                    "system_message_behavior": "append",
+                    "tool_choice": "auto",
+                    "tool_prompt_format": "json",
+                },
                 "tool_prompt_format": "json",
                 "toolgroups": ["string"],
             },
@@ -207,7 +213,6 @@ async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) ->
             agent_config={
                 "enable_session_persistence": True,
                 "instructions": "instructions",
-                "max_infer_iters": 0,
                 "model": "model",
             },
         )
@@ -223,7 +228,6 @@ async def test_streaming_response_create(self, async_client: AsyncLlamaStackClie
             agent_config={
                 "enable_session_persistence": True,
                 "instructions": "instructions",
-                "max_infer_iters": 0,
                 "model": "model",
             },
         ) as response:
diff --git a/tests/api_resources/test_inference.py b/tests/api_resources/test_inference.py
index 64c912d2..6cf7c8ba 100644
--- a/tests/api_resources/test_inference.py
+++ b/tests/api_resources/test_inference.py
@@ -57,6 +57,11 @@ def test_method_chat_completion_with_all_params_overload_1(self, client: LlamaSt
             },
             stream=False,
             tool_choice="auto",
+            tool_config={
+                "system_message_behavior": "append",
+                "tool_choice": "auto",
+                "tool_prompt_format": "json",
+            },
             tool_prompt_format="json",
             tools=[
                 {
@@ -148,6 +153,11 @@ def test_method_chat_completion_with_all_params_overload_2(self, client: LlamaSt
                 "repetition_penalty": 0,
             },
             tool_choice="auto",
+            tool_config={
+                "system_message_behavior": "append",
+                "tool_choice": "auto",
+                "tool_prompt_format": "json",
+            },
             tool_prompt_format="json",
             tools=[
                 {
@@ -385,6 +395,11 @@ async def test_method_chat_completion_with_all_params_overload_1(self, async_cli
             },
             stream=False,
             tool_choice="auto",
+            tool_config={
+                "system_message_behavior": "append",
+                "tool_choice": "auto",
+                "tool_prompt_format": "json",
+            },
             tool_prompt_format="json",
             tools=[
                 {
@@ -476,6 +491,11 @@ async def test_method_chat_completion_with_all_params_overload_2(self, async_cli
                 "repetition_penalty": 0,
             },
             tool_choice="auto",
+            tool_config={
+                "system_message_behavior": "append",
+                "tool_choice": "auto",
+                "tool_prompt_format": "json",
+            },
             tool_prompt_format="json",
             tools=[
                 {
diff --git a/tests/test_client.py b/tests/test_client.py
index 38796e2c..3ea5f0b7 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -311,6 +311,9 @@ def test_default_headers_option(self) -> None:
         assert request.headers.get("x-foo") == "stainless"
         assert request.headers.get("x-stainless-lang") == "my-overriding-header"
 
+    def test_validate_headers(self) -> None:
+        client = LlamaStackClient(base_url=base_url, _strict_response_validation=True)
+
     def test_default_query_option(self) -> None:
         client = LlamaStackClient(
             base_url=base_url, _strict_response_validation=True, default_query={"query_param": "bar"}
@@ -1092,6 +1095,9 @@ def test_default_headers_option(self) -> None:
         assert request.headers.get("x-foo") == "stainless"
         assert request.headers.get("x-stainless-lang") == "my-overriding-header"
 
+    def test_validate_headers(self) -> None:
+        client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True)
+
     def test_default_query_option(self) -> None:
         client = AsyncLlamaStackClient(
             base_url=base_url, _strict_response_validation=True, default_query={"query_param": "bar"}
@@ -1628,7 +1634,7 @@ def test_get_platform(self) -> None:
         import threading
 
         from llama_stack_client._utils import asyncify
-        from llama_stack_client._base_client import get_platform
+        from llama_stack_client._base_client import get_platform 
 
         async def test_main() -> None:
             result = await asyncify(get_platform)()