From 78f75433e2690fd9749b6b186df464d88e5ca587 Mon Sep 17 00:00:00 2001
From: Xi Yan <xiyan@meta.com>
Date: Tue, 19 Nov 2024 00:12:57 -0800
Subject: [PATCH] sync

---
 pyproject.toml                                |   2 +-
 src/llama_stack_client/_client.py             |  16 +-
 src/llama_stack_client/_utils/_sync.py        |  90 ++--
 src/llama_stack_client/resources/__init__.py  |  26 +-
 .../resources/agents/agents.py                |   8 +-
 .../resources/agents/session.py               |  12 +-
 .../resources/agents/steps.py                 |   4 +-
 .../resources/agents/turn.py                  |   8 +-
 .../resources/batch_inference.py              |  18 +-
 src/llama_stack_client/resources/datasetio.py |   4 +-
 src/llama_stack_client/resources/datasets.py  |  12 +-
 src/llama_stack_client/resources/eval/eval.py |   8 +-
 src/llama_stack_client/resources/eval/jobs.py |  12 +-
 .../resources/eval_tasks.py                   |  12 +-
 src/llama_stack_client/resources/inference.py |  12 +-
 src/llama_stack_client/resources/inspect.py   |   4 +-
 src/llama_stack_client/resources/memory.py    |   8 +-
 .../resources/memory_banks.py                 |  16 +-
 src/llama_stack_client/resources/models.py    |  16 +-
 .../resources/post_training/job.py            |  20 +-
 .../resources/post_training/post_training.py  |   8 +-
 src/llama_stack_client/resources/providers.py |   4 +-
 src/llama_stack_client/resources/routes.py    |   4 +-
 src/llama_stack_client/resources/safety.py    |   4 +-
 src/llama_stack_client/resources/scoring.py   |   8 +-
 .../resources/scoring_functions.py            |  12 +-
 src/llama_stack_client/resources/shields.py   |  12 +-
 .../resources/synthetic_data_generation.py    |   4 +-
 src/llama_stack_client/resources/telemetry.py |   8 +-
 .../code_interpreter_tool_definition.py       |   1 +
 .../shared/function_call_tool_definition.py   |   3 +-
 .../types/shared/photogen_tool_definition.py  |   1 +
 .../types/shared/search_tool_definition.py    |   1 +
 .../shared/wolfram_alpha_tool_definition.py   |   1 +
 tests/api_resources/agents/test_session.py    |   4 +-
 tests/api_resources/agents/test_turn.py       | 208 +--------
 tests/api_resources/test_agents.py            |  76 +---
 tests/api_resources/test_batch_inference.py   | 422 ++----------------
 tests/api_resources/test_eval.py              |  32 +-
 tests/api_resources/test_eval_tasks.py        |  16 +-
 tests/api_resources/test_inference.py         | 152 +------
 tests/api_resources/test_memory.py            | 100 +----
 tests/api_resources/test_post_training.py     |  16 +-
 tests/api_resources/test_safety.py            |  84 +---
 tests/api_resources/test_scoring.py           |  24 +-
 tests/api_resources/test_scoring_functions.py |   4 +-
 .../test_synthetic_data_generation.py         |  84 +---
 tests/test_client.py                          |  66 ++-
 48 files changed, 394 insertions(+), 1273 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0ce8fb4d..19c5603a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "llama_stack_client"
-version = "0.0.49"
+version = "0.0.53rc4"
 description = "The official Python library for the llama-stack-client API"
 dynamic = ["readme"]
 license = "Apache-2.0"
diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py
index bbd2dcfa..944712e5 100644
--- a/src/llama_stack_client/_client.py
+++ b/src/llama_stack_client/_client.py
@@ -48,7 +48,7 @@
 
 class LlamaStackClient(SyncAPIClient):
     agents: resources.AgentsResource
-    batch_inferences: resources.BatchInferencesResource
+    batch_inference: resources.BatchInferenceResource
     datasets: resources.DatasetsResource
     eval: resources.EvalResource
     inspect: resources.InspectResource
@@ -117,7 +117,7 @@ def __init__(
         )
 
         self.agents = resources.AgentsResource(self)
-        self.batch_inferences = resources.BatchInferencesResource(self)
+        self.batch_inference = resources.BatchInferenceResource(self)
         self.datasets = resources.DatasetsResource(self)
         self.eval = resources.EvalResource(self)
         self.inspect = resources.InspectResource(self)
@@ -238,7 +238,7 @@ def _make_status_error(
 
 class AsyncLlamaStackClient(AsyncAPIClient):
     agents: resources.AsyncAgentsResource
-    batch_inferences: resources.AsyncBatchInferencesResource
+    batch_inference: resources.AsyncBatchInferenceResource
     datasets: resources.AsyncDatasetsResource
     eval: resources.AsyncEvalResource
     inspect: resources.AsyncInspectResource
@@ -307,7 +307,7 @@ def __init__(
         )
 
         self.agents = resources.AsyncAgentsResource(self)
-        self.batch_inferences = resources.AsyncBatchInferencesResource(self)
+        self.batch_inference = resources.AsyncBatchInferenceResource(self)
         self.datasets = resources.AsyncDatasetsResource(self)
         self.eval = resources.AsyncEvalResource(self)
         self.inspect = resources.AsyncInspectResource(self)
@@ -429,7 +429,7 @@ def _make_status_error(
 class LlamaStackClientWithRawResponse:
     def __init__(self, client: LlamaStackClient) -> None:
         self.agents = resources.AgentsResourceWithRawResponse(client.agents)
-        self.batch_inferences = resources.BatchInferencesResourceWithRawResponse(client.batch_inferences)
+        self.batch_inference = resources.BatchInferenceResourceWithRawResponse(client.batch_inference)
         self.datasets = resources.DatasetsResourceWithRawResponse(client.datasets)
         self.eval = resources.EvalResourceWithRawResponse(client.eval)
         self.inspect = resources.InspectResourceWithRawResponse(client.inspect)
@@ -455,7 +455,7 @@ def __init__(self, client: LlamaStackClient) -> None:
 class AsyncLlamaStackClientWithRawResponse:
     def __init__(self, client: AsyncLlamaStackClient) -> None:
         self.agents = resources.AsyncAgentsResourceWithRawResponse(client.agents)
-        self.batch_inferences = resources.AsyncBatchInferencesResourceWithRawResponse(client.batch_inferences)
+        self.batch_inference = resources.AsyncBatchInferenceResourceWithRawResponse(client.batch_inference)
         self.datasets = resources.AsyncDatasetsResourceWithRawResponse(client.datasets)
         self.eval = resources.AsyncEvalResourceWithRawResponse(client.eval)
         self.inspect = resources.AsyncInspectResourceWithRawResponse(client.inspect)
@@ -481,7 +481,7 @@ def __init__(self, client: AsyncLlamaStackClient) -> None:
 class LlamaStackClientWithStreamedResponse:
     def __init__(self, client: LlamaStackClient) -> None:
         self.agents = resources.AgentsResourceWithStreamingResponse(client.agents)
-        self.batch_inferences = resources.BatchInferencesResourceWithStreamingResponse(client.batch_inferences)
+        self.batch_inference = resources.BatchInferenceResourceWithStreamingResponse(client.batch_inference)
         self.datasets = resources.DatasetsResourceWithStreamingResponse(client.datasets)
         self.eval = resources.EvalResourceWithStreamingResponse(client.eval)
         self.inspect = resources.InspectResourceWithStreamingResponse(client.inspect)
@@ -507,7 +507,7 @@ def __init__(self, client: LlamaStackClient) -> None:
 class AsyncLlamaStackClientWithStreamedResponse:
     def __init__(self, client: AsyncLlamaStackClient) -> None:
         self.agents = resources.AsyncAgentsResourceWithStreamingResponse(client.agents)
-        self.batch_inferences = resources.AsyncBatchInferencesResourceWithStreamingResponse(client.batch_inferences)
+        self.batch_inference = resources.AsyncBatchInferenceResourceWithStreamingResponse(client.batch_inference)
         self.datasets = resources.AsyncDatasetsResourceWithStreamingResponse(client.datasets)
         self.eval = resources.AsyncEvalResourceWithStreamingResponse(client.eval)
         self.inspect = resources.AsyncInspectResourceWithStreamingResponse(client.inspect)
diff --git a/src/llama_stack_client/_utils/_sync.py b/src/llama_stack_client/_utils/_sync.py
index d0d81033..8b3aaf2b 100644
--- a/src/llama_stack_client/_utils/_sync.py
+++ b/src/llama_stack_client/_utils/_sync.py
@@ -1,56 +1,62 @@
 from __future__ import annotations
 
+import sys
+import asyncio
 import functools
-from typing import TypeVar, Callable, Awaitable
+import contextvars
+from typing import Any, TypeVar, Callable, Awaitable
 from typing_extensions import ParamSpec
 
-import anyio
-import anyio.to_thread
-
-from ._reflection import function_has_argument
-
 T_Retval = TypeVar("T_Retval")
 T_ParamSpec = ParamSpec("T_ParamSpec")
 
 
-# copied from `asyncer`, https://github.com/tiangolo/asyncer
-def asyncify(
-    function: Callable[T_ParamSpec, T_Retval],
-    *,
-    cancellable: bool = False,
-    limiter: anyio.CapacityLimiter | None = None,
-) -> Callable[T_ParamSpec, Awaitable[T_Retval]]:
+if sys.version_info >= (3, 9):
+    to_thread = asyncio.to_thread
+else:
+    # backport of https://docs.python.org/3/library/asyncio-task.html#asyncio.to_thread
+    # for Python 3.8 support
+    async def to_thread(
+        func: Callable[T_ParamSpec, T_Retval], /, *args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs
+    ) -> Any:
+        """Asynchronously run function *func* in a separate thread.
+
+        Any *args and **kwargs supplied for this function are directly passed
+        to *func*. Also, the current :class:`contextvars.Context` is propagated,
+        allowing context variables from the main thread to be accessed in the
+        separate thread.
+
+        Returns a coroutine that can be awaited to get the eventual result of *func*.
+        """
+        loop = asyncio.events.get_running_loop()
+        ctx = contextvars.copy_context()
+        func_call = functools.partial(ctx.run, func, *args, **kwargs)
+        return await loop.run_in_executor(None, func_call)
+
+
+# inspired by `asyncer`, https://github.com/tiangolo/asyncer
+def asyncify(function: Callable[T_ParamSpec, T_Retval]) -> Callable[T_ParamSpec, Awaitable[T_Retval]]:
     """
     Take a blocking function and create an async one that receives the same
-    positional and keyword arguments, and that when called, calls the original function
-    in a worker thread using `anyio.to_thread.run_sync()`. Internally,
-    `asyncer.asyncify()` uses the same `anyio.to_thread.run_sync()`, but it supports
-    keyword arguments additional to positional arguments and it adds better support for
-    autocompletion and inline errors for the arguments of the function called and the
-    return value.
-
-    If the `cancellable` option is enabled and the task waiting for its completion is
-    cancelled, the thread will still run its course but its return value (or any raised
-    exception) will be ignored.
+    positional and keyword arguments. For python version 3.9 and above, it uses
+    asyncio.to_thread to run the function in a separate thread. For python version
+    3.8, it uses locally defined copy of the asyncio.to_thread function which was
+    introduced in python 3.9.
 
-    Use it like this:
+    Usage:
 
-    ```Python
-    def do_work(arg1, arg2, kwarg1="", kwarg2="") -> str:
-        # Do work
-        return "Some result"
+    ```python
+    def blocking_func(arg1, arg2, kwarg1=None):
+        # blocking code
+        return result
 
 
-    result = await to_thread.asyncify(do_work)("spam", "ham", kwarg1="a", kwarg2="b")
-    print(result)
+    result = asyncify(blocking_function)(arg1, arg2, kwarg1=value1)
     ```
 
     ## Arguments
 
     `function`: a blocking regular callable (e.g. a function)
-    `cancellable`: `True` to allow cancellation of the operation
-    `limiter`: capacity limiter to use to limit the total amount of threads running
-        (if omitted, the default limiter is used)
 
     ## Return
 
@@ -60,22 +66,6 @@ def do_work(arg1, arg2, kwarg1="", kwarg2="") -> str:
     """
 
     async def wrapper(*args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs) -> T_Retval:
-        partial_f = functools.partial(function, *args, **kwargs)
-
-        # In `v4.1.0` anyio added the `abandon_on_cancel` argument and deprecated the old
-        # `cancellable` argument, so we need to use the new `abandon_on_cancel` to avoid
-        # surfacing deprecation warnings.
-        if function_has_argument(anyio.to_thread.run_sync, "abandon_on_cancel"):
-            return await anyio.to_thread.run_sync(
-                partial_f,
-                abandon_on_cancel=cancellable,
-                limiter=limiter,
-            )
-
-        return await anyio.to_thread.run_sync(
-            partial_f,
-            cancellable=cancellable,
-            limiter=limiter,
-        )
+        return await to_thread(function, *args, **kwargs)
 
     return wrapper
diff --git a/src/llama_stack_client/resources/__init__.py b/src/llama_stack_client/resources/__init__.py
index 8f3c4abb..be72e181 100644
--- a/src/llama_stack_client/resources/__init__.py
+++ b/src/llama_stack_client/resources/__init__.py
@@ -136,13 +136,13 @@
     PostTrainingResourceWithStreamingResponse,
     AsyncPostTrainingResourceWithStreamingResponse,
 )
-from .batch_inferences import (
-    BatchInferencesResource,
-    AsyncBatchInferencesResource,
-    BatchInferencesResourceWithRawResponse,
-    AsyncBatchInferencesResourceWithRawResponse,
-    BatchInferencesResourceWithStreamingResponse,
-    AsyncBatchInferencesResourceWithStreamingResponse,
+from .batch_inference import (
+    BatchInferenceResource,
+    AsyncBatchInferenceResource,
+    BatchInferenceResourceWithRawResponse,
+    AsyncBatchInferenceResourceWithRawResponse,
+    BatchInferenceResourceWithStreamingResponse,
+    AsyncBatchInferenceResourceWithStreamingResponse,
 )
 from .scoring_functions import (
     ScoringFunctionsResource,
@@ -168,12 +168,12 @@
     "AsyncAgentsResourceWithRawResponse",
     "AgentsResourceWithStreamingResponse",
     "AsyncAgentsResourceWithStreamingResponse",
-    "BatchInferencesResource",
-    "AsyncBatchInferencesResource",
-    "BatchInferencesResourceWithRawResponse",
-    "AsyncBatchInferencesResourceWithRawResponse",
-    "BatchInferencesResourceWithStreamingResponse",
-    "AsyncBatchInferencesResourceWithStreamingResponse",
+    "BatchInferenceResource",
+    "AsyncBatchInferenceResource",
+    "BatchInferenceResourceWithRawResponse",
+    "AsyncBatchInferenceResourceWithRawResponse",
+    "BatchInferenceResourceWithStreamingResponse",
+    "AsyncBatchInferenceResourceWithStreamingResponse",
     "DatasetsResource",
     "AsyncDatasetsResource",
     "DatasetsResourceWithRawResponse",
diff --git a/src/llama_stack_client/resources/agents/agents.py b/src/llama_stack_client/resources/agents/agents.py
index 9945a92a..6908ddde 100644
--- a/src/llama_stack_client/resources/agents/agents.py
+++ b/src/llama_stack_client/resources/agents/agents.py
@@ -109,7 +109,7 @@ def create(
             **(extra_headers or {}),
         }
         return self._post(
-            "/agents/create",
+            "/alpha/agents/create",
             body=maybe_transform({"agent_config": agent_config}, agent_create_params.AgentCreateParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -145,7 +145,7 @@ def delete(
             **(extra_headers or {}),
         }
         return self._post(
-            "/agents/delete",
+            "/alpha/agents/delete",
             body=maybe_transform({"agent_id": agent_id}, agent_delete_params.AgentDeleteParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -213,7 +213,7 @@ async def create(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/agents/create",
+            "/alpha/agents/create",
             body=await async_maybe_transform({"agent_config": agent_config}, agent_create_params.AgentCreateParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -249,7 +249,7 @@ async def delete(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/agents/delete",
+            "/alpha/agents/delete",
             body=await async_maybe_transform({"agent_id": agent_id}, agent_delete_params.AgentDeleteParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
diff --git a/src/llama_stack_client/resources/agents/session.py b/src/llama_stack_client/resources/agents/session.py
index ca7f7570..e107af97 100644
--- a/src/llama_stack_client/resources/agents/session.py
+++ b/src/llama_stack_client/resources/agents/session.py
@@ -76,7 +76,7 @@ def create(
             **(extra_headers or {}),
         }
         return self._post(
-            "/agents/session/create",
+            "/alpha/agents/session/create",
             body=maybe_transform(
                 {
                     "agent_id": agent_id,
@@ -119,7 +119,7 @@ def retrieve(
             **(extra_headers or {}),
         }
         return self._post(
-            "/agents/session/get",
+            "/alpha/agents/session/get",
             body=maybe_transform({"turn_ids": turn_ids}, session_retrieve_params.SessionRetrieveParams),
             options=make_request_options(
                 extra_headers=extra_headers,
@@ -166,7 +166,7 @@ def delete(
             **(extra_headers or {}),
         }
         return self._post(
-            "/agents/session/delete",
+            "/alpha/agents/session/delete",
             body=maybe_transform(
                 {
                     "agent_id": agent_id,
@@ -229,7 +229,7 @@ async def create(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/agents/session/create",
+            "/alpha/agents/session/create",
             body=await async_maybe_transform(
                 {
                     "agent_id": agent_id,
@@ -272,7 +272,7 @@ async def retrieve(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/agents/session/get",
+            "/alpha/agents/session/get",
             body=await async_maybe_transform({"turn_ids": turn_ids}, session_retrieve_params.SessionRetrieveParams),
             options=make_request_options(
                 extra_headers=extra_headers,
@@ -319,7 +319,7 @@ async def delete(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/agents/session/delete",
+            "/alpha/agents/session/delete",
             body=await async_maybe_transform(
                 {
                     "agent_id": agent_id,
diff --git a/src/llama_stack_client/resources/agents/steps.py b/src/llama_stack_client/resources/agents/steps.py
index 80f5db43..68dad226 100644
--- a/src/llama_stack_client/resources/agents/steps.py
+++ b/src/llama_stack_client/resources/agents/steps.py
@@ -75,7 +75,7 @@ def retrieve(
             **(extra_headers or {}),
         }
         return self._get(
-            "/agents/step/get",
+            "/alpha/agents/step/get",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -145,7 +145,7 @@ async def retrieve(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/agents/step/get",
+            "/alpha/agents/step/get",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
diff --git a/src/llama_stack_client/resources/agents/turn.py b/src/llama_stack_client/resources/agents/turn.py
index 4738b0ff..540d778e 100644
--- a/src/llama_stack_client/resources/agents/turn.py
+++ b/src/llama_stack_client/resources/agents/turn.py
@@ -164,7 +164,7 @@ def create(
         return cast(
             TurnCreateResponse,
             self._post(
-                "/agents/turn/create",
+                "/alpha/agents/turn/create",
                 body=maybe_transform(
                     {
                         "agent_id": agent_id,
@@ -215,7 +215,7 @@ def retrieve(
             **(extra_headers or {}),
         }
         return self._get(
-            "/agents/turn/get",
+            "/alpha/agents/turn/get",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -366,7 +366,7 @@ async def create(
         return cast(
             TurnCreateResponse,
             await self._post(
-                "/agents/turn/create",
+                "/alpha/agents/turn/create",
                 body=await async_maybe_transform(
                     {
                         "agent_id": agent_id,
@@ -417,7 +417,7 @@ async def retrieve(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/agents/turn/get",
+            "/alpha/agents/turn/get",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
diff --git a/src/llama_stack_client/resources/batch_inference.py b/src/llama_stack_client/resources/batch_inference.py
index 9b5ab542..135311e0 100644
--- a/src/llama_stack_client/resources/batch_inference.py
+++ b/src/llama_stack_client/resources/batch_inference.py
@@ -23,9 +23,9 @@
     async_to_streamed_response_wrapper,
 )
 from .._base_client import make_request_options
-from ..types.batch_chat_completion import BatchChatCompletion
 from ..types.shared.batch_completion import BatchCompletion
 from ..types.shared_params.sampling_params import SamplingParams
+from ..types.batch_inference_chat_completion_response import BatchInferenceChatCompletionResponse
 
 __all__ = ["BatchInferenceResource", "AsyncBatchInferenceResource"]
 
@@ -67,7 +67,7 @@ def chat_completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> BatchChatCompletion:
+    ) -> BatchInferenceChatCompletionResponse:
         """
         Args:
           tool_prompt_format: `json` -- Refers to the json format for calling tools. The json format takes the
@@ -93,7 +93,7 @@ def chat_completion(
             **(extra_headers or {}),
         }
         return self._post(
-            "/batch_inference/chat_completion",
+            "/alpha/batch-inference/chat-completion",
             body=maybe_transform(
                 {
                     "messages_batch": messages_batch,
@@ -109,7 +109,7 @@ def chat_completion(
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=BatchChatCompletion,
+            cast_to=BatchInferenceChatCompletionResponse,
         )
 
     def completion(
@@ -142,7 +142,7 @@ def completion(
             **(extra_headers or {}),
         }
         return self._post(
-            "/batch_inference/completion",
+            "/alpha/batch-inference/completion",
             body=maybe_transform(
                 {
                     "content_batch": content_batch,
@@ -196,7 +196,7 @@ async def chat_completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> BatchChatCompletion:
+    ) -> BatchInferenceChatCompletionResponse:
         """
         Args:
           tool_prompt_format: `json` -- Refers to the json format for calling tools. The json format takes the
@@ -222,7 +222,7 @@ async def chat_completion(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/batch_inference/chat_completion",
+            "/alpha/batch-inference/chat-completion",
             body=await async_maybe_transform(
                 {
                     "messages_batch": messages_batch,
@@ -238,7 +238,7 @@ async def chat_completion(
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=BatchChatCompletion,
+            cast_to=BatchInferenceChatCompletionResponse,
         )
 
     async def completion(
@@ -271,7 +271,7 @@ async def completion(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/batch_inference/completion",
+            "/alpha/batch-inference/completion",
             body=await async_maybe_transform(
                 {
                     "content_batch": content_batch,
diff --git a/src/llama_stack_client/resources/datasetio.py b/src/llama_stack_client/resources/datasetio.py
index 92dafdbf..7187f085 100644
--- a/src/llama_stack_client/resources/datasetio.py
+++ b/src/llama_stack_client/resources/datasetio.py
@@ -75,7 +75,7 @@ def get_rows_paginated(
             **(extra_headers or {}),
         }
         return self._get(
-            "/datasetio/get_rows_paginated",
+            "/alpha/datasetio/get-rows-paginated",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -145,7 +145,7 @@ async def get_rows_paginated(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/datasetio/get_rows_paginated",
+            "/alpha/datasetio/get-rows-paginated",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
diff --git a/src/llama_stack_client/resources/datasets.py b/src/llama_stack_client/resources/datasets.py
index cd8e35dc..3952c3b2 100644
--- a/src/llama_stack_client/resources/datasets.py
+++ b/src/llama_stack_client/resources/datasets.py
@@ -75,7 +75,7 @@ def retrieve(
             **(extra_headers or {}),
         }
         return self._get(
-            "/datasets/get",
+            "/alpha/datasets/get",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -113,7 +113,7 @@ def list(
             **(extra_headers or {}),
         }
         return self._get(
-            "/datasets/list",
+            "/alpha/datasets/list",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -153,7 +153,7 @@ def register(
             **(extra_headers or {}),
         }
         return self._post(
-            "/datasets/register",
+            "/alpha/datasets/register",
             body=maybe_transform(
                 {
                     "dataset_id": dataset_id,
@@ -219,7 +219,7 @@ async def retrieve(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/datasets/get",
+            "/alpha/datasets/get",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -259,7 +259,7 @@ async def list(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/datasets/list",
+            "/alpha/datasets/list",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -299,7 +299,7 @@ async def register(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/datasets/register",
+            "/alpha/datasets/register",
             body=await async_maybe_transform(
                 {
                     "dataset_id": dataset_id,
diff --git a/src/llama_stack_client/resources/eval/eval.py b/src/llama_stack_client/resources/eval/eval.py
index 8ba6debe..d33a3843 100644
--- a/src/llama_stack_client/resources/eval/eval.py
+++ b/src/llama_stack_client/resources/eval/eval.py
@@ -90,7 +90,7 @@ def evaluate_rows(
             **(extra_headers or {}),
         }
         return self._post(
-            "/eval/evaluate_rows",
+            "/alpha/eval/evaluate-rows",
             body=maybe_transform(
                 {
                     "input_rows": input_rows,
@@ -134,7 +134,7 @@ def run_eval(
             **(extra_headers or {}),
         }
         return self._post(
-            "/eval/run_eval",
+            "/alpha/eval/run-eval",
             body=maybe_transform(
                 {
                     "task_config": task_config,
@@ -203,7 +203,7 @@ async def evaluate_rows(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/eval/evaluate_rows",
+            "/alpha/eval/evaluate-rows",
             body=await async_maybe_transform(
                 {
                     "input_rows": input_rows,
@@ -247,7 +247,7 @@ async def run_eval(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/eval/run_eval",
+            "/alpha/eval/run-eval",
             body=await async_maybe_transform(
                 {
                     "task_config": task_config,
diff --git a/src/llama_stack_client/resources/eval/jobs.py b/src/llama_stack_client/resources/eval/jobs.py
index 53154d27..fd95ad41 100644
--- a/src/llama_stack_client/resources/eval/jobs.py
+++ b/src/llama_stack_client/resources/eval/jobs.py
@@ -76,7 +76,7 @@ def retrieve(
             **(extra_headers or {}),
         }
         return self._get(
-            "/eval/job/result",
+            "/alpha/eval/job/result",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -122,7 +122,7 @@ def cancel(
             **(extra_headers or {}),
         }
         return self._post(
-            "/eval/job/cancel",
+            "/alpha/eval/job/cancel",
             body=maybe_transform(
                 {
                     "job_id": job_id,
@@ -164,7 +164,7 @@ def status(
             **(extra_headers or {}),
         }
         return self._get(
-            "/eval/job/status",
+            "/alpha/eval/job/status",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -230,7 +230,7 @@ async def retrieve(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/eval/job/result",
+            "/alpha/eval/job/result",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -276,7 +276,7 @@ async def cancel(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/eval/job/cancel",
+            "/alpha/eval/job/cancel",
             body=await async_maybe_transform(
                 {
                     "job_id": job_id,
@@ -318,7 +318,7 @@ async def status(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/eval/job/status",
+            "/alpha/eval/job/status",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
diff --git a/src/llama_stack_client/resources/eval_tasks.py b/src/llama_stack_client/resources/eval_tasks.py
index 558886ff..93b5e4ad 100644
--- a/src/llama_stack_client/resources/eval_tasks.py
+++ b/src/llama_stack_client/resources/eval_tasks.py
@@ -74,7 +74,7 @@ def retrieve(
             **(extra_headers or {}),
         }
         return self._get(
-            "/eval_tasks/get",
+            "/alpha/eval-tasks/get",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -112,7 +112,7 @@ def list(
             **(extra_headers or {}),
         }
         return self._get(
-            "/eval_tasks/list",
+            "/alpha/eval-tasks/list",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -152,7 +152,7 @@ def register(
             **(extra_headers or {}),
         }
         return self._post(
-            "/eval_tasks/register",
+            "/alpha/eval-tasks/register",
             body=maybe_transform(
                 {
                     "dataset_id": dataset_id,
@@ -218,7 +218,7 @@ async def retrieve(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/eval_tasks/get",
+            "/alpha/eval-tasks/get",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -256,7 +256,7 @@ async def list(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/eval_tasks/list",
+            "/alpha/eval-tasks/list",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -296,7 +296,7 @@ async def register(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/eval_tasks/register",
+            "/alpha/eval-tasks/register",
             body=await async_maybe_transform(
                 {
                     "dataset_id": dataset_id,
diff --git a/src/llama_stack_client/resources/inference.py b/src/llama_stack_client/resources/inference.py
index 98cdc359..248c00e8 100644
--- a/src/llama_stack_client/resources/inference.py
+++ b/src/llama_stack_client/resources/inference.py
@@ -103,7 +103,7 @@ def chat_completion(
         return cast(
             InferenceChatCompletionResponse,
             self._post(
-                "/inference/chat_completion",
+                "/alpha/inference/chat-completion",
                 body=maybe_transform(
                     {
                         "messages": messages,
@@ -162,7 +162,7 @@ def completion(
         return cast(
             InferenceCompletionResponse,
             self._post(
-                "/inference/completion",
+                "/alpha/inference/completion",
                 body=maybe_transform(
                     {
                         "content": content,
@@ -211,7 +211,7 @@ def embeddings(
             **(extra_headers or {}),
         }
         return self._post(
-            "/inference/embeddings",
+            "/alpha/inference/embeddings",
             body=maybe_transform(
                 {
                     "contents": contents,
@@ -294,7 +294,7 @@ async def chat_completion(
         return cast(
             InferenceChatCompletionResponse,
             await self._post(
-                "/inference/chat_completion",
+                "/alpha/inference/chat-completion",
                 body=await async_maybe_transform(
                     {
                         "messages": messages,
@@ -353,7 +353,7 @@ async def completion(
         return cast(
             InferenceCompletionResponse,
             await self._post(
-                "/inference/completion",
+                "/alpha/inference/completion",
                 body=await async_maybe_transform(
                     {
                         "content": content,
@@ -402,7 +402,7 @@ async def embeddings(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/inference/embeddings",
+            "/alpha/inference/embeddings",
             body=await async_maybe_transform(
                 {
                     "contents": contents,
diff --git a/src/llama_stack_client/resources/inspect.py b/src/llama_stack_client/resources/inspect.py
index 3eb54981..9a4a0e9e 100644
--- a/src/llama_stack_client/resources/inspect.py
+++ b/src/llama_stack_client/resources/inspect.py
@@ -66,7 +66,7 @@ def health(
             **(extra_headers or {}),
         }
         return self._get(
-            "/health",
+            "/alpha/health",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -120,7 +120,7 @@ async def health(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/health",
+            "/alpha/health",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
diff --git a/src/llama_stack_client/resources/memory.py b/src/llama_stack_client/resources/memory.py
index 6f400752..8a120ae5 100644
--- a/src/llama_stack_client/resources/memory.py
+++ b/src/llama_stack_client/resources/memory.py
@@ -77,7 +77,7 @@ def insert(
             **(extra_headers or {}),
         }
         return self._post(
-            "/memory/insert",
+            "/alpha/memory/insert",
             body=maybe_transform(
                 {
                     "bank_id": bank_id,
@@ -121,7 +121,7 @@ def query(
             **(extra_headers or {}),
         }
         return self._post(
-            "/memory/query",
+            "/alpha/memory/query",
             body=maybe_transform(
                 {
                     "bank_id": bank_id,
@@ -187,7 +187,7 @@ async def insert(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/memory/insert",
+            "/alpha/memory/insert",
             body=await async_maybe_transform(
                 {
                     "bank_id": bank_id,
@@ -231,7 +231,7 @@ async def query(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/memory/query",
+            "/alpha/memory/query",
             body=await async_maybe_transform(
                 {
                     "bank_id": bank_id,
diff --git a/src/llama_stack_client/resources/memory_banks.py b/src/llama_stack_client/resources/memory_banks.py
index 78de27f8..1b47ee9a 100644
--- a/src/llama_stack_client/resources/memory_banks.py
+++ b/src/llama_stack_client/resources/memory_banks.py
@@ -81,7 +81,7 @@ def retrieve(
         return cast(
             Optional[MemoryBankRetrieveResponse],
             self._get(
-                "/memory_banks/get",
+                "/alpha/memory-banks/get",
                 options=make_request_options(
                     extra_headers=extra_headers,
                     extra_query=extra_query,
@@ -126,7 +126,7 @@ def list(
         return cast(
             MemoryBankListResponse,
             self._get(
-                "/memory_banks/list",
+                "/alpha/memory-banks/list",
                 options=make_request_options(
                     extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
                 ),
@@ -167,7 +167,7 @@ def register(
             **(extra_headers or {}),
         }
         return self._post(
-            "/memory_banks/register",
+            "/alpha/memory-banks/register",
             body=maybe_transform(
                 {
                     "memory_bank_id": memory_bank_id,
@@ -211,7 +211,7 @@ def unregister(
             **(extra_headers or {}),
         }
         return self._post(
-            "/memory_banks/unregister",
+            "/alpha/memory-banks/unregister",
             body=maybe_transform(
                 {"memory_bank_id": memory_bank_id}, memory_bank_unregister_params.MemoryBankUnregisterParams
             ),
@@ -271,7 +271,7 @@ async def retrieve(
         return cast(
             Optional[MemoryBankRetrieveResponse],
             await self._get(
-                "/memory_banks/get",
+                "/alpha/memory-banks/get",
                 options=make_request_options(
                     extra_headers=extra_headers,
                     extra_query=extra_query,
@@ -316,7 +316,7 @@ async def list(
         return cast(
             MemoryBankListResponse,
             await self._get(
-                "/memory_banks/list",
+                "/alpha/memory-banks/list",
                 options=make_request_options(
                     extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
                 ),
@@ -357,7 +357,7 @@ async def register(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/memory_banks/register",
+            "/alpha/memory-banks/register",
             body=await async_maybe_transform(
                 {
                     "memory_bank_id": memory_bank_id,
@@ -401,7 +401,7 @@ async def unregister(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/memory_banks/unregister",
+            "/alpha/memory-banks/unregister",
             body=await async_maybe_transform(
                 {"memory_bank_id": memory_bank_id}, memory_bank_unregister_params.MemoryBankUnregisterParams
             ),
diff --git a/src/llama_stack_client/resources/models.py b/src/llama_stack_client/resources/models.py
index 1a7e8ecc..bdd39e6b 100644
--- a/src/llama_stack_client/resources/models.py
+++ b/src/llama_stack_client/resources/models.py
@@ -74,7 +74,7 @@ def retrieve(
             **(extra_headers or {}),
         }
         return self._get(
-            "/models/get",
+            "/alpha/models/get",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -112,7 +112,7 @@ def list(
             **(extra_headers or {}),
         }
         return self._get(
-            "/models/list",
+            "/alpha/models/list",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -149,7 +149,7 @@ def register(
             **(extra_headers or {}),
         }
         return self._post(
-            "/models/register",
+            "/alpha/models/register",
             body=maybe_transform(
                 {
                     "model_id": model_id,
@@ -193,7 +193,7 @@ def unregister(
             **(extra_headers or {}),
         }
         return self._post(
-            "/models/unregister",
+            "/alpha/models/unregister",
             body=maybe_transform({"model_id": model_id}, model_unregister_params.ModelUnregisterParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -249,7 +249,7 @@ async def retrieve(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/models/get",
+            "/alpha/models/get",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -289,7 +289,7 @@ async def list(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/models/list",
+            "/alpha/models/list",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -326,7 +326,7 @@ async def register(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/models/register",
+            "/alpha/models/register",
             body=await async_maybe_transform(
                 {
                     "model_id": model_id,
@@ -370,7 +370,7 @@ async def unregister(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/models/unregister",
+            "/alpha/models/unregister",
             body=await async_maybe_transform({"model_id": model_id}, model_unregister_params.ModelUnregisterParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
diff --git a/src/llama_stack_client/resources/post_training/job.py b/src/llama_stack_client/resources/post_training/job.py
index 984b6720..79e8e2f1 100644
--- a/src/llama_stack_client/resources/post_training/job.py
+++ b/src/llama_stack_client/resources/post_training/job.py
@@ -75,7 +75,7 @@ def list(
             **(extra_headers or {}),
         }
         return self._get(
-            "/post_training/jobs",
+            "/alpha/post-training/jobs",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -109,7 +109,7 @@ def artifacts(
             **(extra_headers or {}),
         }
         return self._get(
-            "/post_training/job/artifacts",
+            "/alpha/post-training/job/artifacts",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -148,7 +148,7 @@ def cancel(
             **(extra_headers or {}),
         }
         return self._post(
-            "/post_training/job/cancel",
+            "/alpha/post-training/job/cancel",
             body=maybe_transform({"job_uuid": job_uuid}, job_cancel_params.JobCancelParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -183,7 +183,7 @@ def logs(
             **(extra_headers or {}),
         }
         return self._get(
-            "/post_training/job/logs",
+            "/alpha/post-training/job/logs",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -221,7 +221,7 @@ def status(
             **(extra_headers or {}),
         }
         return self._get(
-            "/post_training/job/status",
+            "/alpha/post-training/job/status",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -280,7 +280,7 @@ async def list(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/post_training/jobs",
+            "/alpha/post-training/jobs",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -314,7 +314,7 @@ async def artifacts(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/post_training/job/artifacts",
+            "/alpha/post-training/job/artifacts",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -353,7 +353,7 @@ async def cancel(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/post_training/job/cancel",
+            "/alpha/post-training/job/cancel",
             body=await async_maybe_transform({"job_uuid": job_uuid}, job_cancel_params.JobCancelParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -388,7 +388,7 @@ async def logs(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/post_training/job/logs",
+            "/alpha/post-training/job/logs",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -426,7 +426,7 @@ async def status(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/post_training/job/status",
+            "/alpha/post-training/job/status",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
diff --git a/src/llama_stack_client/resources/post_training/post_training.py b/src/llama_stack_client/resources/post_training/post_training.py
index c54cfb4e..29b2d77c 100644
--- a/src/llama_stack_client/resources/post_training/post_training.py
+++ b/src/llama_stack_client/resources/post_training/post_training.py
@@ -96,7 +96,7 @@ def preference_optimize(
             **(extra_headers or {}),
         }
         return self._post(
-            "/post_training/preference_optimize",
+            "/alpha/post-training/preference-optimize",
             body=maybe_transform(
                 {
                     "algorithm": algorithm,
@@ -154,7 +154,7 @@ def supervised_fine_tune(
             **(extra_headers or {}),
         }
         return self._post(
-            "/post_training/supervised_fine_tune",
+            "/alpha/post-training/supervised-fine-tune",
             body=maybe_transform(
                 {
                     "algorithm": algorithm,
@@ -237,7 +237,7 @@ async def preference_optimize(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/post_training/preference_optimize",
+            "/alpha/post-training/preference-optimize",
             body=await async_maybe_transform(
                 {
                     "algorithm": algorithm,
@@ -295,7 +295,7 @@ async def supervised_fine_tune(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/post_training/supervised_fine_tune",
+            "/alpha/post-training/supervised-fine-tune",
             body=await async_maybe_transform(
                 {
                     "algorithm": algorithm,
diff --git a/src/llama_stack_client/resources/providers.py b/src/llama_stack_client/resources/providers.py
index b0aae344..c31bb4a6 100644
--- a/src/llama_stack_client/resources/providers.py
+++ b/src/llama_stack_client/resources/providers.py
@@ -66,7 +66,7 @@ def list(
             **(extra_headers or {}),
         }
         return self._get(
-            "/providers/list",
+            "/alpha/providers/list",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -120,7 +120,7 @@ async def list(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/providers/list",
+            "/alpha/providers/list",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
diff --git a/src/llama_stack_client/resources/routes.py b/src/llama_stack_client/resources/routes.py
index 8ef4cfc4..8b4df0f4 100644
--- a/src/llama_stack_client/resources/routes.py
+++ b/src/llama_stack_client/resources/routes.py
@@ -66,7 +66,7 @@ def list(
             **(extra_headers or {}),
         }
         return self._get(
-            "/routes/list",
+            "/alpha/routes/list",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -120,7 +120,7 @@ async def list(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/routes/list",
+            "/alpha/routes/list",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
diff --git a/src/llama_stack_client/resources/safety.py b/src/llama_stack_client/resources/safety.py
index 12b20a72..e51aa99c 100644
--- a/src/llama_stack_client/resources/safety.py
+++ b/src/llama_stack_client/resources/safety.py
@@ -76,7 +76,7 @@ def run_shield(
             **(extra_headers or {}),
         }
         return self._post(
-            "/safety/run_shield",
+            "/alpha/safety/run-shield",
             body=maybe_transform(
                 {
                     "messages": messages,
@@ -141,7 +141,7 @@ async def run_shield(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/safety/run_shield",
+            "/alpha/safety/run-shield",
             body=await async_maybe_transform(
                 {
                     "messages": messages,
diff --git a/src/llama_stack_client/resources/scoring.py b/src/llama_stack_client/resources/scoring.py
index 6d39faa1..96f4416f 100644
--- a/src/llama_stack_client/resources/scoring.py
+++ b/src/llama_stack_client/resources/scoring.py
@@ -76,7 +76,7 @@ def score(
             **(extra_headers or {}),
         }
         return self._post(
-            "/scoring/score",
+            "/alpha/scoring/score",
             body=maybe_transform(
                 {
                     "input_rows": input_rows,
@@ -119,7 +119,7 @@ def score_batch(
             **(extra_headers or {}),
         }
         return self._post(
-            "/scoring/score_batch",
+            "/alpha/scoring/score-batch",
             body=maybe_transform(
                 {
                     "dataset_id": dataset_id,
@@ -183,7 +183,7 @@ async def score(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/scoring/score",
+            "/alpha/scoring/score",
             body=await async_maybe_transform(
                 {
                     "input_rows": input_rows,
@@ -226,7 +226,7 @@ async def score_batch(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/scoring/score_batch",
+            "/alpha/scoring/score-batch",
             body=await async_maybe_transform(
                 {
                     "dataset_id": dataset_id,
diff --git a/src/llama_stack_client/resources/scoring_functions.py b/src/llama_stack_client/resources/scoring_functions.py
index 30ec27d2..def46b90 100644
--- a/src/llama_stack_client/resources/scoring_functions.py
+++ b/src/llama_stack_client/resources/scoring_functions.py
@@ -74,7 +74,7 @@ def retrieve(
             **(extra_headers or {}),
         }
         return self._get(
-            "/scoring_functions/get",
+            "/alpha/scoring-functions/get",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -114,7 +114,7 @@ def list(
             **(extra_headers or {}),
         }
         return self._get(
-            "/scoring_functions/list",
+            "/alpha/scoring-functions/list",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -154,7 +154,7 @@ def register(
             **(extra_headers or {}),
         }
         return self._post(
-            "/scoring_functions/register",
+            "/alpha/scoring-functions/register",
             body=maybe_transform(
                 {
                     "description": description,
@@ -220,7 +220,7 @@ async def retrieve(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/scoring_functions/get",
+            "/alpha/scoring-functions/get",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -260,7 +260,7 @@ async def list(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/scoring_functions/list",
+            "/alpha/scoring-functions/list",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -300,7 +300,7 @@ async def register(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/scoring_functions/register",
+            "/alpha/scoring-functions/register",
             body=await async_maybe_transform(
                 {
                     "description": description,
diff --git a/src/llama_stack_client/resources/shields.py b/src/llama_stack_client/resources/shields.py
index 28a3b259..80c85900 100644
--- a/src/llama_stack_client/resources/shields.py
+++ b/src/llama_stack_client/resources/shields.py
@@ -74,7 +74,7 @@ def retrieve(
             **(extra_headers or {}),
         }
         return self._get(
-            "/shields/get",
+            "/alpha/shields/get",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -112,7 +112,7 @@ def list(
             **(extra_headers or {}),
         }
         return self._get(
-            "/shields/list",
+            "/alpha/shields/list",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -149,7 +149,7 @@ def register(
             **(extra_headers or {}),
         }
         return self._post(
-            "/shields/register",
+            "/alpha/shields/register",
             body=maybe_transform(
                 {
                     "shield_id": shield_id,
@@ -213,7 +213,7 @@ async def retrieve(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/shields/get",
+            "/alpha/shields/get",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -253,7 +253,7 @@ async def list(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/shields/list",
+            "/alpha/shields/list",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -290,7 +290,7 @@ async def register(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/shields/register",
+            "/alpha/shields/register",
             body=await async_maybe_transform(
                 {
                     "shield_id": shield_id,
diff --git a/src/llama_stack_client/resources/synthetic_data_generation.py b/src/llama_stack_client/resources/synthetic_data_generation.py
index e8971b15..9174abd1 100644
--- a/src/llama_stack_client/resources/synthetic_data_generation.py
+++ b/src/llama_stack_client/resources/synthetic_data_generation.py
@@ -77,7 +77,7 @@ def generate(
             **(extra_headers or {}),
         }
         return self._post(
-            "/synthetic_data_generation/generate",
+            "/alpha/synthetic-data-generation/generate",
             body=maybe_transform(
                 {
                     "dialogs": dialogs,
@@ -142,7 +142,7 @@ async def generate(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/synthetic_data_generation/generate",
+            "/alpha/synthetic-data-generation/generate",
             body=await async_maybe_transform(
                 {
                     "dialogs": dialogs,
diff --git a/src/llama_stack_client/resources/telemetry.py b/src/llama_stack_client/resources/telemetry.py
index 8c48850a..d417bf38 100644
--- a/src/llama_stack_client/resources/telemetry.py
+++ b/src/llama_stack_client/resources/telemetry.py
@@ -72,7 +72,7 @@ def get_trace(
             **(extra_headers or {}),
         }
         return self._get(
-            "/telemetry/get_trace",
+            "/alpha/telemetry/get-trace",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -111,7 +111,7 @@ def log_event(
             **(extra_headers or {}),
         }
         return self._post(
-            "/telemetry/log_event",
+            "/alpha/telemetry/log-event",
             body=maybe_transform({"event": event}, telemetry_log_event_params.TelemetryLogEventParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -167,7 +167,7 @@ async def get_trace(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/telemetry/get_trace",
+            "/alpha/telemetry/get-trace",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -208,7 +208,7 @@ async def log_event(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/telemetry/log_event",
+            "/alpha/telemetry/log-event",
             body=await async_maybe_transform({"event": event}, telemetry_log_event_params.TelemetryLogEventParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
diff --git a/src/llama_stack_client/types/shared/code_interpreter_tool_definition.py b/src/llama_stack_client/types/shared/code_interpreter_tool_definition.py
index e6839537..e9031b9a 100644
--- a/src/llama_stack_client/types/shared/code_interpreter_tool_definition.py
+++ b/src/llama_stack_client/types/shared/code_interpreter_tool_definition.py
@@ -1,6 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Optional
+
 from typing_extensions import Literal
 
 from ..._models import BaseModel
diff --git a/src/llama_stack_client/types/shared/function_call_tool_definition.py b/src/llama_stack_client/types/shared/function_call_tool_definition.py
index f25b762d..6ebfa983 100644
--- a/src/llama_stack_client/types/shared/function_call_tool_definition.py
+++ b/src/llama_stack_client/types/shared/function_call_tool_definition.py
@@ -1,11 +1,12 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Dict, List, Optional
+
 from typing_extensions import Literal
 
 from ..._models import BaseModel
-from .tool_param_definition import ToolParamDefinition
 from ..rest_api_execution_config_param import RestAPIExecutionConfigParam
+from .tool_param_definition import ToolParamDefinition
 
 __all__ = ["FunctionCallToolDefinition"]
 
diff --git a/src/llama_stack_client/types/shared/photogen_tool_definition.py b/src/llama_stack_client/types/shared/photogen_tool_definition.py
index 80690a03..0b8b643d 100644
--- a/src/llama_stack_client/types/shared/photogen_tool_definition.py
+++ b/src/llama_stack_client/types/shared/photogen_tool_definition.py
@@ -1,6 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Optional
+
 from typing_extensions import Literal
 
 from ..._models import BaseModel
diff --git a/src/llama_stack_client/types/shared/search_tool_definition.py b/src/llama_stack_client/types/shared/search_tool_definition.py
index 7b32ce3f..8186c324 100644
--- a/src/llama_stack_client/types/shared/search_tool_definition.py
+++ b/src/llama_stack_client/types/shared/search_tool_definition.py
@@ -1,6 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Optional
+
 from typing_extensions import Literal
 
 from ..._models import BaseModel
diff --git a/src/llama_stack_client/types/shared/wolfram_alpha_tool_definition.py b/src/llama_stack_client/types/shared/wolfram_alpha_tool_definition.py
index f214b9bb..ccee3738 100644
--- a/src/llama_stack_client/types/shared/wolfram_alpha_tool_definition.py
+++ b/src/llama_stack_client/types/shared/wolfram_alpha_tool_definition.py
@@ -1,6 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Optional
+
 from typing_extensions import Literal
 
 from ..._models import BaseModel
diff --git a/tests/api_resources/agents/test_session.py b/tests/api_resources/agents/test_session.py
index 9c3a0364..da150a8b 100644
--- a/tests/api_resources/agents/test_session.py
+++ b/tests/api_resources/agents/test_session.py
@@ -76,7 +76,7 @@ def test_method_retrieve_with_all_params(self, client: LlamaStackClient) -> None
         session = client.agents.session.retrieve(
             agent_id="agent_id",
             session_id="session_id",
-            turn_ids=["string", "string", "string"],
+            turn_ids=["string"],
             x_llama_stack_provider_data="X-LlamaStack-ProviderData",
         )
         assert_matches_type(Session, session, path=["response"])
@@ -210,7 +210,7 @@ async def test_method_retrieve_with_all_params(self, async_client: AsyncLlamaSta
         session = await async_client.agents.session.retrieve(
             agent_id="agent_id",
             session_id="session_id",
-            turn_ids=["string", "string", "string"],
+            turn_ids=["string"],
             x_llama_stack_provider_data="X-LlamaStack-ProviderData",
         )
         assert_matches_type(Session, session, path=["response"])
diff --git a/tests/api_resources/agents/test_turn.py b/tests/api_resources/agents/test_turn.py
index 5e51e7fe..88e049cd 100644
--- a/tests/api_resources/agents/test_turn.py
+++ b/tests/api_resources/agents/test_turn.py
@@ -28,15 +28,7 @@ def test_method_create_overload_1(self, client: LlamaStackClient) -> None:
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             session_id="session_id",
         )
@@ -54,32 +46,14 @@ def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient
                     "content": "string",
                     "role": "user",
                     "context": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                },
+                }
             ],
             session_id="session_id",
             attachments=[
                 {
                     "content": "string",
                     "mime_type": "mime_type",
-                },
-                {
-                    "content": "string",
-                    "mime_type": "mime_type",
-                },
-                {
-                    "content": "string",
-                    "mime_type": "mime_type",
-                },
+                }
             ],
             stream=False,
             x_llama_stack_provider_data="X-LlamaStack-ProviderData",
@@ -97,15 +71,7 @@ def test_raw_response_create_overload_1(self, client: LlamaStackClient) -> None:
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             session_id="session_id",
         )
@@ -126,15 +92,7 @@ def test_streaming_response_create_overload_1(self, client: LlamaStackClient) ->
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             session_id="session_id",
         ) as response:
@@ -157,15 +115,7 @@ def test_method_create_overload_2(self, client: LlamaStackClient) -> None:
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             session_id="session_id",
             stream=True,
@@ -184,17 +134,7 @@ def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient
                     "content": "string",
                     "role": "user",
                     "context": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                },
+                }
             ],
             session_id="session_id",
             stream=True,
@@ -202,15 +142,7 @@ def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient
                 {
                     "content": "string",
                     "mime_type": "mime_type",
-                },
-                {
-                    "content": "string",
-                    "mime_type": "mime_type",
-                },
-                {
-                    "content": "string",
-                    "mime_type": "mime_type",
-                },
+                }
             ],
             x_llama_stack_provider_data="X-LlamaStack-ProviderData",
         )
@@ -227,15 +159,7 @@ def test_raw_response_create_overload_2(self, client: LlamaStackClient) -> None:
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             session_id="session_id",
             stream=True,
@@ -256,15 +180,7 @@ def test_streaming_response_create_overload_2(self, client: LlamaStackClient) ->
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             session_id="session_id",
             stream=True,
@@ -339,15 +255,7 @@ async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClien
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             session_id="session_id",
         )
@@ -365,32 +273,14 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
                     "content": "string",
                     "role": "user",
                     "context": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                },
+                }
             ],
             session_id="session_id",
             attachments=[
                 {
                     "content": "string",
                     "mime_type": "mime_type",
-                },
-                {
-                    "content": "string",
-                    "mime_type": "mime_type",
-                },
-                {
-                    "content": "string",
-                    "mime_type": "mime_type",
-                },
+                }
             ],
             stream=False,
             x_llama_stack_provider_data="X-LlamaStack-ProviderData",
@@ -408,15 +298,7 @@ async def test_raw_response_create_overload_1(self, async_client: AsyncLlamaStac
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             session_id="session_id",
         )
@@ -437,15 +319,7 @@ async def test_streaming_response_create_overload_1(self, async_client: AsyncLla
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             session_id="session_id",
         ) as response:
@@ -468,15 +342,7 @@ async def test_method_create_overload_2(self, async_client: AsyncLlamaStackClien
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             session_id="session_id",
             stream=True,
@@ -495,17 +361,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
                     "content": "string",
                     "role": "user",
                     "context": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                },
+                }
             ],
             session_id="session_id",
             stream=True,
@@ -513,15 +369,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
                 {
                     "content": "string",
                     "mime_type": "mime_type",
-                },
-                {
-                    "content": "string",
-                    "mime_type": "mime_type",
-                },
-                {
-                    "content": "string",
-                    "mime_type": "mime_type",
-                },
+                }
             ],
             x_llama_stack_provider_data="X-LlamaStack-ProviderData",
         )
@@ -538,15 +386,7 @@ async def test_raw_response_create_overload_2(self, async_client: AsyncLlamaStac
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             session_id="session_id",
             stream=True,
@@ -567,15 +407,7 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncLla
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             session_id="session_id",
             stream=True,
diff --git a/tests/api_resources/test_agents.py b/tests/api_resources/test_agents.py
index db3dac0a..835b61dd 100644
--- a/tests/api_resources/test_agents.py
+++ b/tests/api_resources/test_agents.py
@@ -37,8 +37,8 @@ def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
                 "instructions": "instructions",
                 "max_infer_iters": 0,
                 "model": "model",
-                "input_shields": ["string", "string", "string"],
-                "output_shields": ["string", "string", "string"],
+                "input_shields": ["string"],
+                "output_shields": ["string"],
                 "sampling_params": {
                     "strategy": "greedy",
                     "max_tokens": 0,
@@ -54,8 +54,8 @@ def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
                         "api_key": "api_key",
                         "engine": "bing",
                         "type": "brave_search",
-                        "input_shields": ["string", "string", "string"],
-                        "output_shields": ["string", "string", "string"],
+                        "input_shields": ["string"],
+                        "output_shields": ["string"],
                         "remote_execution": {
                             "method": "GET",
                             "url": "https://example.com",
@@ -63,35 +63,7 @@ def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
                             "headers": {"foo": True},
                             "params": {"foo": True},
                         },
-                    },
-                    {
-                        "api_key": "api_key",
-                        "engine": "bing",
-                        "type": "brave_search",
-                        "input_shields": ["string", "string", "string"],
-                        "output_shields": ["string", "string", "string"],
-                        "remote_execution": {
-                            "method": "GET",
-                            "url": "https://example.com",
-                            "body": {"foo": True},
-                            "headers": {"foo": True},
-                            "params": {"foo": True},
-                        },
-                    },
-                    {
-                        "api_key": "api_key",
-                        "engine": "bing",
-                        "type": "brave_search",
-                        "input_shields": ["string", "string", "string"],
-                        "output_shields": ["string", "string", "string"],
-                        "remote_execution": {
-                            "method": "GET",
-                            "url": "https://example.com",
-                            "body": {"foo": True},
-                            "headers": {"foo": True},
-                            "params": {"foo": True},
-                        },
-                    },
+                    }
                 ],
             },
             x_llama_stack_provider_data="X-LlamaStack-ProviderData",
@@ -195,8 +167,8 @@ async def test_method_create_with_all_params(self, async_client: AsyncLlamaStack
                 "instructions": "instructions",
                 "max_infer_iters": 0,
                 "model": "model",
-                "input_shields": ["string", "string", "string"],
-                "output_shields": ["string", "string", "string"],
+                "input_shields": ["string"],
+                "output_shields": ["string"],
                 "sampling_params": {
                     "strategy": "greedy",
                     "max_tokens": 0,
@@ -212,36 +184,8 @@ async def test_method_create_with_all_params(self, async_client: AsyncLlamaStack
                         "api_key": "api_key",
                         "engine": "bing",
                         "type": "brave_search",
-                        "input_shields": ["string", "string", "string"],
-                        "output_shields": ["string", "string", "string"],
-                        "remote_execution": {
-                            "method": "GET",
-                            "url": "https://example.com",
-                            "body": {"foo": True},
-                            "headers": {"foo": True},
-                            "params": {"foo": True},
-                        },
-                    },
-                    {
-                        "api_key": "api_key",
-                        "engine": "bing",
-                        "type": "brave_search",
-                        "input_shields": ["string", "string", "string"],
-                        "output_shields": ["string", "string", "string"],
-                        "remote_execution": {
-                            "method": "GET",
-                            "url": "https://example.com",
-                            "body": {"foo": True},
-                            "headers": {"foo": True},
-                            "params": {"foo": True},
-                        },
-                    },
-                    {
-                        "api_key": "api_key",
-                        "engine": "bing",
-                        "type": "brave_search",
-                        "input_shields": ["string", "string", "string"],
-                        "output_shields": ["string", "string", "string"],
+                        "input_shields": ["string"],
+                        "output_shields": ["string"],
                         "remote_execution": {
                             "method": "GET",
                             "url": "https://example.com",
@@ -249,7 +193,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncLlamaStack
                             "headers": {"foo": True},
                             "params": {"foo": True},
                         },
-                    },
+                    }
                 ],
             },
             x_llama_stack_provider_data="X-LlamaStack-ProviderData",
diff --git a/tests/api_resources/test_batch_inference.py b/tests/api_resources/test_batch_inference.py
index f01640f5..efe1481f 100644
--- a/tests/api_resources/test_batch_inference.py
+++ b/tests/api_resources/test_batch_inference.py
@@ -10,7 +10,7 @@
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
 from llama_stack_client.types import (
-    BatchChatCompletion,
+    BatchInferenceChatCompletionResponse,
 )
 from llama_stack_client.types.shared import BatchCompletion
 
@@ -28,48 +28,12 @@ def test_method_chat_completion(self, client: LlamaStackClient) -> None:
                     {
                         "content": "string",
                         "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                ],
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                ],
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                ],
+                    }
+                ]
             ],
             model="model",
         )
-        assert_matches_type(BatchChatCompletion, batch_inference, path=["response"])
+        assert_matches_type(BatchInferenceChatCompletionResponse, batch_inference, path=["response"])
 
     @parametrize
     def test_method_chat_completion_with_all_params(self, client: LlamaStackClient) -> None:
@@ -80,52 +44,8 @@ def test_method_chat_completion_with_all_params(self, client: LlamaStackClient)
                         "content": "string",
                         "role": "user",
                         "context": "string",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                        "context": "string",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                        "context": "string",
-                    },
-                ],
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                        "context": "string",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                        "context": "string",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                        "context": "string",
-                    },
-                ],
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                        "context": "string",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                        "context": "string",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                        "context": "string",
-                    },
-                ],
+                    }
+                ]
             ],
             model="model",
             logprobs={"top_k": 0},
@@ -151,35 +71,11 @@ def test_method_chat_completion_with_all_params(self, client: LlamaStackClient)
                             "required": True,
                         }
                     },
-                },
-                {
-                    "tool_name": "brave_search",
-                    "description": "description",
-                    "parameters": {
-                        "foo": {
-                            "param_type": "param_type",
-                            "default": True,
-                            "description": "description",
-                            "required": True,
-                        }
-                    },
-                },
-                {
-                    "tool_name": "brave_search",
-                    "description": "description",
-                    "parameters": {
-                        "foo": {
-                            "param_type": "param_type",
-                            "default": True,
-                            "description": "description",
-                            "required": True,
-                        }
-                    },
-                },
+                }
             ],
             x_llama_stack_provider_data="X-LlamaStack-ProviderData",
         )
-        assert_matches_type(BatchChatCompletion, batch_inference, path=["response"])
+        assert_matches_type(BatchInferenceChatCompletionResponse, batch_inference, path=["response"])
 
     @parametrize
     def test_raw_response_chat_completion(self, client: LlamaStackClient) -> None:
@@ -189,44 +85,8 @@ def test_raw_response_chat_completion(self, client: LlamaStackClient) -> None:
                     {
                         "content": "string",
                         "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                ],
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                ],
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                ],
+                    }
+                ]
             ],
             model="model",
         )
@@ -234,7 +94,7 @@ def test_raw_response_chat_completion(self, client: LlamaStackClient) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         batch_inference = response.parse()
-        assert_matches_type(BatchChatCompletion, batch_inference, path=["response"])
+        assert_matches_type(BatchInferenceChatCompletionResponse, batch_inference, path=["response"])
 
     @parametrize
     def test_streaming_response_chat_completion(self, client: LlamaStackClient) -> None:
@@ -244,44 +104,8 @@ def test_streaming_response_chat_completion(self, client: LlamaStackClient) -> N
                     {
                         "content": "string",
                         "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                ],
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                ],
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                ],
+                    }
+                ]
             ],
             model="model",
         ) as response:
@@ -289,14 +113,14 @@ def test_streaming_response_chat_completion(self, client: LlamaStackClient) -> N
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             batch_inference = response.parse()
-            assert_matches_type(BatchChatCompletion, batch_inference, path=["response"])
+            assert_matches_type(BatchInferenceChatCompletionResponse, batch_inference, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_method_completion(self, client: LlamaStackClient) -> None:
         batch_inference = client.batch_inference.completion(
-            content_batch=["string", "string", "string"],
+            content_batch=["string"],
             model="model",
         )
         assert_matches_type(BatchCompletion, batch_inference, path=["response"])
@@ -304,7 +128,7 @@ def test_method_completion(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_method_completion_with_all_params(self, client: LlamaStackClient) -> None:
         batch_inference = client.batch_inference.completion(
-            content_batch=["string", "string", "string"],
+            content_batch=["string"],
             model="model",
             logprobs={"top_k": 0},
             sampling_params={
@@ -322,7 +146,7 @@ def test_method_completion_with_all_params(self, client: LlamaStackClient) -> No
     @parametrize
     def test_raw_response_completion(self, client: LlamaStackClient) -> None:
         response = client.batch_inference.with_raw_response.completion(
-            content_batch=["string", "string", "string"],
+            content_batch=["string"],
             model="model",
         )
 
@@ -334,7 +158,7 @@ def test_raw_response_completion(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_streaming_response_completion(self, client: LlamaStackClient) -> None:
         with client.batch_inference.with_streaming_response.completion(
-            content_batch=["string", "string", "string"],
+            content_batch=["string"],
             model="model",
         ) as response:
             assert not response.is_closed
@@ -357,48 +181,12 @@ async def test_method_chat_completion(self, async_client: AsyncLlamaStackClient)
                     {
                         "content": "string",
                         "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                ],
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                ],
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                ],
+                    }
+                ]
             ],
             model="model",
         )
-        assert_matches_type(BatchChatCompletion, batch_inference, path=["response"])
+        assert_matches_type(BatchInferenceChatCompletionResponse, batch_inference, path=["response"])
 
     @parametrize
     async def test_method_chat_completion_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
@@ -409,52 +197,8 @@ async def test_method_chat_completion_with_all_params(self, async_client: AsyncL
                         "content": "string",
                         "role": "user",
                         "context": "string",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                        "context": "string",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                        "context": "string",
-                    },
-                ],
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                        "context": "string",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                        "context": "string",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                        "context": "string",
-                    },
-                ],
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                        "context": "string",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                        "context": "string",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                        "context": "string",
-                    },
-                ],
+                    }
+                ]
             ],
             model="model",
             logprobs={"top_k": 0},
@@ -480,35 +224,11 @@ async def test_method_chat_completion_with_all_params(self, async_client: AsyncL
                             "required": True,
                         }
                     },
-                },
-                {
-                    "tool_name": "brave_search",
-                    "description": "description",
-                    "parameters": {
-                        "foo": {
-                            "param_type": "param_type",
-                            "default": True,
-                            "description": "description",
-                            "required": True,
-                        }
-                    },
-                },
-                {
-                    "tool_name": "brave_search",
-                    "description": "description",
-                    "parameters": {
-                        "foo": {
-                            "param_type": "param_type",
-                            "default": True,
-                            "description": "description",
-                            "required": True,
-                        }
-                    },
-                },
+                }
             ],
             x_llama_stack_provider_data="X-LlamaStack-ProviderData",
         )
-        assert_matches_type(BatchChatCompletion, batch_inference, path=["response"])
+        assert_matches_type(BatchInferenceChatCompletionResponse, batch_inference, path=["response"])
 
     @parametrize
     async def test_raw_response_chat_completion(self, async_client: AsyncLlamaStackClient) -> None:
@@ -518,44 +238,8 @@ async def test_raw_response_chat_completion(self, async_client: AsyncLlamaStackC
                     {
                         "content": "string",
                         "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                ],
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                ],
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                ],
+                    }
+                ]
             ],
             model="model",
         )
@@ -563,7 +247,7 @@ async def test_raw_response_chat_completion(self, async_client: AsyncLlamaStackC
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         batch_inference = await response.parse()
-        assert_matches_type(BatchChatCompletion, batch_inference, path=["response"])
+        assert_matches_type(BatchInferenceChatCompletionResponse, batch_inference, path=["response"])
 
     @parametrize
     async def test_streaming_response_chat_completion(self, async_client: AsyncLlamaStackClient) -> None:
@@ -573,44 +257,8 @@ async def test_streaming_response_chat_completion(self, async_client: AsyncLlama
                     {
                         "content": "string",
                         "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                ],
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                ],
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                    {
-                        "content": "string",
-                        "role": "user",
-                    },
-                ],
+                    }
+                ]
             ],
             model="model",
         ) as response:
@@ -618,14 +266,14 @@ async def test_streaming_response_chat_completion(self, async_client: AsyncLlama
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             batch_inference = await response.parse()
-            assert_matches_type(BatchChatCompletion, batch_inference, path=["response"])
+            assert_matches_type(BatchInferenceChatCompletionResponse, batch_inference, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_method_completion(self, async_client: AsyncLlamaStackClient) -> None:
         batch_inference = await async_client.batch_inference.completion(
-            content_batch=["string", "string", "string"],
+            content_batch=["string"],
             model="model",
         )
         assert_matches_type(BatchCompletion, batch_inference, path=["response"])
@@ -633,7 +281,7 @@ async def test_method_completion(self, async_client: AsyncLlamaStackClient) -> N
     @parametrize
     async def test_method_completion_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
         batch_inference = await async_client.batch_inference.completion(
-            content_batch=["string", "string", "string"],
+            content_batch=["string"],
             model="model",
             logprobs={"top_k": 0},
             sampling_params={
@@ -651,7 +299,7 @@ async def test_method_completion_with_all_params(self, async_client: AsyncLlamaS
     @parametrize
     async def test_raw_response_completion(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.batch_inference.with_raw_response.completion(
-            content_batch=["string", "string", "string"],
+            content_batch=["string"],
             model="model",
         )
 
@@ -663,7 +311,7 @@ async def test_raw_response_completion(self, async_client: AsyncLlamaStackClient
     @parametrize
     async def test_streaming_response_completion(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.batch_inference.with_streaming_response.completion(
-            content_batch=["string", "string", "string"],
+            content_batch=["string"],
             model="model",
         ) as response:
             assert not response.is_closed
diff --git a/tests/api_resources/test_eval.py b/tests/api_resources/test_eval.py
index 10508c12..24928940 100644
--- a/tests/api_resources/test_eval.py
+++ b/tests/api_resources/test_eval.py
@@ -20,8 +20,8 @@ class TestEval:
     @parametrize
     def test_method_evaluate_rows(self, client: LlamaStackClient) -> None:
         eval = client.eval.evaluate_rows(
-            input_rows=[{"foo": True}, {"foo": True}, {"foo": True}],
-            scoring_functions=["string", "string", "string"],
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
             task_config={
                 "eval_candidate": {
                     "model": "model",
@@ -37,8 +37,8 @@ def test_method_evaluate_rows(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_method_evaluate_rows_with_all_params(self, client: LlamaStackClient) -> None:
         eval = client.eval.evaluate_rows(
-            input_rows=[{"foo": True}, {"foo": True}, {"foo": True}],
-            scoring_functions=["string", "string", "string"],
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
             task_config={
                 "eval_candidate": {
                     "model": "model",
@@ -67,8 +67,8 @@ def test_method_evaluate_rows_with_all_params(self, client: LlamaStackClient) ->
     @parametrize
     def test_raw_response_evaluate_rows(self, client: LlamaStackClient) -> None:
         response = client.eval.with_raw_response.evaluate_rows(
-            input_rows=[{"foo": True}, {"foo": True}, {"foo": True}],
-            scoring_functions=["string", "string", "string"],
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
             task_config={
                 "eval_candidate": {
                     "model": "model",
@@ -88,8 +88,8 @@ def test_raw_response_evaluate_rows(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_streaming_response_evaluate_rows(self, client: LlamaStackClient) -> None:
         with client.eval.with_streaming_response.evaluate_rows(
-            input_rows=[{"foo": True}, {"foo": True}, {"foo": True}],
-            scoring_functions=["string", "string", "string"],
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
             task_config={
                 "eval_candidate": {
                     "model": "model",
@@ -198,8 +198,8 @@ class TestAsyncEval:
     @parametrize
     async def test_method_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
         eval = await async_client.eval.evaluate_rows(
-            input_rows=[{"foo": True}, {"foo": True}, {"foo": True}],
-            scoring_functions=["string", "string", "string"],
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
             task_config={
                 "eval_candidate": {
                     "model": "model",
@@ -215,8 +215,8 @@ async def test_method_evaluate_rows(self, async_client: AsyncLlamaStackClient) -
     @parametrize
     async def test_method_evaluate_rows_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
         eval = await async_client.eval.evaluate_rows(
-            input_rows=[{"foo": True}, {"foo": True}, {"foo": True}],
-            scoring_functions=["string", "string", "string"],
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
             task_config={
                 "eval_candidate": {
                     "model": "model",
@@ -245,8 +245,8 @@ async def test_method_evaluate_rows_with_all_params(self, async_client: AsyncLla
     @parametrize
     async def test_raw_response_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.eval.with_raw_response.evaluate_rows(
-            input_rows=[{"foo": True}, {"foo": True}, {"foo": True}],
-            scoring_functions=["string", "string", "string"],
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
             task_config={
                 "eval_candidate": {
                     "model": "model",
@@ -266,8 +266,8 @@ async def test_raw_response_evaluate_rows(self, async_client: AsyncLlamaStackCli
     @parametrize
     async def test_streaming_response_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.eval.with_streaming_response.evaluate_rows(
-            input_rows=[{"foo": True}, {"foo": True}, {"foo": True}],
-            scoring_functions=["string", "string", "string"],
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
             task_config={
                 "eval_candidate": {
                     "model": "model",
diff --git a/tests/api_resources/test_eval_tasks.py b/tests/api_resources/test_eval_tasks.py
index 20aba389..5d0bec9d 100644
--- a/tests/api_resources/test_eval_tasks.py
+++ b/tests/api_resources/test_eval_tasks.py
@@ -105,7 +105,7 @@ def test_method_register(self, client: LlamaStackClient) -> None:
         eval_task = client.eval_tasks.register(
             dataset_id="dataset_id",
             eval_task_id="eval_task_id",
-            scoring_functions=["string", "string", "string"],
+            scoring_functions=["string"],
         )
         assert eval_task is None
 
@@ -114,7 +114,7 @@ def test_method_register_with_all_params(self, client: LlamaStackClient) -> None
         eval_task = client.eval_tasks.register(
             dataset_id="dataset_id",
             eval_task_id="eval_task_id",
-            scoring_functions=["string", "string", "string"],
+            scoring_functions=["string"],
             metadata={"foo": True},
             provider_eval_task_id="provider_eval_task_id",
             provider_id="provider_id",
@@ -127,7 +127,7 @@ def test_raw_response_register(self, client: LlamaStackClient) -> None:
         response = client.eval_tasks.with_raw_response.register(
             dataset_id="dataset_id",
             eval_task_id="eval_task_id",
-            scoring_functions=["string", "string", "string"],
+            scoring_functions=["string"],
         )
 
         assert response.is_closed is True
@@ -140,7 +140,7 @@ def test_streaming_response_register(self, client: LlamaStackClient) -> None:
         with client.eval_tasks.with_streaming_response.register(
             dataset_id="dataset_id",
             eval_task_id="eval_task_id",
-            scoring_functions=["string", "string", "string"],
+            scoring_functions=["string"],
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -242,7 +242,7 @@ async def test_method_register(self, async_client: AsyncLlamaStackClient) -> Non
         eval_task = await async_client.eval_tasks.register(
             dataset_id="dataset_id",
             eval_task_id="eval_task_id",
-            scoring_functions=["string", "string", "string"],
+            scoring_functions=["string"],
         )
         assert eval_task is None
 
@@ -251,7 +251,7 @@ async def test_method_register_with_all_params(self, async_client: AsyncLlamaSta
         eval_task = await async_client.eval_tasks.register(
             dataset_id="dataset_id",
             eval_task_id="eval_task_id",
-            scoring_functions=["string", "string", "string"],
+            scoring_functions=["string"],
             metadata={"foo": True},
             provider_eval_task_id="provider_eval_task_id",
             provider_id="provider_id",
@@ -264,7 +264,7 @@ async def test_raw_response_register(self, async_client: AsyncLlamaStackClient)
         response = await async_client.eval_tasks.with_raw_response.register(
             dataset_id="dataset_id",
             eval_task_id="eval_task_id",
-            scoring_functions=["string", "string", "string"],
+            scoring_functions=["string"],
         )
 
         assert response.is_closed is True
@@ -277,7 +277,7 @@ async def test_streaming_response_register(self, async_client: AsyncLlamaStackCl
         async with async_client.eval_tasks.with_streaming_response.register(
             dataset_id="dataset_id",
             eval_task_id="eval_task_id",
-            scoring_functions=["string", "string", "string"],
+            scoring_functions=["string"],
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
diff --git a/tests/api_resources/test_inference.py b/tests/api_resources/test_inference.py
index 3a811f36..60895ef3 100644
--- a/tests/api_resources/test_inference.py
+++ b/tests/api_resources/test_inference.py
@@ -31,15 +31,7 @@ def test_method_chat_completion(self, client: LlamaStackClient) -> None:
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             model_id="model_id",
         )
@@ -56,17 +48,7 @@ def test_method_chat_completion_with_all_params(self, client: LlamaStackClient)
                     "content": "string",
                     "role": "user",
                     "context": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                },
+                }
             ],
             model_id="model_id",
             logprobs={"top_k": 0},
@@ -97,31 +79,7 @@ def test_method_chat_completion_with_all_params(self, client: LlamaStackClient)
                             "required": True,
                         }
                     },
-                },
-                {
-                    "tool_name": "brave_search",
-                    "description": "description",
-                    "parameters": {
-                        "foo": {
-                            "param_type": "param_type",
-                            "default": True,
-                            "description": "description",
-                            "required": True,
-                        }
-                    },
-                },
-                {
-                    "tool_name": "brave_search",
-                    "description": "description",
-                    "parameters": {
-                        "foo": {
-                            "param_type": "param_type",
-                            "default": True,
-                            "description": "description",
-                            "required": True,
-                        }
-                    },
-                },
+                }
             ],
             x_llama_stack_provider_data="X-LlamaStack-ProviderData",
         )
@@ -137,15 +95,7 @@ def test_raw_response_chat_completion(self, client: LlamaStackClient) -> None:
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             model_id="model_id",
         )
@@ -165,15 +115,7 @@ def test_streaming_response_chat_completion(self, client: LlamaStackClient) -> N
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             model_id="model_id",
         ) as response:
@@ -257,7 +199,7 @@ def test_streaming_response_completion(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_method_embeddings(self, client: LlamaStackClient) -> None:
         inference = client.inference.embeddings(
-            contents=["string", "string", "string"],
+            contents=["string"],
             model_id="model_id",
         )
         assert_matches_type(EmbeddingsResponse, inference, path=["response"])
@@ -265,7 +207,7 @@ def test_method_embeddings(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_method_embeddings_with_all_params(self, client: LlamaStackClient) -> None:
         inference = client.inference.embeddings(
-            contents=["string", "string", "string"],
+            contents=["string"],
             model_id="model_id",
             x_llama_stack_provider_data="X-LlamaStack-ProviderData",
         )
@@ -274,7 +216,7 @@ def test_method_embeddings_with_all_params(self, client: LlamaStackClient) -> No
     @parametrize
     def test_raw_response_embeddings(self, client: LlamaStackClient) -> None:
         response = client.inference.with_raw_response.embeddings(
-            contents=["string", "string", "string"],
+            contents=["string"],
             model_id="model_id",
         )
 
@@ -286,7 +228,7 @@ def test_raw_response_embeddings(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_streaming_response_embeddings(self, client: LlamaStackClient) -> None:
         with client.inference.with_streaming_response.embeddings(
-            contents=["string", "string", "string"],
+            contents=["string"],
             model_id="model_id",
         ) as response:
             assert not response.is_closed
@@ -311,15 +253,7 @@ async def test_method_chat_completion(self, async_client: AsyncLlamaStackClient)
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             model_id="model_id",
         )
@@ -336,17 +270,7 @@ async def test_method_chat_completion_with_all_params(self, async_client: AsyncL
                     "content": "string",
                     "role": "user",
                     "context": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                },
+                }
             ],
             model_id="model_id",
             logprobs={"top_k": 0},
@@ -377,31 +301,7 @@ async def test_method_chat_completion_with_all_params(self, async_client: AsyncL
                             "required": True,
                         }
                     },
-                },
-                {
-                    "tool_name": "brave_search",
-                    "description": "description",
-                    "parameters": {
-                        "foo": {
-                            "param_type": "param_type",
-                            "default": True,
-                            "description": "description",
-                            "required": True,
-                        }
-                    },
-                },
-                {
-                    "tool_name": "brave_search",
-                    "description": "description",
-                    "parameters": {
-                        "foo": {
-                            "param_type": "param_type",
-                            "default": True,
-                            "description": "description",
-                            "required": True,
-                        }
-                    },
-                },
+                }
             ],
             x_llama_stack_provider_data="X-LlamaStack-ProviderData",
         )
@@ -417,15 +317,7 @@ async def test_raw_response_chat_completion(self, async_client: AsyncLlamaStackC
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             model_id="model_id",
         )
@@ -445,15 +337,7 @@ async def test_streaming_response_chat_completion(self, async_client: AsyncLlama
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             model_id="model_id",
         ) as response:
@@ -537,7 +421,7 @@ async def test_streaming_response_completion(self, async_client: AsyncLlamaStack
     @parametrize
     async def test_method_embeddings(self, async_client: AsyncLlamaStackClient) -> None:
         inference = await async_client.inference.embeddings(
-            contents=["string", "string", "string"],
+            contents=["string"],
             model_id="model_id",
         )
         assert_matches_type(EmbeddingsResponse, inference, path=["response"])
@@ -545,7 +429,7 @@ async def test_method_embeddings(self, async_client: AsyncLlamaStackClient) -> N
     @parametrize
     async def test_method_embeddings_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
         inference = await async_client.inference.embeddings(
-            contents=["string", "string", "string"],
+            contents=["string"],
             model_id="model_id",
             x_llama_stack_provider_data="X-LlamaStack-ProviderData",
         )
@@ -554,7 +438,7 @@ async def test_method_embeddings_with_all_params(self, async_client: AsyncLlamaS
     @parametrize
     async def test_raw_response_embeddings(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.inference.with_raw_response.embeddings(
-            contents=["string", "string", "string"],
+            contents=["string"],
             model_id="model_id",
         )
 
@@ -566,7 +450,7 @@ async def test_raw_response_embeddings(self, async_client: AsyncLlamaStackClient
     @parametrize
     async def test_streaming_response_embeddings(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.inference.with_streaming_response.embeddings(
-            contents=["string", "string", "string"],
+            contents=["string"],
             model_id="model_id",
         ) as response:
             assert not response.is_closed
diff --git a/tests/api_resources/test_memory.py b/tests/api_resources/test_memory.py
index cbe16a14..05ccbd4a 100644
--- a/tests/api_resources/test_memory.py
+++ b/tests/api_resources/test_memory.py
@@ -26,17 +26,7 @@ def test_method_insert(self, client: LlamaStackClient) -> None:
                     "content": "string",
                     "document_id": "document_id",
                     "metadata": {"foo": True},
-                },
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                },
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                },
+                }
             ],
         )
         assert memory is None
@@ -51,19 +41,7 @@ def test_method_insert_with_all_params(self, client: LlamaStackClient) -> None:
                     "document_id": "document_id",
                     "metadata": {"foo": True},
                     "mime_type": "mime_type",
-                },
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                    "mime_type": "mime_type",
-                },
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                    "mime_type": "mime_type",
-                },
+                }
             ],
             ttl_seconds=0,
             x_llama_stack_provider_data="X-LlamaStack-ProviderData",
@@ -79,17 +57,7 @@ def test_raw_response_insert(self, client: LlamaStackClient) -> None:
                     "content": "string",
                     "document_id": "document_id",
                     "metadata": {"foo": True},
-                },
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                },
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                },
+                }
             ],
         )
 
@@ -107,17 +75,7 @@ def test_streaming_response_insert(self, client: LlamaStackClient) -> None:
                     "content": "string",
                     "document_id": "document_id",
                     "metadata": {"foo": True},
-                },
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                },
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                },
+                }
             ],
         ) as response:
             assert not response.is_closed
@@ -185,17 +143,7 @@ async def test_method_insert(self, async_client: AsyncLlamaStackClient) -> None:
                     "content": "string",
                     "document_id": "document_id",
                     "metadata": {"foo": True},
-                },
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                },
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                },
+                }
             ],
         )
         assert memory is None
@@ -210,19 +158,7 @@ async def test_method_insert_with_all_params(self, async_client: AsyncLlamaStack
                     "document_id": "document_id",
                     "metadata": {"foo": True},
                     "mime_type": "mime_type",
-                },
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                    "mime_type": "mime_type",
-                },
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                    "mime_type": "mime_type",
-                },
+                }
             ],
             ttl_seconds=0,
             x_llama_stack_provider_data="X-LlamaStack-ProviderData",
@@ -238,17 +174,7 @@ async def test_raw_response_insert(self, async_client: AsyncLlamaStackClient) ->
                     "content": "string",
                     "document_id": "document_id",
                     "metadata": {"foo": True},
-                },
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                },
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                },
+                }
             ],
         )
 
@@ -266,17 +192,7 @@ async def test_streaming_response_insert(self, async_client: AsyncLlamaStackClie
                     "content": "string",
                     "document_id": "document_id",
                     "metadata": {"foo": True},
-                },
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                },
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                },
+                }
             ],
         ) as response:
             assert not response.is_closed
diff --git a/tests/api_resources/test_post_training.py b/tests/api_resources/test_post_training.py
index ab79e75d..99cb8b56 100644
--- a/tests/api_resources/test_post_training.py
+++ b/tests/api_resources/test_post_training.py
@@ -174,7 +174,7 @@ def test_method_supervised_fine_tune(self, client: LlamaStackClient) -> None:
                 "alpha": 0,
                 "apply_lora_to_mlp": True,
                 "apply_lora_to_output": True,
-                "lora_attn_modules": ["string", "string", "string"],
+                "lora_attn_modules": ["string"],
                 "rank": 0,
             },
             dataset_id="dataset_id",
@@ -209,7 +209,7 @@ def test_method_supervised_fine_tune_with_all_params(self, client: LlamaStackCli
                 "alpha": 0,
                 "apply_lora_to_mlp": True,
                 "apply_lora_to_output": True,
-                "lora_attn_modules": ["string", "string", "string"],
+                "lora_attn_modules": ["string"],
                 "rank": 0,
             },
             dataset_id="dataset_id",
@@ -245,7 +245,7 @@ def test_raw_response_supervised_fine_tune(self, client: LlamaStackClient) -> No
                 "alpha": 0,
                 "apply_lora_to_mlp": True,
                 "apply_lora_to_output": True,
-                "lora_attn_modules": ["string", "string", "string"],
+                "lora_attn_modules": ["string"],
                 "rank": 0,
             },
             dataset_id="dataset_id",
@@ -284,7 +284,7 @@ def test_streaming_response_supervised_fine_tune(self, client: LlamaStackClient)
                 "alpha": 0,
                 "apply_lora_to_mlp": True,
                 "apply_lora_to_output": True,
-                "lora_attn_modules": ["string", "string", "string"],
+                "lora_attn_modules": ["string"],
                 "rank": 0,
             },
             dataset_id="dataset_id",
@@ -476,7 +476,7 @@ async def test_method_supervised_fine_tune(self, async_client: AsyncLlamaStackCl
                 "alpha": 0,
                 "apply_lora_to_mlp": True,
                 "apply_lora_to_output": True,
-                "lora_attn_modules": ["string", "string", "string"],
+                "lora_attn_modules": ["string"],
                 "rank": 0,
             },
             dataset_id="dataset_id",
@@ -511,7 +511,7 @@ async def test_method_supervised_fine_tune_with_all_params(self, async_client: A
                 "alpha": 0,
                 "apply_lora_to_mlp": True,
                 "apply_lora_to_output": True,
-                "lora_attn_modules": ["string", "string", "string"],
+                "lora_attn_modules": ["string"],
                 "rank": 0,
             },
             dataset_id="dataset_id",
@@ -547,7 +547,7 @@ async def test_raw_response_supervised_fine_tune(self, async_client: AsyncLlamaS
                 "alpha": 0,
                 "apply_lora_to_mlp": True,
                 "apply_lora_to_output": True,
-                "lora_attn_modules": ["string", "string", "string"],
+                "lora_attn_modules": ["string"],
                 "rank": 0,
             },
             dataset_id="dataset_id",
@@ -586,7 +586,7 @@ async def test_streaming_response_supervised_fine_tune(self, async_client: Async
                 "alpha": 0,
                 "apply_lora_to_mlp": True,
                 "apply_lora_to_output": True,
-                "lora_attn_modules": ["string", "string", "string"],
+                "lora_attn_modules": ["string"],
                 "rank": 0,
             },
             dataset_id="dataset_id",
diff --git a/tests/api_resources/test_safety.py b/tests/api_resources/test_safety.py
index 7e54c34d..f1c340eb 100644
--- a/tests/api_resources/test_safety.py
+++ b/tests/api_resources/test_safety.py
@@ -24,15 +24,7 @@ def test_method_run_shield(self, client: LlamaStackClient) -> None:
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             params={"foo": True},
             shield_id="shield_id",
@@ -47,17 +39,7 @@ def test_method_run_shield_with_all_params(self, client: LlamaStackClient) -> No
                     "content": "string",
                     "role": "user",
                     "context": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                },
+                }
             ],
             params={"foo": True},
             shield_id="shield_id",
@@ -72,15 +54,7 @@ def test_raw_response_run_shield(self, client: LlamaStackClient) -> None:
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             params={"foo": True},
             shield_id="shield_id",
@@ -98,15 +72,7 @@ def test_streaming_response_run_shield(self, client: LlamaStackClient) -> None:
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             params={"foo": True},
             shield_id="shield_id",
@@ -130,15 +96,7 @@ async def test_method_run_shield(self, async_client: AsyncLlamaStackClient) -> N
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             params={"foo": True},
             shield_id="shield_id",
@@ -153,17 +111,7 @@ async def test_method_run_shield_with_all_params(self, async_client: AsyncLlamaS
                     "content": "string",
                     "role": "user",
                     "context": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                },
+                }
             ],
             params={"foo": True},
             shield_id="shield_id",
@@ -178,15 +126,7 @@ async def test_raw_response_run_shield(self, async_client: AsyncLlamaStackClient
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             params={"foo": True},
             shield_id="shield_id",
@@ -204,15 +144,7 @@ async def test_streaming_response_run_shield(self, async_client: AsyncLlamaStack
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             params={"foo": True},
             shield_id="shield_id",
diff --git a/tests/api_resources/test_scoring.py b/tests/api_resources/test_scoring.py
index 34c6fcee..06dc159d 100644
--- a/tests/api_resources/test_scoring.py
+++ b/tests/api_resources/test_scoring.py
@@ -23,7 +23,7 @@ class TestScoring:
     @parametrize
     def test_method_score(self, client: LlamaStackClient) -> None:
         scoring = client.scoring.score(
-            input_rows=[{"foo": True}, {"foo": True}, {"foo": True}],
+            input_rows=[{"foo": True}],
             scoring_functions={
                 "foo": {
                     "judge_model": "judge_model",
@@ -36,12 +36,12 @@ def test_method_score(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_method_score_with_all_params(self, client: LlamaStackClient) -> None:
         scoring = client.scoring.score(
-            input_rows=[{"foo": True}, {"foo": True}, {"foo": True}],
+            input_rows=[{"foo": True}],
             scoring_functions={
                 "foo": {
                     "judge_model": "judge_model",
                     "type": "llm_as_judge",
-                    "judge_score_regexes": ["string", "string", "string"],
+                    "judge_score_regexes": ["string"],
                     "prompt_template": "prompt_template",
                 }
             },
@@ -52,7 +52,7 @@ def test_method_score_with_all_params(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_raw_response_score(self, client: LlamaStackClient) -> None:
         response = client.scoring.with_raw_response.score(
-            input_rows=[{"foo": True}, {"foo": True}, {"foo": True}],
+            input_rows=[{"foo": True}],
             scoring_functions={
                 "foo": {
                     "judge_model": "judge_model",
@@ -69,7 +69,7 @@ def test_raw_response_score(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_streaming_response_score(self, client: LlamaStackClient) -> None:
         with client.scoring.with_streaming_response.score(
-            input_rows=[{"foo": True}, {"foo": True}, {"foo": True}],
+            input_rows=[{"foo": True}],
             scoring_functions={
                 "foo": {
                     "judge_model": "judge_model",
@@ -108,7 +108,7 @@ def test_method_score_batch_with_all_params(self, client: LlamaStackClient) -> N
                 "foo": {
                     "judge_model": "judge_model",
                     "type": "llm_as_judge",
-                    "judge_score_regexes": ["string", "string", "string"],
+                    "judge_score_regexes": ["string"],
                     "prompt_template": "prompt_template",
                 }
             },
@@ -161,7 +161,7 @@ class TestAsyncScoring:
     @parametrize
     async def test_method_score(self, async_client: AsyncLlamaStackClient) -> None:
         scoring = await async_client.scoring.score(
-            input_rows=[{"foo": True}, {"foo": True}, {"foo": True}],
+            input_rows=[{"foo": True}],
             scoring_functions={
                 "foo": {
                     "judge_model": "judge_model",
@@ -174,12 +174,12 @@ async def test_method_score(self, async_client: AsyncLlamaStackClient) -> None:
     @parametrize
     async def test_method_score_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
         scoring = await async_client.scoring.score(
-            input_rows=[{"foo": True}, {"foo": True}, {"foo": True}],
+            input_rows=[{"foo": True}],
             scoring_functions={
                 "foo": {
                     "judge_model": "judge_model",
                     "type": "llm_as_judge",
-                    "judge_score_regexes": ["string", "string", "string"],
+                    "judge_score_regexes": ["string"],
                     "prompt_template": "prompt_template",
                 }
             },
@@ -190,7 +190,7 @@ async def test_method_score_with_all_params(self, async_client: AsyncLlamaStackC
     @parametrize
     async def test_raw_response_score(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.scoring.with_raw_response.score(
-            input_rows=[{"foo": True}, {"foo": True}, {"foo": True}],
+            input_rows=[{"foo": True}],
             scoring_functions={
                 "foo": {
                     "judge_model": "judge_model",
@@ -207,7 +207,7 @@ async def test_raw_response_score(self, async_client: AsyncLlamaStackClient) ->
     @parametrize
     async def test_streaming_response_score(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.scoring.with_streaming_response.score(
-            input_rows=[{"foo": True}, {"foo": True}, {"foo": True}],
+            input_rows=[{"foo": True}],
             scoring_functions={
                 "foo": {
                     "judge_model": "judge_model",
@@ -246,7 +246,7 @@ async def test_method_score_batch_with_all_params(self, async_client: AsyncLlama
                 "foo": {
                     "judge_model": "judge_model",
                     "type": "llm_as_judge",
-                    "judge_score_regexes": ["string", "string", "string"],
+                    "judge_score_regexes": ["string"],
                     "prompt_template": "prompt_template",
                 }
             },
diff --git a/tests/api_resources/test_scoring_functions.py b/tests/api_resources/test_scoring_functions.py
index 31c0e2fb..48b2336d 100644
--- a/tests/api_resources/test_scoring_functions.py
+++ b/tests/api_resources/test_scoring_functions.py
@@ -118,7 +118,7 @@ def test_method_register_with_all_params(self, client: LlamaStackClient) -> None
             params={
                 "judge_model": "judge_model",
                 "type": "llm_as_judge",
-                "judge_score_regexes": ["string", "string", "string"],
+                "judge_score_regexes": ["string"],
                 "prompt_template": "prompt_template",
             },
             provider_id="provider_id",
@@ -260,7 +260,7 @@ async def test_method_register_with_all_params(self, async_client: AsyncLlamaSta
             params={
                 "judge_model": "judge_model",
                 "type": "llm_as_judge",
-                "judge_score_regexes": ["string", "string", "string"],
+                "judge_score_regexes": ["string"],
                 "prompt_template": "prompt_template",
             },
             provider_id="provider_id",
diff --git a/tests/api_resources/test_synthetic_data_generation.py b/tests/api_resources/test_synthetic_data_generation.py
index 1343e693..203a9ce5 100644
--- a/tests/api_resources/test_synthetic_data_generation.py
+++ b/tests/api_resources/test_synthetic_data_generation.py
@@ -24,15 +24,7 @@ def test_method_generate(self, client: LlamaStackClient) -> None:
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             filtering_function="none",
         )
@@ -46,17 +38,7 @@ def test_method_generate_with_all_params(self, client: LlamaStackClient) -> None
                     "content": "string",
                     "role": "user",
                     "context": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                },
+                }
             ],
             filtering_function="none",
             model="model",
@@ -71,15 +53,7 @@ def test_raw_response_generate(self, client: LlamaStackClient) -> None:
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             filtering_function="none",
         )
@@ -96,15 +70,7 @@ def test_streaming_response_generate(self, client: LlamaStackClient) -> None:
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             filtering_function="none",
         ) as response:
@@ -127,15 +93,7 @@ async def test_method_generate(self, async_client: AsyncLlamaStackClient) -> Non
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             filtering_function="none",
         )
@@ -149,17 +107,7 @@ async def test_method_generate_with_all_params(self, async_client: AsyncLlamaSta
                     "content": "string",
                     "role": "user",
                     "context": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                },
+                }
             ],
             filtering_function="none",
             model="model",
@@ -174,15 +122,7 @@ async def test_raw_response_generate(self, async_client: AsyncLlamaStackClient)
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             filtering_function="none",
         )
@@ -199,15 +139,7 @@ async def test_streaming_response_generate(self, async_client: AsyncLlamaStackCl
                 {
                     "content": "string",
                     "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
-                {
-                    "content": "string",
-                    "role": "user",
-                },
+                }
             ],
             filtering_function="none",
         ) as response:
diff --git a/tests/test_client.py b/tests/test_client.py
index 3b377b5b..45d816e3 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -4,11 +4,14 @@
 
 import gc
 import os
+import sys
 import json
 import asyncio
 import inspect
+import subprocess
 import tracemalloc
 from typing import Any, Union, cast
+from textwrap import dedent
 from unittest import mock
 from typing_extensions import Literal
 
@@ -672,11 +675,11 @@ def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str
     @mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) -> None:
-        respx_mock.post("/models/register").mock(side_effect=httpx.TimeoutException("Test timeout error"))
+        respx_mock.post("/alpha/models/register").mock(side_effect=httpx.TimeoutException("Test timeout error"))
 
         with pytest.raises(APITimeoutError):
             self.client.post(
-                "/models/register",
+                "/alpha/models/register",
                 body=cast(object, dict(model_id="model_id")),
                 cast_to=httpx.Response,
                 options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
@@ -687,11 +690,11 @@ def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) -> No
     @mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) -> None:
-        respx_mock.post("/models/register").mock(return_value=httpx.Response(500))
+        respx_mock.post("/alpha/models/register").mock(return_value=httpx.Response(500))
 
         with pytest.raises(APIStatusError):
             self.client.post(
-                "/models/register",
+                "/alpha/models/register",
                 body=cast(object, dict(model_id="model_id")),
                 cast_to=httpx.Response,
                 options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
@@ -723,7 +726,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                 return httpx.Response(500)
             return httpx.Response(200)
 
-        respx_mock.post("/models/register").mock(side_effect=retry_handler)
+        respx_mock.post("/alpha/models/register").mock(side_effect=retry_handler)
 
         response = client.models.with_raw_response.register(model_id="model_id")
 
@@ -747,7 +750,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                 return httpx.Response(500)
             return httpx.Response(200)
 
-        respx_mock.post("/models/register").mock(side_effect=retry_handler)
+        respx_mock.post("/alpha/models/register").mock(side_effect=retry_handler)
 
         response = client.models.with_raw_response.register(
             model_id="model_id", extra_headers={"x-stainless-retry-count": Omit()}
@@ -772,7 +775,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                 return httpx.Response(500)
             return httpx.Response(200)
 
-        respx_mock.post("/models/register").mock(side_effect=retry_handler)
+        respx_mock.post("/alpha/models/register").mock(side_effect=retry_handler)
 
         response = client.models.with_raw_response.register(
             model_id="model_id", extra_headers={"x-stainless-retry-count": "42"}
@@ -1413,11 +1416,11 @@ async def test_parse_retry_after_header(self, remaining_retries: int, retry_afte
     @mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     async def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) -> None:
-        respx_mock.post("/models/register").mock(side_effect=httpx.TimeoutException("Test timeout error"))
+        respx_mock.post("/alpha/models/register").mock(side_effect=httpx.TimeoutException("Test timeout error"))
 
         with pytest.raises(APITimeoutError):
             await self.client.post(
-                "/models/register",
+                "/alpha/models/register",
                 body=cast(object, dict(model_id="model_id")),
                 cast_to=httpx.Response,
                 options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
@@ -1428,11 +1431,11 @@ async def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter)
     @mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     async def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) -> None:
-        respx_mock.post("/models/register").mock(return_value=httpx.Response(500))
+        respx_mock.post("/alpha/models/register").mock(return_value=httpx.Response(500))
 
         with pytest.raises(APIStatusError):
             await self.client.post(
-                "/models/register",
+                "/alpha/models/register",
                 body=cast(object, dict(model_id="model_id")),
                 cast_to=httpx.Response,
                 options={"headers": {RAW_RESPONSE_HEADER: "stream"}},
@@ -1465,7 +1468,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                 return httpx.Response(500)
             return httpx.Response(200)
 
-        respx_mock.post("/models/register").mock(side_effect=retry_handler)
+        respx_mock.post("/alpha/models/register").mock(side_effect=retry_handler)
 
         response = await client.models.with_raw_response.register(model_id="model_id")
 
@@ -1490,7 +1493,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                 return httpx.Response(500)
             return httpx.Response(200)
 
-        respx_mock.post("/models/register").mock(side_effect=retry_handler)
+        respx_mock.post("/alpha/models/register").mock(side_effect=retry_handler)
 
         response = await client.models.with_raw_response.register(
             model_id="model_id", extra_headers={"x-stainless-retry-count": Omit()}
@@ -1516,10 +1519,45 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                 return httpx.Response(500)
             return httpx.Response(200)
 
-        respx_mock.post("/models/register").mock(side_effect=retry_handler)
+        respx_mock.post("/alpha/models/register").mock(side_effect=retry_handler)
 
         response = await client.models.with_raw_response.register(
             model_id="model_id", extra_headers={"x-stainless-retry-count": "42"}
         )
 
         assert response.http_request.headers.get("x-stainless-retry-count") == "42"
+
+    def test_get_platform(self) -> None:
+        # A previous implementation of asyncify could leave threads unterminated when
+        # used with nest_asyncio.
+        #
+        # Since nest_asyncio.apply() is global and cannot be un-applied, this
+        # test is run in a separate process to avoid affecting other tests.
+        test_code = dedent("""
+        import asyncio
+        import nest_asyncio
+        import threading
+
+        from llama_stack_client._utils import asyncify
+        from llama_stack_client._base_client import get_platform 
+
+        async def test_main() -> None:
+            result = await asyncify(get_platform)()
+            print(result)
+            for thread in threading.enumerate():
+                print(thread.name)
+
+        nest_asyncio.apply()
+        asyncio.run(test_main())
+        """)
+        with subprocess.Popen(
+            [sys.executable, "-c", test_code],
+            text=True,
+        ) as process:
+            try:
+                process.wait(2)
+                if process.returncode:
+                    raise AssertionError("calling get_platform using asyncify resulted in a non-zero exit code")
+            except subprocess.TimeoutExpired as e:
+                process.kill()
+                raise AssertionError("calling get_platform using asyncify resulted in a hung process") from e