From 4af8f35cffaf2b3d00a38a8fc5f8ca5a0b266786 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Fri, 31 Oct 2025 02:32:44 +0000
Subject: [PATCH 1/3] chore(internal/tests): avoid race condition with implicit
 client cleanup

---
 tests/test_client.py | 362 ++++++++++++++++++++++++-------------------
 1 file changed, 202 insertions(+), 160 deletions(-)

diff --git a/tests/test_client.py b/tests/test_client.py
index 3ccb4d91..2d39d32a 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -64,47 +64,45 @@ def _get_open_connections(client: LlamaStackClient | AsyncLlamaStackClient) -> i
 
 
 class TestLlamaStackClient:
-    client = LlamaStackClient(base_url=base_url, _strict_response_validation=True)
-
     @pytest.mark.respx(base_url=base_url)
-    def test_raw_response(self, respx_mock: MockRouter) -> None:
+    def test_raw_response(self, respx_mock: MockRouter, client: LlamaStackClient) -> None:
         respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = self.client.post("/foo", cast_to=httpx.Response)
+        response = client.post("/foo", cast_to=httpx.Response)
         assert response.status_code == 200
         assert isinstance(response, httpx.Response)
         assert response.json() == {"foo": "bar"}
 
     @pytest.mark.respx(base_url=base_url)
-    def test_raw_response_for_binary(self, respx_mock: MockRouter) -> None:
+    def test_raw_response_for_binary(self, respx_mock: MockRouter, client: LlamaStackClient) -> None:
         respx_mock.post("/foo").mock(
             return_value=httpx.Response(200, headers={"Content-Type": "application/binary"}, content='{"foo": "bar"}')
         )
 
-        response = self.client.post("/foo", cast_to=httpx.Response)
+        response = client.post("/foo", cast_to=httpx.Response)
         assert response.status_code == 200
         assert isinstance(response, httpx.Response)
         assert response.json() == {"foo": "bar"}
 
-    def test_copy(self) -> None:
-        copied = self.client.copy()
-        assert id(copied) != id(self.client)
+    def test_copy(self, client: LlamaStackClient) -> None:
+        copied = client.copy()
+        assert id(copied) != id(client)
 
-    def test_copy_default_options(self) -> None:
+    def test_copy_default_options(self, client: LlamaStackClient) -> None:
         # options that have a default are overridden correctly
-        copied = self.client.copy(max_retries=7)
+        copied = client.copy(max_retries=7)
         assert copied.max_retries == 7
-        assert self.client.max_retries == 2
+        assert client.max_retries == 2
 
         copied2 = copied.copy(max_retries=6)
         assert copied2.max_retries == 6
         assert copied.max_retries == 7
 
         # timeout
-        assert isinstance(self.client.timeout, httpx.Timeout)
-        copied = self.client.copy(timeout=None)
+        assert isinstance(client.timeout, httpx.Timeout)
+        copied = client.copy(timeout=None)
         assert copied.timeout is None
-        assert isinstance(self.client.timeout, httpx.Timeout)
+        assert isinstance(client.timeout, httpx.Timeout)
 
     def test_copy_default_headers(self) -> None:
         client = LlamaStackClient(base_url=base_url, _strict_response_validation=True, default_headers={"X-Foo": "bar"})
@@ -137,6 +135,7 @@ def test_copy_default_headers(self) -> None:
             match="`default_headers` and `set_default_headers` arguments are mutually exclusive",
         ):
             client.copy(set_default_headers={}, default_headers={"X-Foo": "Bar"})
+        client.close()
 
     def test_copy_default_query(self) -> None:
         client = LlamaStackClient(base_url=base_url, _strict_response_validation=True, default_query={"foo": "bar"})
@@ -172,13 +171,15 @@ def test_copy_default_query(self) -> None:
         ):
             client.copy(set_default_query={}, default_query={"foo": "Bar"})
 
-    def test_copy_signature(self) -> None:
+        client.close()
+
+    def test_copy_signature(self, client: LlamaStackClient) -> None:
         # ensure the same parameters that can be passed to the client are defined in the `.copy()` method
         init_signature = inspect.signature(
             # mypy doesn't like that we access the `__init__` property.
-            self.client.__init__,  # type: ignore[misc]
+            client.__init__,  # type: ignore[misc]
         )
-        copy_signature = inspect.signature(self.client.copy)
+        copy_signature = inspect.signature(client.copy)
         exclude_params = {"transport", "proxies", "_strict_response_validation"}
 
         for name in init_signature.parameters.keys():
@@ -189,12 +190,12 @@ def test_copy_signature(self) -> None:
             assert copy_param is not None, f"copy() signature is missing the {name} param"
 
     @pytest.mark.skipif(sys.version_info >= (3, 10), reason="fails because of a memory leak that started from 3.12")
-    def test_copy_build_request(self) -> None:
+    def test_copy_build_request(self, client: LlamaStackClient) -> None:
         options = FinalRequestOptions(method="get", url="/foo")
 
         def build_request(options: FinalRequestOptions) -> None:
-            client = self.client.copy()
-            client._build_request(options)
+            client_copy = client.copy()
+            client_copy._build_request(options)
 
         # ensure that the machinery is warmed up before tracing starts.
         build_request(options)
@@ -251,14 +252,12 @@ def add_leak(leaks: list[tracemalloc.StatisticDiff], diff: tracemalloc.Statistic
                     print(frame)
             raise AssertionError()
 
-    def test_request_timeout(self) -> None:
-        request = self.client._build_request(FinalRequestOptions(method="get", url="/foo"))
+    def test_request_timeout(self, client: LlamaStackClient) -> None:
+        request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
         assert timeout == DEFAULT_TIMEOUT
 
-        request = self.client._build_request(
-            FinalRequestOptions(method="get", url="/foo", timeout=httpx.Timeout(100.0))
-        )
+        request = client._build_request(FinalRequestOptions(method="get", url="/foo", timeout=httpx.Timeout(100.0)))
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
         assert timeout == httpx.Timeout(100.0)
 
@@ -269,6 +268,8 @@ def test_client_timeout_option(self) -> None:
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
         assert timeout == httpx.Timeout(0)
 
+        client.close()
+
     def test_http_client_timeout_option(self) -> None:
         # custom timeout given to the httpx client should be used
         with httpx.Client(timeout=None) as http_client:
@@ -278,6 +279,8 @@ def test_http_client_timeout_option(self) -> None:
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == httpx.Timeout(None)
 
+            client.close()
+
         # no timeout given to the httpx client should not use the httpx default
         with httpx.Client() as http_client:
             client = LlamaStackClient(base_url=base_url, _strict_response_validation=True, http_client=http_client)
@@ -286,6 +289,8 @@ def test_http_client_timeout_option(self) -> None:
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == DEFAULT_TIMEOUT
 
+            client.close()
+
         # explicitly passing the default timeout currently results in it being ignored
         with httpx.Client(timeout=HTTPX_DEFAULT_TIMEOUT) as http_client:
             client = LlamaStackClient(base_url=base_url, _strict_response_validation=True, http_client=http_client)
@@ -294,6 +299,8 @@ def test_http_client_timeout_option(self) -> None:
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == DEFAULT_TIMEOUT  # our default
 
+            client.close()
+
     async def test_invalid_http_client(self) -> None:
         with pytest.raises(TypeError, match="Invalid `http_client` arg"):
             async with httpx.AsyncClient() as http_client:
@@ -302,12 +309,14 @@ async def test_invalid_http_client(self) -> None:
                 )
 
     def test_default_headers_option(self) -> None:
-        client = LlamaStackClient(base_url=base_url, _strict_response_validation=True, default_headers={"X-Foo": "bar"})
-        request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
+        test_client = LlamaStackClient(
+            base_url=base_url, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
+        )
+        request = test_client._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("x-foo") == "bar"
         assert request.headers.get("x-stainless-lang") == "python"
 
-        client2 = LlamaStackClient(
+        test_client2 = LlamaStackClient(
             base_url=base_url,
             _strict_response_validation=True,
             default_headers={
@@ -315,10 +324,13 @@ def test_default_headers_option(self) -> None:
                 "X-Stainless-Lang": "my-overriding-header",
             },
         )
-        request = client2._build_request(FinalRequestOptions(method="get", url="/foo"))
+        request = test_client2._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("x-foo") == "stainless"
         assert request.headers.get("x-stainless-lang") == "my-overriding-header"
 
+        test_client.close()
+        test_client2.close()
+
     def test_default_query_option(self) -> None:
         client = LlamaStackClient(
             base_url=base_url, _strict_response_validation=True, default_query={"query_param": "bar"}
@@ -337,8 +349,10 @@ def test_default_query_option(self) -> None:
         url = httpx.URL(request.url)
         assert dict(url.params) == {"foo": "baz", "query_param": "overridden"}
 
-    def test_request_extra_json(self) -> None:
-        request = self.client._build_request(
+        client.close()
+
+    def test_request_extra_json(self, client: LlamaStackClient) -> None:
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -349,7 +363,7 @@ def test_request_extra_json(self) -> None:
         data = json.loads(request.content.decode("utf-8"))
         assert data == {"foo": "bar", "baz": False}
 
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -360,7 +374,7 @@ def test_request_extra_json(self) -> None:
         assert data == {"baz": False}
 
         # `extra_json` takes priority over `json_data` when keys clash
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -371,8 +385,8 @@ def test_request_extra_json(self) -> None:
         data = json.loads(request.content.decode("utf-8"))
         assert data == {"foo": "bar", "baz": None}
 
-    def test_request_extra_headers(self) -> None:
-        request = self.client._build_request(
+    def test_request_extra_headers(self, client: LlamaStackClient) -> None:
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -382,7 +396,7 @@ def test_request_extra_headers(self) -> None:
         assert request.headers.get("X-Foo") == "Foo"
 
         # `extra_headers` takes priority over `default_headers` when keys clash
-        request = self.client.with_options(default_headers={"X-Bar": "true"})._build_request(
+        request = client.with_options(default_headers={"X-Bar": "true"})._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -393,8 +407,8 @@ def test_request_extra_headers(self) -> None:
         )
         assert request.headers.get("X-Bar") == "false"
 
-    def test_request_extra_query(self) -> None:
-        request = self.client._build_request(
+    def test_request_extra_query(self, client: LlamaStackClient) -> None:
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -407,7 +421,7 @@ def test_request_extra_query(self) -> None:
         assert params == {"my_query_param": "Foo"}
 
         # if both `query` and `extra_query` are given, they are merged
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -421,7 +435,7 @@ def test_request_extra_query(self) -> None:
         assert params == {"bar": "1", "foo": "2"}
 
         # `extra_query` takes priority over `query` when keys clash
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -464,7 +478,7 @@ def test_multipart_repeating_array(self, client: LlamaStackClient) -> None:
         ]
 
     @pytest.mark.respx(base_url=base_url)
-    def test_basic_union_response(self, respx_mock: MockRouter) -> None:
+    def test_basic_union_response(self, respx_mock: MockRouter, client: LlamaStackClient) -> None:
         class Model1(BaseModel):
             name: str
 
@@ -473,12 +487,12 @@ class Model2(BaseModel):
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+        response = client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
         assert isinstance(response, Model2)
         assert response.foo == "bar"
 
     @pytest.mark.respx(base_url=base_url)
-    def test_union_response_different_types(self, respx_mock: MockRouter) -> None:
+    def test_union_response_different_types(self, respx_mock: MockRouter, client: LlamaStackClient) -> None:
         """Union of objects with the same field name using a different type"""
 
         class Model1(BaseModel):
@@ -489,18 +503,20 @@ class Model2(BaseModel):
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+        response = client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
         assert isinstance(response, Model2)
         assert response.foo == "bar"
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": 1}))
 
-        response = self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+        response = client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
         assert isinstance(response, Model1)
         assert response.foo == 1
 
     @pytest.mark.respx(base_url=base_url)
-    def test_non_application_json_content_type_for_json_data(self, respx_mock: MockRouter) -> None:
+    def test_non_application_json_content_type_for_json_data(
+        self, respx_mock: MockRouter, client: LlamaStackClient
+    ) -> None:
         """
         Response that sets Content-Type to something other than application/json but returns json data
         """
@@ -516,7 +532,7 @@ class Model(BaseModel):
             )
         )
 
-        response = self.client.get("/foo", cast_to=Model)
+        response = client.get("/foo", cast_to=Model)
         assert isinstance(response, Model)
         assert response.foo == 2
 
@@ -528,6 +544,8 @@ def test_base_url_setter(self) -> None:
 
         assert client.base_url == "https://example.com/from_setter/"
 
+        client.close()
+
     def test_base_url_env(self) -> None:
         with update_env(LLAMA_STACK_CLIENT_BASE_URL="http://localhost:5000/from/env"):
             client = LlamaStackClient(_strict_response_validation=True)
@@ -554,6 +572,7 @@ def test_base_url_trailing_slash(self, client: LlamaStackClient) -> None:
             ),
         )
         assert request.url == "http://localhost:5000/custom/path/foo"
+        client.close()
 
     @pytest.mark.parametrize(
         "client",
@@ -576,6 +595,7 @@ def test_base_url_no_trailing_slash(self, client: LlamaStackClient) -> None:
             ),
         )
         assert request.url == "http://localhost:5000/custom/path/foo"
+        client.close()
 
     @pytest.mark.parametrize(
         "client",
@@ -598,35 +618,36 @@ def test_absolute_request_url(self, client: LlamaStackClient) -> None:
             ),
         )
         assert request.url == "https://myapi.com/foo"
+        client.close()
 
     def test_copied_client_does_not_close_http(self) -> None:
-        client = LlamaStackClient(base_url=base_url, _strict_response_validation=True)
-        assert not client.is_closed()
+        test_client = LlamaStackClient(base_url=base_url, _strict_response_validation=True)
+        assert not test_client.is_closed()
 
-        copied = client.copy()
-        assert copied is not client
+        copied = test_client.copy()
+        assert copied is not test_client
 
         del copied
 
-        assert not client.is_closed()
+        assert not test_client.is_closed()
 
     def test_client_context_manager(self) -> None:
-        client = LlamaStackClient(base_url=base_url, _strict_response_validation=True)
-        with client as c2:
-            assert c2 is client
+        test_client = LlamaStackClient(base_url=base_url, _strict_response_validation=True)
+        with test_client as c2:
+            assert c2 is test_client
             assert not c2.is_closed()
-            assert not client.is_closed()
-        assert client.is_closed()
+            assert not test_client.is_closed()
+        assert test_client.is_closed()
 
     @pytest.mark.respx(base_url=base_url)
-    def test_client_response_validation_error(self, respx_mock: MockRouter) -> None:
+    def test_client_response_validation_error(self, respx_mock: MockRouter, client: LlamaStackClient) -> None:
         class Model(BaseModel):
             foo: str
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": {"invalid": True}}))
 
         with pytest.raises(APIResponseValidationError) as exc:
-            self.client.get("/foo", cast_to=Model)
+            client.get("/foo", cast_to=Model)
 
         assert isinstance(exc.value.__cause__, ValidationError)
 
@@ -646,11 +667,14 @@ class Model(BaseModel):
         with pytest.raises(APIResponseValidationError):
             strict_client.get("/foo", cast_to=Model)
 
-        client = LlamaStackClient(base_url=base_url, _strict_response_validation=False)
+        non_strict_client = LlamaStackClient(base_url=base_url, _strict_response_validation=False)
 
-        response = client.get("/foo", cast_to=Model)
+        response = non_strict_client.get("/foo", cast_to=Model)
         assert isinstance(response, str)  # type: ignore[unreachable]
 
+        strict_client.close()
+        non_strict_client.close()
+
     @pytest.mark.parametrize(
         "remaining_retries,retry_after,timeout",
         [
@@ -673,9 +697,9 @@ class Model(BaseModel):
         ],
     )
     @mock.patch("time.time", mock.MagicMock(return_value=1696004797))
-    def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str, timeout: float) -> None:
-        client = LlamaStackClient(base_url=base_url, _strict_response_validation=True)
-
+    def test_parse_retry_after_header(
+        self, remaining_retries: int, retry_after: str, timeout: float, client: LlamaStackClient
+    ) -> None:
         headers = httpx.Headers({"retry-after": retry_after})
         options = FinalRequestOptions(method="get", url="/foo", max_retries=3)
         calculated = client._calculate_retry_timeout(remaining_retries, options, headers)
@@ -697,7 +721,7 @@ def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter, clien
                 model="model",
             ).__enter__()
 
-        assert _get_open_connections(self.client) == 0
+        assert _get_open_connections(client) == 0
 
     @mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
@@ -714,7 +738,7 @@ def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter, client
                 ],
                 model="model",
             ).__enter__()
-        assert _get_open_connections(self.client) == 0
+        assert _get_open_connections(client) == 0
 
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
     @mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
@@ -842,79 +866,73 @@ def test_default_client_creation(self) -> None:
         )
 
     @pytest.mark.respx(base_url=base_url)
-    def test_follow_redirects(self, respx_mock: MockRouter) -> None:
+    def test_follow_redirects(self, respx_mock: MockRouter, client: LlamaStackClient) -> None:
         # Test that the default follow_redirects=True allows following redirects
         respx_mock.post("/redirect").mock(
             return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
         )
         respx_mock.get("/redirected").mock(return_value=httpx.Response(200, json={"status": "ok"}))
 
-        response = self.client.post("/redirect", body={"key": "value"}, cast_to=httpx.Response)
+        response = client.post("/redirect", body={"key": "value"}, cast_to=httpx.Response)
         assert response.status_code == 200
         assert response.json() == {"status": "ok"}
 
     @pytest.mark.respx(base_url=base_url)
-    def test_follow_redirects_disabled(self, respx_mock: MockRouter) -> None:
+    def test_follow_redirects_disabled(self, respx_mock: MockRouter, client: LlamaStackClient) -> None:
         # Test that follow_redirects=False prevents following redirects
         respx_mock.post("/redirect").mock(
             return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
         )
 
         with pytest.raises(APIStatusError) as exc_info:
-            self.client.post(
-                "/redirect", body={"key": "value"}, options={"follow_redirects": False}, cast_to=httpx.Response
-            )
+            client.post("/redirect", body={"key": "value"}, options={"follow_redirects": False}, cast_to=httpx.Response)
 
         assert exc_info.value.response.status_code == 302
         assert exc_info.value.response.headers["Location"] == f"{base_url}/redirected"
 
 
 class TestAsyncLlamaStackClient:
-    client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True)
-
     @pytest.mark.respx(base_url=base_url)
-    @pytest.mark.asyncio
-    async def test_raw_response(self, respx_mock: MockRouter) -> None:
+    async def test_raw_response(self, respx_mock: MockRouter, async_client: AsyncLlamaStackClient) -> None:
         respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = await self.client.post("/foo", cast_to=httpx.Response)
+        response = await async_client.post("/foo", cast_to=httpx.Response)
         assert response.status_code == 200
         assert isinstance(response, httpx.Response)
         assert response.json() == {"foo": "bar"}
 
     @pytest.mark.respx(base_url=base_url)
-    @pytest.mark.asyncio
-    async def test_raw_response_for_binary(self, respx_mock: MockRouter) -> None:
+    async def test_raw_response_for_binary(self, respx_mock: MockRouter, async_client: AsyncLlamaStackClient) -> None:
         respx_mock.post("/foo").mock(
             return_value=httpx.Response(200, headers={"Content-Type": "application/binary"}, content='{"foo": "bar"}')
         )
 
-        response = await self.client.post("/foo", cast_to=httpx.Response)
+        response = await async_client.post("/foo", cast_to=httpx.Response)
         assert response.status_code == 200
         assert isinstance(response, httpx.Response)
         assert response.json() == {"foo": "bar"}
 
-    def test_copy(self) -> None:
-        copied = self.client.copy()
-        assert id(copied) != id(self.client)
+    def test_copy(self, async_client: AsyncLlamaStackClient) -> None:
+        copied = async_client.copy()
+        assert id(copied) != id(async_client)
 
-    def test_copy_default_options(self) -> None:
+    def test_copy_default_options(self, async_client: AsyncLlamaStackClient) -> None:
         # options that have a default are overridden correctly
-        copied = self.client.copy(max_retries=7)
+        copied = async_client.copy(max_retries=7)
         assert copied.max_retries == 7
-        assert self.client.max_retries == 2
+        assert async_client.max_retries == 2
 
         copied2 = copied.copy(max_retries=6)
         assert copied2.max_retries == 6
         assert copied.max_retries == 7
 
         # timeout
-        assert isinstance(self.client.timeout, httpx.Timeout)
-        copied = self.client.copy(timeout=None)
+        assert isinstance(async_client.timeout, httpx.Timeout)
+        copied = async_client.copy(timeout=None)
         assert copied.timeout is None
-        assert isinstance(self.client.timeout, httpx.Timeout)
+        assert isinstance(async_client.timeout, httpx.Timeout)
 
-    def test_copy_default_headers(self) -> None:
+    async def test_copy_default_headers(self) -> None:
         client = AsyncLlamaStackClient(
             base_url=base_url, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
         )
@@ -947,8 +965,9 @@ def test_copy_default_headers(self) -> None:
             match="`default_headers` and `set_default_headers` arguments are mutually exclusive",
         ):
             client.copy(set_default_headers={}, default_headers={"X-Foo": "Bar"})
+        await client.close()
 
-    def test_copy_default_query(self) -> None:
+    async def test_copy_default_query(self) -> None:
         client = AsyncLlamaStackClient(
             base_url=base_url, _strict_response_validation=True, default_query={"foo": "bar"}
         )
@@ -984,13 +1003,15 @@ def test_copy_default_query(self) -> None:
         ):
             client.copy(set_default_query={}, default_query={"foo": "Bar"})
 
-    def test_copy_signature(self) -> None:
+        await client.close()
+
+    def test_copy_signature(self, async_client: AsyncLlamaStackClient) -> None:
         # ensure the same parameters that can be passed to the client are defined in the `.copy()` method
         init_signature = inspect.signature(
             # mypy doesn't like that we access the `__init__` property.
-            self.client.__init__,  # type: ignore[misc]
+            async_client.__init__,  # type: ignore[misc]
         )
-        copy_signature = inspect.signature(self.client.copy)
+        copy_signature = inspect.signature(async_client.copy)
         exclude_params = {"transport", "proxies", "_strict_response_validation"}
 
         for name in init_signature.parameters.keys():
@@ -1001,12 +1022,12 @@ def test_copy_signature(self) -> None:
             assert copy_param is not None, f"copy() signature is missing the {name} param"
 
     @pytest.mark.skipif(sys.version_info >= (3, 10), reason="fails because of a memory leak that started from 3.12")
-    def test_copy_build_request(self) -> None:
+    def test_copy_build_request(self, async_client: AsyncLlamaStackClient) -> None:
         options = FinalRequestOptions(method="get", url="/foo")
 
         def build_request(options: FinalRequestOptions) -> None:
-            client = self.client.copy()
-            client._build_request(options)
+            client_copy = async_client.copy()
+            client_copy._build_request(options)
 
         # ensure that the machinery is warmed up before tracing starts.
         build_request(options)
@@ -1063,12 +1084,12 @@ def add_leak(leaks: list[tracemalloc.StatisticDiff], diff: tracemalloc.Statistic
                     print(frame)
             raise AssertionError()
 
-    async def test_request_timeout(self) -> None:
-        request = self.client._build_request(FinalRequestOptions(method="get", url="/foo"))
+    async def test_request_timeout(self, async_client: AsyncLlamaStackClient) -> None:
+        request = async_client._build_request(FinalRequestOptions(method="get", url="/foo"))
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
         assert timeout == DEFAULT_TIMEOUT
 
-        request = self.client._build_request(
+        request = async_client._build_request(
             FinalRequestOptions(method="get", url="/foo", timeout=httpx.Timeout(100.0))
         )
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
@@ -1081,6 +1102,8 @@ async def test_client_timeout_option(self) -> None:
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
         assert timeout == httpx.Timeout(0)
 
+        await client.close()
+
     async def test_http_client_timeout_option(self) -> None:
         # custom timeout given to the httpx client should be used
         async with httpx.AsyncClient(timeout=None) as http_client:
@@ -1090,6 +1113,8 @@ async def test_http_client_timeout_option(self) -> None:
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == httpx.Timeout(None)
 
+            await client.close()
+
         # no timeout given to the httpx client should not use the httpx default
         async with httpx.AsyncClient() as http_client:
             client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True, http_client=http_client)
@@ -1098,6 +1123,8 @@ async def test_http_client_timeout_option(self) -> None:
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == DEFAULT_TIMEOUT
 
+            await client.close()
+
         # explicitly passing the default timeout currently results in it being ignored
         async with httpx.AsyncClient(timeout=HTTPX_DEFAULT_TIMEOUT) as http_client:
             client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True, http_client=http_client)
@@ -1106,6 +1133,8 @@ async def test_http_client_timeout_option(self) -> None:
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == DEFAULT_TIMEOUT  # our default
 
+            await client.close()
+
     def test_invalid_http_client(self) -> None:
         with pytest.raises(TypeError, match="Invalid `http_client` arg"):
             with httpx.Client() as http_client:
@@ -1113,15 +1142,15 @@ def test_invalid_http_client(self) -> None:
                     base_url=base_url, _strict_response_validation=True, http_client=cast(Any, http_client)
                 )
 
-    def test_default_headers_option(self) -> None:
-        client = AsyncLlamaStackClient(
+    async def test_default_headers_option(self) -> None:
+        test_client = AsyncLlamaStackClient(
             base_url=base_url, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
         )
-        request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
+        request = test_client._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("x-foo") == "bar"
         assert request.headers.get("x-stainless-lang") == "python"
 
-        client2 = AsyncLlamaStackClient(
+        test_client2 = AsyncLlamaStackClient(
             base_url=base_url,
             _strict_response_validation=True,
             default_headers={
@@ -1129,11 +1158,14 @@ def test_default_headers_option(self) -> None:
                 "X-Stainless-Lang": "my-overriding-header",
             },
         )
-        request = client2._build_request(FinalRequestOptions(method="get", url="/foo"))
+        request = test_client2._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("x-foo") == "stainless"
         assert request.headers.get("x-stainless-lang") == "my-overriding-header"
 
-    def test_default_query_option(self) -> None:
+        await test_client.close()
+        await test_client2.close()
+
+    async def test_default_query_option(self) -> None:
         client = AsyncLlamaStackClient(
             base_url=base_url, _strict_response_validation=True, default_query={"query_param": "bar"}
         )
@@ -1151,8 +1183,10 @@ def test_default_query_option(self) -> None:
         url = httpx.URL(request.url)
         assert dict(url.params) == {"foo": "baz", "query_param": "overridden"}
 
-    def test_request_extra_json(self) -> None:
-        request = self.client._build_request(
+        await client.close()
+
+    def test_request_extra_json(self, client: LlamaStackClient) -> None:
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1163,7 +1197,7 @@ def test_request_extra_json(self) -> None:
         data = json.loads(request.content.decode("utf-8"))
         assert data == {"foo": "bar", "baz": False}
 
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1174,7 +1208,7 @@ def test_request_extra_json(self) -> None:
         assert data == {"baz": False}
 
         # `extra_json` takes priority over `json_data` when keys clash
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1185,8 +1219,8 @@ def test_request_extra_json(self) -> None:
         data = json.loads(request.content.decode("utf-8"))
         assert data == {"foo": "bar", "baz": None}
 
-    def test_request_extra_headers(self) -> None:
-        request = self.client._build_request(
+    def test_request_extra_headers(self, client: LlamaStackClient) -> None:
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1196,7 +1230,7 @@ def test_request_extra_headers(self) -> None:
         assert request.headers.get("X-Foo") == "Foo"
 
         # `extra_headers` takes priority over `default_headers` when keys clash
-        request = self.client.with_options(default_headers={"X-Bar": "true"})._build_request(
+        request = client.with_options(default_headers={"X-Bar": "true"})._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1207,8 +1241,8 @@ def test_request_extra_headers(self) -> None:
         )
         assert request.headers.get("X-Bar") == "false"
 
-    def test_request_extra_query(self) -> None:
-        request = self.client._build_request(
+    def test_request_extra_query(self, client: LlamaStackClient) -> None:
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1221,7 +1255,7 @@ def test_request_extra_query(self) -> None:
         assert params == {"my_query_param": "Foo"}
 
         # if both `query` and `extra_query` are given, they are merged
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1235,7 +1269,7 @@ def test_request_extra_query(self) -> None:
         assert params == {"bar": "1", "foo": "2"}
 
         # `extra_query` takes priority over `query` when keys clash
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1278,7 +1312,7 @@ def test_multipart_repeating_array(self, async_client: AsyncLlamaStackClient) ->
         ]
 
     @pytest.mark.respx(base_url=base_url)
-    async def test_basic_union_response(self, respx_mock: MockRouter) -> None:
+    async def test_basic_union_response(self, respx_mock: MockRouter, async_client: AsyncLlamaStackClient) -> None:
         class Model1(BaseModel):
             name: str
 
@@ -1287,12 +1321,14 @@ class Model2(BaseModel):
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = await self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+        response = await async_client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
         assert isinstance(response, Model2)
         assert response.foo == "bar"
 
     @pytest.mark.respx(base_url=base_url)
-    async def test_union_response_different_types(self, respx_mock: MockRouter) -> None:
+    async def test_union_response_different_types(
+        self, respx_mock: MockRouter, async_client: AsyncLlamaStackClient
+    ) -> None:
         """Union of objects with the same field name using a different type"""
 
         class Model1(BaseModel):
@@ -1303,18 +1339,20 @@ class Model2(BaseModel):
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = await self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+        response = await async_client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
         assert isinstance(response, Model2)
         assert response.foo == "bar"
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": 1}))
 
-        response = await self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+        response = await async_client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
         assert isinstance(response, Model1)
         assert response.foo == 1
 
     @pytest.mark.respx(base_url=base_url)
-    async def test_non_application_json_content_type_for_json_data(self, respx_mock: MockRouter) -> None:
+    async def test_non_application_json_content_type_for_json_data(
+        self, respx_mock: MockRouter, async_client: AsyncLlamaStackClient
+    ) -> None:
         """
         Response that sets Content-Type to something other than application/json but returns json data
         """
@@ -1330,11 +1368,11 @@ class Model(BaseModel):
             )
         )
 
-        response = await self.client.get("/foo", cast_to=Model)
+        response = await async_client.get("/foo", cast_to=Model)
         assert isinstance(response, Model)
         assert response.foo == 2
 
-    def test_base_url_setter(self) -> None:
+    async def test_base_url_setter(self) -> None:
         client = AsyncLlamaStackClient(base_url="https://example.com/from_init", _strict_response_validation=True)
         assert client.base_url == "https://example.com/from_init/"
 
@@ -1342,7 +1380,9 @@ def test_base_url_setter(self) -> None:
 
         assert client.base_url == "https://example.com/from_setter/"
 
-    def test_base_url_env(self) -> None:
+        await client.close()
+
+    async def test_base_url_env(self) -> None:
         with update_env(LLAMA_STACK_CLIENT_BASE_URL="http://localhost:5000/from/env"):
             client = AsyncLlamaStackClient(_strict_response_validation=True)
             assert client.base_url == "http://localhost:5000/from/env/"
@@ -1359,7 +1399,7 @@ def test_base_url_env(self) -> None:
         ],
         ids=["standard", "custom http client"],
     )
-    def test_base_url_trailing_slash(self, client: AsyncLlamaStackClient) -> None:
+    async def test_base_url_trailing_slash(self, client: AsyncLlamaStackClient) -> None:
         request = client._build_request(
             FinalRequestOptions(
                 method="post",
@@ -1368,6 +1408,7 @@ def test_base_url_trailing_slash(self, client: AsyncLlamaStackClient) -> None:
             ),
         )
         assert request.url == "http://localhost:5000/custom/path/foo"
+        await client.close()
 
     @pytest.mark.parametrize(
         "client",
@@ -1381,7 +1422,7 @@ def test_base_url_trailing_slash(self, client: AsyncLlamaStackClient) -> None:
         ],
         ids=["standard", "custom http client"],
     )
-    def test_base_url_no_trailing_slash(self, client: AsyncLlamaStackClient) -> None:
+    async def test_base_url_no_trailing_slash(self, client: AsyncLlamaStackClient) -> None:
         request = client._build_request(
             FinalRequestOptions(
                 method="post",
@@ -1390,6 +1431,7 @@ def test_base_url_no_trailing_slash(self, client: AsyncLlamaStackClient) -> None
             ),
         )
         assert request.url == "http://localhost:5000/custom/path/foo"
+        await client.close()
 
     @pytest.mark.parametrize(
         "client",
@@ -1403,7 +1445,7 @@ def test_base_url_no_trailing_slash(self, client: AsyncLlamaStackClient) -> None
         ],
         ids=["standard", "custom http client"],
     )
-    def test_absolute_request_url(self, client: AsyncLlamaStackClient) -> None:
+    async def test_absolute_request_url(self, client: AsyncLlamaStackClient) -> None:
         request = client._build_request(
             FinalRequestOptions(
                 method="post",
@@ -1412,37 +1454,39 @@ def test_absolute_request_url(self, client: AsyncLlamaStackClient) -> None:
             ),
         )
         assert request.url == "https://myapi.com/foo"
+        await client.close()
 
     async def test_copied_client_does_not_close_http(self) -> None:
-        client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True)
-        assert not client.is_closed()
+        test_client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True)
+        assert not test_client.is_closed()
 
-        copied = client.copy()
-        assert copied is not client
+        copied = test_client.copy()
+        assert copied is not test_client
 
         del copied
 
         await asyncio.sleep(0.2)
-        assert not client.is_closed()
+        assert not test_client.is_closed()
 
     async def test_client_context_manager(self) -> None:
-        client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True)
-        async with client as c2:
-            assert c2 is client
+        test_client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True)
+        async with test_client as c2:
+            assert c2 is test_client
             assert not c2.is_closed()
-            assert not client.is_closed()
-        assert client.is_closed()
+            assert not test_client.is_closed()
+        assert test_client.is_closed()
 
     @pytest.mark.respx(base_url=base_url)
-    @pytest.mark.asyncio
-    async def test_client_response_validation_error(self, respx_mock: MockRouter) -> None:
+    async def test_client_response_validation_error(
+        self, respx_mock: MockRouter, async_client: AsyncLlamaStackClient
+    ) -> None:
         class Model(BaseModel):
             foo: str
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": {"invalid": True}}))
 
         with pytest.raises(APIResponseValidationError) as exc:
-            await self.client.get("/foo", cast_to=Model)
+            await async_client.get("/foo", cast_to=Model)
 
         assert isinstance(exc.value.__cause__, ValidationError)
 
@@ -1451,7 +1495,6 @@ async def test_client_max_retries_validation(self) -> None:
             AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True, max_retries=cast(Any, None))
 
     @pytest.mark.respx(base_url=base_url)
-    @pytest.mark.asyncio
     async def test_received_text_for_expected_json(self, respx_mock: MockRouter) -> None:
         class Model(BaseModel):
             name: str
@@ -1463,11 +1506,14 @@ class Model(BaseModel):
         with pytest.raises(APIResponseValidationError):
             await strict_client.get("/foo", cast_to=Model)
 
-        client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=False)
+        non_strict_client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=False)
 
-        response = await client.get("/foo", cast_to=Model)
+        response = await non_strict_client.get("/foo", cast_to=Model)
         assert isinstance(response, str)  # type: ignore[unreachable]
 
+        await strict_client.close()
+        await non_strict_client.close()
+
     @pytest.mark.parametrize(
         "remaining_retries,retry_after,timeout",
         [
@@ -1490,13 +1536,12 @@ class Model(BaseModel):
         ],
     )
     @mock.patch("time.time", mock.MagicMock(return_value=1696004797))
-    @pytest.mark.asyncio
-    async def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str, timeout: float) -> None:
-        client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True)
-
+    async def test_parse_retry_after_header(
+        self, remaining_retries: int, retry_after: str, timeout: float, async_client: AsyncLlamaStackClient
+    ) -> None:
         headers = httpx.Headers({"retry-after": retry_after})
         options = FinalRequestOptions(method="get", url="/foo", max_retries=3)
-        calculated = client._calculate_retry_timeout(remaining_retries, options, headers)
+        calculated = async_client._calculate_retry_timeout(remaining_retries, options, headers)
         assert calculated == pytest.approx(timeout, 0.5 * 0.875)  # pyright: ignore[reportUnknownMemberType]
 
     @mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
@@ -1517,7 +1562,7 @@ async def test_retrying_timeout_errors_doesnt_leak(
                 model="model",
             ).__aenter__()
 
-        assert _get_open_connections(self.client) == 0
+        assert _get_open_connections(async_client) == 0
 
     @mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
@@ -1536,12 +1581,11 @@ async def test_retrying_status_errors_doesnt_leak(
                 ],
                 model="model",
             ).__aenter__()
-        assert _get_open_connections(self.client) == 0
+        assert _get_open_connections(async_client) == 0
 
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
     @mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
-    @pytest.mark.asyncio
     @pytest.mark.parametrize("failure_mode", ["status", "exception"])
     async def test_retries_taken(
         self,
@@ -1581,7 +1625,6 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
     @mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
-    @pytest.mark.asyncio
     async def test_omit_retry_count_header(
         self, async_client: AsyncLlamaStackClient, failures_before_success: int, respx_mock: MockRouter
     ) -> None:
@@ -1614,7 +1657,6 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
     @mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
-    @pytest.mark.asyncio
     async def test_overwrite_retry_count_header(
         self, async_client: AsyncLlamaStackClient, failures_before_success: int, respx_mock: MockRouter
     ) -> None:
@@ -1671,26 +1713,26 @@ async def test_default_client_creation(self) -> None:
         )
 
     @pytest.mark.respx(base_url=base_url)
-    async def test_follow_redirects(self, respx_mock: MockRouter) -> None:
+    async def test_follow_redirects(self, respx_mock: MockRouter, async_client: AsyncLlamaStackClient) -> None:
         # Test that the default follow_redirects=True allows following redirects
         respx_mock.post("/redirect").mock(
             return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
         )
         respx_mock.get("/redirected").mock(return_value=httpx.Response(200, json={"status": "ok"}))
 
-        response = await self.client.post("/redirect", body={"key": "value"}, cast_to=httpx.Response)
+        response = await async_client.post("/redirect", body={"key": "value"}, cast_to=httpx.Response)
         assert response.status_code == 200
         assert response.json() == {"status": "ok"}
 
     @pytest.mark.respx(base_url=base_url)
-    async def test_follow_redirects_disabled(self, respx_mock: MockRouter) -> None:
+    async def test_follow_redirects_disabled(self, respx_mock: MockRouter, async_client: AsyncLlamaStackClient) -> None:
         # Test that follow_redirects=False prevents following redirects
         respx_mock.post("/redirect").mock(
             return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
         )
 
         with pytest.raises(APIStatusError) as exc_info:
-            await self.client.post(
+            await async_client.post(
                 "/redirect", body={"key": "value"}, options={"follow_redirects": False}, cast_to=httpx.Response
             )
 

From efdf1be41243be5107f4863de99c5dce8504bba9 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Mon, 3 Nov 2025 23:54:56 +0000
Subject: [PATCH 2/3] feat(api): point models.list() to /v1/openai/v1/models

step towards openai compatibility of models endpoint
---
 .stats.yml                                    |   8 +-
 api.md                                        |  12 +-
 .../resources/models/models.py                |   8 +-
 .../resources/models/openai.py                |  14 +--
 .../resources/responses/responses.py          |  22 ++++
 src/llama_stack_client/resources/routes.py    |  37 +++++-
 src/llama_stack_client/types/__init__.py      |   1 +
 .../types/conversation_create_params.py       |  22 ++++
 .../types/conversations/item_create_params.py |  22 ++++
 .../conversations/item_create_response.py     |  22 ++++
 .../types/conversations/item_get_response.py  |  22 ++++
 .../types/conversations/item_list_response.py |  22 ++++
 .../types/model_list_response.py              |  24 +++-
 .../types/models/__init__.py                  |   2 +
 .../types/models/openai_list_response.py      |  10 ++
 .../types/query_chunks_response.py            |   9 +-
 .../types/response_create_params.py           |  93 ++++++++++++++
 .../types/response_list_response.py           | 116 ++++++++++++++++++
 .../types/response_object.py                  |  94 ++++++++++++++
 .../types/response_object_stream.py           |  48 ++++++++
 .../responses/input_item_list_response.py     |  22 ++++
 .../types/route_list_params.py                |  17 +++
 .../types/vector_io_insert_params.py          |   9 +-
 tests/api_resources/models/test_openai.py     |  14 +--
 tests/api_resources/test_responses.py         |  40 ++++++
 tests/api_resources/test_routes.py            |  14 +++
 tests/api_resources/test_vector_io.py         |  10 +-
 27 files changed, 689 insertions(+), 45 deletions(-)
 create mode 100644 src/llama_stack_client/types/models/openai_list_response.py
 create mode 100644 src/llama_stack_client/types/route_list_params.py

diff --git a/.stats.yml b/.stats.yml
index 60e64c3c..29bc5044 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 111
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-35c6569e5e9fcc85084c9728eb7fc7c5908297fcc77043d621d25de3c850a990.yml
-openapi_spec_hash: 0f95bbeee16f3205d36ec34cfa62c711
-config_hash: ef275cc002a89629459fd73d0cf9cba9
+configured_endpoints: 112
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-a9f69d4a5f5d9bf957497cac83fdad1f72c8a44614098447762c53883e8bd987.yml
+openapi_spec_hash: 75de5bdff8e70591d6033b609fc24e5d
+config_hash: 34558d5f6e265184d712d43e231eb693
diff --git a/api.md b/api.md
index 57ecd092..bd1949f8 100644
--- a/api.md
+++ b/api.md
@@ -306,15 +306,21 @@ from llama_stack_client.types import ListModelsResponse, Model, ModelListRespons
 Methods:
 
 - <code title="get /v1/models/{model_id}">client.models.<a href="./src/llama_stack_client/resources/models/models.py">retrieve</a>(model_id) -> <a href="./src/llama_stack_client/types/model.py">Model</a></code>
-- <code title="get /v1/models">client.models.<a href="./src/llama_stack_client/resources/models/models.py">list</a>() -> <a href="./src/llama_stack_client/types/model_list_response.py">ModelListResponse</a></code>
+- <code title="get /v1/openai/v1/models">client.models.<a href="./src/llama_stack_client/resources/models/models.py">list</a>() -> <a href="./src/llama_stack_client/types/model_list_response.py">ModelListResponse</a></code>
 - <code title="post /v1/models">client.models.<a href="./src/llama_stack_client/resources/models/models.py">register</a>(\*\*<a href="src/llama_stack_client/types/model_register_params.py">params</a>) -> <a href="./src/llama_stack_client/types/model.py">Model</a></code>
 - <code title="delete /v1/models/{model_id}">client.models.<a href="./src/llama_stack_client/resources/models/models.py">unregister</a>(model_id) -> None</code>
 
 ## OpenAI
 
+Types:
+
+```python
+from llama_stack_client.types.models import OpenAIListResponse
+```
+
 Methods:
 
-- <code title="get /v1/models">client.models.openai.<a href="./src/llama_stack_client/resources/models/openai.py">list</a>() -> <a href="./src/llama_stack_client/types/model_list_response.py">ModelListResponse</a></code>
+- <code title="get /v1/models">client.models.openai.<a href="./src/llama_stack_client/resources/models/openai.py">list</a>() -> <a href="./src/llama_stack_client/types/models/openai_list_response.py">OpenAIListResponse</a></code>
 
 # Providers
 
@@ -339,7 +345,7 @@ from llama_stack_client.types import ListRoutesResponse, RouteListResponse
 
 Methods:
 
-- <code title="get /v1/inspect/routes">client.routes.<a href="./src/llama_stack_client/resources/routes.py">list</a>() -> <a href="./src/llama_stack_client/types/route_list_response.py">RouteListResponse</a></code>
+- <code title="get /v1/inspect/routes">client.routes.<a href="./src/llama_stack_client/resources/routes.py">list</a>(\*\*<a href="src/llama_stack_client/types/route_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/route_list_response.py">RouteListResponse</a></code>
 
 # Moderations
 
diff --git a/src/llama_stack_client/resources/models/models.py b/src/llama_stack_client/resources/models/models.py
index 99ebccdd..dc7e0f4d 100644
--- a/src/llama_stack_client/resources/models/models.py
+++ b/src/llama_stack_client/resources/models/models.py
@@ -108,9 +108,9 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> ModelListResponse:
-        """List all models."""
+        """List models using the OpenAI API."""
         return self._get(
-            "/v1/models",
+            "/v1/openai/v1/models",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -281,9 +281,9 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> ModelListResponse:
-        """List all models."""
+        """List models using the OpenAI API."""
         return await self._get(
-            "/v1/models",
+            "/v1/openai/v1/models",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
diff --git a/src/llama_stack_client/resources/models/openai.py b/src/llama_stack_client/resources/models/openai.py
index c5ff1738..c581f714 100644
--- a/src/llama_stack_client/resources/models/openai.py
+++ b/src/llama_stack_client/resources/models/openai.py
@@ -23,7 +23,7 @@
 )
 from ..._wrappers import DataWrapper
 from ..._base_client import make_request_options
-from ...types.model_list_response import ModelListResponse
+from ...types.models.openai_list_response import OpenAIListResponse
 
 __all__ = ["OpenAIResource", "AsyncOpenAIResource"]
 
@@ -57,7 +57,7 @@ def list(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> ModelListResponse:
+    ) -> OpenAIListResponse:
         """List all models."""
         return self._get(
             "/v1/models",
@@ -66,9 +66,9 @@ def list(
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                post_parser=DataWrapper[ModelListResponse]._unwrapper,
+                post_parser=DataWrapper[OpenAIListResponse]._unwrapper,
             ),
-            cast_to=cast(Type[ModelListResponse], DataWrapper[ModelListResponse]),
+            cast_to=cast(Type[OpenAIListResponse], DataWrapper[OpenAIListResponse]),
         )
 
 
@@ -101,7 +101,7 @@ async def list(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> ModelListResponse:
+    ) -> OpenAIListResponse:
         """List all models."""
         return await self._get(
             "/v1/models",
@@ -110,9 +110,9 @@ async def list(
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                post_parser=DataWrapper[ModelListResponse]._unwrapper,
+                post_parser=DataWrapper[OpenAIListResponse]._unwrapper,
             ),
-            cast_to=cast(Type[ModelListResponse], DataWrapper[ModelListResponse]),
+            cast_to=cast(Type[OpenAIListResponse], DataWrapper[OpenAIListResponse]),
         )
 
 
diff --git a/src/llama_stack_client/resources/responses/responses.py b/src/llama_stack_client/resources/responses/responses.py
index 6bc29a62..e0109583 100644
--- a/src/llama_stack_client/resources/responses/responses.py
+++ b/src/llama_stack_client/resources/responses/responses.py
@@ -78,6 +78,7 @@ def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
+        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         stream: Literal[False] | Omit = omit,
         temperature: float | Omit = omit,
@@ -108,6 +109,8 @@ def create(
               response. This can be used to easily fork-off new responses from existing
               responses.
 
+          prompt: (Optional) Prompt object with ID, version, and variables.
+
           text: Text response configuration for OpenAI responses.
 
           extra_headers: Send extra headers
@@ -132,6 +135,7 @@ def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
+        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         temperature: float | Omit = omit,
         text: response_create_params.Text | Omit = omit,
@@ -161,6 +165,8 @@ def create(
               response. This can be used to easily fork-off new responses from existing
               responses.
 
+          prompt: (Optional) Prompt object with ID, version, and variables.
+
           text: Text response configuration for OpenAI responses.
 
           extra_headers: Send extra headers
@@ -185,6 +191,7 @@ def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
+        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         temperature: float | Omit = omit,
         text: response_create_params.Text | Omit = omit,
@@ -214,6 +221,8 @@ def create(
               response. This can be used to easily fork-off new responses from existing
               responses.
 
+          prompt: (Optional) Prompt object with ID, version, and variables.
+
           text: Text response configuration for OpenAI responses.
 
           extra_headers: Send extra headers
@@ -237,6 +246,7 @@ def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
+        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         stream: Literal[False] | Literal[True] | Omit = omit,
         temperature: float | Omit = omit,
@@ -260,6 +270,7 @@ def create(
                     "instructions": instructions,
                     "max_infer_iters": max_infer_iters,
                     "previous_response_id": previous_response_id,
+                    "prompt": prompt,
                     "store": store,
                     "stream": stream,
                     "temperature": temperature,
@@ -435,6 +446,7 @@ async def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
+        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         stream: Literal[False] | Omit = omit,
         temperature: float | Omit = omit,
@@ -465,6 +477,8 @@ async def create(
               response. This can be used to easily fork-off new responses from existing
               responses.
 
+          prompt: (Optional) Prompt object with ID, version, and variables.
+
           text: Text response configuration for OpenAI responses.
 
           extra_headers: Send extra headers
@@ -489,6 +503,7 @@ async def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
+        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         temperature: float | Omit = omit,
         text: response_create_params.Text | Omit = omit,
@@ -518,6 +533,8 @@ async def create(
               response. This can be used to easily fork-off new responses from existing
               responses.
 
+          prompt: (Optional) Prompt object with ID, version, and variables.
+
           text: Text response configuration for OpenAI responses.
 
           extra_headers: Send extra headers
@@ -542,6 +559,7 @@ async def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
+        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         temperature: float | Omit = omit,
         text: response_create_params.Text | Omit = omit,
@@ -571,6 +589,8 @@ async def create(
               response. This can be used to easily fork-off new responses from existing
               responses.
 
+          prompt: (Optional) Prompt object with ID, version, and variables.
+
           text: Text response configuration for OpenAI responses.
 
           extra_headers: Send extra headers
@@ -594,6 +614,7 @@ async def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
+        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         stream: Literal[False] | Literal[True] | Omit = omit,
         temperature: float | Omit = omit,
@@ -617,6 +638,7 @@ async def create(
                     "instructions": instructions,
                     "max_infer_iters": max_infer_iters,
                     "previous_response_id": previous_response_id,
+                    "prompt": prompt,
                     "store": store,
                     "stream": stream,
                     "temperature": temperature,
diff --git a/src/llama_stack_client/resources/routes.py b/src/llama_stack_client/resources/routes.py
index 0797d00f..ff9b2c59 100644
--- a/src/llama_stack_client/resources/routes.py
+++ b/src/llama_stack_client/resources/routes.py
@@ -9,10 +9,13 @@
 from __future__ import annotations
 
 from typing import Type, cast
+from typing_extensions import Literal
 
 import httpx
 
-from .._types import Body, Query, Headers, NotGiven, not_given
+from ..types import route_list_params
+from .._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from .._utils import maybe_transform, async_maybe_transform
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import (
@@ -51,6 +54,7 @@ def with_streaming_response(self) -> RoutesResourceWithStreamingResponse:
     def list(
         self,
         *,
+        api_filter: Literal["v1", "v1alpha", "v1beta", "deprecated"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -62,6 +66,20 @@ def list(
 
         List all available API routes with their methods and implementing
         providers.
+
+        Args:
+          api_filter: Optional filter to control which routes are returned. Can be an API level ('v1',
+              'v1alpha', 'v1beta') to show non-deprecated routes at that level, or
+              'deprecated' to show deprecated routes across all levels. If not specified,
+              returns only non-deprecated v1 routes.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
         """
         return self._get(
             "/v1/inspect/routes",
@@ -70,6 +88,7 @@ def list(
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
+                query=maybe_transform({"api_filter": api_filter}, route_list_params.RouteListParams),
                 post_parser=DataWrapper[RouteListResponse]._unwrapper,
             ),
             cast_to=cast(Type[RouteListResponse], DataWrapper[RouteListResponse]),
@@ -99,6 +118,7 @@ def with_streaming_response(self) -> AsyncRoutesResourceWithStreamingResponse:
     async def list(
         self,
         *,
+        api_filter: Literal["v1", "v1alpha", "v1beta", "deprecated"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -110,6 +130,20 @@ async def list(
 
         List all available API routes with their methods and implementing
         providers.
+
+        Args:
+          api_filter: Optional filter to control which routes are returned. Can be an API level ('v1',
+              'v1alpha', 'v1beta') to show non-deprecated routes at that level, or
+              'deprecated' to show deprecated routes across all levels. If not specified,
+              returns only non-deprecated v1 routes.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
         """
         return await self._get(
             "/v1/inspect/routes",
@@ -118,6 +152,7 @@ async def list(
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
+                query=await async_maybe_transform({"api_filter": api_filter}, route_list_params.RouteListParams),
                 post_parser=DataWrapper[RouteListResponse]._unwrapper,
             ),
             cast_to=cast(Type[RouteListResponse], DataWrapper[RouteListResponse]),
diff --git a/src/llama_stack_client/types/__init__.py b/src/llama_stack_client/types/__init__.py
index 4b6a2b84..1a67f6c4 100644
--- a/src/llama_stack_client/types/__init__.py
+++ b/src/llama_stack_client/types/__init__.py
@@ -44,6 +44,7 @@
 from .response_object import ResponseObject as ResponseObject
 from .file_list_params import FileListParams as FileListParams
 from .tool_list_params import ToolListParams as ToolListParams
+from .route_list_params import RouteListParams as RouteListParams
 from .scoring_fn_params import ScoringFnParams as ScoringFnParams
 from .file_create_params import FileCreateParams as FileCreateParams
 from .tool_list_response import ToolListResponse as ToolListResponse
diff --git a/src/llama_stack_client/types/conversation_create_params.py b/src/llama_stack_client/types/conversation_create_params.py
index c51245dd..96fbb82e 100644
--- a/src/llama_stack_client/types/conversation_create_params.py
+++ b/src/llama_stack_client/types/conversation_create_params.py
@@ -20,6 +20,7 @@
     "ItemOpenAIResponseMessageContentUnionMember1",
     "ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "ItemOpenAIResponseMessageContentUnionMember2",
     "ItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "ItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -64,13 +65,34 @@ class ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCont
     type: Required[Literal["input_image"]]
     """Content type identifier, always "input_image" """
 
+    file_id: str
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: str
     """(Optional) URL of the image content"""
 
 
+class ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(TypedDict, total=False):
+    type: Required[Literal["input_file"]]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: str
+    """The data of the file to be sent to the model."""
+
+    file_id: str
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: str
+    """The URL of the file to be sent to the model."""
+
+    filename: str
+    """The name of the file to be sent to the model."""
+
+
 ItemOpenAIResponseMessageContentUnionMember1: TypeAlias = Union[
     ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
     ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+    ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
 ]
 
 
diff --git a/src/llama_stack_client/types/conversations/item_create_params.py b/src/llama_stack_client/types/conversations/item_create_params.py
index 8df31144..111c39fb 100644
--- a/src/llama_stack_client/types/conversations/item_create_params.py
+++ b/src/llama_stack_client/types/conversations/item_create_params.py
@@ -20,6 +20,7 @@
     "ItemOpenAIResponseMessageContentUnionMember1",
     "ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "ItemOpenAIResponseMessageContentUnionMember2",
     "ItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "ItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -61,13 +62,34 @@ class ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCont
     type: Required[Literal["input_image"]]
     """Content type identifier, always "input_image" """
 
+    file_id: str
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: str
     """(Optional) URL of the image content"""
 
 
+class ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(TypedDict, total=False):
+    type: Required[Literal["input_file"]]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: str
+    """The data of the file to be sent to the model."""
+
+    file_id: str
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: str
+    """The URL of the file to be sent to the model."""
+
+    filename: str
+    """The name of the file to be sent to the model."""
+
+
 ItemOpenAIResponseMessageContentUnionMember1: TypeAlias = Union[
     ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
     ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+    ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
 ]
 
 
diff --git a/src/llama_stack_client/types/conversations/item_create_response.py b/src/llama_stack_client/types/conversations/item_create_response.py
index c382e2b9..580aaf23 100644
--- a/src/llama_stack_client/types/conversations/item_create_response.py
+++ b/src/llama_stack_client/types/conversations/item_create_response.py
@@ -19,6 +19,7 @@
     "DataOpenAIResponseMessageContentUnionMember1",
     "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "DataOpenAIResponseMessageContentUnionMember2",
     "DataOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "DataOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -55,14 +56,35 @@ class DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCont
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
+class DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The data of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
+
+
 DataOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+        DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
diff --git a/src/llama_stack_client/types/conversations/item_get_response.py b/src/llama_stack_client/types/conversations/item_get_response.py
index 9f8d4bda..434e4639 100644
--- a/src/llama_stack_client/types/conversations/item_get_response.py
+++ b/src/llama_stack_client/types/conversations/item_get_response.py
@@ -18,6 +18,7 @@
     "OpenAIResponseMessageContentUnionMember1",
     "OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "OpenAIResponseMessageContentUnionMember2",
     "OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -54,14 +55,35 @@ class OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentI
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
+class OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The data of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
+
+
 OpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+        OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
diff --git a/src/llama_stack_client/types/conversations/item_list_response.py b/src/llama_stack_client/types/conversations/item_list_response.py
index b95f56fb..d6ba4735 100644
--- a/src/llama_stack_client/types/conversations/item_list_response.py
+++ b/src/llama_stack_client/types/conversations/item_list_response.py
@@ -18,6 +18,7 @@
     "OpenAIResponseMessageContentUnionMember1",
     "OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "OpenAIResponseMessageContentUnionMember2",
     "OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -54,14 +55,35 @@ class OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentI
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
+class OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The data of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
+
+
 OpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+        OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
diff --git a/src/llama_stack_client/types/model_list_response.py b/src/llama_stack_client/types/model_list_response.py
index b53ae421..c42b3310 100644
--- a/src/llama_stack_client/types/model_list_response.py
+++ b/src/llama_stack_client/types/model_list_response.py
@@ -6,11 +6,25 @@
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import List
-from typing_extensions import TypeAlias
+import builtins
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, TypeAlias
 
-from .model import Model
+from .._models import BaseModel
 
-__all__ = ["ModelListResponse"]
+__all__ = ["ModelListResponse", "ModelListResponseItem"]
 
-ModelListResponse: TypeAlias = List[Model]
+
+class ModelListResponseItem(BaseModel):
+    id: str
+
+    created: int
+
+    object: Literal["model"]
+
+    owned_by: str
+
+    custom_metadata: Optional[Dict[str, Union[bool, float, str, List[builtins.object], builtins.object, None]]] = None
+
+
+ModelListResponse: TypeAlias = List[ModelListResponseItem]
diff --git a/src/llama_stack_client/types/models/__init__.py b/src/llama_stack_client/types/models/__init__.py
index d14ed874..bba1f3e9 100644
--- a/src/llama_stack_client/types/models/__init__.py
+++ b/src/llama_stack_client/types/models/__init__.py
@@ -7,3 +7,5 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
+
+from .openai_list_response import OpenAIListResponse as OpenAIListResponse
diff --git a/src/llama_stack_client/types/models/openai_list_response.py b/src/llama_stack_client/types/models/openai_list_response.py
new file mode 100644
index 00000000..5b6c0358
--- /dev/null
+++ b/src/llama_stack_client/types/models/openai_list_response.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import TypeAlias
+
+from ..model import Model
+
+__all__ = ["OpenAIListResponse"]
+
+OpenAIListResponse: TypeAlias = List[Model]
diff --git a/src/llama_stack_client/types/query_chunks_response.py b/src/llama_stack_client/types/query_chunks_response.py
index ab5fdb61..cb0181a2 100644
--- a/src/llama_stack_client/types/query_chunks_response.py
+++ b/src/llama_stack_client/types/query_chunks_response.py
@@ -53,6 +53,9 @@ class ChunkChunkMetadata(BaseModel):
 
 
 class Chunk(BaseModel):
+    chunk_id: str
+    """Unique identifier for the chunk. Must be provided explicitly."""
+
     content: InterleavedContent
     """
     The content of the chunk, which can be interleaved text, images, or other types.
@@ -73,12 +76,6 @@ class Chunk(BaseModel):
     embedding: Optional[List[float]] = None
     """Optional embedding for the chunk. If not provided, it will be computed later."""
 
-    stored_chunk_id: Optional[str] = None
-    """The chunk ID that is stored in the vector database.
-
-    Used for backend functionality.
-    """
-
 
 class QueryChunksResponse(BaseModel):
     chunks: List[Chunk]
diff --git a/src/llama_stack_client/types/response_create_params.py b/src/llama_stack_client/types/response_create_params.py
index c8b48657..f99cd037 100644
--- a/src/llama_stack_client/types/response_create_params.py
+++ b/src/llama_stack_client/types/response_create_params.py
@@ -20,6 +20,7 @@
     "InputUnionMember1OpenAIResponseMessageContentUnionMember1",
     "InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "InputUnionMember1OpenAIResponseMessageContentUnionMember2",
     "InputUnionMember1OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "InputUnionMember1OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -38,6 +39,11 @@
     "InputUnionMember1OpenAIResponseMcpApprovalRequest",
     "InputUnionMember1OpenAIResponseInputFunctionToolCallOutput",
     "InputUnionMember1OpenAIResponseMcpApprovalResponse",
+    "Prompt",
+    "PromptVariables",
+    "PromptVariablesOpenAIResponseInputMessageContentText",
+    "PromptVariablesOpenAIResponseInputMessageContentImage",
+    "PromptVariablesOpenAIResponseInputMessageContentFile",
     "Text",
     "TextFormat",
     "Tool",
@@ -83,6 +89,9 @@ class ResponseCreateParamsBase(TypedDict, total=False):
     responses.
     """
 
+    prompt: Prompt
+    """(Optional) Prompt object with ID, version, and variables."""
+
     store: bool
 
     temperature: float
@@ -112,13 +121,36 @@ class InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInp
     type: Required[Literal["input_image"]]
     """Content type identifier, always "input_image" """
 
+    file_id: str
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: str
     """(Optional) URL of the image content"""
 
 
+class InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(
+    TypedDict, total=False
+):
+    type: Required[Literal["input_file"]]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: str
+    """The data of the file to be sent to the model."""
+
+    file_id: str
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: str
+    """The URL of the file to be sent to the model."""
+
+    filename: str
+    """The name of the file to be sent to the model."""
+
+
 InputUnionMember1OpenAIResponseMessageContentUnionMember1: TypeAlias = Union[
     InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
     InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+    InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
 ]
 
 
@@ -400,6 +432,67 @@ class InputUnionMember1OpenAIResponseMcpApprovalResponse(TypedDict, total=False)
 ]
 
 
+class PromptVariablesOpenAIResponseInputMessageContentText(TypedDict, total=False):
+    text: Required[str]
+    """The text content of the input message"""
+
+    type: Required[Literal["input_text"]]
+    """Content type identifier, always "input_text" """
+
+
+class PromptVariablesOpenAIResponseInputMessageContentImage(TypedDict, total=False):
+    detail: Required[Literal["low", "high", "auto"]]
+    """Level of detail for image processing, can be "low", "high", or "auto" """
+
+    type: Required[Literal["input_image"]]
+    """Content type identifier, always "input_image" """
+
+    file_id: str
+    """(Optional) The ID of the file to be sent to the model."""
+
+    image_url: str
+    """(Optional) URL of the image content"""
+
+
+class PromptVariablesOpenAIResponseInputMessageContentFile(TypedDict, total=False):
+    type: Required[Literal["input_file"]]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: str
+    """The data of the file to be sent to the model."""
+
+    file_id: str
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: str
+    """The URL of the file to be sent to the model."""
+
+    filename: str
+    """The name of the file to be sent to the model."""
+
+
+PromptVariables: TypeAlias = Union[
+    PromptVariablesOpenAIResponseInputMessageContentText,
+    PromptVariablesOpenAIResponseInputMessageContentImage,
+    PromptVariablesOpenAIResponseInputMessageContentFile,
+]
+
+
+class Prompt(TypedDict, total=False):
+    id: Required[str]
+    """Unique identifier of the prompt template"""
+
+    variables: Dict[str, PromptVariables]
+    """
+    Dictionary of variable names to OpenAIResponseInputMessageContent structure for
+    template substitution. The substitution values can either be strings, or other
+    Response input types like images or files.
+    """
+
+    version: str
+    """Version number of the prompt to use (defaults to latest if not specified)"""
+
+
 class TextFormat(TypedDict, total=False):
     type: Required[Literal["text", "json_schema", "json_object"]]
     """Must be "text", "json_schema", or "json_object" to identify the format type"""
diff --git a/src/llama_stack_client/types/response_list_response.py b/src/llama_stack_client/types/response_list_response.py
index 78c683b4..ccd9a3d7 100644
--- a/src/llama_stack_client/types/response_list_response.py
+++ b/src/llama_stack_client/types/response_list_response.py
@@ -21,6 +21,7 @@
     "InputOpenAIResponseMessageContentUnionMember1",
     "InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "InputOpenAIResponseMessageContentUnionMember2",
     "InputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "InputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -44,6 +45,7 @@
     "OutputOpenAIResponseMessageContentUnionMember1",
     "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "OutputOpenAIResponseMessageContentUnionMember2",
     "OutputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "OutputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -63,6 +65,11 @@
     "Text",
     "TextFormat",
     "Error",
+    "Prompt",
+    "PromptVariables",
+    "PromptVariablesOpenAIResponseInputMessageContentText",
+    "PromptVariablesOpenAIResponseInputMessageContentImage",
+    "PromptVariablesOpenAIResponseInputMessageContentFile",
     "Tool",
     "ToolOpenAIResponseInputToolWebSearch",
     "ToolOpenAIResponseInputToolFileSearch",
@@ -92,14 +99,35 @@ class InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCon
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
+class InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The data of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
+
+
 InputOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+        InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
@@ -396,14 +424,35 @@ class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCo
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
+class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The data of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
+
+
 OutputOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+        OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
@@ -699,6 +748,70 @@ class Error(BaseModel):
     """Human-readable error message describing the failure"""
 
 
+class PromptVariablesOpenAIResponseInputMessageContentText(BaseModel):
+    text: str
+    """The text content of the input message"""
+
+    type: Literal["input_text"]
+    """Content type identifier, always "input_text" """
+
+
+class PromptVariablesOpenAIResponseInputMessageContentImage(BaseModel):
+    detail: Literal["low", "high", "auto"]
+    """Level of detail for image processing, can be "low", "high", or "auto" """
+
+    type: Literal["input_image"]
+    """Content type identifier, always "input_image" """
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    image_url: Optional[str] = None
+    """(Optional) URL of the image content"""
+
+
+class PromptVariablesOpenAIResponseInputMessageContentFile(BaseModel):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The data of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
+
+
+PromptVariables: TypeAlias = Annotated[
+    Union[
+        PromptVariablesOpenAIResponseInputMessageContentText,
+        PromptVariablesOpenAIResponseInputMessageContentImage,
+        PromptVariablesOpenAIResponseInputMessageContentFile,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class Prompt(BaseModel):
+    id: str
+    """Unique identifier of the prompt template"""
+
+    variables: Optional[Dict[str, PromptVariables]] = None
+    """
+    Dictionary of variable names to OpenAIResponseInputMessageContent structure for
+    template substitution. The substitution values can either be strings, or other
+    Response input types like images or files.
+    """
+
+    version: Optional[str] = None
+    """Version number of the prompt to use (defaults to latest if not specified)"""
+
+
 class ToolOpenAIResponseInputToolWebSearch(BaseModel):
     type: Literal["web_search", "web_search_preview", "web_search_preview_2025_03_11"]
     """Web search tool type variant to use"""
@@ -842,6 +955,9 @@ class ResponseListResponse(BaseModel):
     previous_response_id: Optional[str] = None
     """(Optional) ID of the previous response in a conversation"""
 
+    prompt: Optional[Prompt] = None
+    """(Optional) Reference to a prompt template and its variables."""
+
     temperature: Optional[float] = None
     """(Optional) Sampling temperature used for generation"""
 
diff --git a/src/llama_stack_client/types/response_object.py b/src/llama_stack_client/types/response_object.py
index 57f708ce..706f50e2 100644
--- a/src/llama_stack_client/types/response_object.py
+++ b/src/llama_stack_client/types/response_object.py
@@ -21,6 +21,7 @@
     "OutputOpenAIResponseMessageContentUnionMember1",
     "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "OutputOpenAIResponseMessageContentUnionMember2",
     "OutputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "OutputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -40,6 +41,11 @@
     "Text",
     "TextFormat",
     "Error",
+    "Prompt",
+    "PromptVariables",
+    "PromptVariablesOpenAIResponseInputMessageContentText",
+    "PromptVariablesOpenAIResponseInputMessageContentImage",
+    "PromptVariablesOpenAIResponseInputMessageContentFile",
     "Tool",
     "ToolOpenAIResponseInputToolWebSearch",
     "ToolOpenAIResponseInputToolFileSearch",
@@ -69,14 +75,35 @@ class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCo
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
+class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The data of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
+
+
 OutputOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+        OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
@@ -372,6 +399,70 @@ class Error(BaseModel):
     """Human-readable error message describing the failure"""
 
 
+class PromptVariablesOpenAIResponseInputMessageContentText(BaseModel):
+    text: str
+    """The text content of the input message"""
+
+    type: Literal["input_text"]
+    """Content type identifier, always "input_text" """
+
+
+class PromptVariablesOpenAIResponseInputMessageContentImage(BaseModel):
+    detail: Literal["low", "high", "auto"]
+    """Level of detail for image processing, can be "low", "high", or "auto" """
+
+    type: Literal["input_image"]
+    """Content type identifier, always "input_image" """
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    image_url: Optional[str] = None
+    """(Optional) URL of the image content"""
+
+
+class PromptVariablesOpenAIResponseInputMessageContentFile(BaseModel):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The data of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
+
+
+PromptVariables: TypeAlias = Annotated[
+    Union[
+        PromptVariablesOpenAIResponseInputMessageContentText,
+        PromptVariablesOpenAIResponseInputMessageContentImage,
+        PromptVariablesOpenAIResponseInputMessageContentFile,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class Prompt(BaseModel):
+    id: str
+    """Unique identifier of the prompt template"""
+
+    variables: Optional[Dict[str, PromptVariables]] = None
+    """
+    Dictionary of variable names to OpenAIResponseInputMessageContent structure for
+    template substitution. The substitution values can either be strings, or other
+    Response input types like images or files.
+    """
+
+    version: Optional[str] = None
+    """Version number of the prompt to use (defaults to latest if not specified)"""
+
+
 class ToolOpenAIResponseInputToolWebSearch(BaseModel):
     type: Literal["web_search", "web_search_preview", "web_search_preview_2025_03_11"]
     """Web search tool type variant to use"""
@@ -522,6 +613,9 @@ def output_text(self) -> str:
     previous_response_id: Optional[str] = None
     """(Optional) ID of the previous response in a conversation"""
 
+    prompt: Optional[Prompt] = None
+    """(Optional) Reference to a prompt template and its variables."""
+
     temperature: Optional[float] = None
     """(Optional) Sampling temperature used for generation"""
 
diff --git a/src/llama_stack_client/types/response_object_stream.py b/src/llama_stack_client/types/response_object_stream.py
index a75ac721..16fe6c6d 100644
--- a/src/llama_stack_client/types/response_object_stream.py
+++ b/src/llama_stack_client/types/response_object_stream.py
@@ -23,6 +23,7 @@
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -45,6 +46,7 @@
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -159,14 +161,37 @@ class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessage
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
+class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(
+    BaseModel
+):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The data of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
+
+
 OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
@@ -470,14 +495,37 @@ class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageC
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
+class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(
+    BaseModel
+):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The data of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
+
+
 OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
diff --git a/src/llama_stack_client/types/responses/input_item_list_response.py b/src/llama_stack_client/types/responses/input_item_list_response.py
index b812ee62..71a59f50 100644
--- a/src/llama_stack_client/types/responses/input_item_list_response.py
+++ b/src/llama_stack_client/types/responses/input_item_list_response.py
@@ -19,6 +19,7 @@
     "DataOpenAIResponseMessageContentUnionMember1",
     "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "DataOpenAIResponseMessageContentUnionMember2",
     "DataOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "DataOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -55,14 +56,35 @@ class DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCont
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
+class DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The data of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
+
+
 DataOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+        DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
diff --git a/src/llama_stack_client/types/route_list_params.py b/src/llama_stack_client/types/route_list_params.py
new file mode 100644
index 00000000..764b13c7
--- /dev/null
+++ b/src/llama_stack_client/types/route_list_params.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["RouteListParams"]
+
+
+class RouteListParams(TypedDict, total=False):
+    api_filter: Literal["v1", "v1alpha", "v1beta", "deprecated"]
+    """Optional filter to control which routes are returned.
+
+    Can be an API level ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at
+    that level, or 'deprecated' to show deprecated routes across all levels. If not
+    specified, returns only non-deprecated v1 routes.
+    """
diff --git a/src/llama_stack_client/types/vector_io_insert_params.py b/src/llama_stack_client/types/vector_io_insert_params.py
index 1584f807..520ad24b 100644
--- a/src/llama_stack_client/types/vector_io_insert_params.py
+++ b/src/llama_stack_client/types/vector_io_insert_params.py
@@ -73,6 +73,9 @@ class ChunkChunkMetadata(TypedDict, total=False):
 
 
 class Chunk(TypedDict, total=False):
+    chunk_id: Required[str]
+    """Unique identifier for the chunk. Must be provided explicitly."""
+
     content: Required[InterleavedContent]
     """
     The content of the chunk, which can be interleaved text, images, or other types.
@@ -92,9 +95,3 @@ class Chunk(TypedDict, total=False):
 
     embedding: Iterable[float]
     """Optional embedding for the chunk. If not provided, it will be computed later."""
-
-    stored_chunk_id: str
-    """The chunk ID that is stored in the vector database.
-
-    Used for backend functionality.
-    """
diff --git a/tests/api_resources/models/test_openai.py b/tests/api_resources/models/test_openai.py
index 6a9acf23..96955333 100644
--- a/tests/api_resources/models/test_openai.py
+++ b/tests/api_resources/models/test_openai.py
@@ -15,7 +15,7 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import ModelListResponse
+from llama_stack_client.types.models import OpenAIListResponse
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -26,7 +26,7 @@ class TestOpenAI:
     @parametrize
     def test_method_list(self, client: LlamaStackClient) -> None:
         openai = client.models.openai.list()
-        assert_matches_type(ModelListResponse, openai, path=["response"])
+        assert_matches_type(OpenAIListResponse, openai, path=["response"])
 
     @parametrize
     def test_raw_response_list(self, client: LlamaStackClient) -> None:
@@ -35,7 +35,7 @@ def test_raw_response_list(self, client: LlamaStackClient) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         openai = response.parse()
-        assert_matches_type(ModelListResponse, openai, path=["response"])
+        assert_matches_type(OpenAIListResponse, openai, path=["response"])
 
     @parametrize
     def test_streaming_response_list(self, client: LlamaStackClient) -> None:
@@ -44,7 +44,7 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             openai = response.parse()
-            assert_matches_type(ModelListResponse, openai, path=["response"])
+            assert_matches_type(OpenAIListResponse, openai, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -57,7 +57,7 @@ class TestAsyncOpenAI:
     @parametrize
     async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
         openai = await async_client.models.openai.list()
-        assert_matches_type(ModelListResponse, openai, path=["response"])
+        assert_matches_type(OpenAIListResponse, openai, path=["response"])
 
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
@@ -66,7 +66,7 @@ async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> N
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         openai = await response.parse()
-        assert_matches_type(ModelListResponse, openai, path=["response"])
+        assert_matches_type(OpenAIListResponse, openai, path=["response"])
 
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
@@ -75,6 +75,6 @@ async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             openai = await response.parse()
-            assert_matches_type(ModelListResponse, openai, path=["response"])
+            assert_matches_type(OpenAIListResponse, openai, path=["response"])
 
         assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_responses.py b/tests/api_resources/test_responses.py
index 5ef731fd..3bdafe3c 100644
--- a/tests/api_resources/test_responses.py
+++ b/tests/api_resources/test_responses.py
@@ -46,6 +46,16 @@ def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient
             instructions="instructions",
             max_infer_iters=0,
             previous_response_id="previous_response_id",
+            prompt={
+                "id": "id",
+                "variables": {
+                    "foo": {
+                        "text": "text",
+                        "type": "input_text",
+                    }
+                },
+                "version": "version",
+            },
             store=True,
             stream=False,
             temperature=0,
@@ -113,6 +123,16 @@ def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient
             instructions="instructions",
             max_infer_iters=0,
             previous_response_id="previous_response_id",
+            prompt={
+                "id": "id",
+                "variables": {
+                    "foo": {
+                        "text": "text",
+                        "type": "input_text",
+                    }
+                },
+                "version": "version",
+            },
             store=True,
             temperature=0,
             text={
@@ -295,6 +315,16 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             instructions="instructions",
             max_infer_iters=0,
             previous_response_id="previous_response_id",
+            prompt={
+                "id": "id",
+                "variables": {
+                    "foo": {
+                        "text": "text",
+                        "type": "input_text",
+                    }
+                },
+                "version": "version",
+            },
             store=True,
             stream=False,
             temperature=0,
@@ -362,6 +392,16 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             instructions="instructions",
             max_infer_iters=0,
             previous_response_id="previous_response_id",
+            prompt={
+                "id": "id",
+                "variables": {
+                    "foo": {
+                        "text": "text",
+                        "type": "input_text",
+                    }
+                },
+                "version": "version",
+            },
             store=True,
             temperature=0,
             text={
diff --git a/tests/api_resources/test_routes.py b/tests/api_resources/test_routes.py
index 9c863f26..58ab8ad9 100644
--- a/tests/api_resources/test_routes.py
+++ b/tests/api_resources/test_routes.py
@@ -28,6 +28,13 @@ def test_method_list(self, client: LlamaStackClient) -> None:
         route = client.routes.list()
         assert_matches_type(RouteListResponse, route, path=["response"])
 
+    @parametrize
+    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
+        route = client.routes.list(
+            api_filter="v1",
+        )
+        assert_matches_type(RouteListResponse, route, path=["response"])
+
     @parametrize
     def test_raw_response_list(self, client: LlamaStackClient) -> None:
         response = client.routes.with_raw_response.list()
@@ -59,6 +66,13 @@ async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
         route = await async_client.routes.list()
         assert_matches_type(RouteListResponse, route, path=["response"])
 
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        route = await async_client.routes.list(
+            api_filter="v1",
+        )
+        assert_matches_type(RouteListResponse, route, path=["response"])
+
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.routes.with_raw_response.list()
diff --git a/tests/api_resources/test_vector_io.py b/tests/api_resources/test_vector_io.py
index 9adf721a..b5ae697f 100644
--- a/tests/api_resources/test_vector_io.py
+++ b/tests/api_resources/test_vector_io.py
@@ -28,6 +28,7 @@ def test_method_insert(self, client: LlamaStackClient) -> None:
         vector_io = client.vector_io.insert(
             chunks=[
                 {
+                    "chunk_id": "chunk_id",
                     "content": "string",
                     "metadata": {"foo": True},
                 }
@@ -41,6 +42,7 @@ def test_method_insert_with_all_params(self, client: LlamaStackClient) -> None:
         vector_io = client.vector_io.insert(
             chunks=[
                 {
+                    "chunk_id": "chunk_id",
                     "content": "string",
                     "metadata": {"foo": True},
                     "chunk_metadata": {
@@ -57,7 +59,6 @@ def test_method_insert_with_all_params(self, client: LlamaStackClient) -> None:
                         "updated_timestamp": 0,
                     },
                     "embedding": [0],
-                    "stored_chunk_id": "stored_chunk_id",
                 }
             ],
             vector_store_id="vector_store_id",
@@ -70,6 +71,7 @@ def test_raw_response_insert(self, client: LlamaStackClient) -> None:
         response = client.vector_io.with_raw_response.insert(
             chunks=[
                 {
+                    "chunk_id": "chunk_id",
                     "content": "string",
                     "metadata": {"foo": True},
                 }
@@ -87,6 +89,7 @@ def test_streaming_response_insert(self, client: LlamaStackClient) -> None:
         with client.vector_io.with_streaming_response.insert(
             chunks=[
                 {
+                    "chunk_id": "chunk_id",
                     "content": "string",
                     "metadata": {"foo": True},
                 }
@@ -155,6 +158,7 @@ async def test_method_insert(self, async_client: AsyncLlamaStackClient) -> None:
         vector_io = await async_client.vector_io.insert(
             chunks=[
                 {
+                    "chunk_id": "chunk_id",
                     "content": "string",
                     "metadata": {"foo": True},
                 }
@@ -168,6 +172,7 @@ async def test_method_insert_with_all_params(self, async_client: AsyncLlamaStack
         vector_io = await async_client.vector_io.insert(
             chunks=[
                 {
+                    "chunk_id": "chunk_id",
                     "content": "string",
                     "metadata": {"foo": True},
                     "chunk_metadata": {
@@ -184,7 +189,6 @@ async def test_method_insert_with_all_params(self, async_client: AsyncLlamaStack
                         "updated_timestamp": 0,
                     },
                     "embedding": [0],
-                    "stored_chunk_id": "stored_chunk_id",
                 }
             ],
             vector_store_id="vector_store_id",
@@ -197,6 +201,7 @@ async def test_raw_response_insert(self, async_client: AsyncLlamaStackClient) ->
         response = await async_client.vector_io.with_raw_response.insert(
             chunks=[
                 {
+                    "chunk_id": "chunk_id",
                     "content": "string",
                     "metadata": {"foo": True},
                 }
@@ -214,6 +219,7 @@ async def test_streaming_response_insert(self, async_client: AsyncLlamaStackClie
         async with async_client.vector_io.with_streaming_response.insert(
             chunks=[
                 {
+                    "chunk_id": "chunk_id",
                     "content": "string",
                     "metadata": {"foo": True},
                 }

From f0dc940c50b26706f1c3f6ee0bba7c545fce965b Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Mon, 3 Nov 2025 23:55:20 +0000
Subject: [PATCH 3/3] release: 0.4.0-alpha.2

---
 .release-please-manifest.json |  2 +-
 CHANGELOG.md                  | 14 ++++++++++++++
 pyproject.toml                |  2 +-
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index a1e0736b..24b05bc4 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.4.0-alpha.1"
+  ".": "0.4.0-alpha.2"
 }
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ab7d3936..236e5da7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,19 @@
 # Changelog
 
+## 0.4.0-alpha.2 (2025-11-03)
+
+Full Changelog: [v0.4.0-alpha.1...v0.4.0-alpha.2](https://github.com/llamastack/llama-stack-client-python/compare/v0.4.0-alpha.1...v0.4.0-alpha.2)
+
+### Features
+
+* **api:** point models.list() to /v1/openai/v1/models ([efdf1be](https://github.com/llamastack/llama-stack-client-python/commit/efdf1be41243be5107f4863de99c5dce8504bba9))
+
+
+### Chores
+
+* bump version to 0.3.2.dev0 ([#292](https://github.com/llamastack/llama-stack-client-python/issues/292)) ([fb91556](https://github.com/llamastack/llama-stack-client-python/commit/fb915569d1b07bbbc1202e3142447807f6d42436))
+* **internal/tests:** avoid race condition with implicit client cleanup ([4af8f35](https://github.com/llamastack/llama-stack-client-python/commit/4af8f35cffaf2b3d00a38a8fc5f8ca5a0b266786))
+
 ## 0.4.0-alpha.1 (2025-10-30)
 
 Full Changelog: [v0.3.1-alpha.2...v0.4.0-alpha.1](https://github.com/llamastack/llama-stack-client-python/compare/v0.3.1-alpha.2...v0.4.0-alpha.1)
diff --git a/pyproject.toml b/pyproject.toml
index 1b1f5563..c75fa9fe 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "llama_stack_client"
-version = "0.4.0-alpha.1"
+version = "0.4.0-alpha.2"
 description = "The official Python library for the llama-stack-client API"
 dynamic = ["readme"]
 license = "MIT"