From 4af8f35cffaf2b3d00a38a8fc5f8ca5a0b266786 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Fri, 31 Oct 2025 02:32:44 +0000
Subject: [PATCH 1/3] chore(internal/tests): avoid race condition with implicit
client cleanup
---
tests/test_client.py | 362 ++++++++++++++++++++++++-------------------
1 file changed, 202 insertions(+), 160 deletions(-)
diff --git a/tests/test_client.py b/tests/test_client.py
index 3ccb4d91..2d39d32a 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -64,47 +64,45 @@ def _get_open_connections(client: LlamaStackClient | AsyncLlamaStackClient) -> i
class TestLlamaStackClient:
- client = LlamaStackClient(base_url=base_url, _strict_response_validation=True)
-
@pytest.mark.respx(base_url=base_url)
- def test_raw_response(self, respx_mock: MockRouter) -> None:
+ def test_raw_response(self, respx_mock: MockRouter, client: LlamaStackClient) -> None:
respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
- response = self.client.post("/foo", cast_to=httpx.Response)
+ response = client.post("/foo", cast_to=httpx.Response)
assert response.status_code == 200
assert isinstance(response, httpx.Response)
assert response.json() == {"foo": "bar"}
@pytest.mark.respx(base_url=base_url)
- def test_raw_response_for_binary(self, respx_mock: MockRouter) -> None:
+ def test_raw_response_for_binary(self, respx_mock: MockRouter, client: LlamaStackClient) -> None:
respx_mock.post("/foo").mock(
return_value=httpx.Response(200, headers={"Content-Type": "application/binary"}, content='{"foo": "bar"}')
)
- response = self.client.post("/foo", cast_to=httpx.Response)
+ response = client.post("/foo", cast_to=httpx.Response)
assert response.status_code == 200
assert isinstance(response, httpx.Response)
assert response.json() == {"foo": "bar"}
- def test_copy(self) -> None:
- copied = self.client.copy()
- assert id(copied) != id(self.client)
+ def test_copy(self, client: LlamaStackClient) -> None:
+ copied = client.copy()
+ assert id(copied) != id(client)
- def test_copy_default_options(self) -> None:
+ def test_copy_default_options(self, client: LlamaStackClient) -> None:
# options that have a default are overridden correctly
- copied = self.client.copy(max_retries=7)
+ copied = client.copy(max_retries=7)
assert copied.max_retries == 7
- assert self.client.max_retries == 2
+ assert client.max_retries == 2
copied2 = copied.copy(max_retries=6)
assert copied2.max_retries == 6
assert copied.max_retries == 7
# timeout
- assert isinstance(self.client.timeout, httpx.Timeout)
- copied = self.client.copy(timeout=None)
+ assert isinstance(client.timeout, httpx.Timeout)
+ copied = client.copy(timeout=None)
assert copied.timeout is None
- assert isinstance(self.client.timeout, httpx.Timeout)
+ assert isinstance(client.timeout, httpx.Timeout)
def test_copy_default_headers(self) -> None:
client = LlamaStackClient(base_url=base_url, _strict_response_validation=True, default_headers={"X-Foo": "bar"})
@@ -137,6 +135,7 @@ def test_copy_default_headers(self) -> None:
match="`default_headers` and `set_default_headers` arguments are mutually exclusive",
):
client.copy(set_default_headers={}, default_headers={"X-Foo": "Bar"})
+ client.close()
def test_copy_default_query(self) -> None:
client = LlamaStackClient(base_url=base_url, _strict_response_validation=True, default_query={"foo": "bar"})
@@ -172,13 +171,15 @@ def test_copy_default_query(self) -> None:
):
client.copy(set_default_query={}, default_query={"foo": "Bar"})
- def test_copy_signature(self) -> None:
+ client.close()
+
+ def test_copy_signature(self, client: LlamaStackClient) -> None:
# ensure the same parameters that can be passed to the client are defined in the `.copy()` method
init_signature = inspect.signature(
# mypy doesn't like that we access the `__init__` property.
- self.client.__init__, # type: ignore[misc]
+ client.__init__, # type: ignore[misc]
)
- copy_signature = inspect.signature(self.client.copy)
+ copy_signature = inspect.signature(client.copy)
exclude_params = {"transport", "proxies", "_strict_response_validation"}
for name in init_signature.parameters.keys():
@@ -189,12 +190,12 @@ def test_copy_signature(self) -> None:
assert copy_param is not None, f"copy() signature is missing the {name} param"
@pytest.mark.skipif(sys.version_info >= (3, 10), reason="fails because of a memory leak that started from 3.12")
- def test_copy_build_request(self) -> None:
+ def test_copy_build_request(self, client: LlamaStackClient) -> None:
options = FinalRequestOptions(method="get", url="/foo")
def build_request(options: FinalRequestOptions) -> None:
- client = self.client.copy()
- client._build_request(options)
+ client_copy = client.copy()
+ client_copy._build_request(options)
# ensure that the machinery is warmed up before tracing starts.
build_request(options)
@@ -251,14 +252,12 @@ def add_leak(leaks: list[tracemalloc.StatisticDiff], diff: tracemalloc.Statistic
print(frame)
raise AssertionError()
- def test_request_timeout(self) -> None:
- request = self.client._build_request(FinalRequestOptions(method="get", url="/foo"))
+ def test_request_timeout(self, client: LlamaStackClient) -> None:
+ request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore
assert timeout == DEFAULT_TIMEOUT
- request = self.client._build_request(
- FinalRequestOptions(method="get", url="/foo", timeout=httpx.Timeout(100.0))
- )
+ request = client._build_request(FinalRequestOptions(method="get", url="/foo", timeout=httpx.Timeout(100.0)))
timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore
assert timeout == httpx.Timeout(100.0)
@@ -269,6 +268,8 @@ def test_client_timeout_option(self) -> None:
timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore
assert timeout == httpx.Timeout(0)
+ client.close()
+
def test_http_client_timeout_option(self) -> None:
# custom timeout given to the httpx client should be used
with httpx.Client(timeout=None) as http_client:
@@ -278,6 +279,8 @@ def test_http_client_timeout_option(self) -> None:
timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore
assert timeout == httpx.Timeout(None)
+ client.close()
+
# no timeout given to the httpx client should not use the httpx default
with httpx.Client() as http_client:
client = LlamaStackClient(base_url=base_url, _strict_response_validation=True, http_client=http_client)
@@ -286,6 +289,8 @@ def test_http_client_timeout_option(self) -> None:
timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore
assert timeout == DEFAULT_TIMEOUT
+ client.close()
+
# explicitly passing the default timeout currently results in it being ignored
with httpx.Client(timeout=HTTPX_DEFAULT_TIMEOUT) as http_client:
client = LlamaStackClient(base_url=base_url, _strict_response_validation=True, http_client=http_client)
@@ -294,6 +299,8 @@ def test_http_client_timeout_option(self) -> None:
timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore
assert timeout == DEFAULT_TIMEOUT # our default
+ client.close()
+
async def test_invalid_http_client(self) -> None:
with pytest.raises(TypeError, match="Invalid `http_client` arg"):
async with httpx.AsyncClient() as http_client:
@@ -302,12 +309,14 @@ async def test_invalid_http_client(self) -> None:
)
def test_default_headers_option(self) -> None:
- client = LlamaStackClient(base_url=base_url, _strict_response_validation=True, default_headers={"X-Foo": "bar"})
- request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
+ test_client = LlamaStackClient(
+ base_url=base_url, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
+ )
+ request = test_client._build_request(FinalRequestOptions(method="get", url="/foo"))
assert request.headers.get("x-foo") == "bar"
assert request.headers.get("x-stainless-lang") == "python"
- client2 = LlamaStackClient(
+ test_client2 = LlamaStackClient(
base_url=base_url,
_strict_response_validation=True,
default_headers={
@@ -315,10 +324,13 @@ def test_default_headers_option(self) -> None:
"X-Stainless-Lang": "my-overriding-header",
},
)
- request = client2._build_request(FinalRequestOptions(method="get", url="/foo"))
+ request = test_client2._build_request(FinalRequestOptions(method="get", url="/foo"))
assert request.headers.get("x-foo") == "stainless"
assert request.headers.get("x-stainless-lang") == "my-overriding-header"
+ test_client.close()
+ test_client2.close()
+
def test_default_query_option(self) -> None:
client = LlamaStackClient(
base_url=base_url, _strict_response_validation=True, default_query={"query_param": "bar"}
@@ -337,8 +349,10 @@ def test_default_query_option(self) -> None:
url = httpx.URL(request.url)
assert dict(url.params) == {"foo": "baz", "query_param": "overridden"}
- def test_request_extra_json(self) -> None:
- request = self.client._build_request(
+ client.close()
+
+ def test_request_extra_json(self, client: LlamaStackClient) -> None:
+ request = client._build_request(
FinalRequestOptions(
method="post",
url="/foo",
@@ -349,7 +363,7 @@ def test_request_extra_json(self) -> None:
data = json.loads(request.content.decode("utf-8"))
assert data == {"foo": "bar", "baz": False}
- request = self.client._build_request(
+ request = client._build_request(
FinalRequestOptions(
method="post",
url="/foo",
@@ -360,7 +374,7 @@ def test_request_extra_json(self) -> None:
assert data == {"baz": False}
# `extra_json` takes priority over `json_data` when keys clash
- request = self.client._build_request(
+ request = client._build_request(
FinalRequestOptions(
method="post",
url="/foo",
@@ -371,8 +385,8 @@ def test_request_extra_json(self) -> None:
data = json.loads(request.content.decode("utf-8"))
assert data == {"foo": "bar", "baz": None}
- def test_request_extra_headers(self) -> None:
- request = self.client._build_request(
+ def test_request_extra_headers(self, client: LlamaStackClient) -> None:
+ request = client._build_request(
FinalRequestOptions(
method="post",
url="/foo",
@@ -382,7 +396,7 @@ def test_request_extra_headers(self) -> None:
assert request.headers.get("X-Foo") == "Foo"
# `extra_headers` takes priority over `default_headers` when keys clash
- request = self.client.with_options(default_headers={"X-Bar": "true"})._build_request(
+ request = client.with_options(default_headers={"X-Bar": "true"})._build_request(
FinalRequestOptions(
method="post",
url="/foo",
@@ -393,8 +407,8 @@ def test_request_extra_headers(self) -> None:
)
assert request.headers.get("X-Bar") == "false"
- def test_request_extra_query(self) -> None:
- request = self.client._build_request(
+ def test_request_extra_query(self, client: LlamaStackClient) -> None:
+ request = client._build_request(
FinalRequestOptions(
method="post",
url="/foo",
@@ -407,7 +421,7 @@ def test_request_extra_query(self) -> None:
assert params == {"my_query_param": "Foo"}
# if both `query` and `extra_query` are given, they are merged
- request = self.client._build_request(
+ request = client._build_request(
FinalRequestOptions(
method="post",
url="/foo",
@@ -421,7 +435,7 @@ def test_request_extra_query(self) -> None:
assert params == {"bar": "1", "foo": "2"}
# `extra_query` takes priority over `query` when keys clash
- request = self.client._build_request(
+ request = client._build_request(
FinalRequestOptions(
method="post",
url="/foo",
@@ -464,7 +478,7 @@ def test_multipart_repeating_array(self, client: LlamaStackClient) -> None:
]
@pytest.mark.respx(base_url=base_url)
- def test_basic_union_response(self, respx_mock: MockRouter) -> None:
+ def test_basic_union_response(self, respx_mock: MockRouter, client: LlamaStackClient) -> None:
class Model1(BaseModel):
name: str
@@ -473,12 +487,12 @@ class Model2(BaseModel):
respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
- response = self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+ response = client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
assert isinstance(response, Model2)
assert response.foo == "bar"
@pytest.mark.respx(base_url=base_url)
- def test_union_response_different_types(self, respx_mock: MockRouter) -> None:
+ def test_union_response_different_types(self, respx_mock: MockRouter, client: LlamaStackClient) -> None:
"""Union of objects with the same field name using a different type"""
class Model1(BaseModel):
@@ -489,18 +503,20 @@ class Model2(BaseModel):
respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
- response = self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+ response = client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
assert isinstance(response, Model2)
assert response.foo == "bar"
respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": 1}))
- response = self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+ response = client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
assert isinstance(response, Model1)
assert response.foo == 1
@pytest.mark.respx(base_url=base_url)
- def test_non_application_json_content_type_for_json_data(self, respx_mock: MockRouter) -> None:
+ def test_non_application_json_content_type_for_json_data(
+ self, respx_mock: MockRouter, client: LlamaStackClient
+ ) -> None:
"""
Response that sets Content-Type to something other than application/json but returns json data
"""
@@ -516,7 +532,7 @@ class Model(BaseModel):
)
)
- response = self.client.get("/foo", cast_to=Model)
+ response = client.get("/foo", cast_to=Model)
assert isinstance(response, Model)
assert response.foo == 2
@@ -528,6 +544,8 @@ def test_base_url_setter(self) -> None:
assert client.base_url == "https://example.com/from_setter/"
+ client.close()
+
def test_base_url_env(self) -> None:
with update_env(LLAMA_STACK_CLIENT_BASE_URL="http://localhost:5000/from/env"):
client = LlamaStackClient(_strict_response_validation=True)
@@ -554,6 +572,7 @@ def test_base_url_trailing_slash(self, client: LlamaStackClient) -> None:
),
)
assert request.url == "http://localhost:5000/custom/path/foo"
+ client.close()
@pytest.mark.parametrize(
"client",
@@ -576,6 +595,7 @@ def test_base_url_no_trailing_slash(self, client: LlamaStackClient) -> None:
),
)
assert request.url == "http://localhost:5000/custom/path/foo"
+ client.close()
@pytest.mark.parametrize(
"client",
@@ -598,35 +618,36 @@ def test_absolute_request_url(self, client: LlamaStackClient) -> None:
),
)
assert request.url == "https://myapi.com/foo"
+ client.close()
def test_copied_client_does_not_close_http(self) -> None:
- client = LlamaStackClient(base_url=base_url, _strict_response_validation=True)
- assert not client.is_closed()
+ test_client = LlamaStackClient(base_url=base_url, _strict_response_validation=True)
+ assert not test_client.is_closed()
- copied = client.copy()
- assert copied is not client
+ copied = test_client.copy()
+ assert copied is not test_client
del copied
- assert not client.is_closed()
+ assert not test_client.is_closed()
def test_client_context_manager(self) -> None:
- client = LlamaStackClient(base_url=base_url, _strict_response_validation=True)
- with client as c2:
- assert c2 is client
+ test_client = LlamaStackClient(base_url=base_url, _strict_response_validation=True)
+ with test_client as c2:
+ assert c2 is test_client
assert not c2.is_closed()
- assert not client.is_closed()
- assert client.is_closed()
+ assert not test_client.is_closed()
+ assert test_client.is_closed()
@pytest.mark.respx(base_url=base_url)
- def test_client_response_validation_error(self, respx_mock: MockRouter) -> None:
+ def test_client_response_validation_error(self, respx_mock: MockRouter, client: LlamaStackClient) -> None:
class Model(BaseModel):
foo: str
respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": {"invalid": True}}))
with pytest.raises(APIResponseValidationError) as exc:
- self.client.get("/foo", cast_to=Model)
+ client.get("/foo", cast_to=Model)
assert isinstance(exc.value.__cause__, ValidationError)
@@ -646,11 +667,14 @@ class Model(BaseModel):
with pytest.raises(APIResponseValidationError):
strict_client.get("/foo", cast_to=Model)
- client = LlamaStackClient(base_url=base_url, _strict_response_validation=False)
+ non_strict_client = LlamaStackClient(base_url=base_url, _strict_response_validation=False)
- response = client.get("/foo", cast_to=Model)
+ response = non_strict_client.get("/foo", cast_to=Model)
assert isinstance(response, str) # type: ignore[unreachable]
+ strict_client.close()
+ non_strict_client.close()
+
@pytest.mark.parametrize(
"remaining_retries,retry_after,timeout",
[
@@ -673,9 +697,9 @@ class Model(BaseModel):
],
)
@mock.patch("time.time", mock.MagicMock(return_value=1696004797))
- def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str, timeout: float) -> None:
- client = LlamaStackClient(base_url=base_url, _strict_response_validation=True)
-
+ def test_parse_retry_after_header(
+ self, remaining_retries: int, retry_after: str, timeout: float, client: LlamaStackClient
+ ) -> None:
headers = httpx.Headers({"retry-after": retry_after})
options = FinalRequestOptions(method="get", url="/foo", max_retries=3)
calculated = client._calculate_retry_timeout(remaining_retries, options, headers)
@@ -697,7 +721,7 @@ def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter, clien
model="model",
).__enter__()
- assert _get_open_connections(self.client) == 0
+ assert _get_open_connections(client) == 0
@mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
@pytest.mark.respx(base_url=base_url)
@@ -714,7 +738,7 @@ def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter, client
],
model="model",
).__enter__()
- assert _get_open_connections(self.client) == 0
+ assert _get_open_connections(client) == 0
@pytest.mark.parametrize("failures_before_success", [0, 2, 4])
@mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
@@ -842,79 +866,73 @@ def test_default_client_creation(self) -> None:
)
@pytest.mark.respx(base_url=base_url)
- def test_follow_redirects(self, respx_mock: MockRouter) -> None:
+ def test_follow_redirects(self, respx_mock: MockRouter, client: LlamaStackClient) -> None:
# Test that the default follow_redirects=True allows following redirects
respx_mock.post("/redirect").mock(
return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
)
respx_mock.get("/redirected").mock(return_value=httpx.Response(200, json={"status": "ok"}))
- response = self.client.post("/redirect", body={"key": "value"}, cast_to=httpx.Response)
+ response = client.post("/redirect", body={"key": "value"}, cast_to=httpx.Response)
assert response.status_code == 200
assert response.json() == {"status": "ok"}
@pytest.mark.respx(base_url=base_url)
- def test_follow_redirects_disabled(self, respx_mock: MockRouter) -> None:
+ def test_follow_redirects_disabled(self, respx_mock: MockRouter, client: LlamaStackClient) -> None:
# Test that follow_redirects=False prevents following redirects
respx_mock.post("/redirect").mock(
return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
)
with pytest.raises(APIStatusError) as exc_info:
- self.client.post(
- "/redirect", body={"key": "value"}, options={"follow_redirects": False}, cast_to=httpx.Response
- )
+ client.post("/redirect", body={"key": "value"}, options={"follow_redirects": False}, cast_to=httpx.Response)
assert exc_info.value.response.status_code == 302
assert exc_info.value.response.headers["Location"] == f"{base_url}/redirected"
class TestAsyncLlamaStackClient:
- client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True)
-
@pytest.mark.respx(base_url=base_url)
- @pytest.mark.asyncio
- async def test_raw_response(self, respx_mock: MockRouter) -> None:
+ async def test_raw_response(self, respx_mock: MockRouter, async_client: AsyncLlamaStackClient) -> None:
respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
- response = await self.client.post("/foo", cast_to=httpx.Response)
+ response = await async_client.post("/foo", cast_to=httpx.Response)
assert response.status_code == 200
assert isinstance(response, httpx.Response)
assert response.json() == {"foo": "bar"}
@pytest.mark.respx(base_url=base_url)
- @pytest.mark.asyncio
- async def test_raw_response_for_binary(self, respx_mock: MockRouter) -> None:
+ async def test_raw_response_for_binary(self, respx_mock: MockRouter, async_client: AsyncLlamaStackClient) -> None:
respx_mock.post("/foo").mock(
return_value=httpx.Response(200, headers={"Content-Type": "application/binary"}, content='{"foo": "bar"}')
)
- response = await self.client.post("/foo", cast_to=httpx.Response)
+ response = await async_client.post("/foo", cast_to=httpx.Response)
assert response.status_code == 200
assert isinstance(response, httpx.Response)
assert response.json() == {"foo": "bar"}
- def test_copy(self) -> None:
- copied = self.client.copy()
- assert id(copied) != id(self.client)
+ def test_copy(self, async_client: AsyncLlamaStackClient) -> None:
+ copied = async_client.copy()
+ assert id(copied) != id(async_client)
- def test_copy_default_options(self) -> None:
+ def test_copy_default_options(self, async_client: AsyncLlamaStackClient) -> None:
# options that have a default are overridden correctly
- copied = self.client.copy(max_retries=7)
+ copied = async_client.copy(max_retries=7)
assert copied.max_retries == 7
- assert self.client.max_retries == 2
+ assert async_client.max_retries == 2
copied2 = copied.copy(max_retries=6)
assert copied2.max_retries == 6
assert copied.max_retries == 7
# timeout
- assert isinstance(self.client.timeout, httpx.Timeout)
- copied = self.client.copy(timeout=None)
+ assert isinstance(async_client.timeout, httpx.Timeout)
+ copied = async_client.copy(timeout=None)
assert copied.timeout is None
- assert isinstance(self.client.timeout, httpx.Timeout)
+ assert isinstance(async_client.timeout, httpx.Timeout)
- def test_copy_default_headers(self) -> None:
+ async def test_copy_default_headers(self) -> None:
client = AsyncLlamaStackClient(
base_url=base_url, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
)
@@ -947,8 +965,9 @@ def test_copy_default_headers(self) -> None:
match="`default_headers` and `set_default_headers` arguments are mutually exclusive",
):
client.copy(set_default_headers={}, default_headers={"X-Foo": "Bar"})
+ await client.close()
- def test_copy_default_query(self) -> None:
+ async def test_copy_default_query(self) -> None:
client = AsyncLlamaStackClient(
base_url=base_url, _strict_response_validation=True, default_query={"foo": "bar"}
)
@@ -984,13 +1003,15 @@ def test_copy_default_query(self) -> None:
):
client.copy(set_default_query={}, default_query={"foo": "Bar"})
- def test_copy_signature(self) -> None:
+ await client.close()
+
+ def test_copy_signature(self, async_client: AsyncLlamaStackClient) -> None:
# ensure the same parameters that can be passed to the client are defined in the `.copy()` method
init_signature = inspect.signature(
# mypy doesn't like that we access the `__init__` property.
- self.client.__init__, # type: ignore[misc]
+ async_client.__init__, # type: ignore[misc]
)
- copy_signature = inspect.signature(self.client.copy)
+ copy_signature = inspect.signature(async_client.copy)
exclude_params = {"transport", "proxies", "_strict_response_validation"}
for name in init_signature.parameters.keys():
@@ -1001,12 +1022,12 @@ def test_copy_signature(self) -> None:
assert copy_param is not None, f"copy() signature is missing the {name} param"
@pytest.mark.skipif(sys.version_info >= (3, 10), reason="fails because of a memory leak that started from 3.12")
- def test_copy_build_request(self) -> None:
+ def test_copy_build_request(self, async_client: AsyncLlamaStackClient) -> None:
options = FinalRequestOptions(method="get", url="/foo")
def build_request(options: FinalRequestOptions) -> None:
- client = self.client.copy()
- client._build_request(options)
+ client_copy = async_client.copy()
+ client_copy._build_request(options)
# ensure that the machinery is warmed up before tracing starts.
build_request(options)
@@ -1063,12 +1084,12 @@ def add_leak(leaks: list[tracemalloc.StatisticDiff], diff: tracemalloc.Statistic
print(frame)
raise AssertionError()
- async def test_request_timeout(self) -> None:
- request = self.client._build_request(FinalRequestOptions(method="get", url="/foo"))
+ async def test_request_timeout(self, async_client: AsyncLlamaStackClient) -> None:
+ request = async_client._build_request(FinalRequestOptions(method="get", url="/foo"))
timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore
assert timeout == DEFAULT_TIMEOUT
- request = self.client._build_request(
+ request = async_client._build_request(
FinalRequestOptions(method="get", url="/foo", timeout=httpx.Timeout(100.0))
)
timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore
@@ -1081,6 +1102,8 @@ async def test_client_timeout_option(self) -> None:
timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore
assert timeout == httpx.Timeout(0)
+ await client.close()
+
async def test_http_client_timeout_option(self) -> None:
# custom timeout given to the httpx client should be used
async with httpx.AsyncClient(timeout=None) as http_client:
@@ -1090,6 +1113,8 @@ async def test_http_client_timeout_option(self) -> None:
timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore
assert timeout == httpx.Timeout(None)
+ await client.close()
+
# no timeout given to the httpx client should not use the httpx default
async with httpx.AsyncClient() as http_client:
client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True, http_client=http_client)
@@ -1098,6 +1123,8 @@ async def test_http_client_timeout_option(self) -> None:
timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore
assert timeout == DEFAULT_TIMEOUT
+ await client.close()
+
# explicitly passing the default timeout currently results in it being ignored
async with httpx.AsyncClient(timeout=HTTPX_DEFAULT_TIMEOUT) as http_client:
client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True, http_client=http_client)
@@ -1106,6 +1133,8 @@ async def test_http_client_timeout_option(self) -> None:
timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore
assert timeout == DEFAULT_TIMEOUT # our default
+ await client.close()
+
def test_invalid_http_client(self) -> None:
with pytest.raises(TypeError, match="Invalid `http_client` arg"):
with httpx.Client() as http_client:
@@ -1113,15 +1142,15 @@ def test_invalid_http_client(self) -> None:
base_url=base_url, _strict_response_validation=True, http_client=cast(Any, http_client)
)
- def test_default_headers_option(self) -> None:
- client = AsyncLlamaStackClient(
+ async def test_default_headers_option(self) -> None:
+ test_client = AsyncLlamaStackClient(
base_url=base_url, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
)
- request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
+ request = test_client._build_request(FinalRequestOptions(method="get", url="/foo"))
assert request.headers.get("x-foo") == "bar"
assert request.headers.get("x-stainless-lang") == "python"
- client2 = AsyncLlamaStackClient(
+ test_client2 = AsyncLlamaStackClient(
base_url=base_url,
_strict_response_validation=True,
default_headers={
@@ -1129,11 +1158,14 @@ def test_default_headers_option(self) -> None:
"X-Stainless-Lang": "my-overriding-header",
},
)
- request = client2._build_request(FinalRequestOptions(method="get", url="/foo"))
+ request = test_client2._build_request(FinalRequestOptions(method="get", url="/foo"))
assert request.headers.get("x-foo") == "stainless"
assert request.headers.get("x-stainless-lang") == "my-overriding-header"
- def test_default_query_option(self) -> None:
+ await test_client.close()
+ await test_client2.close()
+
+ async def test_default_query_option(self) -> None:
client = AsyncLlamaStackClient(
base_url=base_url, _strict_response_validation=True, default_query={"query_param": "bar"}
)
@@ -1151,8 +1183,10 @@ def test_default_query_option(self) -> None:
url = httpx.URL(request.url)
assert dict(url.params) == {"foo": "baz", "query_param": "overridden"}
- def test_request_extra_json(self) -> None:
- request = self.client._build_request(
+ await client.close()
+
+ def test_request_extra_json(self, client: LlamaStackClient) -> None:
+ request = client._build_request(
FinalRequestOptions(
method="post",
url="/foo",
@@ -1163,7 +1197,7 @@ def test_request_extra_json(self) -> None:
data = json.loads(request.content.decode("utf-8"))
assert data == {"foo": "bar", "baz": False}
- request = self.client._build_request(
+ request = client._build_request(
FinalRequestOptions(
method="post",
url="/foo",
@@ -1174,7 +1208,7 @@ def test_request_extra_json(self) -> None:
assert data == {"baz": False}
# `extra_json` takes priority over `json_data` when keys clash
- request = self.client._build_request(
+ request = client._build_request(
FinalRequestOptions(
method="post",
url="/foo",
@@ -1185,8 +1219,8 @@ def test_request_extra_json(self) -> None:
data = json.loads(request.content.decode("utf-8"))
assert data == {"foo": "bar", "baz": None}
- def test_request_extra_headers(self) -> None:
- request = self.client._build_request(
+ def test_request_extra_headers(self, client: LlamaStackClient) -> None:
+ request = client._build_request(
FinalRequestOptions(
method="post",
url="/foo",
@@ -1196,7 +1230,7 @@ def test_request_extra_headers(self) -> None:
assert request.headers.get("X-Foo") == "Foo"
# `extra_headers` takes priority over `default_headers` when keys clash
- request = self.client.with_options(default_headers={"X-Bar": "true"})._build_request(
+ request = client.with_options(default_headers={"X-Bar": "true"})._build_request(
FinalRequestOptions(
method="post",
url="/foo",
@@ -1207,8 +1241,8 @@ def test_request_extra_headers(self) -> None:
)
assert request.headers.get("X-Bar") == "false"
- def test_request_extra_query(self) -> None:
- request = self.client._build_request(
+ def test_request_extra_query(self, client: LlamaStackClient) -> None:
+ request = client._build_request(
FinalRequestOptions(
method="post",
url="/foo",
@@ -1221,7 +1255,7 @@ def test_request_extra_query(self) -> None:
assert params == {"my_query_param": "Foo"}
# if both `query` and `extra_query` are given, they are merged
- request = self.client._build_request(
+ request = client._build_request(
FinalRequestOptions(
method="post",
url="/foo",
@@ -1235,7 +1269,7 @@ def test_request_extra_query(self) -> None:
assert params == {"bar": "1", "foo": "2"}
# `extra_query` takes priority over `query` when keys clash
- request = self.client._build_request(
+ request = client._build_request(
FinalRequestOptions(
method="post",
url="/foo",
@@ -1278,7 +1312,7 @@ def test_multipart_repeating_array(self, async_client: AsyncLlamaStackClient) ->
]
@pytest.mark.respx(base_url=base_url)
- async def test_basic_union_response(self, respx_mock: MockRouter) -> None:
+ async def test_basic_union_response(self, respx_mock: MockRouter, async_client: AsyncLlamaStackClient) -> None:
class Model1(BaseModel):
name: str
@@ -1287,12 +1321,14 @@ class Model2(BaseModel):
respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
- response = await self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+ response = await async_client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
assert isinstance(response, Model2)
assert response.foo == "bar"
@pytest.mark.respx(base_url=base_url)
- async def test_union_response_different_types(self, respx_mock: MockRouter) -> None:
+ async def test_union_response_different_types(
+ self, respx_mock: MockRouter, async_client: AsyncLlamaStackClient
+ ) -> None:
"""Union of objects with the same field name using a different type"""
class Model1(BaseModel):
@@ -1303,18 +1339,20 @@ class Model2(BaseModel):
respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
- response = await self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+ response = await async_client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
assert isinstance(response, Model2)
assert response.foo == "bar"
respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": 1}))
- response = await self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+ response = await async_client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
assert isinstance(response, Model1)
assert response.foo == 1
@pytest.mark.respx(base_url=base_url)
- async def test_non_application_json_content_type_for_json_data(self, respx_mock: MockRouter) -> None:
+ async def test_non_application_json_content_type_for_json_data(
+ self, respx_mock: MockRouter, async_client: AsyncLlamaStackClient
+ ) -> None:
"""
Response that sets Content-Type to something other than application/json but returns json data
"""
@@ -1330,11 +1368,11 @@ class Model(BaseModel):
)
)
- response = await self.client.get("/foo", cast_to=Model)
+ response = await async_client.get("/foo", cast_to=Model)
assert isinstance(response, Model)
assert response.foo == 2
- def test_base_url_setter(self) -> None:
+ async def test_base_url_setter(self) -> None:
client = AsyncLlamaStackClient(base_url="https://example.com/from_init", _strict_response_validation=True)
assert client.base_url == "https://example.com/from_init/"
@@ -1342,7 +1380,9 @@ def test_base_url_setter(self) -> None:
assert client.base_url == "https://example.com/from_setter/"
- def test_base_url_env(self) -> None:
+ await client.close()
+
+ async def test_base_url_env(self) -> None:
with update_env(LLAMA_STACK_CLIENT_BASE_URL="http://localhost:5000/from/env"):
client = AsyncLlamaStackClient(_strict_response_validation=True)
assert client.base_url == "http://localhost:5000/from/env/"
@@ -1359,7 +1399,7 @@ def test_base_url_env(self) -> None:
],
ids=["standard", "custom http client"],
)
- def test_base_url_trailing_slash(self, client: AsyncLlamaStackClient) -> None:
+ async def test_base_url_trailing_slash(self, client: AsyncLlamaStackClient) -> None:
request = client._build_request(
FinalRequestOptions(
method="post",
@@ -1368,6 +1408,7 @@ def test_base_url_trailing_slash(self, client: AsyncLlamaStackClient) -> None:
),
)
assert request.url == "http://localhost:5000/custom/path/foo"
+ await client.close()
@pytest.mark.parametrize(
"client",
@@ -1381,7 +1422,7 @@ def test_base_url_trailing_slash(self, client: AsyncLlamaStackClient) -> None:
],
ids=["standard", "custom http client"],
)
- def test_base_url_no_trailing_slash(self, client: AsyncLlamaStackClient) -> None:
+ async def test_base_url_no_trailing_slash(self, client: AsyncLlamaStackClient) -> None:
request = client._build_request(
FinalRequestOptions(
method="post",
@@ -1390,6 +1431,7 @@ def test_base_url_no_trailing_slash(self, client: AsyncLlamaStackClient) -> None
),
)
assert request.url == "http://localhost:5000/custom/path/foo"
+ await client.close()
@pytest.mark.parametrize(
"client",
@@ -1403,7 +1445,7 @@ def test_base_url_no_trailing_slash(self, client: AsyncLlamaStackClient) -> None
],
ids=["standard", "custom http client"],
)
- def test_absolute_request_url(self, client: AsyncLlamaStackClient) -> None:
+ async def test_absolute_request_url(self, client: AsyncLlamaStackClient) -> None:
request = client._build_request(
FinalRequestOptions(
method="post",
@@ -1412,37 +1454,39 @@ def test_absolute_request_url(self, client: AsyncLlamaStackClient) -> None:
),
)
assert request.url == "https://myapi.com/foo"
+ await client.close()
async def test_copied_client_does_not_close_http(self) -> None:
- client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True)
- assert not client.is_closed()
+ test_client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True)
+ assert not test_client.is_closed()
- copied = client.copy()
- assert copied is not client
+ copied = test_client.copy()
+ assert copied is not test_client
del copied
await asyncio.sleep(0.2)
- assert not client.is_closed()
+ assert not test_client.is_closed()
async def test_client_context_manager(self) -> None:
- client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True)
- async with client as c2:
- assert c2 is client
+ test_client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True)
+ async with test_client as c2:
+ assert c2 is test_client
assert not c2.is_closed()
- assert not client.is_closed()
- assert client.is_closed()
+ assert not test_client.is_closed()
+ assert test_client.is_closed()
@pytest.mark.respx(base_url=base_url)
- @pytest.mark.asyncio
- async def test_client_response_validation_error(self, respx_mock: MockRouter) -> None:
+ async def test_client_response_validation_error(
+ self, respx_mock: MockRouter, async_client: AsyncLlamaStackClient
+ ) -> None:
class Model(BaseModel):
foo: str
respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": {"invalid": True}}))
with pytest.raises(APIResponseValidationError) as exc:
- await self.client.get("/foo", cast_to=Model)
+ await async_client.get("/foo", cast_to=Model)
assert isinstance(exc.value.__cause__, ValidationError)
@@ -1451,7 +1495,6 @@ async def test_client_max_retries_validation(self) -> None:
AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True, max_retries=cast(Any, None))
@pytest.mark.respx(base_url=base_url)
- @pytest.mark.asyncio
async def test_received_text_for_expected_json(self, respx_mock: MockRouter) -> None:
class Model(BaseModel):
name: str
@@ -1463,11 +1506,14 @@ class Model(BaseModel):
with pytest.raises(APIResponseValidationError):
await strict_client.get("/foo", cast_to=Model)
- client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=False)
+ non_strict_client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=False)
- response = await client.get("/foo", cast_to=Model)
+ response = await non_strict_client.get("/foo", cast_to=Model)
assert isinstance(response, str) # type: ignore[unreachable]
+ await strict_client.close()
+ await non_strict_client.close()
+
@pytest.mark.parametrize(
"remaining_retries,retry_after,timeout",
[
@@ -1490,13 +1536,12 @@ class Model(BaseModel):
],
)
@mock.patch("time.time", mock.MagicMock(return_value=1696004797))
- @pytest.mark.asyncio
- async def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str, timeout: float) -> None:
- client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True)
-
+ async def test_parse_retry_after_header(
+ self, remaining_retries: int, retry_after: str, timeout: float, async_client: AsyncLlamaStackClient
+ ) -> None:
headers = httpx.Headers({"retry-after": retry_after})
options = FinalRequestOptions(method="get", url="/foo", max_retries=3)
- calculated = client._calculate_retry_timeout(remaining_retries, options, headers)
+ calculated = async_client._calculate_retry_timeout(remaining_retries, options, headers)
assert calculated == pytest.approx(timeout, 0.5 * 0.875) # pyright: ignore[reportUnknownMemberType]
@mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
@@ -1517,7 +1562,7 @@ async def test_retrying_timeout_errors_doesnt_leak(
model="model",
).__aenter__()
- assert _get_open_connections(self.client) == 0
+ assert _get_open_connections(async_client) == 0
@mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
@pytest.mark.respx(base_url=base_url)
@@ -1536,12 +1581,11 @@ async def test_retrying_status_errors_doesnt_leak(
],
model="model",
).__aenter__()
- assert _get_open_connections(self.client) == 0
+ assert _get_open_connections(async_client) == 0
@pytest.mark.parametrize("failures_before_success", [0, 2, 4])
@mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
@pytest.mark.respx(base_url=base_url)
- @pytest.mark.asyncio
@pytest.mark.parametrize("failure_mode", ["status", "exception"])
async def test_retries_taken(
self,
@@ -1581,7 +1625,6 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
@pytest.mark.parametrize("failures_before_success", [0, 2, 4])
@mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
@pytest.mark.respx(base_url=base_url)
- @pytest.mark.asyncio
async def test_omit_retry_count_header(
self, async_client: AsyncLlamaStackClient, failures_before_success: int, respx_mock: MockRouter
) -> None:
@@ -1614,7 +1657,6 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
@pytest.mark.parametrize("failures_before_success", [0, 2, 4])
@mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
@pytest.mark.respx(base_url=base_url)
- @pytest.mark.asyncio
async def test_overwrite_retry_count_header(
self, async_client: AsyncLlamaStackClient, failures_before_success: int, respx_mock: MockRouter
) -> None:
@@ -1671,26 +1713,26 @@ async def test_default_client_creation(self) -> None:
)
@pytest.mark.respx(base_url=base_url)
- async def test_follow_redirects(self, respx_mock: MockRouter) -> None:
+ async def test_follow_redirects(self, respx_mock: MockRouter, async_client: AsyncLlamaStackClient) -> None:
# Test that the default follow_redirects=True allows following redirects
respx_mock.post("/redirect").mock(
return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
)
respx_mock.get("/redirected").mock(return_value=httpx.Response(200, json={"status": "ok"}))
- response = await self.client.post("/redirect", body={"key": "value"}, cast_to=httpx.Response)
+ response = await async_client.post("/redirect", body={"key": "value"}, cast_to=httpx.Response)
assert response.status_code == 200
assert response.json() == {"status": "ok"}
@pytest.mark.respx(base_url=base_url)
- async def test_follow_redirects_disabled(self, respx_mock: MockRouter) -> None:
+ async def test_follow_redirects_disabled(self, respx_mock: MockRouter, async_client: AsyncLlamaStackClient) -> None:
# Test that follow_redirects=False prevents following redirects
respx_mock.post("/redirect").mock(
return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
)
with pytest.raises(APIStatusError) as exc_info:
- await self.client.post(
+ await async_client.post(
"/redirect", body={"key": "value"}, options={"follow_redirects": False}, cast_to=httpx.Response
)
From efdf1be41243be5107f4863de99c5dce8504bba9 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Mon, 3 Nov 2025 23:54:56 +0000
Subject: [PATCH 2/3] feat(api): point models.list() to /v1/openai/v1/models
step towards openai compatibility of models endpoint
---
.stats.yml | 8 +-
api.md | 12 +-
.../resources/models/models.py | 8 +-
.../resources/models/openai.py | 14 +--
.../resources/responses/responses.py | 22 ++++
src/llama_stack_client/resources/routes.py | 37 +++++-
src/llama_stack_client/types/__init__.py | 1 +
.../types/conversation_create_params.py | 22 ++++
.../types/conversations/item_create_params.py | 22 ++++
.../conversations/item_create_response.py | 22 ++++
.../types/conversations/item_get_response.py | 22 ++++
.../types/conversations/item_list_response.py | 22 ++++
.../types/model_list_response.py | 24 +++-
.../types/models/__init__.py | 2 +
.../types/models/openai_list_response.py | 10 ++
.../types/query_chunks_response.py | 9 +-
.../types/response_create_params.py | 93 ++++++++++++++
.../types/response_list_response.py | 116 ++++++++++++++++++
.../types/response_object.py | 94 ++++++++++++++
.../types/response_object_stream.py | 48 ++++++++
.../responses/input_item_list_response.py | 22 ++++
.../types/route_list_params.py | 17 +++
.../types/vector_io_insert_params.py | 9 +-
tests/api_resources/models/test_openai.py | 14 +--
tests/api_resources/test_responses.py | 40 ++++++
tests/api_resources/test_routes.py | 14 +++
tests/api_resources/test_vector_io.py | 10 +-
27 files changed, 689 insertions(+), 45 deletions(-)
create mode 100644 src/llama_stack_client/types/models/openai_list_response.py
create mode 100644 src/llama_stack_client/types/route_list_params.py
diff --git a/.stats.yml b/.stats.yml
index 60e64c3c..29bc5044 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 111
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-35c6569e5e9fcc85084c9728eb7fc7c5908297fcc77043d621d25de3c850a990.yml
-openapi_spec_hash: 0f95bbeee16f3205d36ec34cfa62c711
-config_hash: ef275cc002a89629459fd73d0cf9cba9
+configured_endpoints: 112
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-a9f69d4a5f5d9bf957497cac83fdad1f72c8a44614098447762c53883e8bd987.yml
+openapi_spec_hash: 75de5bdff8e70591d6033b609fc24e5d
+config_hash: 34558d5f6e265184d712d43e231eb693
diff --git a/api.md b/api.md
index 57ecd092..bd1949f8 100644
--- a/api.md
+++ b/api.md
@@ -306,15 +306,21 @@ from llama_stack_client.types import ListModelsResponse, Model, ModelListRespons
Methods:
- client.models.retrieve(model_id) -> Model
-- client.models.list() -> ModelListResponse
+- client.models.list() -> ModelListResponse
- client.models.register(\*\*params) -> Model
- client.models.unregister(model_id) -> None
## OpenAI
+Types:
+
+```python
+from llama_stack_client.types.models import OpenAIListResponse
+```
+
Methods:
-- client.models.openai.list() -> ModelListResponse
+- client.models.openai.list() -> OpenAIListResponse
# Providers
@@ -339,7 +345,7 @@ from llama_stack_client.types import ListRoutesResponse, RouteListResponse
Methods:
-- client.routes.list() -> RouteListResponse
+- client.routes.list(\*\*params) -> RouteListResponse
# Moderations
diff --git a/src/llama_stack_client/resources/models/models.py b/src/llama_stack_client/resources/models/models.py
index 99ebccdd..dc7e0f4d 100644
--- a/src/llama_stack_client/resources/models/models.py
+++ b/src/llama_stack_client/resources/models/models.py
@@ -108,9 +108,9 @@ def list(
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = not_given,
) -> ModelListResponse:
- """List all models."""
+ """List models using the OpenAI API."""
return self._get(
- "/v1/models",
+ "/v1/openai/v1/models",
options=make_request_options(
extra_headers=extra_headers,
extra_query=extra_query,
@@ -281,9 +281,9 @@ async def list(
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = not_given,
) -> ModelListResponse:
- """List all models."""
+ """List models using the OpenAI API."""
return await self._get(
- "/v1/models",
+ "/v1/openai/v1/models",
options=make_request_options(
extra_headers=extra_headers,
extra_query=extra_query,
diff --git a/src/llama_stack_client/resources/models/openai.py b/src/llama_stack_client/resources/models/openai.py
index c5ff1738..c581f714 100644
--- a/src/llama_stack_client/resources/models/openai.py
+++ b/src/llama_stack_client/resources/models/openai.py
@@ -23,7 +23,7 @@
)
from ..._wrappers import DataWrapper
from ..._base_client import make_request_options
-from ...types.model_list_response import ModelListResponse
+from ...types.models.openai_list_response import OpenAIListResponse
__all__ = ["OpenAIResource", "AsyncOpenAIResource"]
@@ -57,7 +57,7 @@ def list(
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> ModelListResponse:
+ ) -> OpenAIListResponse:
"""List all models."""
return self._get(
"/v1/models",
@@ -66,9 +66,9 @@ def list(
extra_query=extra_query,
extra_body=extra_body,
timeout=timeout,
- post_parser=DataWrapper[ModelListResponse]._unwrapper,
+ post_parser=DataWrapper[OpenAIListResponse]._unwrapper,
),
- cast_to=cast(Type[ModelListResponse], DataWrapper[ModelListResponse]),
+ cast_to=cast(Type[OpenAIListResponse], DataWrapper[OpenAIListResponse]),
)
@@ -101,7 +101,7 @@ async def list(
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> ModelListResponse:
+ ) -> OpenAIListResponse:
"""List all models."""
return await self._get(
"/v1/models",
@@ -110,9 +110,9 @@ async def list(
extra_query=extra_query,
extra_body=extra_body,
timeout=timeout,
- post_parser=DataWrapper[ModelListResponse]._unwrapper,
+ post_parser=DataWrapper[OpenAIListResponse]._unwrapper,
),
- cast_to=cast(Type[ModelListResponse], DataWrapper[ModelListResponse]),
+ cast_to=cast(Type[OpenAIListResponse], DataWrapper[OpenAIListResponse]),
)
diff --git a/src/llama_stack_client/resources/responses/responses.py b/src/llama_stack_client/resources/responses/responses.py
index 6bc29a62..e0109583 100644
--- a/src/llama_stack_client/resources/responses/responses.py
+++ b/src/llama_stack_client/resources/responses/responses.py
@@ -78,6 +78,7 @@ def create(
instructions: str | Omit = omit,
max_infer_iters: int | Omit = omit,
previous_response_id: str | Omit = omit,
+ prompt: response_create_params.Prompt | Omit = omit,
store: bool | Omit = omit,
stream: Literal[False] | Omit = omit,
temperature: float | Omit = omit,
@@ -108,6 +109,8 @@ def create(
response. This can be used to easily fork-off new responses from existing
responses.
+ prompt: (Optional) Prompt object with ID, version, and variables.
+
text: Text response configuration for OpenAI responses.
extra_headers: Send extra headers
@@ -132,6 +135,7 @@ def create(
instructions: str | Omit = omit,
max_infer_iters: int | Omit = omit,
previous_response_id: str | Omit = omit,
+ prompt: response_create_params.Prompt | Omit = omit,
store: bool | Omit = omit,
temperature: float | Omit = omit,
text: response_create_params.Text | Omit = omit,
@@ -161,6 +165,8 @@ def create(
response. This can be used to easily fork-off new responses from existing
responses.
+ prompt: (Optional) Prompt object with ID, version, and variables.
+
text: Text response configuration for OpenAI responses.
extra_headers: Send extra headers
@@ -185,6 +191,7 @@ def create(
instructions: str | Omit = omit,
max_infer_iters: int | Omit = omit,
previous_response_id: str | Omit = omit,
+ prompt: response_create_params.Prompt | Omit = omit,
store: bool | Omit = omit,
temperature: float | Omit = omit,
text: response_create_params.Text | Omit = omit,
@@ -214,6 +221,8 @@ def create(
response. This can be used to easily fork-off new responses from existing
responses.
+ prompt: (Optional) Prompt object with ID, version, and variables.
+
text: Text response configuration for OpenAI responses.
extra_headers: Send extra headers
@@ -237,6 +246,7 @@ def create(
instructions: str | Omit = omit,
max_infer_iters: int | Omit = omit,
previous_response_id: str | Omit = omit,
+ prompt: response_create_params.Prompt | Omit = omit,
store: bool | Omit = omit,
stream: Literal[False] | Literal[True] | Omit = omit,
temperature: float | Omit = omit,
@@ -260,6 +270,7 @@ def create(
"instructions": instructions,
"max_infer_iters": max_infer_iters,
"previous_response_id": previous_response_id,
+ "prompt": prompt,
"store": store,
"stream": stream,
"temperature": temperature,
@@ -435,6 +446,7 @@ async def create(
instructions: str | Omit = omit,
max_infer_iters: int | Omit = omit,
previous_response_id: str | Omit = omit,
+ prompt: response_create_params.Prompt | Omit = omit,
store: bool | Omit = omit,
stream: Literal[False] | Omit = omit,
temperature: float | Omit = omit,
@@ -465,6 +477,8 @@ async def create(
response. This can be used to easily fork-off new responses from existing
responses.
+ prompt: (Optional) Prompt object with ID, version, and variables.
+
text: Text response configuration for OpenAI responses.
extra_headers: Send extra headers
@@ -489,6 +503,7 @@ async def create(
instructions: str | Omit = omit,
max_infer_iters: int | Omit = omit,
previous_response_id: str | Omit = omit,
+ prompt: response_create_params.Prompt | Omit = omit,
store: bool | Omit = omit,
temperature: float | Omit = omit,
text: response_create_params.Text | Omit = omit,
@@ -518,6 +533,8 @@ async def create(
response. This can be used to easily fork-off new responses from existing
responses.
+ prompt: (Optional) Prompt object with ID, version, and variables.
+
text: Text response configuration for OpenAI responses.
extra_headers: Send extra headers
@@ -542,6 +559,7 @@ async def create(
instructions: str | Omit = omit,
max_infer_iters: int | Omit = omit,
previous_response_id: str | Omit = omit,
+ prompt: response_create_params.Prompt | Omit = omit,
store: bool | Omit = omit,
temperature: float | Omit = omit,
text: response_create_params.Text | Omit = omit,
@@ -571,6 +589,8 @@ async def create(
response. This can be used to easily fork-off new responses from existing
responses.
+ prompt: (Optional) Prompt object with ID, version, and variables.
+
text: Text response configuration for OpenAI responses.
extra_headers: Send extra headers
@@ -594,6 +614,7 @@ async def create(
instructions: str | Omit = omit,
max_infer_iters: int | Omit = omit,
previous_response_id: str | Omit = omit,
+ prompt: response_create_params.Prompt | Omit = omit,
store: bool | Omit = omit,
stream: Literal[False] | Literal[True] | Omit = omit,
temperature: float | Omit = omit,
@@ -617,6 +638,7 @@ async def create(
"instructions": instructions,
"max_infer_iters": max_infer_iters,
"previous_response_id": previous_response_id,
+ "prompt": prompt,
"store": store,
"stream": stream,
"temperature": temperature,
diff --git a/src/llama_stack_client/resources/routes.py b/src/llama_stack_client/resources/routes.py
index 0797d00f..ff9b2c59 100644
--- a/src/llama_stack_client/resources/routes.py
+++ b/src/llama_stack_client/resources/routes.py
@@ -9,10 +9,13 @@
from __future__ import annotations
from typing import Type, cast
+from typing_extensions import Literal
import httpx
-from .._types import Body, Query, Headers, NotGiven, not_given
+from ..types import route_list_params
+from .._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from .._utils import maybe_transform, async_maybe_transform
from .._compat import cached_property
from .._resource import SyncAPIResource, AsyncAPIResource
from .._response import (
@@ -51,6 +54,7 @@ def with_streaming_response(self) -> RoutesResourceWithStreamingResponse:
def list(
self,
*,
+ api_filter: Literal["v1", "v1alpha", "v1beta", "deprecated"] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -62,6 +66,20 @@ def list(
List all available API routes with their methods and implementing
providers.
+
+ Args:
+ api_filter: Optional filter to control which routes are returned. Can be an API level ('v1',
+ 'v1alpha', 'v1beta') to show non-deprecated routes at that level, or
+ 'deprecated' to show deprecated routes across all levels. If not specified,
+ returns only non-deprecated v1 routes.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
"""
return self._get(
"/v1/inspect/routes",
@@ -70,6 +88,7 @@ def list(
extra_query=extra_query,
extra_body=extra_body,
timeout=timeout,
+ query=maybe_transform({"api_filter": api_filter}, route_list_params.RouteListParams),
post_parser=DataWrapper[RouteListResponse]._unwrapper,
),
cast_to=cast(Type[RouteListResponse], DataWrapper[RouteListResponse]),
@@ -99,6 +118,7 @@ def with_streaming_response(self) -> AsyncRoutesResourceWithStreamingResponse:
async def list(
self,
*,
+ api_filter: Literal["v1", "v1alpha", "v1beta", "deprecated"] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -110,6 +130,20 @@ async def list(
List all available API routes with their methods and implementing
providers.
+
+ Args:
+ api_filter: Optional filter to control which routes are returned. Can be an API level ('v1',
+ 'v1alpha', 'v1beta') to show non-deprecated routes at that level, or
+ 'deprecated' to show deprecated routes across all levels. If not specified,
+ returns only non-deprecated v1 routes.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
"""
return await self._get(
"/v1/inspect/routes",
@@ -118,6 +152,7 @@ async def list(
extra_query=extra_query,
extra_body=extra_body,
timeout=timeout,
+ query=await async_maybe_transform({"api_filter": api_filter}, route_list_params.RouteListParams),
post_parser=DataWrapper[RouteListResponse]._unwrapper,
),
cast_to=cast(Type[RouteListResponse], DataWrapper[RouteListResponse]),
diff --git a/src/llama_stack_client/types/__init__.py b/src/llama_stack_client/types/__init__.py
index 4b6a2b84..1a67f6c4 100644
--- a/src/llama_stack_client/types/__init__.py
+++ b/src/llama_stack_client/types/__init__.py
@@ -44,6 +44,7 @@
from .response_object import ResponseObject as ResponseObject
from .file_list_params import FileListParams as FileListParams
from .tool_list_params import ToolListParams as ToolListParams
+from .route_list_params import RouteListParams as RouteListParams
from .scoring_fn_params import ScoringFnParams as ScoringFnParams
from .file_create_params import FileCreateParams as FileCreateParams
from .tool_list_response import ToolListResponse as ToolListResponse
diff --git a/src/llama_stack_client/types/conversation_create_params.py b/src/llama_stack_client/types/conversation_create_params.py
index c51245dd..96fbb82e 100644
--- a/src/llama_stack_client/types/conversation_create_params.py
+++ b/src/llama_stack_client/types/conversation_create_params.py
@@ -20,6 +20,7 @@
"ItemOpenAIResponseMessageContentUnionMember1",
"ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
"ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+ "ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
"ItemOpenAIResponseMessageContentUnionMember2",
"ItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
"ItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -64,13 +65,34 @@ class ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCont
type: Required[Literal["input_image"]]
"""Content type identifier, always "input_image" """
+ file_id: str
+ """(Optional) The ID of the file to be sent to the model."""
+
image_url: str
"""(Optional) URL of the image content"""
+class ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(TypedDict, total=False):
+ type: Required[Literal["input_file"]]
+ """The type of the input item. Always `input_file`."""
+
+ file_data: str
+ """The data of the file to be sent to the model."""
+
+ file_id: str
+ """(Optional) The ID of the file to be sent to the model."""
+
+ file_url: str
+ """The URL of the file to be sent to the model."""
+
+ filename: str
+ """The name of the file to be sent to the model."""
+
+
ItemOpenAIResponseMessageContentUnionMember1: TypeAlias = Union[
ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+ ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
]
diff --git a/src/llama_stack_client/types/conversations/item_create_params.py b/src/llama_stack_client/types/conversations/item_create_params.py
index 8df31144..111c39fb 100644
--- a/src/llama_stack_client/types/conversations/item_create_params.py
+++ b/src/llama_stack_client/types/conversations/item_create_params.py
@@ -20,6 +20,7 @@
"ItemOpenAIResponseMessageContentUnionMember1",
"ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
"ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+ "ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
"ItemOpenAIResponseMessageContentUnionMember2",
"ItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
"ItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -61,13 +62,34 @@ class ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCont
type: Required[Literal["input_image"]]
"""Content type identifier, always "input_image" """
+ file_id: str
+ """(Optional) The ID of the file to be sent to the model."""
+
image_url: str
"""(Optional) URL of the image content"""
+class ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(TypedDict, total=False):
+ type: Required[Literal["input_file"]]
+ """The type of the input item. Always `input_file`."""
+
+ file_data: str
+ """The data of the file to be sent to the model."""
+
+ file_id: str
+ """(Optional) The ID of the file to be sent to the model."""
+
+ file_url: str
+ """The URL of the file to be sent to the model."""
+
+ filename: str
+ """The name of the file to be sent to the model."""
+
+
ItemOpenAIResponseMessageContentUnionMember1: TypeAlias = Union[
ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+ ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
]
diff --git a/src/llama_stack_client/types/conversations/item_create_response.py b/src/llama_stack_client/types/conversations/item_create_response.py
index c382e2b9..580aaf23 100644
--- a/src/llama_stack_client/types/conversations/item_create_response.py
+++ b/src/llama_stack_client/types/conversations/item_create_response.py
@@ -19,6 +19,7 @@
"DataOpenAIResponseMessageContentUnionMember1",
"DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
"DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+ "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
"DataOpenAIResponseMessageContentUnionMember2",
"DataOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
"DataOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -55,14 +56,35 @@ class DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCont
type: Literal["input_image"]
"""Content type identifier, always "input_image" """
+ file_id: Optional[str] = None
+ """(Optional) The ID of the file to be sent to the model."""
+
image_url: Optional[str] = None
"""(Optional) URL of the image content"""
+class DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
+ type: Literal["input_file"]
+ """The type of the input item. Always `input_file`."""
+
+ file_data: Optional[str] = None
+ """The data of the file to be sent to the model."""
+
+ file_id: Optional[str] = None
+ """(Optional) The ID of the file to be sent to the model."""
+
+ file_url: Optional[str] = None
+ """The URL of the file to be sent to the model."""
+
+ filename: Optional[str] = None
+ """The name of the file to be sent to the model."""
+
+
DataOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
Union[
DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+ DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
],
PropertyInfo(discriminator="type"),
]
diff --git a/src/llama_stack_client/types/conversations/item_get_response.py b/src/llama_stack_client/types/conversations/item_get_response.py
index 9f8d4bda..434e4639 100644
--- a/src/llama_stack_client/types/conversations/item_get_response.py
+++ b/src/llama_stack_client/types/conversations/item_get_response.py
@@ -18,6 +18,7 @@
"OpenAIResponseMessageContentUnionMember1",
"OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
"OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+ "OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
"OpenAIResponseMessageContentUnionMember2",
"OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
"OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -54,14 +55,35 @@ class OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentI
type: Literal["input_image"]
"""Content type identifier, always "input_image" """
+ file_id: Optional[str] = None
+ """(Optional) The ID of the file to be sent to the model."""
+
image_url: Optional[str] = None
"""(Optional) URL of the image content"""
+class OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
+ type: Literal["input_file"]
+ """The type of the input item. Always `input_file`."""
+
+ file_data: Optional[str] = None
+ """The data of the file to be sent to the model."""
+
+ file_id: Optional[str] = None
+ """(Optional) The ID of the file to be sent to the model."""
+
+ file_url: Optional[str] = None
+ """The URL of the file to be sent to the model."""
+
+ filename: Optional[str] = None
+ """The name of the file to be sent to the model."""
+
+
OpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
Union[
OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+ OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
],
PropertyInfo(discriminator="type"),
]
diff --git a/src/llama_stack_client/types/conversations/item_list_response.py b/src/llama_stack_client/types/conversations/item_list_response.py
index b95f56fb..d6ba4735 100644
--- a/src/llama_stack_client/types/conversations/item_list_response.py
+++ b/src/llama_stack_client/types/conversations/item_list_response.py
@@ -18,6 +18,7 @@
"OpenAIResponseMessageContentUnionMember1",
"OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
"OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+ "OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
"OpenAIResponseMessageContentUnionMember2",
"OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
"OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -54,14 +55,35 @@ class OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentI
type: Literal["input_image"]
"""Content type identifier, always "input_image" """
+ file_id: Optional[str] = None
+ """(Optional) The ID of the file to be sent to the model."""
+
image_url: Optional[str] = None
"""(Optional) URL of the image content"""
+class OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
+ type: Literal["input_file"]
+ """The type of the input item. Always `input_file`."""
+
+ file_data: Optional[str] = None
+ """The data of the file to be sent to the model."""
+
+ file_id: Optional[str] = None
+ """(Optional) The ID of the file to be sent to the model."""
+
+ file_url: Optional[str] = None
+ """The URL of the file to be sent to the model."""
+
+ filename: Optional[str] = None
+ """The name of the file to be sent to the model."""
+
+
OpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
Union[
OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+ OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
],
PropertyInfo(discriminator="type"),
]
diff --git a/src/llama_stack_client/types/model_list_response.py b/src/llama_stack_client/types/model_list_response.py
index b53ae421..c42b3310 100644
--- a/src/llama_stack_client/types/model_list_response.py
+++ b/src/llama_stack_client/types/model_list_response.py
@@ -6,11 +6,25 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import List
-from typing_extensions import TypeAlias
+import builtins
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, TypeAlias
-from .model import Model
+from .._models import BaseModel
-__all__ = ["ModelListResponse"]
+__all__ = ["ModelListResponse", "ModelListResponseItem"]
-ModelListResponse: TypeAlias = List[Model]
+
+class ModelListResponseItem(BaseModel):
+ id: str
+
+ created: int
+
+ object: Literal["model"]
+
+ owned_by: str
+
+ custom_metadata: Optional[Dict[str, Union[bool, float, str, List[builtins.object], builtins.object, None]]] = None
+
+
+ModelListResponse: TypeAlias = List[ModelListResponseItem]
diff --git a/src/llama_stack_client/types/models/__init__.py b/src/llama_stack_client/types/models/__init__.py
index d14ed874..bba1f3e9 100644
--- a/src/llama_stack_client/types/models/__init__.py
+++ b/src/llama_stack_client/types/models/__init__.py
@@ -7,3 +7,5 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
from __future__ import annotations
+
+from .openai_list_response import OpenAIListResponse as OpenAIListResponse
diff --git a/src/llama_stack_client/types/models/openai_list_response.py b/src/llama_stack_client/types/models/openai_list_response.py
new file mode 100644
index 00000000..5b6c0358
--- /dev/null
+++ b/src/llama_stack_client/types/models/openai_list_response.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import TypeAlias
+
+from ..model import Model
+
+__all__ = ["OpenAIListResponse"]
+
+OpenAIListResponse: TypeAlias = List[Model]
diff --git a/src/llama_stack_client/types/query_chunks_response.py b/src/llama_stack_client/types/query_chunks_response.py
index ab5fdb61..cb0181a2 100644
--- a/src/llama_stack_client/types/query_chunks_response.py
+++ b/src/llama_stack_client/types/query_chunks_response.py
@@ -53,6 +53,9 @@ class ChunkChunkMetadata(BaseModel):
class Chunk(BaseModel):
+ chunk_id: str
+ """Unique identifier for the chunk. Must be provided explicitly."""
+
content: InterleavedContent
"""
The content of the chunk, which can be interleaved text, images, or other types.
@@ -73,12 +76,6 @@ class Chunk(BaseModel):
embedding: Optional[List[float]] = None
"""Optional embedding for the chunk. If not provided, it will be computed later."""
- stored_chunk_id: Optional[str] = None
- """The chunk ID that is stored in the vector database.
-
- Used for backend functionality.
- """
-
class QueryChunksResponse(BaseModel):
chunks: List[Chunk]
diff --git a/src/llama_stack_client/types/response_create_params.py b/src/llama_stack_client/types/response_create_params.py
index c8b48657..f99cd037 100644
--- a/src/llama_stack_client/types/response_create_params.py
+++ b/src/llama_stack_client/types/response_create_params.py
@@ -20,6 +20,7 @@
"InputUnionMember1OpenAIResponseMessageContentUnionMember1",
"InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
"InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+ "InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
"InputUnionMember1OpenAIResponseMessageContentUnionMember2",
"InputUnionMember1OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
"InputUnionMember1OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -38,6 +39,11 @@
"InputUnionMember1OpenAIResponseMcpApprovalRequest",
"InputUnionMember1OpenAIResponseInputFunctionToolCallOutput",
"InputUnionMember1OpenAIResponseMcpApprovalResponse",
+ "Prompt",
+ "PromptVariables",
+ "PromptVariablesOpenAIResponseInputMessageContentText",
+ "PromptVariablesOpenAIResponseInputMessageContentImage",
+ "PromptVariablesOpenAIResponseInputMessageContentFile",
"Text",
"TextFormat",
"Tool",
@@ -83,6 +89,9 @@ class ResponseCreateParamsBase(TypedDict, total=False):
responses.
"""
+ prompt: Prompt
+ """(Optional) Prompt object with ID, version, and variables."""
+
store: bool
temperature: float
@@ -112,13 +121,36 @@ class InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInp
type: Required[Literal["input_image"]]
"""Content type identifier, always "input_image" """
+ file_id: str
+ """(Optional) The ID of the file to be sent to the model."""
+
image_url: str
"""(Optional) URL of the image content"""
+class InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(
+ TypedDict, total=False
+):
+ type: Required[Literal["input_file"]]
+ """The type of the input item. Always `input_file`."""
+
+ file_data: str
+ """The data of the file to be sent to the model."""
+
+ file_id: str
+ """(Optional) The ID of the file to be sent to the model."""
+
+ file_url: str
+ """The URL of the file to be sent to the model."""
+
+ filename: str
+ """The name of the file to be sent to the model."""
+
+
InputUnionMember1OpenAIResponseMessageContentUnionMember1: TypeAlias = Union[
InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+ InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
]
@@ -400,6 +432,67 @@ class InputUnionMember1OpenAIResponseMcpApprovalResponse(TypedDict, total=False)
]
+class PromptVariablesOpenAIResponseInputMessageContentText(TypedDict, total=False):
+ text: Required[str]
+ """The text content of the input message"""
+
+ type: Required[Literal["input_text"]]
+ """Content type identifier, always "input_text" """
+
+
+class PromptVariablesOpenAIResponseInputMessageContentImage(TypedDict, total=False):
+ detail: Required[Literal["low", "high", "auto"]]
+ """Level of detail for image processing, can be "low", "high", or "auto" """
+
+ type: Required[Literal["input_image"]]
+ """Content type identifier, always "input_image" """
+
+ file_id: str
+ """(Optional) The ID of the file to be sent to the model."""
+
+ image_url: str
+ """(Optional) URL of the image content"""
+
+
+class PromptVariablesOpenAIResponseInputMessageContentFile(TypedDict, total=False):
+ type: Required[Literal["input_file"]]
+ """The type of the input item. Always `input_file`."""
+
+ file_data: str
+ """The data of the file to be sent to the model."""
+
+ file_id: str
+ """(Optional) The ID of the file to be sent to the model."""
+
+ file_url: str
+ """The URL of the file to be sent to the model."""
+
+ filename: str
+ """The name of the file to be sent to the model."""
+
+
+PromptVariables: TypeAlias = Union[
+ PromptVariablesOpenAIResponseInputMessageContentText,
+ PromptVariablesOpenAIResponseInputMessageContentImage,
+ PromptVariablesOpenAIResponseInputMessageContentFile,
+]
+
+
+class Prompt(TypedDict, total=False):
+ id: Required[str]
+ """Unique identifier of the prompt template"""
+
+ variables: Dict[str, PromptVariables]
+ """
+ Dictionary of variable names to OpenAIResponseInputMessageContent structure for
+ template substitution. The substitution values can either be strings, or other
+ Response input types like images or files.
+ """
+
+ version: str
+ """Version number of the prompt to use (defaults to latest if not specified)"""
+
+
class TextFormat(TypedDict, total=False):
type: Required[Literal["text", "json_schema", "json_object"]]
"""Must be "text", "json_schema", or "json_object" to identify the format type"""
diff --git a/src/llama_stack_client/types/response_list_response.py b/src/llama_stack_client/types/response_list_response.py
index 78c683b4..ccd9a3d7 100644
--- a/src/llama_stack_client/types/response_list_response.py
+++ b/src/llama_stack_client/types/response_list_response.py
@@ -21,6 +21,7 @@
"InputOpenAIResponseMessageContentUnionMember1",
"InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
"InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+ "InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
"InputOpenAIResponseMessageContentUnionMember2",
"InputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
"InputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -44,6 +45,7 @@
"OutputOpenAIResponseMessageContentUnionMember1",
"OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
"OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+ "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
"OutputOpenAIResponseMessageContentUnionMember2",
"OutputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
"OutputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -63,6 +65,11 @@
"Text",
"TextFormat",
"Error",
+ "Prompt",
+ "PromptVariables",
+ "PromptVariablesOpenAIResponseInputMessageContentText",
+ "PromptVariablesOpenAIResponseInputMessageContentImage",
+ "PromptVariablesOpenAIResponseInputMessageContentFile",
"Tool",
"ToolOpenAIResponseInputToolWebSearch",
"ToolOpenAIResponseInputToolFileSearch",
@@ -92,14 +99,35 @@ class InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCon
type: Literal["input_image"]
"""Content type identifier, always "input_image" """
+ file_id: Optional[str] = None
+ """(Optional) The ID of the file to be sent to the model."""
+
image_url: Optional[str] = None
"""(Optional) URL of the image content"""
+class InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
+ type: Literal["input_file"]
+ """The type of the input item. Always `input_file`."""
+
+ file_data: Optional[str] = None
+ """The data of the file to be sent to the model."""
+
+ file_id: Optional[str] = None
+ """(Optional) The ID of the file to be sent to the model."""
+
+ file_url: Optional[str] = None
+ """The URL of the file to be sent to the model."""
+
+ filename: Optional[str] = None
+ """The name of the file to be sent to the model."""
+
+
InputOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
Union[
InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+ InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
],
PropertyInfo(discriminator="type"),
]
@@ -396,14 +424,35 @@ class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCo
type: Literal["input_image"]
"""Content type identifier, always "input_image" """
+ file_id: Optional[str] = None
+ """(Optional) The ID of the file to be sent to the model."""
+
image_url: Optional[str] = None
"""(Optional) URL of the image content"""
+class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
+ type: Literal["input_file"]
+ """The type of the input item. Always `input_file`."""
+
+ file_data: Optional[str] = None
+ """The data of the file to be sent to the model."""
+
+ file_id: Optional[str] = None
+ """(Optional) The ID of the file to be sent to the model."""
+
+ file_url: Optional[str] = None
+ """The URL of the file to be sent to the model."""
+
+ filename: Optional[str] = None
+ """The name of the file to be sent to the model."""
+
+
OutputOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
Union[
OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+ OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
],
PropertyInfo(discriminator="type"),
]
@@ -699,6 +748,70 @@ class Error(BaseModel):
"""Human-readable error message describing the failure"""
+class PromptVariablesOpenAIResponseInputMessageContentText(BaseModel):
+ text: str
+ """The text content of the input message"""
+
+ type: Literal["input_text"]
+ """Content type identifier, always "input_text" """
+
+
+class PromptVariablesOpenAIResponseInputMessageContentImage(BaseModel):
+ detail: Literal["low", "high", "auto"]
+ """Level of detail for image processing, can be "low", "high", or "auto" """
+
+ type: Literal["input_image"]
+ """Content type identifier, always "input_image" """
+
+ file_id: Optional[str] = None
+ """(Optional) The ID of the file to be sent to the model."""
+
+ image_url: Optional[str] = None
+ """(Optional) URL of the image content"""
+
+
+class PromptVariablesOpenAIResponseInputMessageContentFile(BaseModel):
+ type: Literal["input_file"]
+ """The type of the input item. Always `input_file`."""
+
+ file_data: Optional[str] = None
+ """The data of the file to be sent to the model."""
+
+ file_id: Optional[str] = None
+ """(Optional) The ID of the file to be sent to the model."""
+
+ file_url: Optional[str] = None
+ """The URL of the file to be sent to the model."""
+
+ filename: Optional[str] = None
+ """The name of the file to be sent to the model."""
+
+
+PromptVariables: TypeAlias = Annotated[
+ Union[
+ PromptVariablesOpenAIResponseInputMessageContentText,
+ PromptVariablesOpenAIResponseInputMessageContentImage,
+ PromptVariablesOpenAIResponseInputMessageContentFile,
+ ],
+ PropertyInfo(discriminator="type"),
+]
+
+
+class Prompt(BaseModel):
+ id: str
+ """Unique identifier of the prompt template"""
+
+ variables: Optional[Dict[str, PromptVariables]] = None
+ """
+ Dictionary of variable names to OpenAIResponseInputMessageContent structure for
+ template substitution. The substitution values can either be strings, or other
+ Response input types like images or files.
+ """
+
+ version: Optional[str] = None
+ """Version number of the prompt to use (defaults to latest if not specified)"""
+
+
class ToolOpenAIResponseInputToolWebSearch(BaseModel):
type: Literal["web_search", "web_search_preview", "web_search_preview_2025_03_11"]
"""Web search tool type variant to use"""
@@ -842,6 +955,9 @@ class ResponseListResponse(BaseModel):
previous_response_id: Optional[str] = None
"""(Optional) ID of the previous response in a conversation"""
+ prompt: Optional[Prompt] = None
+ """(Optional) Reference to a prompt template and its variables."""
+
temperature: Optional[float] = None
"""(Optional) Sampling temperature used for generation"""
diff --git a/src/llama_stack_client/types/response_object.py b/src/llama_stack_client/types/response_object.py
index 57f708ce..706f50e2 100644
--- a/src/llama_stack_client/types/response_object.py
+++ b/src/llama_stack_client/types/response_object.py
@@ -21,6 +21,7 @@
"OutputOpenAIResponseMessageContentUnionMember1",
"OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
"OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+ "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
"OutputOpenAIResponseMessageContentUnionMember2",
"OutputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
"OutputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -40,6 +41,11 @@
"Text",
"TextFormat",
"Error",
+ "Prompt",
+ "PromptVariables",
+ "PromptVariablesOpenAIResponseInputMessageContentText",
+ "PromptVariablesOpenAIResponseInputMessageContentImage",
+ "PromptVariablesOpenAIResponseInputMessageContentFile",
"Tool",
"ToolOpenAIResponseInputToolWebSearch",
"ToolOpenAIResponseInputToolFileSearch",
@@ -69,14 +75,35 @@ class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCo
type: Literal["input_image"]
"""Content type identifier, always "input_image" """
+ file_id: Optional[str] = None
+ """(Optional) The ID of the file to be sent to the model."""
+
image_url: Optional[str] = None
"""(Optional) URL of the image content"""
+class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
+ type: Literal["input_file"]
+ """The type of the input item. Always `input_file`."""
+
+ file_data: Optional[str] = None
+ """The data of the file to be sent to the model."""
+
+ file_id: Optional[str] = None
+ """(Optional) The ID of the file to be sent to the model."""
+
+ file_url: Optional[str] = None
+ """The URL of the file to be sent to the model."""
+
+ filename: Optional[str] = None
+ """The name of the file to be sent to the model."""
+
+
OutputOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
Union[
OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+ OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
],
PropertyInfo(discriminator="type"),
]
@@ -372,6 +399,70 @@ class Error(BaseModel):
"""Human-readable error message describing the failure"""
+class PromptVariablesOpenAIResponseInputMessageContentText(BaseModel):
+ text: str
+ """The text content of the input message"""
+
+ type: Literal["input_text"]
+ """Content type identifier, always "input_text" """
+
+
+class PromptVariablesOpenAIResponseInputMessageContentImage(BaseModel):
+ detail: Literal["low", "high", "auto"]
+ """Level of detail for image processing, can be "low", "high", or "auto" """
+
+ type: Literal["input_image"]
+ """Content type identifier, always "input_image" """
+
+ file_id: Optional[str] = None
+ """(Optional) The ID of the file to be sent to the model."""
+
+ image_url: Optional[str] = None
+ """(Optional) URL of the image content"""
+
+
+class PromptVariablesOpenAIResponseInputMessageContentFile(BaseModel):
+ type: Literal["input_file"]
+ """The type of the input item. Always `input_file`."""
+
+ file_data: Optional[str] = None
+ """The data of the file to be sent to the model."""
+
+ file_id: Optional[str] = None
+ """(Optional) The ID of the file to be sent to the model."""
+
+ file_url: Optional[str] = None
+ """The URL of the file to be sent to the model."""
+
+ filename: Optional[str] = None
+ """The name of the file to be sent to the model."""
+
+
+PromptVariables: TypeAlias = Annotated[
+ Union[
+ PromptVariablesOpenAIResponseInputMessageContentText,
+ PromptVariablesOpenAIResponseInputMessageContentImage,
+ PromptVariablesOpenAIResponseInputMessageContentFile,
+ ],
+ PropertyInfo(discriminator="type"),
+]
+
+
+class Prompt(BaseModel):
+ id: str
+ """Unique identifier of the prompt template"""
+
+ variables: Optional[Dict[str, PromptVariables]] = None
+ """
+ Dictionary of variable names to OpenAIResponseInputMessageContent structure for
+ template substitution. The substitution values can either be strings, or other
+ Response input types like images or files.
+ """
+
+ version: Optional[str] = None
+ """Version number of the prompt to use (defaults to latest if not specified)"""
+
+
class ToolOpenAIResponseInputToolWebSearch(BaseModel):
type: Literal["web_search", "web_search_preview", "web_search_preview_2025_03_11"]
"""Web search tool type variant to use"""
@@ -522,6 +613,9 @@ def output_text(self) -> str:
previous_response_id: Optional[str] = None
"""(Optional) ID of the previous response in a conversation"""
+ prompt: Optional[Prompt] = None
+ """(Optional) Reference to a prompt template and its variables."""
+
temperature: Optional[float] = None
"""(Optional) Sampling temperature used for generation"""
diff --git a/src/llama_stack_client/types/response_object_stream.py b/src/llama_stack_client/types/response_object_stream.py
index a75ac721..16fe6c6d 100644
--- a/src/llama_stack_client/types/response_object_stream.py
+++ b/src/llama_stack_client/types/response_object_stream.py
@@ -23,6 +23,7 @@
"OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1",
"OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
"OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+ "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
"OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2",
"OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
"OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -45,6 +46,7 @@
"OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1",
"OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
"OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+ "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
"OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2",
"OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
"OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -159,14 +161,37 @@ class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessage
type: Literal["input_image"]
"""Content type identifier, always "input_image" """
+ file_id: Optional[str] = None
+ """(Optional) The ID of the file to be sent to the model."""
+
image_url: Optional[str] = None
"""(Optional) URL of the image content"""
+class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(
+ BaseModel
+):
+ type: Literal["input_file"]
+ """The type of the input item. Always `input_file`."""
+
+ file_data: Optional[str] = None
+ """The data of the file to be sent to the model."""
+
+ file_id: Optional[str] = None
+ """(Optional) The ID of the file to be sent to the model."""
+
+ file_url: Optional[str] = None
+ """The URL of the file to be sent to the model."""
+
+ filename: Optional[str] = None
+ """The name of the file to be sent to the model."""
+
+
OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
Union[
OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+ OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
],
PropertyInfo(discriminator="type"),
]
@@ -470,14 +495,37 @@ class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageC
type: Literal["input_image"]
"""Content type identifier, always "input_image" """
+ file_id: Optional[str] = None
+ """(Optional) The ID of the file to be sent to the model."""
+
image_url: Optional[str] = None
"""(Optional) URL of the image content"""
+class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(
+ BaseModel
+):
+ type: Literal["input_file"]
+ """The type of the input item. Always `input_file`."""
+
+ file_data: Optional[str] = None
+ """The data of the file to be sent to the model."""
+
+ file_id: Optional[str] = None
+ """(Optional) The ID of the file to be sent to the model."""
+
+ file_url: Optional[str] = None
+ """The URL of the file to be sent to the model."""
+
+ filename: Optional[str] = None
+ """The name of the file to be sent to the model."""
+
+
OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
Union[
OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+ OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
],
PropertyInfo(discriminator="type"),
]
diff --git a/src/llama_stack_client/types/responses/input_item_list_response.py b/src/llama_stack_client/types/responses/input_item_list_response.py
index b812ee62..71a59f50 100644
--- a/src/llama_stack_client/types/responses/input_item_list_response.py
+++ b/src/llama_stack_client/types/responses/input_item_list_response.py
@@ -19,6 +19,7 @@
"DataOpenAIResponseMessageContentUnionMember1",
"DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
"DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+ "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
"DataOpenAIResponseMessageContentUnionMember2",
"DataOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
"DataOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -55,14 +56,35 @@ class DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCont
type: Literal["input_image"]
"""Content type identifier, always "input_image" """
+ file_id: Optional[str] = None
+ """(Optional) The ID of the file to be sent to the model."""
+
image_url: Optional[str] = None
"""(Optional) URL of the image content"""
+class DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
+ type: Literal["input_file"]
+ """The type of the input item. Always `input_file`."""
+
+ file_data: Optional[str] = None
+ """The data of the file to be sent to the model."""
+
+ file_id: Optional[str] = None
+ """(Optional) The ID of the file to be sent to the model."""
+
+ file_url: Optional[str] = None
+ """The URL of the file to be sent to the model."""
+
+ filename: Optional[str] = None
+ """The name of the file to be sent to the model."""
+
+
DataOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
Union[
DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+ DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
],
PropertyInfo(discriminator="type"),
]
diff --git a/src/llama_stack_client/types/route_list_params.py b/src/llama_stack_client/types/route_list_params.py
new file mode 100644
index 00000000..764b13c7
--- /dev/null
+++ b/src/llama_stack_client/types/route_list_params.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["RouteListParams"]
+
+
+class RouteListParams(TypedDict, total=False):
+ api_filter: Literal["v1", "v1alpha", "v1beta", "deprecated"]
+ """Optional filter to control which routes are returned.
+
+ Can be an API level ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at
+ that level, or 'deprecated' to show deprecated routes across all levels. If not
+ specified, returns only non-deprecated v1 routes.
+ """
diff --git a/src/llama_stack_client/types/vector_io_insert_params.py b/src/llama_stack_client/types/vector_io_insert_params.py
index 1584f807..520ad24b 100644
--- a/src/llama_stack_client/types/vector_io_insert_params.py
+++ b/src/llama_stack_client/types/vector_io_insert_params.py
@@ -73,6 +73,9 @@ class ChunkChunkMetadata(TypedDict, total=False):
class Chunk(TypedDict, total=False):
+ chunk_id: Required[str]
+ """Unique identifier for the chunk. Must be provided explicitly."""
+
content: Required[InterleavedContent]
"""
The content of the chunk, which can be interleaved text, images, or other types.
@@ -92,9 +95,3 @@ class Chunk(TypedDict, total=False):
embedding: Iterable[float]
"""Optional embedding for the chunk. If not provided, it will be computed later."""
-
- stored_chunk_id: str
- """The chunk ID that is stored in the vector database.
-
- Used for backend functionality.
- """
diff --git a/tests/api_resources/models/test_openai.py b/tests/api_resources/models/test_openai.py
index 6a9acf23..96955333 100644
--- a/tests/api_resources/models/test_openai.py
+++ b/tests/api_resources/models/test_openai.py
@@ -15,7 +15,7 @@
from tests.utils import assert_matches_type
from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import ModelListResponse
+from llama_stack_client.types.models import OpenAIListResponse
base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
@@ -26,7 +26,7 @@ class TestOpenAI:
@parametrize
def test_method_list(self, client: LlamaStackClient) -> None:
openai = client.models.openai.list()
- assert_matches_type(ModelListResponse, openai, path=["response"])
+ assert_matches_type(OpenAIListResponse, openai, path=["response"])
@parametrize
def test_raw_response_list(self, client: LlamaStackClient) -> None:
@@ -35,7 +35,7 @@ def test_raw_response_list(self, client: LlamaStackClient) -> None:
assert response.is_closed is True
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
openai = response.parse()
- assert_matches_type(ModelListResponse, openai, path=["response"])
+ assert_matches_type(OpenAIListResponse, openai, path=["response"])
@parametrize
def test_streaming_response_list(self, client: LlamaStackClient) -> None:
@@ -44,7 +44,7 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None:
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
openai = response.parse()
- assert_matches_type(ModelListResponse, openai, path=["response"])
+ assert_matches_type(OpenAIListResponse, openai, path=["response"])
assert cast(Any, response.is_closed) is True
@@ -57,7 +57,7 @@ class TestAsyncOpenAI:
@parametrize
async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
openai = await async_client.models.openai.list()
- assert_matches_type(ModelListResponse, openai, path=["response"])
+ assert_matches_type(OpenAIListResponse, openai, path=["response"])
@parametrize
async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
@@ -66,7 +66,7 @@ async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> N
assert response.is_closed is True
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
openai = await response.parse()
- assert_matches_type(ModelListResponse, openai, path=["response"])
+ assert_matches_type(OpenAIListResponse, openai, path=["response"])
@parametrize
async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
@@ -75,6 +75,6 @@ async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
openai = await response.parse()
- assert_matches_type(ModelListResponse, openai, path=["response"])
+ assert_matches_type(OpenAIListResponse, openai, path=["response"])
assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_responses.py b/tests/api_resources/test_responses.py
index 5ef731fd..3bdafe3c 100644
--- a/tests/api_resources/test_responses.py
+++ b/tests/api_resources/test_responses.py
@@ -46,6 +46,16 @@ def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient
instructions="instructions",
max_infer_iters=0,
previous_response_id="previous_response_id",
+ prompt={
+ "id": "id",
+ "variables": {
+ "foo": {
+ "text": "text",
+ "type": "input_text",
+ }
+ },
+ "version": "version",
+ },
store=True,
stream=False,
temperature=0,
@@ -113,6 +123,16 @@ def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient
instructions="instructions",
max_infer_iters=0,
previous_response_id="previous_response_id",
+ prompt={
+ "id": "id",
+ "variables": {
+ "foo": {
+ "text": "text",
+ "type": "input_text",
+ }
+ },
+ "version": "version",
+ },
store=True,
temperature=0,
text={
@@ -295,6 +315,16 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
instructions="instructions",
max_infer_iters=0,
previous_response_id="previous_response_id",
+ prompt={
+ "id": "id",
+ "variables": {
+ "foo": {
+ "text": "text",
+ "type": "input_text",
+ }
+ },
+ "version": "version",
+ },
store=True,
stream=False,
temperature=0,
@@ -362,6 +392,16 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
instructions="instructions",
max_infer_iters=0,
previous_response_id="previous_response_id",
+ prompt={
+ "id": "id",
+ "variables": {
+ "foo": {
+ "text": "text",
+ "type": "input_text",
+ }
+ },
+ "version": "version",
+ },
store=True,
temperature=0,
text={
diff --git a/tests/api_resources/test_routes.py b/tests/api_resources/test_routes.py
index 9c863f26..58ab8ad9 100644
--- a/tests/api_resources/test_routes.py
+++ b/tests/api_resources/test_routes.py
@@ -28,6 +28,13 @@ def test_method_list(self, client: LlamaStackClient) -> None:
route = client.routes.list()
assert_matches_type(RouteListResponse, route, path=["response"])
+ @parametrize
+ def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
+ route = client.routes.list(
+ api_filter="v1",
+ )
+ assert_matches_type(RouteListResponse, route, path=["response"])
+
@parametrize
def test_raw_response_list(self, client: LlamaStackClient) -> None:
response = client.routes.with_raw_response.list()
@@ -59,6 +66,13 @@ async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
route = await async_client.routes.list()
assert_matches_type(RouteListResponse, route, path=["response"])
+ @parametrize
+ async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+ route = await async_client.routes.list(
+ api_filter="v1",
+ )
+ assert_matches_type(RouteListResponse, route, path=["response"])
+
@parametrize
async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
response = await async_client.routes.with_raw_response.list()
diff --git a/tests/api_resources/test_vector_io.py b/tests/api_resources/test_vector_io.py
index 9adf721a..b5ae697f 100644
--- a/tests/api_resources/test_vector_io.py
+++ b/tests/api_resources/test_vector_io.py
@@ -28,6 +28,7 @@ def test_method_insert(self, client: LlamaStackClient) -> None:
vector_io = client.vector_io.insert(
chunks=[
{
+ "chunk_id": "chunk_id",
"content": "string",
"metadata": {"foo": True},
}
@@ -41,6 +42,7 @@ def test_method_insert_with_all_params(self, client: LlamaStackClient) -> None:
vector_io = client.vector_io.insert(
chunks=[
{
+ "chunk_id": "chunk_id",
"content": "string",
"metadata": {"foo": True},
"chunk_metadata": {
@@ -57,7 +59,6 @@ def test_method_insert_with_all_params(self, client: LlamaStackClient) -> None:
"updated_timestamp": 0,
},
"embedding": [0],
- "stored_chunk_id": "stored_chunk_id",
}
],
vector_store_id="vector_store_id",
@@ -70,6 +71,7 @@ def test_raw_response_insert(self, client: LlamaStackClient) -> None:
response = client.vector_io.with_raw_response.insert(
chunks=[
{
+ "chunk_id": "chunk_id",
"content": "string",
"metadata": {"foo": True},
}
@@ -87,6 +89,7 @@ def test_streaming_response_insert(self, client: LlamaStackClient) -> None:
with client.vector_io.with_streaming_response.insert(
chunks=[
{
+ "chunk_id": "chunk_id",
"content": "string",
"metadata": {"foo": True},
}
@@ -155,6 +158,7 @@ async def test_method_insert(self, async_client: AsyncLlamaStackClient) -> None:
vector_io = await async_client.vector_io.insert(
chunks=[
{
+ "chunk_id": "chunk_id",
"content": "string",
"metadata": {"foo": True},
}
@@ -168,6 +172,7 @@ async def test_method_insert_with_all_params(self, async_client: AsyncLlamaStack
vector_io = await async_client.vector_io.insert(
chunks=[
{
+ "chunk_id": "chunk_id",
"content": "string",
"metadata": {"foo": True},
"chunk_metadata": {
@@ -184,7 +189,6 @@ async def test_method_insert_with_all_params(self, async_client: AsyncLlamaStack
"updated_timestamp": 0,
},
"embedding": [0],
- "stored_chunk_id": "stored_chunk_id",
}
],
vector_store_id="vector_store_id",
@@ -197,6 +201,7 @@ async def test_raw_response_insert(self, async_client: AsyncLlamaStackClient) ->
response = await async_client.vector_io.with_raw_response.insert(
chunks=[
{
+ "chunk_id": "chunk_id",
"content": "string",
"metadata": {"foo": True},
}
@@ -214,6 +219,7 @@ async def test_streaming_response_insert(self, async_client: AsyncLlamaStackClie
async with async_client.vector_io.with_streaming_response.insert(
chunks=[
{
+ "chunk_id": "chunk_id",
"content": "string",
"metadata": {"foo": True},
}
From f0dc940c50b26706f1c3f6ee0bba7c545fce965b Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Mon, 3 Nov 2025 23:55:20 +0000
Subject: [PATCH 3/3] release: 0.4.0-alpha.2
---
.release-please-manifest.json | 2 +-
CHANGELOG.md | 14 ++++++++++++++
pyproject.toml | 2 +-
3 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index a1e0736b..24b05bc4 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "0.4.0-alpha.1"
+ ".": "0.4.0-alpha.2"
}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ab7d3936..236e5da7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,19 @@
# Changelog
+## 0.4.0-alpha.2 (2025-11-03)
+
+Full Changelog: [v0.4.0-alpha.1...v0.4.0-alpha.2](https://github.com/llamastack/llama-stack-client-python/compare/v0.4.0-alpha.1...v0.4.0-alpha.2)
+
+### Features
+
+* **api:** point models.list() to /v1/openai/v1/models ([efdf1be](https://github.com/llamastack/llama-stack-client-python/commit/efdf1be41243be5107f4863de99c5dce8504bba9))
+
+
+### Chores
+
+* bump version to 0.3.2.dev0 ([#292](https://github.com/llamastack/llama-stack-client-python/issues/292)) ([fb91556](https://github.com/llamastack/llama-stack-client-python/commit/fb915569d1b07bbbc1202e3142447807f6d42436))
+* **internal/tests:** avoid race condition with implicit client cleanup ([4af8f35](https://github.com/llamastack/llama-stack-client-python/commit/4af8f35cffaf2b3d00a38a8fc5f8ca5a0b266786))
+
## 0.4.0-alpha.1 (2025-10-30)
Full Changelog: [v0.3.1-alpha.2...v0.4.0-alpha.1](https://github.com/llamastack/llama-stack-client-python/compare/v0.3.1-alpha.2...v0.4.0-alpha.1)
diff --git a/pyproject.toml b/pyproject.toml
index 1b1f5563..c75fa9fe 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "llama_stack_client"
-version = "0.4.0-alpha.1"
+version = "0.4.0-alpha.2"
description = "The official Python library for the llama-stack-client API"
dynamic = ["readme"]
license = "MIT"