llamastack · ashwinb · Aug 22, 2025 · Aug 21, 2025 · Aug 22, 2025 · Aug 22, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -36,7 +36,7 @@ jobs:
         run: ./scripts/lint
 
   build:
-    if: github.repository == 'stainless-sdks/llama-stack-client-python' && (github.event_name == 'push' || github.event.pull_request.head.repo.fork)
+    if: github.event_name == 'push' || github.event.pull_request.head.repo.fork
     timeout-minutes: 10
     name: build
     permissions:
@@ -61,12 +61,14 @@ jobs:
         run: rye build
 
       - name: Get GitHub OIDC Token
+        if: github.repository == 'stainless-sdks/llama-stack-client-python'
         id: github-oidc
         uses: actions/github-script@v6
         with:
           script: core.setOutput('github_token', await core.getIDToken());
 
       - name: Upload tarball
+        if: github.repository == 'stainless-sdks/llama-stack-client-python'
         env:
           URL: https://pkg.stainless.com/s
           AUTH: ${{ steps.github-oidc.outputs.github_token }}

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.2.18-alpha.3"
+  ".": "0.2.19-alpha.1"
 }
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 106
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-4f6633567c1a079df49d0cf58f37251a4bb0ee2f2a496ac83c9fee26eb325f9c.yml
-openapi_spec_hash: af5b3d3bbecf48f15c90b982ccac852e
-config_hash: e67fd054e95c1e82f78f4b834e96bb65
+configured_endpoints: 107
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-f252873ea1e1f38fd207331ef2621c511154d5be3f4076e59cc15754fc58eee4.yml
+openapi_spec_hash: 10cbb4337a06a9fdd7d08612dd6044c3
+config_hash: 17fe64b23723fc54f2ee61c80223c3e3
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,31 @@
 # Changelog
 
+## 0.2.19-alpha.1 (2025-08-22)
+
+Full Changelog: [v0.2.18-alpha.3...v0.2.19-alpha.1](https://github.com/llamastack/llama-stack-client-python/compare/v0.2.18-alpha.3...v0.2.19-alpha.1)
+
+### Features
+
+* **api:** manual updates ([119bdb2](https://github.com/llamastack/llama-stack-client-python/commit/119bdb2a862fe772ca82770937aba49ffb039bf2))
+* **api:** query_metrics, batches, changes ([c935c79](https://github.com/llamastack/llama-stack-client-python/commit/c935c79c1117613c7e9413b87d19cfd010d89796))
+* **api:** some updates to query metrics ([8f0f7a5](https://github.com/llamastack/llama-stack-client-python/commit/8f0f7a5de82f1dd3404cedff599b8a33f6e5c755))
+
+
+### Bug Fixes
+
+* **agent:** fix wrong module import in ReAct agent ([#262](https://github.com/llamastack/llama-stack-client-python/issues/262)) ([c17f3d6](https://github.com/llamastack/llama-stack-client-python/commit/c17f3d65af17d282785623864661ef2d16fcb1fc)), closes [#261](https://github.com/llamastack/llama-stack-client-python/issues/261)
+* **build:** kill explicit listing of python3.13 for now ([5284b4a](https://github.com/llamastack/llama-stack-client-python/commit/5284b4a93822e8900c05f63ddf342aab3b603aa3))
+
+
+### Chores
+
+* update github action ([af6b97e](https://github.com/llamastack/llama-stack-client-python/commit/af6b97e6ec55473a03682ea45e4bac9429fbdf78))
+
+
+### Build System
+
+* Bump version to 0.2.18 ([53d95ba](https://github.com/llamastack/llama-stack-client-python/commit/53d95bad01e4aaa8fa27438618aaa6082cd60275))
+
 ## 0.2.18-alpha.3 (2025-08-14)
 
 Full Changelog: [v0.2.18-alpha.2...v0.2.18-alpha.3](https://github.com/llamastack/llama-stack-client-python/compare/v0.2.18-alpha.2...v0.2.18-alpha.3)

diff --git a/api.md b/api.md
@@ -20,6 +20,7 @@ from llama_stack_client.types import (
     SafetyViolation,
     SamplingParams,
     ScoringResult,
+    SharedTokenLogProbs,
     SystemMessage,
     ToolCall,
     ToolCallOrString,
@@ -62,7 +63,7 @@ Methods:
 Types:
 
 ```python
-from llama_stack_client.types import ToolDef, ToolInvocationResult, ToolRuntimeListToolsResponse
+from llama_stack_client.types import ToolInvocationResult, ToolRuntimeListToolsResponse
 ```
 
 Methods:
@@ -239,7 +240,6 @@ Types:
 ```python
 from llama_stack_client.types import (
     ChatCompletionResponseStreamChunk,
-    CompletionResponse,
     EmbeddingsResponse,
     TokenLogProbs,
     InferenceBatchChatCompletionResponse,
@@ -251,7 +251,7 @@ Methods:
 - <code title="post /v1/inference/batch-chat-completion">client.inference.<a href="./src/llama_stack_client/resources/inference.py">batch_chat_completion</a>(\*\*<a href="src/llama_stack_client/types/inference_batch_chat_completion_params.py">params</a>) -> <a href="./src/llama_stack_client/types/inference_batch_chat_completion_response.py">InferenceBatchChatCompletionResponse</a></code>
 - <code title="post /v1/inference/batch-completion">client.inference.<a href="./src/llama_stack_client/resources/inference.py">batch_completion</a>(\*\*<a href="src/llama_stack_client/types/inference_batch_completion_params.py">params</a>) -> <a href="./src/llama_stack_client/types/shared/batch_completion.py">BatchCompletion</a></code>
 - <code title="post /v1/inference/chat-completion">client.inference.<a href="./src/llama_stack_client/resources/inference.py">chat_completion</a>(\*\*<a href="src/llama_stack_client/types/inference_chat_completion_params.py">params</a>) -> <a href="./src/llama_stack_client/types/shared/chat_completion_response.py">ChatCompletionResponse</a></code>
-- <code title="post /v1/inference/completion">client.inference.<a href="./src/llama_stack_client/resources/inference.py">completion</a>(\*\*<a href="src/llama_stack_client/types/inference_completion_params.py">params</a>) -> <a href="./src/llama_stack_client/types/completion_response.py">CompletionResponse</a></code>
+- <code title="post /v1/inference/completion">client.inference.<a href="./src/llama_stack_client/resources/inference.py">completion</a>(\*\*<a href="src/llama_stack_client/types/inference_completion_params.py">params</a>) -> UnnamedTypeWithNoPropertyInfoOrParent0</code>
 - <code title="post /v1/inference/embeddings">client.inference.<a href="./src/llama_stack_client/resources/inference.py">embeddings</a>(\*\*<a href="src/llama_stack_client/types/inference_embeddings_params.py">params</a>) -> <a href="./src/llama_stack_client/types/embeddings_response.py">EmbeddingsResponse</a></code>
 
 # Embeddings
@@ -509,12 +509,14 @@ Types:
 ```python
 from llama_stack_client.types import (
     Event,
+    Metric,
     QueryCondition,
     QuerySpansResponse,
     SpanWithStatus,
     Trace,
     TelemetryGetSpanResponse,
     TelemetryGetSpanTreeResponse,
+    TelemetryQueryMetricsResponse,
     TelemetryQuerySpansResponse,
     TelemetryQueryTracesResponse,
 )
@@ -526,6 +528,7 @@ Methods:
 - <code title="post /v1/telemetry/spans/{span_id}/tree">client.telemetry.<a href="./src/llama_stack_client/resources/telemetry.py">get_span_tree</a>(span_id, \*\*<a href="src/llama_stack_client/types/telemetry_get_span_tree_params.py">params</a>) -> <a href="./src/llama_stack_client/types/telemetry_get_span_tree_response.py">TelemetryGetSpanTreeResponse</a></code>
 - <code title="get /v1/telemetry/traces/{trace_id}">client.telemetry.<a href="./src/llama_stack_client/resources/telemetry.py">get_trace</a>(trace_id) -> <a href="./src/llama_stack_client/types/trace.py">Trace</a></code>
 - <code title="post /v1/telemetry/events">client.telemetry.<a href="./src/llama_stack_client/resources/telemetry.py">log_event</a>(\*\*<a href="src/llama_stack_client/types/telemetry_log_event_params.py">params</a>) -> None</code>
+- <code title="post /v1/telemetry/metrics/{metric_name}">client.telemetry.<a href="./src/llama_stack_client/resources/telemetry.py">query_metrics</a>(metric_name, \*\*<a href="src/llama_stack_client/types/telemetry_query_metrics_params.py">params</a>) -> <a href="./src/llama_stack_client/types/telemetry_query_metrics_response.py">TelemetryQueryMetricsResponse</a></code>
 - <code title="post /v1/telemetry/spans">client.telemetry.<a href="./src/llama_stack_client/resources/telemetry.py">query_spans</a>(\*\*<a href="src/llama_stack_client/types/telemetry_query_spans_params.py">params</a>) -> <a href="./src/llama_stack_client/types/telemetry_query_spans_response.py">TelemetryQuerySpansResponse</a></code>
 - <code title="post /v1/telemetry/traces">client.telemetry.<a href="./src/llama_stack_client/resources/telemetry.py">query_traces</a>(\*\*<a href="src/llama_stack_client/types/telemetry_query_traces_params.py">params</a>) -> <a href="./src/llama_stack_client/types/telemetry_query_traces_response.py">TelemetryQueryTracesResponse</a></code>
 - <code title="post /v1/telemetry/spans/export">client.telemetry.<a href="./src/llama_stack_client/resources/telemetry.py">save_spans_to_dataset</a>(\*\*<a href="src/llama_stack_client/types/telemetry_save_spans_to_dataset_params.py">params</a>) -> None</code>

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "llama_stack_client"
-version = "0.2.18"
+version = "0.2.19-alpha.1"
 description = "The official Python library for the llama-stack-client API"
 dynamic = ["readme"]
 license = "MIT"

diff --git a/src/llama_stack_client/pagination.py b/src/llama_stack_client/pagination.py
@@ -24,10 +24,13 @@ def _get_page_items(self) -> List[_T]:
     @override
     def next_page_info(self) -> Optional[PageInfo]:
         next_index = self.next_index
-        if not next_index:
-            return None
+        if next_index is None:
+            return None  # type: ignore[unreachable]
+
+        length = len(self._get_page_items())
+        current_count = next_index + length
 
-        return PageInfo(params={"start_index": next_index})
+        return PageInfo(params={"start_index": current_count})
 
 
 class AsyncDatasetsIterrows(BaseAsyncPage[_T], BasePage[_T], Generic[_T]):
@@ -44,10 +47,13 @@ def _get_page_items(self) -> List[_T]:
     @override
     def next_page_info(self) -> Optional[PageInfo]:
         next_index = self.next_index
-        if not next_index:
-            return None
+        if next_index is None:
+            return None  # type: ignore[unreachable]
+
+        length = len(self._get_page_items())
+        current_count = next_index + length
 
-        return PageInfo(params={"start_index": next_index})
+        return PageInfo(params={"start_index": current_count})
 
 
 class SyncOpenAICursorPage(BaseSyncPage[_T], BasePage[_T], Generic[_T]):

diff --git a/src/llama_stack_client/resources/files.py b/src/llama_stack_client/resources/files.py
@@ -50,7 +50,7 @@ def create(
         self,
         *,
         file: FileTypes,
-        purpose: Literal["assistants"],
+        purpose: Literal["assistants", "batch"],
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -137,7 +137,7 @@ def list(
         after: str | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        purpose: Literal["assistants"] | NotGiven = NOT_GIVEN,
+        purpose: Literal["assistants", "batch"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -282,7 +282,7 @@ async def create(
         self,
         *,
         file: FileTypes,
-        purpose: Literal["assistants"],
+        purpose: Literal["assistants", "batch"],
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -369,7 +369,7 @@ def list(
         after: str | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        purpose: Literal["assistants"] | NotGiven = NOT_GIVEN,
+        purpose: Literal["assistants", "batch"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,

diff --git a/src/llama_stack_client/resources/inference.py b/src/llama_stack_client/resources/inference.py
@@ -27,10 +27,10 @@
 )
 from .._streaming import Stream, AsyncStream
 from .._base_client import make_request_options
-from ..types.completion_response import CompletionResponse
 from ..types.embeddings_response import EmbeddingsResponse
 from ..types.shared_params.message import Message
 from ..types.shared.batch_completion import BatchCompletion
+from ..types.inference_completion_params import UnnamedTypeWithNoPropertyInfoOrParent0
 from ..types.shared_params.response_format import ResponseFormat
 from ..types.shared_params.sampling_params import SamplingParams
 from ..types.shared.chat_completion_response import ChatCompletionResponse
@@ -467,7 +467,7 @@ def completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionResponse:
+    ) -> UnnamedTypeWithNoPropertyInfoOrParent0:
         """
         Generate a completion for the given content using the specified model.
 
@@ -514,7 +514,7 @@ def completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[CompletionResponse]:
+    ) -> Stream[UnnamedTypeWithNoPropertyInfoOrParent0]:
         """
         Generate a completion for the given content using the specified model.
 
@@ -561,7 +561,7 @@ def completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionResponse | Stream[CompletionResponse]:
+    ) -> UnnamedTypeWithNoPropertyInfoOrParent0 | Stream[UnnamedTypeWithNoPropertyInfoOrParent0]:
         """
         Generate a completion for the given content using the specified model.
 
@@ -608,7 +608,7 @@ def completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionResponse | Stream[CompletionResponse]:
+    ) -> UnnamedTypeWithNoPropertyInfoOrParent0 | Stream[UnnamedTypeWithNoPropertyInfoOrParent0]:
         if stream:
             extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
         return self._post(
@@ -629,9 +629,9 @@ def completion(
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=CompletionResponse,
+            cast_to=UnnamedTypeWithNoPropertyInfoOrParent0,
             stream=stream or False,
-            stream_cls=Stream[CompletionResponse],
+            stream_cls=Stream[UnnamedTypeWithNoPropertyInfoOrParent0],
         )
 
     @typing_extensions.deprecated("/v1/inference/embeddings is deprecated. Please use /v1/openai/v1/embeddings.")
@@ -1122,7 +1122,7 @@ async def completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionResponse:
+    ) -> UnnamedTypeWithNoPropertyInfoOrParent0:
         """
         Generate a completion for the given content using the specified model.
 
@@ -1169,7 +1169,7 @@ async def completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[CompletionResponse]:
+    ) -> AsyncStream[UnnamedTypeWithNoPropertyInfoOrParent0]:
         """
         Generate a completion for the given content using the specified model.
 
@@ -1216,7 +1216,7 @@ async def completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionResponse | AsyncStream[CompletionResponse]:
+    ) -> UnnamedTypeWithNoPropertyInfoOrParent0 | AsyncStream[UnnamedTypeWithNoPropertyInfoOrParent0]:
         """
         Generate a completion for the given content using the specified model.
 
@@ -1263,7 +1263,7 @@ async def completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionResponse | AsyncStream[CompletionResponse]:
+    ) -> UnnamedTypeWithNoPropertyInfoOrParent0 | AsyncStream[UnnamedTypeWithNoPropertyInfoOrParent0]:
         if stream:
             extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
         return await self._post(
@@ -1284,9 +1284,9 @@ async def completion(
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=CompletionResponse,
+            cast_to=UnnamedTypeWithNoPropertyInfoOrParent0,
             stream=stream or False,
-            stream_cls=AsyncStream[CompletionResponse],
+            stream_cls=AsyncStream[UnnamedTypeWithNoPropertyInfoOrParent0],
         )
 
     @typing_extensions.deprecated("/v1/inference/embeddings is deprecated. Please use /v1/openai/v1/embeddings.")