llamastack · yanxi0830 · Nov 19, 2024 · Nov 19, 2024
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "llama_stack_client"
-version = "0.0.49"
+version = "0.0.53rc4"
 description = "The official Python library for the llama-stack-client API"
 dynamic = ["readme"]
 license = "Apache-2.0"

diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py
@@ -48,7 +48,7 @@
 
 class LlamaStackClient(SyncAPIClient):
     agents: resources.AgentsResource
-    batch_inferences: resources.BatchInferencesResource
+    batch_inference: resources.BatchInferenceResource
     datasets: resources.DatasetsResource
     eval: resources.EvalResource
     inspect: resources.InspectResource
@@ -117,7 +117,7 @@ def __init__(
         )
 
         self.agents = resources.AgentsResource(self)
-        self.batch_inferences = resources.BatchInferencesResource(self)
+        self.batch_inference = resources.BatchInferenceResource(self)
         self.datasets = resources.DatasetsResource(self)
         self.eval = resources.EvalResource(self)
         self.inspect = resources.InspectResource(self)
@@ -238,7 +238,7 @@ def _make_status_error(
 
 class AsyncLlamaStackClient(AsyncAPIClient):
     agents: resources.AsyncAgentsResource
-    batch_inferences: resources.AsyncBatchInferencesResource
+    batch_inference: resources.AsyncBatchInferenceResource
     datasets: resources.AsyncDatasetsResource
     eval: resources.AsyncEvalResource
     inspect: resources.AsyncInspectResource
@@ -307,7 +307,7 @@ def __init__(
         )
 
         self.agents = resources.AsyncAgentsResource(self)
-        self.batch_inferences = resources.AsyncBatchInferencesResource(self)
+        self.batch_inference = resources.AsyncBatchInferenceResource(self)
         self.datasets = resources.AsyncDatasetsResource(self)
         self.eval = resources.AsyncEvalResource(self)
         self.inspect = resources.AsyncInspectResource(self)
@@ -429,7 +429,7 @@ def _make_status_error(
 class LlamaStackClientWithRawResponse:
     def __init__(self, client: LlamaStackClient) -> None:
         self.agents = resources.AgentsResourceWithRawResponse(client.agents)
-        self.batch_inferences = resources.BatchInferencesResourceWithRawResponse(client.batch_inferences)
+        self.batch_inference = resources.BatchInferenceResourceWithRawResponse(client.batch_inference)
         self.datasets = resources.DatasetsResourceWithRawResponse(client.datasets)
         self.eval = resources.EvalResourceWithRawResponse(client.eval)
         self.inspect = resources.InspectResourceWithRawResponse(client.inspect)
@@ -455,7 +455,7 @@ def __init__(self, client: LlamaStackClient) -> None:
 class AsyncLlamaStackClientWithRawResponse:
     def __init__(self, client: AsyncLlamaStackClient) -> None:
         self.agents = resources.AsyncAgentsResourceWithRawResponse(client.agents)
-        self.batch_inferences = resources.AsyncBatchInferencesResourceWithRawResponse(client.batch_inferences)
+        self.batch_inference = resources.AsyncBatchInferenceResourceWithRawResponse(client.batch_inference)
         self.datasets = resources.AsyncDatasetsResourceWithRawResponse(client.datasets)
         self.eval = resources.AsyncEvalResourceWithRawResponse(client.eval)
         self.inspect = resources.AsyncInspectResourceWithRawResponse(client.inspect)
@@ -481,7 +481,7 @@ def __init__(self, client: AsyncLlamaStackClient) -> None:
 class LlamaStackClientWithStreamedResponse:
     def __init__(self, client: LlamaStackClient) -> None:
         self.agents = resources.AgentsResourceWithStreamingResponse(client.agents)
-        self.batch_inferences = resources.BatchInferencesResourceWithStreamingResponse(client.batch_inferences)
+        self.batch_inference = resources.BatchInferenceResourceWithStreamingResponse(client.batch_inference)
         self.datasets = resources.DatasetsResourceWithStreamingResponse(client.datasets)
         self.eval = resources.EvalResourceWithStreamingResponse(client.eval)
         self.inspect = resources.InspectResourceWithStreamingResponse(client.inspect)
@@ -507,7 +507,7 @@ def __init__(self, client: LlamaStackClient) -> None:
 class AsyncLlamaStackClientWithStreamedResponse:
     def __init__(self, client: AsyncLlamaStackClient) -> None:
         self.agents = resources.AsyncAgentsResourceWithStreamingResponse(client.agents)
-        self.batch_inferences = resources.AsyncBatchInferencesResourceWithStreamingResponse(client.batch_inferences)
+        self.batch_inference = resources.AsyncBatchInferenceResourceWithStreamingResponse(client.batch_inference)
         self.datasets = resources.AsyncDatasetsResourceWithStreamingResponse(client.datasets)
         self.eval = resources.AsyncEvalResourceWithStreamingResponse(client.eval)
         self.inspect = resources.AsyncInspectResourceWithStreamingResponse(client.inspect)

diff --git a/src/llama_stack_client/_utils/_sync.py b/src/llama_stack_client/_utils/_sync.py
@@ -1,56 +1,62 @@
 from __future__ import annotations
 
+import sys
+import asyncio
 import functools
-from typing import TypeVar, Callable, Awaitable
+import contextvars
+from typing import Any, TypeVar, Callable, Awaitable
 from typing_extensions import ParamSpec
 
-import anyio
-import anyio.to_thread
-
-from ._reflection import function_has_argument
-
 T_Retval = TypeVar("T_Retval")
 T_ParamSpec = ParamSpec("T_ParamSpec")
 
 
-# copied from `asyncer`, https://github.com/tiangolo/asyncer
-def asyncify(
-    function: Callable[T_ParamSpec, T_Retval],
-    *,
-    cancellable: bool = False,
-    limiter: anyio.CapacityLimiter | None = None,
-) -> Callable[T_ParamSpec, Awaitable[T_Retval]]:
+if sys.version_info >= (3, 9):
+    to_thread = asyncio.to_thread
+else:
+    # backport of https://docs.python.org/3/library/asyncio-task.html#asyncio.to_thread
+    # for Python 3.8 support
+    async def to_thread(
+        func: Callable[T_ParamSpec, T_Retval], /, *args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs
+    ) -> Any:
+        """Asynchronously run function *func* in a separate thread.
+
+        Any *args and **kwargs supplied for this function are directly passed
+        to *func*. Also, the current :class:`contextvars.Context` is propagated,
+        allowing context variables from the main thread to be accessed in the
+        separate thread.
+
+        Returns a coroutine that can be awaited to get the eventual result of *func*.
+        """
+        loop = asyncio.events.get_running_loop()
+        ctx = contextvars.copy_context()
+        func_call = functools.partial(ctx.run, func, *args, **kwargs)
+        return await loop.run_in_executor(None, func_call)
+
+
+# inspired by `asyncer`, https://github.com/tiangolo/asyncer
+def asyncify(function: Callable[T_ParamSpec, T_Retval]) -> Callable[T_ParamSpec, Awaitable[T_Retval]]:
     """
     Take a blocking function and create an async one that receives the same
-    positional and keyword arguments, and that when called, calls the original function
-    in a worker thread using `anyio.to_thread.run_sync()`. Internally,
-    `asyncer.asyncify()` uses the same `anyio.to_thread.run_sync()`, but it supports
-    keyword arguments additional to positional arguments and it adds better support for
-    autocompletion and inline errors for the arguments of the function called and the
-    return value.
-
-    If the `cancellable` option is enabled and the task waiting for its completion is
-    cancelled, the thread will still run its course but its return value (or any raised
-    exception) will be ignored.
+    positional and keyword arguments. For python version 3.9 and above, it uses
+    asyncio.to_thread to run the function in a separate thread. For python version
+    3.8, it uses locally defined copy of the asyncio.to_thread function which was
+    introduced in python 3.9.
 
-    Use it like this:
+    Usage:
 
-    ```Python
-    def do_work(arg1, arg2, kwarg1="", kwarg2="") -> str:
-        # Do work
-        return "Some result"
+    ```python
+    def blocking_func(arg1, arg2, kwarg1=None):
+        # blocking code
+        return result
 
 
-    result = await to_thread.asyncify(do_work)("spam", "ham", kwarg1="a", kwarg2="b")
-    print(result)
+    result = asyncify(blocking_function)(arg1, arg2, kwarg1=value1)
     ```
 
     ## Arguments
 
     `function`: a blocking regular callable (e.g. a function)
-    `cancellable`: `True` to allow cancellation of the operation
-    `limiter`: capacity limiter to use to limit the total amount of threads running
-        (if omitted, the default limiter is used)
 
     ## Return
 
@@ -60,22 +66,6 @@ def do_work(arg1, arg2, kwarg1="", kwarg2="") -> str:
     """
 
     async def wrapper(*args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs) -> T_Retval:
-        partial_f = functools.partial(function, *args, **kwargs)
-
-        # In `v4.1.0` anyio added the `abandon_on_cancel` argument and deprecated the old
-        # `cancellable` argument, so we need to use the new `abandon_on_cancel` to avoid
-        # surfacing deprecation warnings.
-        if function_has_argument(anyio.to_thread.run_sync, "abandon_on_cancel"):
-            return await anyio.to_thread.run_sync(
-                partial_f,
-                abandon_on_cancel=cancellable,
-                limiter=limiter,
-            )
-
-        return await anyio.to_thread.run_sync(
-            partial_f,
-            cancellable=cancellable,
-            limiter=limiter,
-        )
+        return await to_thread(function, *args, **kwargs)
 
     return wrapper
diff --git a/src/llama_stack_client/resources/__init__.py b/src/llama_stack_client/resources/__init__.py
@@ -136,13 +136,13 @@
     PostTrainingResourceWithStreamingResponse,
     AsyncPostTrainingResourceWithStreamingResponse,
 )
-from .batch_inferences import (
-    BatchInferencesResource,
-    AsyncBatchInferencesResource,
-    BatchInferencesResourceWithRawResponse,
-    AsyncBatchInferencesResourceWithRawResponse,
-    BatchInferencesResourceWithStreamingResponse,
-    AsyncBatchInferencesResourceWithStreamingResponse,
+from .batch_inference import (
+    BatchInferenceResource,
+    AsyncBatchInferenceResource,
+    BatchInferenceResourceWithRawResponse,
+    AsyncBatchInferenceResourceWithRawResponse,
+    BatchInferenceResourceWithStreamingResponse,
+    AsyncBatchInferenceResourceWithStreamingResponse,
 )
 from .scoring_functions import (
     ScoringFunctionsResource,
@@ -168,12 +168,12 @@
     "AsyncAgentsResourceWithRawResponse",
     "AgentsResourceWithStreamingResponse",
     "AsyncAgentsResourceWithStreamingResponse",
-    "BatchInferencesResource",
-    "AsyncBatchInferencesResource",
-    "BatchInferencesResourceWithRawResponse",
-    "AsyncBatchInferencesResourceWithRawResponse",
-    "BatchInferencesResourceWithStreamingResponse",
-    "AsyncBatchInferencesResourceWithStreamingResponse",
+    "BatchInferenceResource",
+    "AsyncBatchInferenceResource",
+    "BatchInferenceResourceWithRawResponse",
+    "AsyncBatchInferenceResourceWithRawResponse",
+    "BatchInferenceResourceWithStreamingResponse",
+    "AsyncBatchInferenceResourceWithStreamingResponse",
     "DatasetsResource",
     "AsyncDatasetsResource",
     "DatasetsResourceWithRawResponse",

diff --git a/src/llama_stack_client/resources/agents/agents.py b/src/llama_stack_client/resources/agents/agents.py
@@ -109,7 +109,7 @@ def create(
             **(extra_headers or {}),
         }
         return self._post(
-            "/agents/create",
+            "/alpha/agents/create",
             body=maybe_transform({"agent_config": agent_config}, agent_create_params.AgentCreateParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -145,7 +145,7 @@ def delete(
             **(extra_headers or {}),
         }
         return self._post(
-            "/agents/delete",
+            "/alpha/agents/delete",
             body=maybe_transform({"agent_id": agent_id}, agent_delete_params.AgentDeleteParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -213,7 +213,7 @@ async def create(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/agents/create",
+            "/alpha/agents/create",
             body=await async_maybe_transform({"agent_config": agent_config}, agent_create_params.AgentCreateParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -249,7 +249,7 @@ async def delete(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/agents/delete",
+            "/alpha/agents/delete",
             body=await async_maybe_transform({"agent_id": agent_id}, agent_delete_params.AgentDeleteParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout

diff --git a/src/llama_stack_client/resources/agents/session.py b/src/llama_stack_client/resources/agents/session.py
@@ -76,7 +76,7 @@ def create(
             **(extra_headers or {}),
         }
         return self._post(
-            "/agents/session/create",
+            "/alpha/agents/session/create",
             body=maybe_transform(
                 {
                     "agent_id": agent_id,
@@ -119,7 +119,7 @@ def retrieve(
             **(extra_headers or {}),
         }
         return self._post(
-            "/agents/session/get",
+            "/alpha/agents/session/get",
             body=maybe_transform({"turn_ids": turn_ids}, session_retrieve_params.SessionRetrieveParams),
             options=make_request_options(
                 extra_headers=extra_headers,
@@ -166,7 +166,7 @@ def delete(
             **(extra_headers or {}),
         }
         return self._post(
-            "/agents/session/delete",
+            "/alpha/agents/session/delete",
             body=maybe_transform(
                 {
                     "agent_id": agent_id,
@@ -229,7 +229,7 @@ async def create(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/agents/session/create",
+            "/alpha/agents/session/create",
             body=await async_maybe_transform(
                 {
                     "agent_id": agent_id,
@@ -272,7 +272,7 @@ async def retrieve(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/agents/session/get",
+            "/alpha/agents/session/get",
             body=await async_maybe_transform({"turn_ids": turn_ids}, session_retrieve_params.SessionRetrieveParams),
             options=make_request_options(
                 extra_headers=extra_headers,
@@ -319,7 +319,7 @@ async def delete(
             **(extra_headers or {}),
         }
         return await self._post(
-            "/agents/session/delete",
+            "/alpha/agents/session/delete",
             body=await async_maybe_transform(
                 {
                     "agent_id": agent_id,

diff --git a/src/llama_stack_client/resources/agents/steps.py b/src/llama_stack_client/resources/agents/steps.py
@@ -75,7 +75,7 @@ def retrieve(
             **(extra_headers or {}),
         }
         return self._get(
-            "/agents/step/get",
+            "/alpha/agents/step/get",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -145,7 +145,7 @@ async def retrieve(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/agents/step/get",
+            "/alpha/agents/step/get",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,

diff --git a/src/llama_stack_client/resources/agents/turn.py b/src/llama_stack_client/resources/agents/turn.py
@@ -164,7 +164,7 @@ def create(
         return cast(
             TurnCreateResponse,
             self._post(
-                "/agents/turn/create",
+                "/alpha/agents/turn/create",
                 body=maybe_transform(
                     {
                         "agent_id": agent_id,
@@ -215,7 +215,7 @@ def retrieve(
             **(extra_headers or {}),
         }
         return self._get(
-            "/agents/turn/get",
+            "/alpha/agents/turn/get",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -366,7 +366,7 @@ async def create(
         return cast(
             TurnCreateResponse,
             await self._post(
-                "/agents/turn/create",
+                "/alpha/agents/turn/create",
                 body=await async_maybe_transform(
                     {
                         "agent_id": agent_id,
@@ -417,7 +417,7 @@ async def retrieve(
             **(extra_headers or {}),
         }
         return await self._get(
-            "/agents/turn/get",
+            "/alpha/agents/turn/get",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,