diff --git a/src/llama_stack_client/_base_client.py b/src/llama_stack_client/_base_client.py index 5a0376e6..a0c4ea5b 100644 --- a/src/llama_stack_client/_base_client.py +++ b/src/llama_stack_client/_base_client.py @@ -98,7 +98,11 @@ _AsyncStreamT = TypeVar("_AsyncStreamT", bound=AsyncStream[Any]) if TYPE_CHECKING: - from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT + from httpx._config import ( + DEFAULT_TIMEOUT_CONFIG, # pyright: ignore[reportPrivateImportUsage] + ) + + HTTPX_DEFAULT_TIMEOUT = DEFAULT_TIMEOUT_CONFIG else: try: from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT @@ -115,6 +119,7 @@ class PageInfo: url: URL | NotGiven params: Query | NotGiven + json: Body | NotGiven @overload def __init__( @@ -130,19 +135,30 @@ def __init__( params: Query, ) -> None: ... + @overload + def __init__( + self, + *, + json: Body, + ) -> None: ... + def __init__( self, *, url: URL | NotGiven = NOT_GIVEN, + json: Body | NotGiven = NOT_GIVEN, params: Query | NotGiven = NOT_GIVEN, ) -> None: self.url = url + self.json = json self.params = params @override def __repr__(self) -> str: if self.url: return f"{self.__class__.__name__}(url={self.url})" + if self.json: + return f"{self.__class__.__name__}(json={self.json})" return f"{self.__class__.__name__}(params={self.params})" @@ -191,6 +207,19 @@ def _info_to_options(self, info: PageInfo) -> FinalRequestOptions: options.url = str(url) return options + if not isinstance(info.json, NotGiven): + if not is_mapping(info.json): + raise TypeError("Pagination is only supported with mappings") + + if not options.json_data: + options.json_data = {**info.json} + else: + if not is_mapping(options.json_data): + raise TypeError("Pagination is only supported with mappings") + + options.json_data = {**options.json_data, **info.json} + return options + raise ValueError("Unexpected PageInfo state") @@ -408,8 +437,8 @@ def _build_headers(self, options: FinalRequestOptions, *, retries_taken: int = 0 headers = httpx.Headers(headers_dict) idempotency_header = self._idempotency_header - if idempotency_header and options.method.lower() != "get" and idempotency_header not in headers: - headers[idempotency_header] = options.idempotency_key or self._idempotency_key() + if idempotency_header and options.idempotency_key and idempotency_header not in headers: + headers[idempotency_header] = options.idempotency_key # Don't set these headers if they were already set or removed by the caller. We check # `custom_headers`, which can contain `Omit()`, instead of `headers` to account for the removal case. @@ -873,7 +902,6 @@ def request( self, cast_to: Type[ResponseT], options: FinalRequestOptions, - remaining_retries: Optional[int] = None, *, stream: Literal[True], stream_cls: Type[_StreamT], @@ -884,7 +912,6 @@ def request( self, cast_to: Type[ResponseT], options: FinalRequestOptions, - remaining_retries: Optional[int] = None, *, stream: Literal[False] = False, ) -> ResponseT: ... @@ -894,7 +921,6 @@ def request( self, cast_to: Type[ResponseT], options: FinalRequestOptions, - remaining_retries: Optional[int] = None, *, stream: bool = False, stream_cls: Type[_StreamT] | None = None, @@ -904,121 +930,109 @@ def request( self, cast_to: Type[ResponseT], options: FinalRequestOptions, - remaining_retries: Optional[int] = None, *, stream: bool = False, stream_cls: type[_StreamT] | None = None, ) -> ResponseT | _StreamT: - if remaining_retries is not None: - retries_taken = options.get_max_retries(self.max_retries) - remaining_retries - else: - retries_taken = 0 - - return self._request( - cast_to=cast_to, - options=options, - stream=stream, - stream_cls=stream_cls, - retries_taken=retries_taken, - ) + cast_to = self._maybe_override_cast_to(cast_to, options) - def _request( - self, - *, - cast_to: Type[ResponseT], - options: FinalRequestOptions, - retries_taken: int, - stream: bool, - stream_cls: type[_StreamT] | None, - ) -> ResponseT | _StreamT: # create a copy of the options we were given so that if the # options are mutated later & we then retry, the retries are # given the original options input_options = model_copy(options) + if input_options.idempotency_key is None and input_options.method.lower() != "get": + # ensure the idempotency key is reused between requests + input_options.idempotency_key = self._idempotency_key() - cast_to = self._maybe_override_cast_to(cast_to, options) - options = self._prepare_options(options) - - remaining_retries = options.get_max_retries(self.max_retries) - retries_taken - request = self._build_request(options, retries_taken=retries_taken) - self._prepare_request(request) - - kwargs: HttpxSendArgs = {} - if self.custom_auth is not None: - kwargs["auth"] = self.custom_auth + response: httpx.Response | None = None + max_retries = input_options.get_max_retries(self.max_retries) - log.debug("Sending HTTP Request: %s %s", request.method, request.url) + retries_taken = 0 + for retries_taken in range(max_retries + 1): + options = model_copy(input_options) + options = self._prepare_options(options) - try: - response = self._client.send( - request, - stream=stream or self._should_stream_response_body(request=request), - **kwargs, - ) - except httpx.TimeoutException as err: - log.debug("Encountered httpx.TimeoutException", exc_info=True) + remaining_retries = max_retries - retries_taken + request = self._build_request(options, retries_taken=retries_taken) + self._prepare_request(request) - if remaining_retries > 0: - return self._retry_request( - input_options, - cast_to, - retries_taken=retries_taken, - stream=stream, - stream_cls=stream_cls, - response_headers=None, - ) + kwargs: HttpxSendArgs = {} + if self.custom_auth is not None: + kwargs["auth"] = self.custom_auth - log.debug("Raising timeout error") - raise APITimeoutError(request=request) from err - except Exception as err: - log.debug("Encountered Exception", exc_info=True) + log.debug("Sending HTTP Request: %s %s", request.method, request.url) - if remaining_retries > 0: - return self._retry_request( - input_options, - cast_to, - retries_taken=retries_taken, - stream=stream, - stream_cls=stream_cls, - response_headers=None, + response = None + try: + response = self._client.send( + request, + stream=stream or self._should_stream_response_body(request=request), + **kwargs, ) + except httpx.TimeoutException as err: + log.debug("Encountered httpx.TimeoutException", exc_info=True) + + if remaining_retries > 0: + self._sleep_for_retry( + retries_taken=retries_taken, + max_retries=max_retries, + options=input_options, + response=None, + ) + continue + + log.debug("Raising timeout error") + raise APITimeoutError(request=request) from err + except Exception as err: + log.debug("Encountered Exception", exc_info=True) + + if remaining_retries > 0: + self._sleep_for_retry( + retries_taken=retries_taken, + max_retries=max_retries, + options=input_options, + response=None, + ) + continue + + log.debug("Raising connection error") + raise APIConnectionError(request=request) from err + + log.debug( + 'HTTP Response: %s %s "%i %s" %s', + request.method, + request.url, + response.status_code, + response.reason_phrase, + response.headers, + ) - log.debug("Raising connection error") - raise APIConnectionError(request=request) from err - - log.debug( - 'HTTP Response: %s %s "%i %s" %s', - request.method, - request.url, - response.status_code, - response.reason_phrase, - response.headers, - ) + try: + response.raise_for_status() + except httpx.HTTPStatusError as err: # thrown on 4xx and 5xx status code + log.debug("Encountered httpx.HTTPStatusError", exc_info=True) + + if remaining_retries > 0 and self._should_retry(err.response): + err.response.close() + self._sleep_for_retry( + retries_taken=retries_taken, + max_retries=max_retries, + options=input_options, + response=response, + ) + continue - try: - response.raise_for_status() - except httpx.HTTPStatusError as err: # thrown on 4xx and 5xx status code - log.debug("Encountered httpx.HTTPStatusError", exc_info=True) - - if remaining_retries > 0 and self._should_retry(err.response): - err.response.close() - return self._retry_request( - input_options, - cast_to, - retries_taken=retries_taken, - response_headers=err.response.headers, - stream=stream, - stream_cls=stream_cls, - ) + # If the response is streamed then we need to explicitly read the response + # to completion before attempting to access the response text. + if not err.response.is_closed: + err.response.read() - # If the response is streamed then we need to explicitly read the response - # to completion before attempting to access the response text. - if not err.response.is_closed: - err.response.read() + log.debug("Re-raising status error") + raise self._make_status_error_from_response(err.response) from None - log.debug("Re-raising status error") - raise self._make_status_error_from_response(err.response) from None + break + assert response is not None, "could not resolve response (should never happen)" return self._process_response( cast_to=cast_to, options=options, @@ -1028,37 +1042,20 @@ def _request( retries_taken=retries_taken, ) - def _retry_request( - self, - options: FinalRequestOptions, - cast_to: Type[ResponseT], - *, - retries_taken: int, - response_headers: httpx.Headers | None, - stream: bool, - stream_cls: type[_StreamT] | None, - ) -> ResponseT | _StreamT: - remaining_retries = options.get_max_retries(self.max_retries) - retries_taken + def _sleep_for_retry( + self, *, retries_taken: int, max_retries: int, options: FinalRequestOptions, response: httpx.Response | None + ) -> None: + remaining_retries = max_retries - retries_taken if remaining_retries == 1: log.debug("1 retry left") else: log.debug("%i retries left", remaining_retries) - timeout = self._calculate_retry_timeout(remaining_retries, options, response_headers) + timeout = self._calculate_retry_timeout(remaining_retries, options, response.headers if response else None) log.info("Retrying request to %s in %f seconds", options.url, timeout) - # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a - # different thread if necessary. time.sleep(timeout) - return self._request( - options=options, - cast_to=cast_to, - retries_taken=retries_taken + 1, - stream=stream, - stream_cls=stream_cls, - ) - def _process_response( self, *, @@ -1402,7 +1399,6 @@ async def request( options: FinalRequestOptions, *, stream: Literal[False] = False, - remaining_retries: Optional[int] = None, ) -> ResponseT: ... @overload @@ -1413,7 +1409,6 @@ async def request( *, stream: Literal[True], stream_cls: type[_AsyncStreamT], - remaining_retries: Optional[int] = None, ) -> _AsyncStreamT: ... @overload @@ -1424,7 +1419,6 @@ async def request( *, stream: bool, stream_cls: type[_AsyncStreamT] | None = None, - remaining_retries: Optional[int] = None, ) -> ResponseT | _AsyncStreamT: ... async def request( @@ -1434,116 +1428,111 @@ async def request( *, stream: bool = False, stream_cls: type[_AsyncStreamT] | None = None, - remaining_retries: Optional[int] = None, - ) -> ResponseT | _AsyncStreamT: - if remaining_retries is not None: - retries_taken = options.get_max_retries(self.max_retries) - remaining_retries - else: - retries_taken = 0 - - return await self._request( - cast_to=cast_to, - options=options, - stream=stream, - stream_cls=stream_cls, - retries_taken=retries_taken, - ) - - async def _request( - self, - cast_to: Type[ResponseT], - options: FinalRequestOptions, - *, - stream: bool, - stream_cls: type[_AsyncStreamT] | None, - retries_taken: int, ) -> ResponseT | _AsyncStreamT: if self._platform is None: # `get_platform` can make blocking IO calls so we # execute it earlier while we are in an async context self._platform = await asyncify(get_platform)() + cast_to = self._maybe_override_cast_to(cast_to, options) + # create a copy of the options we were given so that if the # options are mutated later & we then retry, the retries are # given the original options input_options = model_copy(options) + if input_options.idempotency_key is None and input_options.method.lower() != "get": + # ensure the idempotency key is reused between requests + input_options.idempotency_key = self._idempotency_key() - cast_to = self._maybe_override_cast_to(cast_to, options) - options = await self._prepare_options(options) + response: httpx.Response | None = None + max_retries = input_options.get_max_retries(self.max_retries) - remaining_retries = options.get_max_retries(self.max_retries) - retries_taken - request = self._build_request(options, retries_taken=retries_taken) - await self._prepare_request(request) + retries_taken = 0 + for retries_taken in range(max_retries + 1): + options = model_copy(input_options) + options = await self._prepare_options(options) - kwargs: HttpxSendArgs = {} - if self.custom_auth is not None: - kwargs["auth"] = self.custom_auth + remaining_retries = max_retries - retries_taken + request = self._build_request(options, retries_taken=retries_taken) + await self._prepare_request(request) - try: - response = await self._client.send( - request, - stream=stream or self._should_stream_response_body(request=request), - **kwargs, - ) - except httpx.TimeoutException as err: - log.debug("Encountered httpx.TimeoutException", exc_info=True) - - if remaining_retries > 0: - return await self._retry_request( - input_options, - cast_to, - retries_taken=retries_taken, - stream=stream, - stream_cls=stream_cls, - response_headers=None, - ) + kwargs: HttpxSendArgs = {} + if self.custom_auth is not None: + kwargs["auth"] = self.custom_auth - log.debug("Raising timeout error") - raise APITimeoutError(request=request) from err - except Exception as err: - log.debug("Encountered Exception", exc_info=True) + log.debug("Sending HTTP Request: %s %s", request.method, request.url) - if remaining_retries > 0: - return await self._retry_request( - input_options, - cast_to, - retries_taken=retries_taken, - stream=stream, - stream_cls=stream_cls, - response_headers=None, + response = None + try: + response = await self._client.send( + request, + stream=stream or self._should_stream_response_body(request=request), + **kwargs, ) + except httpx.TimeoutException as err: + log.debug("Encountered httpx.TimeoutException", exc_info=True) + + if remaining_retries > 0: + await self._sleep_for_retry( + retries_taken=retries_taken, + max_retries=max_retries, + options=input_options, + response=None, + ) + continue + + log.debug("Raising timeout error") + raise APITimeoutError(request=request) from err + except Exception as err: + log.debug("Encountered Exception", exc_info=True) + + if remaining_retries > 0: + await self._sleep_for_retry( + retries_taken=retries_taken, + max_retries=max_retries, + options=input_options, + response=None, + ) + continue + + log.debug("Raising connection error") + raise APIConnectionError(request=request) from err + + log.debug( + 'HTTP Response: %s %s "%i %s" %s', + request.method, + request.url, + response.status_code, + response.reason_phrase, + response.headers, + ) - log.debug("Raising connection error") - raise APIConnectionError(request=request) from err + try: + response.raise_for_status() + except httpx.HTTPStatusError as err: # thrown on 4xx and 5xx status code + log.debug("Encountered httpx.HTTPStatusError", exc_info=True) + + if remaining_retries > 0 and self._should_retry(err.response): + await err.response.aclose() + await self._sleep_for_retry( + retries_taken=retries_taken, + max_retries=max_retries, + options=input_options, + response=response, + ) + continue - log.debug( - 'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase - ) + # If the response is streamed then we need to explicitly read the response + # to completion before attempting to access the response text. + if not err.response.is_closed: + await err.response.aread() - try: - response.raise_for_status() - except httpx.HTTPStatusError as err: # thrown on 4xx and 5xx status code - log.debug("Encountered httpx.HTTPStatusError", exc_info=True) - - if remaining_retries > 0 and self._should_retry(err.response): - await err.response.aclose() - return await self._retry_request( - input_options, - cast_to, - retries_taken=retries_taken, - response_headers=err.response.headers, - stream=stream, - stream_cls=stream_cls, - ) + log.debug("Re-raising status error") + raise self._make_status_error_from_response(err.response) from None - # If the response is streamed then we need to explicitly read the response - # to completion before attempting to access the response text. - if not err.response.is_closed: - await err.response.aread() - - log.debug("Re-raising status error") - raise self._make_status_error_from_response(err.response) from None + break + assert response is not None, "could not resolve response (should never happen)" return await self._process_response( cast_to=cast_to, options=options, @@ -1553,35 +1542,20 @@ async def _request( retries_taken=retries_taken, ) - async def _retry_request( - self, - options: FinalRequestOptions, - cast_to: Type[ResponseT], - *, - retries_taken: int, - response_headers: httpx.Headers | None, - stream: bool, - stream_cls: type[_AsyncStreamT] | None, - ) -> ResponseT | _AsyncStreamT: - remaining_retries = options.get_max_retries(self.max_retries) - retries_taken + async def _sleep_for_retry( + self, *, retries_taken: int, max_retries: int, options: FinalRequestOptions, response: httpx.Response | None + ) -> None: + remaining_retries = max_retries - retries_taken if remaining_retries == 1: log.debug("1 retry left") else: log.debug("%i retries left", remaining_retries) - timeout = self._calculate_retry_timeout(remaining_retries, options, response_headers) + timeout = self._calculate_retry_timeout(remaining_retries, options, response.headers if response else None) log.info("Retrying request to %s in %f seconds", options.url, timeout) await anyio.sleep(timeout) - return await self._request( - options=options, - cast_to=cast_to, - retries_taken=retries_taken + 1, - stream=stream, - stream_cls=stream_cls, - ) - async def _process_response( self, *, diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py index 7066ae2a..f9f22967 100644 --- a/src/llama_stack_client/_client.py +++ b/src/llama_stack_client/_client.py @@ -20,10 +20,7 @@ ProxiesTypes, RequestOptions, ) -from ._utils import ( - is_given, - get_async_library, -) +from ._utils import is_given, get_async_library from ._version import __version__ from .resources import ( tools, @@ -41,6 +38,7 @@ benchmarks, toolgroups, vector_dbs, + completions, scoring_functions, synthetic_data_generation, ) @@ -51,6 +49,7 @@ SyncAPIClient, AsyncAPIClient, ) +from .resources.chat import chat from .resources.eval import eval from .resources.agents import agents from .resources.tool_runtime import tool_runtime @@ -77,6 +76,8 @@ class LlamaStackClient(SyncAPIClient): eval: eval.EvalResource inspect: inspect.InspectResource inference: inference.InferenceResource + chat: chat.ChatResource + completions: completions.CompletionsResource vector_io: vector_io.VectorIoResource vector_dbs: vector_dbs.VectorDBsResource models: models.ModelsResource @@ -157,6 +158,8 @@ def __init__( self.eval = eval.EvalResource(self) self.inspect = inspect.InspectResource(self) self.inference = inference.InferenceResource(self) + self.chat = chat.ChatResource(self) + self.completions = completions.CompletionsResource(self) self.vector_io = vector_io.VectorIoResource(self) self.vector_dbs = vector_dbs.VectorDBsResource(self) self.models = models.ModelsResource(self) @@ -289,6 +292,8 @@ class AsyncLlamaStackClient(AsyncAPIClient): eval: eval.AsyncEvalResource inspect: inspect.AsyncInspectResource inference: inference.AsyncInferenceResource + chat: chat.AsyncChatResource + completions: completions.AsyncCompletionsResource vector_io: vector_io.AsyncVectorIoResource vector_dbs: vector_dbs.AsyncVectorDBsResource models: models.AsyncModelsResource @@ -369,6 +374,8 @@ def __init__( self.eval = eval.AsyncEvalResource(self) self.inspect = inspect.AsyncInspectResource(self) self.inference = inference.AsyncInferenceResource(self) + self.chat = chat.AsyncChatResource(self) + self.completions = completions.AsyncCompletionsResource(self) self.vector_io = vector_io.AsyncVectorIoResource(self) self.vector_dbs = vector_dbs.AsyncVectorDBsResource(self) self.models = models.AsyncModelsResource(self) @@ -502,6 +509,8 @@ def __init__(self, client: LlamaStackClient) -> None: self.eval = eval.EvalResourceWithRawResponse(client.eval) self.inspect = inspect.InspectResourceWithRawResponse(client.inspect) self.inference = inference.InferenceResourceWithRawResponse(client.inference) + self.chat = chat.ChatResourceWithRawResponse(client.chat) + self.completions = completions.CompletionsResourceWithRawResponse(client.completions) self.vector_io = vector_io.VectorIoResourceWithRawResponse(client.vector_io) self.vector_dbs = vector_dbs.VectorDBsResourceWithRawResponse(client.vector_dbs) self.models = models.ModelsResourceWithRawResponse(client.models) @@ -529,6 +538,8 @@ def __init__(self, client: AsyncLlamaStackClient) -> None: self.eval = eval.AsyncEvalResourceWithRawResponse(client.eval) self.inspect = inspect.AsyncInspectResourceWithRawResponse(client.inspect) self.inference = inference.AsyncInferenceResourceWithRawResponse(client.inference) + self.chat = chat.AsyncChatResourceWithRawResponse(client.chat) + self.completions = completions.AsyncCompletionsResourceWithRawResponse(client.completions) self.vector_io = vector_io.AsyncVectorIoResourceWithRawResponse(client.vector_io) self.vector_dbs = vector_dbs.AsyncVectorDBsResourceWithRawResponse(client.vector_dbs) self.models = models.AsyncModelsResourceWithRawResponse(client.models) @@ -558,6 +569,8 @@ def __init__(self, client: LlamaStackClient) -> None: self.eval = eval.EvalResourceWithStreamingResponse(client.eval) self.inspect = inspect.InspectResourceWithStreamingResponse(client.inspect) self.inference = inference.InferenceResourceWithStreamingResponse(client.inference) + self.chat = chat.ChatResourceWithStreamingResponse(client.chat) + self.completions = completions.CompletionsResourceWithStreamingResponse(client.completions) self.vector_io = vector_io.VectorIoResourceWithStreamingResponse(client.vector_io) self.vector_dbs = vector_dbs.VectorDBsResourceWithStreamingResponse(client.vector_dbs) self.models = models.ModelsResourceWithStreamingResponse(client.models) @@ -587,6 +600,8 @@ def __init__(self, client: AsyncLlamaStackClient) -> None: self.eval = eval.AsyncEvalResourceWithStreamingResponse(client.eval) self.inspect = inspect.AsyncInspectResourceWithStreamingResponse(client.inspect) self.inference = inference.AsyncInferenceResourceWithStreamingResponse(client.inference) + self.chat = chat.AsyncChatResourceWithStreamingResponse(client.chat) + self.completions = completions.AsyncCompletionsResourceWithStreamingResponse(client.completions) self.vector_io = vector_io.AsyncVectorIoResourceWithStreamingResponse(client.vector_io) self.vector_dbs = vector_dbs.AsyncVectorDBsResourceWithStreamingResponse(client.vector_dbs) self.models = models.AsyncModelsResourceWithStreamingResponse(client.models) diff --git a/src/llama_stack_client/_models.py b/src/llama_stack_client/_models.py index 34935716..798956f1 100644 --- a/src/llama_stack_client/_models.py +++ b/src/llama_stack_client/_models.py @@ -19,7 +19,6 @@ ) import pydantic -import pydantic.generics from pydantic.fields import FieldInfo from ._types import ( @@ -627,8 +626,8 @@ def _build_discriminated_union_meta(*, union: type, meta_annotations: tuple[Any, # Note: if one variant defines an alias then they all should discriminator_alias = field_info.alias - if field_info.annotation and is_literal_type(field_info.annotation): - for entry in get_args(field_info.annotation): + if (annotation := getattr(field_info, "annotation", None)) and is_literal_type(annotation): + for entry in get_args(annotation): if isinstance(entry, str): mapping[entry] = variant diff --git a/src/llama_stack_client/_response.py b/src/llama_stack_client/_response.py index 1938ae74..8486ab8e 100644 --- a/src/llama_stack_client/_response.py +++ b/src/llama_stack_client/_response.py @@ -235,7 +235,7 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T: # split is required to handle cases where additional information is included # in the response, e.g. application/json; charset=utf-8 content_type, *_ = response.headers.get("content-type", "*").split(";") - if content_type != "application/json": + if not content_type.endswith("json"): if is_basemodel(cast_to): try: data = response.json() diff --git a/src/llama_stack_client/_utils/_typing.py b/src/llama_stack_client/_utils/_typing.py index 1958820f..1bac9542 100644 --- a/src/llama_stack_client/_utils/_typing.py +++ b/src/llama_stack_client/_utils/_typing.py @@ -110,7 +110,7 @@ class MyResponse(Foo[_T]): ``` """ cls = cast(object, get_origin(typ) or typ) - if cls in generic_bases: + if cls in generic_bases: # pyright: ignore[reportUnnecessaryContains] # we're given the class directly return extract_type_arg(typ, index) diff --git a/src/llama_stack_client/_utils/_utils.py b/src/llama_stack_client/_utils/_utils.py index e5811bba..ea3cf3f2 100644 --- a/src/llama_stack_client/_utils/_utils.py +++ b/src/llama_stack_client/_utils/_utils.py @@ -72,8 +72,16 @@ def _extract_items( from .._files import assert_is_file_content # We have exhausted the path, return the entry we found. - assert_is_file_content(obj, key=flattened_key) assert flattened_key is not None + + if is_list(obj): + files: list[tuple[str, FileTypes]] = [] + for entry in obj: + assert_is_file_content(entry, key=flattened_key + "[]" if flattened_key else "") + files.append((flattened_key + "[]", cast(FileTypes, entry))) + return files + + assert_is_file_content(obj, key=flattened_key) return [(flattened_key, cast(FileTypes, obj))] index += 1 diff --git a/src/llama_stack_client/resources/__init__.py b/src/llama_stack_client/resources/__init__.py index 0e3373dc..ff5b3260 100644 --- a/src/llama_stack_client/resources/__init__.py +++ b/src/llama_stack_client/resources/__init__.py @@ -1,5 +1,13 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. +from .chat import ( + ChatResource, + AsyncChatResource, + ChatResourceWithRawResponse, + AsyncChatResourceWithRawResponse, + ChatResourceWithStreamingResponse, + AsyncChatResourceWithStreamingResponse, +) from .eval import ( EvalResource, AsyncEvalResource, @@ -136,6 +144,14 @@ VectorDBsResourceWithStreamingResponse, AsyncVectorDBsResourceWithStreamingResponse, ) +from .completions import ( + CompletionsResource, + AsyncCompletionsResource, + CompletionsResourceWithRawResponse, + AsyncCompletionsResourceWithRawResponse, + CompletionsResourceWithStreamingResponse, + AsyncCompletionsResourceWithStreamingResponse, +) from .tool_runtime import ( ToolRuntimeResource, AsyncToolRuntimeResource, @@ -218,6 +234,18 @@ "AsyncInferenceResourceWithRawResponse", "InferenceResourceWithStreamingResponse", "AsyncInferenceResourceWithStreamingResponse", + "ChatResource", + "AsyncChatResource", + "ChatResourceWithRawResponse", + "AsyncChatResourceWithRawResponse", + "ChatResourceWithStreamingResponse", + "AsyncChatResourceWithStreamingResponse", + "CompletionsResource", + "AsyncCompletionsResource", + "CompletionsResourceWithRawResponse", + "AsyncCompletionsResourceWithRawResponse", + "CompletionsResourceWithStreamingResponse", + "AsyncCompletionsResourceWithStreamingResponse", "VectorIoResource", "AsyncVectorIoResource", "VectorIoResourceWithRawResponse", diff --git a/src/llama_stack_client/resources/agents/agents.py b/src/llama_stack_client/resources/agents/agents.py index 314edd7e..ed03dde5 100644 --- a/src/llama_stack_client/resources/agents/agents.py +++ b/src/llama_stack_client/resources/agents/agents.py @@ -30,10 +30,7 @@ AsyncSessionResourceWithStreamingResponse, ) from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven -from ..._utils import ( - maybe_transform, - async_maybe_transform, -) +from ..._utils import maybe_transform, async_maybe_transform from ..._compat import cached_property from ..._resource import SyncAPIResource, AsyncAPIResource from ..._response import ( diff --git a/src/llama_stack_client/resources/agents/session.py b/src/llama_stack_client/resources/agents/session.py index 592a94ba..0aec7449 100644 --- a/src/llama_stack_client/resources/agents/session.py +++ b/src/llama_stack_client/resources/agents/session.py @@ -7,10 +7,7 @@ import httpx from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven -from ..._utils import ( - maybe_transform, - async_maybe_transform, -) +from ..._utils import maybe_transform, async_maybe_transform from ..._compat import cached_property from ..._resource import SyncAPIResource, AsyncAPIResource from ..._response import ( diff --git a/src/llama_stack_client/resources/agents/turn.py b/src/llama_stack_client/resources/agents/turn.py index 6b1b4ae2..8c48869e 100644 --- a/src/llama_stack_client/resources/agents/turn.py +++ b/src/llama_stack_client/resources/agents/turn.py @@ -8,11 +8,7 @@ import httpx from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven -from ..._utils import ( - required_args, - maybe_transform, - async_maybe_transform, -) +from ..._utils import required_args, maybe_transform, async_maybe_transform from ..._compat import cached_property from ..._resource import SyncAPIResource, AsyncAPIResource from ..._response import ( diff --git a/src/llama_stack_client/resources/benchmarks.py b/src/llama_stack_client/resources/benchmarks.py index f541a6ba..ff6af994 100644 --- a/src/llama_stack_client/resources/benchmarks.py +++ b/src/llama_stack_client/resources/benchmarks.py @@ -8,10 +8,7 @@ from ..types import benchmark_register_params from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven -from .._utils import ( - maybe_transform, - async_maybe_transform, -) +from .._utils import maybe_transform, async_maybe_transform from .._compat import cached_property from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( diff --git a/src/llama_stack_client/resources/chat/__init__.py b/src/llama_stack_client/resources/chat/__init__.py new file mode 100644 index 00000000..ec960eb4 --- /dev/null +++ b/src/llama_stack_client/resources/chat/__init__.py @@ -0,0 +1,33 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .chat import ( + ChatResource, + AsyncChatResource, + ChatResourceWithRawResponse, + AsyncChatResourceWithRawResponse, + ChatResourceWithStreamingResponse, + AsyncChatResourceWithStreamingResponse, +) +from .completions import ( + CompletionsResource, + AsyncCompletionsResource, + CompletionsResourceWithRawResponse, + AsyncCompletionsResourceWithRawResponse, + CompletionsResourceWithStreamingResponse, + AsyncCompletionsResourceWithStreamingResponse, +) + +__all__ = [ + "CompletionsResource", + "AsyncCompletionsResource", + "CompletionsResourceWithRawResponse", + "AsyncCompletionsResourceWithRawResponse", + "CompletionsResourceWithStreamingResponse", + "AsyncCompletionsResourceWithStreamingResponse", + "ChatResource", + "AsyncChatResource", + "ChatResourceWithRawResponse", + "AsyncChatResourceWithRawResponse", + "ChatResourceWithStreamingResponse", + "AsyncChatResourceWithStreamingResponse", +] diff --git a/src/llama_stack_client/resources/chat/chat.py b/src/llama_stack_client/resources/chat/chat.py new file mode 100644 index 00000000..681051f3 --- /dev/null +++ b/src/llama_stack_client/resources/chat/chat.py @@ -0,0 +1,102 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from .completions import ( + CompletionsResource, + AsyncCompletionsResource, + CompletionsResourceWithRawResponse, + AsyncCompletionsResourceWithRawResponse, + CompletionsResourceWithStreamingResponse, + AsyncCompletionsResourceWithStreamingResponse, +) + +__all__ = ["ChatResource", "AsyncChatResource"] + + +class ChatResource(SyncAPIResource): + @cached_property + def completions(self) -> CompletionsResource: + return CompletionsResource(self._client) + + @cached_property + def with_raw_response(self) -> ChatResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers + """ + return ChatResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> ChatResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response + """ + return ChatResourceWithStreamingResponse(self) + + +class AsyncChatResource(AsyncAPIResource): + @cached_property + def completions(self) -> AsyncCompletionsResource: + return AsyncCompletionsResource(self._client) + + @cached_property + def with_raw_response(self) -> AsyncChatResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers + """ + return AsyncChatResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncChatResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response + """ + return AsyncChatResourceWithStreamingResponse(self) + + +class ChatResourceWithRawResponse: + def __init__(self, chat: ChatResource) -> None: + self._chat = chat + + @cached_property + def completions(self) -> CompletionsResourceWithRawResponse: + return CompletionsResourceWithRawResponse(self._chat.completions) + + +class AsyncChatResourceWithRawResponse: + def __init__(self, chat: AsyncChatResource) -> None: + self._chat = chat + + @cached_property + def completions(self) -> AsyncCompletionsResourceWithRawResponse: + return AsyncCompletionsResourceWithRawResponse(self._chat.completions) + + +class ChatResourceWithStreamingResponse: + def __init__(self, chat: ChatResource) -> None: + self._chat = chat + + @cached_property + def completions(self) -> CompletionsResourceWithStreamingResponse: + return CompletionsResourceWithStreamingResponse(self._chat.completions) + + +class AsyncChatResourceWithStreamingResponse: + def __init__(self, chat: AsyncChatResource) -> None: + self._chat = chat + + @cached_property + def completions(self) -> AsyncCompletionsResourceWithStreamingResponse: + return AsyncCompletionsResourceWithStreamingResponse(self._chat.completions) diff --git a/src/llama_stack_client/resources/chat/completions.py b/src/llama_stack_client/resources/chat/completions.py new file mode 100644 index 00000000..7c449d41 --- /dev/null +++ b/src/llama_stack_client/resources/chat/completions.py @@ -0,0 +1,848 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Any, Dict, List, Union, Iterable, cast +from typing_extensions import Literal, overload + +import httpx + +from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from ..._utils import required_args, maybe_transform, async_maybe_transform +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from ..._response import ( + to_raw_response_wrapper, + to_streamed_response_wrapper, + async_to_raw_response_wrapper, + async_to_streamed_response_wrapper, +) +from ..._streaming import Stream, AsyncStream +from ...types.chat import completion_create_params +from ..._base_client import make_request_options +from ...types.chat_completion_chunk import ChatCompletionChunk +from ...types.chat.completion_create_response import CompletionCreateResponse + +__all__ = ["CompletionsResource", "AsyncCompletionsResource"] + + +class CompletionsResource(SyncAPIResource): + @cached_property + def with_raw_response(self) -> CompletionsResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers + """ + return CompletionsResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> CompletionsResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response + """ + return CompletionsResourceWithStreamingResponse(self) + + @overload + def create( + self, + *, + messages: Iterable[completion_create_params.Message], + model: str, + frequency_penalty: float | NotGiven = NOT_GIVEN, + function_call: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + | NotGiven = NOT_GIVEN, + functions: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_completion_tokens: int | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[str, List[str]] | NotGiven = NOT_GIVEN, + stream: Literal[False] | NotGiven = NOT_GIVEN, + stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + | NotGiven = NOT_GIVEN, + tools: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN, + top_logprobs: int | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> CompletionCreateResponse: + """ + Generate an OpenAI-compatible chat completion for the given messages using the + specified model. + + Args: + messages: List of messages in the conversation + + model: The identifier of the model to use. The model must be registered with Llama + Stack and available via the /models endpoint. + + frequency_penalty: (Optional) The penalty for repeated tokens + + function_call: (Optional) The function call to use + + functions: (Optional) List of functions to use + + logit_bias: (Optional) The logit bias to use + + logprobs: (Optional) The log probabilities to use + + max_completion_tokens: (Optional) The maximum number of tokens to generate + + max_tokens: (Optional) The maximum number of tokens to generate + + n: (Optional) The number of completions to generate + + parallel_tool_calls: (Optional) Whether to parallelize tool calls + + presence_penalty: (Optional) The penalty for repeated tokens + + response_format: (Optional) The response format to use + + seed: (Optional) The seed to use + + stop: (Optional) The stop tokens to use + + stream: (Optional) Whether to stream the response + + stream_options: (Optional) The stream options to use + + temperature: (Optional) The temperature to use + + tool_choice: (Optional) The tool choice to use + + tools: (Optional) The tools to use + + top_logprobs: (Optional) The top log probabilities to use + + top_p: (Optional) The top p to use + + user: (Optional) The user to use + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def create( + self, + *, + messages: Iterable[completion_create_params.Message], + model: str, + stream: Literal[True], + frequency_penalty: float | NotGiven = NOT_GIVEN, + function_call: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + | NotGiven = NOT_GIVEN, + functions: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_completion_tokens: int | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[str, List[str]] | NotGiven = NOT_GIVEN, + stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + | NotGiven = NOT_GIVEN, + tools: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN, + top_logprobs: int | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Stream[ChatCompletionChunk]: + """ + Generate an OpenAI-compatible chat completion for the given messages using the + specified model. + + Args: + messages: List of messages in the conversation + + model: The identifier of the model to use. The model must be registered with Llama + Stack and available via the /models endpoint. + + stream: (Optional) Whether to stream the response + + frequency_penalty: (Optional) The penalty for repeated tokens + + function_call: (Optional) The function call to use + + functions: (Optional) List of functions to use + + logit_bias: (Optional) The logit bias to use + + logprobs: (Optional) The log probabilities to use + + max_completion_tokens: (Optional) The maximum number of tokens to generate + + max_tokens: (Optional) The maximum number of tokens to generate + + n: (Optional) The number of completions to generate + + parallel_tool_calls: (Optional) Whether to parallelize tool calls + + presence_penalty: (Optional) The penalty for repeated tokens + + response_format: (Optional) The response format to use + + seed: (Optional) The seed to use + + stop: (Optional) The stop tokens to use + + stream_options: (Optional) The stream options to use + + temperature: (Optional) The temperature to use + + tool_choice: (Optional) The tool choice to use + + tools: (Optional) The tools to use + + top_logprobs: (Optional) The top log probabilities to use + + top_p: (Optional) The top p to use + + user: (Optional) The user to use + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def create( + self, + *, + messages: Iterable[completion_create_params.Message], + model: str, + stream: bool, + frequency_penalty: float | NotGiven = NOT_GIVEN, + function_call: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + | NotGiven = NOT_GIVEN, + functions: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_completion_tokens: int | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[str, List[str]] | NotGiven = NOT_GIVEN, + stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + | NotGiven = NOT_GIVEN, + tools: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN, + top_logprobs: int | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> CompletionCreateResponse | Stream[ChatCompletionChunk]: + """ + Generate an OpenAI-compatible chat completion for the given messages using the + specified model. + + Args: + messages: List of messages in the conversation + + model: The identifier of the model to use. The model must be registered with Llama + Stack and available via the /models endpoint. + + stream: (Optional) Whether to stream the response + + frequency_penalty: (Optional) The penalty for repeated tokens + + function_call: (Optional) The function call to use + + functions: (Optional) List of functions to use + + logit_bias: (Optional) The logit bias to use + + logprobs: (Optional) The log probabilities to use + + max_completion_tokens: (Optional) The maximum number of tokens to generate + + max_tokens: (Optional) The maximum number of tokens to generate + + n: (Optional) The number of completions to generate + + parallel_tool_calls: (Optional) Whether to parallelize tool calls + + presence_penalty: (Optional) The penalty for repeated tokens + + response_format: (Optional) The response format to use + + seed: (Optional) The seed to use + + stop: (Optional) The stop tokens to use + + stream_options: (Optional) The stream options to use + + temperature: (Optional) The temperature to use + + tool_choice: (Optional) The tool choice to use + + tools: (Optional) The tools to use + + top_logprobs: (Optional) The top log probabilities to use + + top_p: (Optional) The top p to use + + user: (Optional) The user to use + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["messages", "model"], ["messages", "model", "stream"]) + def create( + self, + *, + messages: Iterable[completion_create_params.Message], + model: str, + frequency_penalty: float | NotGiven = NOT_GIVEN, + function_call: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + | NotGiven = NOT_GIVEN, + functions: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_completion_tokens: int | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[str, List[str]] | NotGiven = NOT_GIVEN, + stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN, + stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + | NotGiven = NOT_GIVEN, + tools: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN, + top_logprobs: int | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> CompletionCreateResponse | Stream[ChatCompletionChunk]: + return self._post( + "/v1/openai/v1/chat/completions", + body=maybe_transform( + { + "messages": messages, + "model": model, + "frequency_penalty": frequency_penalty, + "function_call": function_call, + "functions": functions, + "logit_bias": logit_bias, + "logprobs": logprobs, + "max_completion_tokens": max_completion_tokens, + "max_tokens": max_tokens, + "n": n, + "parallel_tool_calls": parallel_tool_calls, + "presence_penalty": presence_penalty, + "response_format": response_format, + "seed": seed, + "stop": stop, + "stream": stream, + "stream_options": stream_options, + "temperature": temperature, + "tool_choice": tool_choice, + "tools": tools, + "top_logprobs": top_logprobs, + "top_p": top_p, + "user": user, + }, + completion_create_params.CompletionCreateParamsStreaming + if stream + else completion_create_params.CompletionCreateParamsNonStreaming, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=cast( + Any, CompletionCreateResponse + ), # Union types cannot be passed in as arguments in the type system + stream=stream or False, + stream_cls=Stream[ChatCompletionChunk], + ) + + +class AsyncCompletionsResource(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncCompletionsResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers + """ + return AsyncCompletionsResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncCompletionsResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response + """ + return AsyncCompletionsResourceWithStreamingResponse(self) + + @overload + async def create( + self, + *, + messages: Iterable[completion_create_params.Message], + model: str, + frequency_penalty: float | NotGiven = NOT_GIVEN, + function_call: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + | NotGiven = NOT_GIVEN, + functions: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_completion_tokens: int | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[str, List[str]] | NotGiven = NOT_GIVEN, + stream: Literal[False] | NotGiven = NOT_GIVEN, + stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + | NotGiven = NOT_GIVEN, + tools: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN, + top_logprobs: int | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> CompletionCreateResponse: + """ + Generate an OpenAI-compatible chat completion for the given messages using the + specified model. + + Args: + messages: List of messages in the conversation + + model: The identifier of the model to use. The model must be registered with Llama + Stack and available via the /models endpoint. + + frequency_penalty: (Optional) The penalty for repeated tokens + + function_call: (Optional) The function call to use + + functions: (Optional) List of functions to use + + logit_bias: (Optional) The logit bias to use + + logprobs: (Optional) The log probabilities to use + + max_completion_tokens: (Optional) The maximum number of tokens to generate + + max_tokens: (Optional) The maximum number of tokens to generate + + n: (Optional) The number of completions to generate + + parallel_tool_calls: (Optional) Whether to parallelize tool calls + + presence_penalty: (Optional) The penalty for repeated tokens + + response_format: (Optional) The response format to use + + seed: (Optional) The seed to use + + stop: (Optional) The stop tokens to use + + stream: (Optional) Whether to stream the response + + stream_options: (Optional) The stream options to use + + temperature: (Optional) The temperature to use + + tool_choice: (Optional) The tool choice to use + + tools: (Optional) The tools to use + + top_logprobs: (Optional) The top log probabilities to use + + top_p: (Optional) The top p to use + + user: (Optional) The user to use + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def create( + self, + *, + messages: Iterable[completion_create_params.Message], + model: str, + stream: Literal[True], + frequency_penalty: float | NotGiven = NOT_GIVEN, + function_call: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + | NotGiven = NOT_GIVEN, + functions: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_completion_tokens: int | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[str, List[str]] | NotGiven = NOT_GIVEN, + stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + | NotGiven = NOT_GIVEN, + tools: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN, + top_logprobs: int | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncStream[ChatCompletionChunk]: + """ + Generate an OpenAI-compatible chat completion for the given messages using the + specified model. + + Args: + messages: List of messages in the conversation + + model: The identifier of the model to use. The model must be registered with Llama + Stack and available via the /models endpoint. + + stream: (Optional) Whether to stream the response + + frequency_penalty: (Optional) The penalty for repeated tokens + + function_call: (Optional) The function call to use + + functions: (Optional) List of functions to use + + logit_bias: (Optional) The logit bias to use + + logprobs: (Optional) The log probabilities to use + + max_completion_tokens: (Optional) The maximum number of tokens to generate + + max_tokens: (Optional) The maximum number of tokens to generate + + n: (Optional) The number of completions to generate + + parallel_tool_calls: (Optional) Whether to parallelize tool calls + + presence_penalty: (Optional) The penalty for repeated tokens + + response_format: (Optional) The response format to use + + seed: (Optional) The seed to use + + stop: (Optional) The stop tokens to use + + stream_options: (Optional) The stream options to use + + temperature: (Optional) The temperature to use + + tool_choice: (Optional) The tool choice to use + + tools: (Optional) The tools to use + + top_logprobs: (Optional) The top log probabilities to use + + top_p: (Optional) The top p to use + + user: (Optional) The user to use + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def create( + self, + *, + messages: Iterable[completion_create_params.Message], + model: str, + stream: bool, + frequency_penalty: float | NotGiven = NOT_GIVEN, + function_call: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + | NotGiven = NOT_GIVEN, + functions: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_completion_tokens: int | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[str, List[str]] | NotGiven = NOT_GIVEN, + stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + | NotGiven = NOT_GIVEN, + tools: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN, + top_logprobs: int | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> CompletionCreateResponse | AsyncStream[ChatCompletionChunk]: + """ + Generate an OpenAI-compatible chat completion for the given messages using the + specified model. + + Args: + messages: List of messages in the conversation + + model: The identifier of the model to use. The model must be registered with Llama + Stack and available via the /models endpoint. + + stream: (Optional) Whether to stream the response + + frequency_penalty: (Optional) The penalty for repeated tokens + + function_call: (Optional) The function call to use + + functions: (Optional) List of functions to use + + logit_bias: (Optional) The logit bias to use + + logprobs: (Optional) The log probabilities to use + + max_completion_tokens: (Optional) The maximum number of tokens to generate + + max_tokens: (Optional) The maximum number of tokens to generate + + n: (Optional) The number of completions to generate + + parallel_tool_calls: (Optional) Whether to parallelize tool calls + + presence_penalty: (Optional) The penalty for repeated tokens + + response_format: (Optional) The response format to use + + seed: (Optional) The seed to use + + stop: (Optional) The stop tokens to use + + stream_options: (Optional) The stream options to use + + temperature: (Optional) The temperature to use + + tool_choice: (Optional) The tool choice to use + + tools: (Optional) The tools to use + + top_logprobs: (Optional) The top log probabilities to use + + top_p: (Optional) The top p to use + + user: (Optional) The user to use + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["messages", "model"], ["messages", "model", "stream"]) + async def create( + self, + *, + messages: Iterable[completion_create_params.Message], + model: str, + frequency_penalty: float | NotGiven = NOT_GIVEN, + function_call: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + | NotGiven = NOT_GIVEN, + functions: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_completion_tokens: int | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + parallel_tool_calls: bool | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[str, List[str]] | NotGiven = NOT_GIVEN, + stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN, + stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + | NotGiven = NOT_GIVEN, + tools: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN, + top_logprobs: int | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> CompletionCreateResponse | AsyncStream[ChatCompletionChunk]: + return await self._post( + "/v1/openai/v1/chat/completions", + body=await async_maybe_transform( + { + "messages": messages, + "model": model, + "frequency_penalty": frequency_penalty, + "function_call": function_call, + "functions": functions, + "logit_bias": logit_bias, + "logprobs": logprobs, + "max_completion_tokens": max_completion_tokens, + "max_tokens": max_tokens, + "n": n, + "parallel_tool_calls": parallel_tool_calls, + "presence_penalty": presence_penalty, + "response_format": response_format, + "seed": seed, + "stop": stop, + "stream": stream, + "stream_options": stream_options, + "temperature": temperature, + "tool_choice": tool_choice, + "tools": tools, + "top_logprobs": top_logprobs, + "top_p": top_p, + "user": user, + }, + completion_create_params.CompletionCreateParamsStreaming + if stream + else completion_create_params.CompletionCreateParamsNonStreaming, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=cast( + Any, CompletionCreateResponse + ), # Union types cannot be passed in as arguments in the type system + stream=stream or False, + stream_cls=AsyncStream[ChatCompletionChunk], + ) + + +class CompletionsResourceWithRawResponse: + def __init__(self, completions: CompletionsResource) -> None: + self._completions = completions + + self.create = to_raw_response_wrapper( + completions.create, + ) + + +class AsyncCompletionsResourceWithRawResponse: + def __init__(self, completions: AsyncCompletionsResource) -> None: + self._completions = completions + + self.create = async_to_raw_response_wrapper( + completions.create, + ) + + +class CompletionsResourceWithStreamingResponse: + def __init__(self, completions: CompletionsResource) -> None: + self._completions = completions + + self.create = to_streamed_response_wrapper( + completions.create, + ) + + +class AsyncCompletionsResourceWithStreamingResponse: + def __init__(self, completions: AsyncCompletionsResource) -> None: + self._completions = completions + + self.create = async_to_streamed_response_wrapper( + completions.create, + ) diff --git a/src/llama_stack_client/resources/completions.py b/src/llama_stack_client/resources/completions.py new file mode 100644 index 00000000..8f57aeb4 --- /dev/null +++ b/src/llama_stack_client/resources/completions.py @@ -0,0 +1,715 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Dict, List, Union, Iterable +from typing_extensions import Literal, overload + +import httpx + +from ..types import completion_create_params +from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from .._utils import required_args, maybe_transform, async_maybe_transform +from .._compat import cached_property +from .._resource import SyncAPIResource, AsyncAPIResource +from .._response import ( + to_raw_response_wrapper, + to_streamed_response_wrapper, + async_to_raw_response_wrapper, + async_to_streamed_response_wrapper, +) +from .._streaming import Stream, AsyncStream +from .._base_client import make_request_options +from ..types.completion_create_response import CompletionCreateResponse + +__all__ = ["CompletionsResource", "AsyncCompletionsResource"] + + +class CompletionsResource(SyncAPIResource): + @cached_property + def with_raw_response(self) -> CompletionsResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers + """ + return CompletionsResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> CompletionsResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response + """ + return CompletionsResourceWithStreamingResponse(self) + + @overload + def create( + self, + *, + model: str, + prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]], + best_of: int | NotGiven = NOT_GIVEN, + echo: bool | NotGiven = NOT_GIVEN, + frequency_penalty: float | NotGiven = NOT_GIVEN, + guided_choice: List[str] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + prompt_logprobs: int | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[str, List[str]] | NotGiven = NOT_GIVEN, + stream: Literal[False] | NotGiven = NOT_GIVEN, + stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> CompletionCreateResponse: + """ + Generate an OpenAI-compatible completion for the given prompt using the + specified model. + + Args: + model: The identifier of the model to use. The model must be registered with Llama + Stack and available via the /models endpoint. + + prompt: The prompt to generate a completion for + + best_of: (Optional) The number of completions to generate + + echo: (Optional) Whether to echo the prompt + + frequency_penalty: (Optional) The penalty for repeated tokens + + logit_bias: (Optional) The logit bias to use + + logprobs: (Optional) The log probabilities to use + + max_tokens: (Optional) The maximum number of tokens to generate + + n: (Optional) The number of completions to generate + + presence_penalty: (Optional) The penalty for repeated tokens + + seed: (Optional) The seed to use + + stop: (Optional) The stop tokens to use + + stream: (Optional) Whether to stream the response + + stream_options: (Optional) The stream options to use + + temperature: (Optional) The temperature to use + + top_p: (Optional) The top p to use + + user: (Optional) The user to use + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def create( + self, + *, + model: str, + prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]], + stream: Literal[True], + best_of: int | NotGiven = NOT_GIVEN, + echo: bool | NotGiven = NOT_GIVEN, + frequency_penalty: float | NotGiven = NOT_GIVEN, + guided_choice: List[str] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + prompt_logprobs: int | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[str, List[str]] | NotGiven = NOT_GIVEN, + stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Stream[CompletionCreateResponse]: + """ + Generate an OpenAI-compatible completion for the given prompt using the + specified model. + + Args: + model: The identifier of the model to use. The model must be registered with Llama + Stack and available via the /models endpoint. + + prompt: The prompt to generate a completion for + + stream: (Optional) Whether to stream the response + + best_of: (Optional) The number of completions to generate + + echo: (Optional) Whether to echo the prompt + + frequency_penalty: (Optional) The penalty for repeated tokens + + logit_bias: (Optional) The logit bias to use + + logprobs: (Optional) The log probabilities to use + + max_tokens: (Optional) The maximum number of tokens to generate + + n: (Optional) The number of completions to generate + + presence_penalty: (Optional) The penalty for repeated tokens + + seed: (Optional) The seed to use + + stop: (Optional) The stop tokens to use + + stream_options: (Optional) The stream options to use + + temperature: (Optional) The temperature to use + + top_p: (Optional) The top p to use + + user: (Optional) The user to use + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def create( + self, + *, + model: str, + prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]], + stream: bool, + best_of: int | NotGiven = NOT_GIVEN, + echo: bool | NotGiven = NOT_GIVEN, + frequency_penalty: float | NotGiven = NOT_GIVEN, + guided_choice: List[str] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + prompt_logprobs: int | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[str, List[str]] | NotGiven = NOT_GIVEN, + stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> CompletionCreateResponse | Stream[CompletionCreateResponse]: + """ + Generate an OpenAI-compatible completion for the given prompt using the + specified model. + + Args: + model: The identifier of the model to use. The model must be registered with Llama + Stack and available via the /models endpoint. + + prompt: The prompt to generate a completion for + + stream: (Optional) Whether to stream the response + + best_of: (Optional) The number of completions to generate + + echo: (Optional) Whether to echo the prompt + + frequency_penalty: (Optional) The penalty for repeated tokens + + logit_bias: (Optional) The logit bias to use + + logprobs: (Optional) The log probabilities to use + + max_tokens: (Optional) The maximum number of tokens to generate + + n: (Optional) The number of completions to generate + + presence_penalty: (Optional) The penalty for repeated tokens + + seed: (Optional) The seed to use + + stop: (Optional) The stop tokens to use + + stream_options: (Optional) The stream options to use + + temperature: (Optional) The temperature to use + + top_p: (Optional) The top p to use + + user: (Optional) The user to use + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["model", "prompt"], ["model", "prompt", "stream"]) + def create( + self, + *, + model: str, + prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]], + best_of: int | NotGiven = NOT_GIVEN, + echo: bool | NotGiven = NOT_GIVEN, + frequency_penalty: float | NotGiven = NOT_GIVEN, + guided_choice: List[str] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + prompt_logprobs: int | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[str, List[str]] | NotGiven = NOT_GIVEN, + stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN, + stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> CompletionCreateResponse | Stream[CompletionCreateResponse]: + return self._post( + "/v1/openai/v1/completions", + body=maybe_transform( + { + "model": model, + "prompt": prompt, + "best_of": best_of, + "echo": echo, + "frequency_penalty": frequency_penalty, + "guided_choice": guided_choice, + "logit_bias": logit_bias, + "logprobs": logprobs, + "max_tokens": max_tokens, + "n": n, + "presence_penalty": presence_penalty, + "prompt_logprobs": prompt_logprobs, + "seed": seed, + "stop": stop, + "stream": stream, + "stream_options": stream_options, + "temperature": temperature, + "top_p": top_p, + "user": user, + }, + completion_create_params.CompletionCreateParamsStreaming + if stream + else completion_create_params.CompletionCreateParamsNonStreaming, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=CompletionCreateResponse, + stream=stream or False, + stream_cls=Stream[CompletionCreateResponse], + ) + + +class AsyncCompletionsResource(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncCompletionsResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers + """ + return AsyncCompletionsResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncCompletionsResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response + """ + return AsyncCompletionsResourceWithStreamingResponse(self) + + @overload + async def create( + self, + *, + model: str, + prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]], + best_of: int | NotGiven = NOT_GIVEN, + echo: bool | NotGiven = NOT_GIVEN, + frequency_penalty: float | NotGiven = NOT_GIVEN, + guided_choice: List[str] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + prompt_logprobs: int | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[str, List[str]] | NotGiven = NOT_GIVEN, + stream: Literal[False] | NotGiven = NOT_GIVEN, + stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> CompletionCreateResponse: + """ + Generate an OpenAI-compatible completion for the given prompt using the + specified model. + + Args: + model: The identifier of the model to use. The model must be registered with Llama + Stack and available via the /models endpoint. + + prompt: The prompt to generate a completion for + + best_of: (Optional) The number of completions to generate + + echo: (Optional) Whether to echo the prompt + + frequency_penalty: (Optional) The penalty for repeated tokens + + logit_bias: (Optional) The logit bias to use + + logprobs: (Optional) The log probabilities to use + + max_tokens: (Optional) The maximum number of tokens to generate + + n: (Optional) The number of completions to generate + + presence_penalty: (Optional) The penalty for repeated tokens + + seed: (Optional) The seed to use + + stop: (Optional) The stop tokens to use + + stream: (Optional) Whether to stream the response + + stream_options: (Optional) The stream options to use + + temperature: (Optional) The temperature to use + + top_p: (Optional) The top p to use + + user: (Optional) The user to use + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def create( + self, + *, + model: str, + prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]], + stream: Literal[True], + best_of: int | NotGiven = NOT_GIVEN, + echo: bool | NotGiven = NOT_GIVEN, + frequency_penalty: float | NotGiven = NOT_GIVEN, + guided_choice: List[str] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + prompt_logprobs: int | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[str, List[str]] | NotGiven = NOT_GIVEN, + stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncStream[CompletionCreateResponse]: + """ + Generate an OpenAI-compatible completion for the given prompt using the + specified model. + + Args: + model: The identifier of the model to use. The model must be registered with Llama + Stack and available via the /models endpoint. + + prompt: The prompt to generate a completion for + + stream: (Optional) Whether to stream the response + + best_of: (Optional) The number of completions to generate + + echo: (Optional) Whether to echo the prompt + + frequency_penalty: (Optional) The penalty for repeated tokens + + logit_bias: (Optional) The logit bias to use + + logprobs: (Optional) The log probabilities to use + + max_tokens: (Optional) The maximum number of tokens to generate + + n: (Optional) The number of completions to generate + + presence_penalty: (Optional) The penalty for repeated tokens + + seed: (Optional) The seed to use + + stop: (Optional) The stop tokens to use + + stream_options: (Optional) The stream options to use + + temperature: (Optional) The temperature to use + + top_p: (Optional) The top p to use + + user: (Optional) The user to use + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def create( + self, + *, + model: str, + prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]], + stream: bool, + best_of: int | NotGiven = NOT_GIVEN, + echo: bool | NotGiven = NOT_GIVEN, + frequency_penalty: float | NotGiven = NOT_GIVEN, + guided_choice: List[str] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + prompt_logprobs: int | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[str, List[str]] | NotGiven = NOT_GIVEN, + stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> CompletionCreateResponse | AsyncStream[CompletionCreateResponse]: + """ + Generate an OpenAI-compatible completion for the given prompt using the + specified model. + + Args: + model: The identifier of the model to use. The model must be registered with Llama + Stack and available via the /models endpoint. + + prompt: The prompt to generate a completion for + + stream: (Optional) Whether to stream the response + + best_of: (Optional) The number of completions to generate + + echo: (Optional) Whether to echo the prompt + + frequency_penalty: (Optional) The penalty for repeated tokens + + logit_bias: (Optional) The logit bias to use + + logprobs: (Optional) The log probabilities to use + + max_tokens: (Optional) The maximum number of tokens to generate + + n: (Optional) The number of completions to generate + + presence_penalty: (Optional) The penalty for repeated tokens + + seed: (Optional) The seed to use + + stop: (Optional) The stop tokens to use + + stream_options: (Optional) The stream options to use + + temperature: (Optional) The temperature to use + + top_p: (Optional) The top p to use + + user: (Optional) The user to use + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["model", "prompt"], ["model", "prompt", "stream"]) + async def create( + self, + *, + model: str, + prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]], + best_of: int | NotGiven = NOT_GIVEN, + echo: bool | NotGiven = NOT_GIVEN, + frequency_penalty: float | NotGiven = NOT_GIVEN, + guided_choice: List[str] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + prompt_logprobs: int | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[str, List[str]] | NotGiven = NOT_GIVEN, + stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN, + stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> CompletionCreateResponse | AsyncStream[CompletionCreateResponse]: + return await self._post( + "/v1/openai/v1/completions", + body=await async_maybe_transform( + { + "model": model, + "prompt": prompt, + "best_of": best_of, + "echo": echo, + "frequency_penalty": frequency_penalty, + "guided_choice": guided_choice, + "logit_bias": logit_bias, + "logprobs": logprobs, + "max_tokens": max_tokens, + "n": n, + "presence_penalty": presence_penalty, + "prompt_logprobs": prompt_logprobs, + "seed": seed, + "stop": stop, + "stream": stream, + "stream_options": stream_options, + "temperature": temperature, + "top_p": top_p, + "user": user, + }, + completion_create_params.CompletionCreateParamsStreaming + if stream + else completion_create_params.CompletionCreateParamsNonStreaming, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=CompletionCreateResponse, + stream=stream or False, + stream_cls=AsyncStream[CompletionCreateResponse], + ) + + +class CompletionsResourceWithRawResponse: + def __init__(self, completions: CompletionsResource) -> None: + self._completions = completions + + self.create = to_raw_response_wrapper( + completions.create, + ) + + +class AsyncCompletionsResourceWithRawResponse: + def __init__(self, completions: AsyncCompletionsResource) -> None: + self._completions = completions + + self.create = async_to_raw_response_wrapper( + completions.create, + ) + + +class CompletionsResourceWithStreamingResponse: + def __init__(self, completions: CompletionsResource) -> None: + self._completions = completions + + self.create = to_streamed_response_wrapper( + completions.create, + ) + + +class AsyncCompletionsResourceWithStreamingResponse: + def __init__(self, completions: AsyncCompletionsResource) -> None: + self._completions = completions + + self.create = async_to_streamed_response_wrapper( + completions.create, + ) diff --git a/src/llama_stack_client/resources/datasets.py b/src/llama_stack_client/resources/datasets.py index aaa27a5e..845f182b 100644 --- a/src/llama_stack_client/resources/datasets.py +++ b/src/llama_stack_client/resources/datasets.py @@ -9,10 +9,7 @@ from ..types import dataset_iterrows_params, dataset_register_params from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven -from .._utils import ( - maybe_transform, - async_maybe_transform, -) +from .._utils import maybe_transform, async_maybe_transform from .._compat import cached_property from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( diff --git a/src/llama_stack_client/resources/eval/eval.py b/src/llama_stack_client/resources/eval/eval.py index e73f7df1..23d1500c 100644 --- a/src/llama_stack_client/resources/eval/eval.py +++ b/src/llama_stack_client/resources/eval/eval.py @@ -21,10 +21,7 @@ eval_evaluate_rows_alpha_params, ) from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven -from ..._utils import ( - maybe_transform, - async_maybe_transform, -) +from ..._utils import maybe_transform, async_maybe_transform from ..._compat import cached_property from ..._resource import SyncAPIResource, AsyncAPIResource from ..._response import ( diff --git a/src/llama_stack_client/resources/inference.py b/src/llama_stack_client/resources/inference.py index 428956ce..be87eda4 100644 --- a/src/llama_stack_client/resources/inference.py +++ b/src/llama_stack_client/resources/inference.py @@ -15,11 +15,7 @@ inference_batch_chat_completion_params, ) from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven -from .._utils import ( - required_args, - maybe_transform, - async_maybe_transform, -) +from .._utils import required_args, maybe_transform, async_maybe_transform from .._compat import cached_property from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( diff --git a/src/llama_stack_client/resources/models.py b/src/llama_stack_client/resources/models.py index db08a9d5..02458d3b 100644 --- a/src/llama_stack_client/resources/models.py +++ b/src/llama_stack_client/resources/models.py @@ -9,10 +9,7 @@ from ..types import model_register_params from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven -from .._utils import ( - maybe_transform, - async_maybe_transform, -) +from .._utils import maybe_transform, async_maybe_transform from .._compat import cached_property from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( diff --git a/src/llama_stack_client/resources/post_training/job.py b/src/llama_stack_client/resources/post_training/job.py index bcd31952..a55ba7fa 100644 --- a/src/llama_stack_client/resources/post_training/job.py +++ b/src/llama_stack_client/resources/post_training/job.py @@ -7,10 +7,7 @@ import httpx from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven -from ..._utils import ( - maybe_transform, - async_maybe_transform, -) +from ..._utils import maybe_transform, async_maybe_transform from ..._compat import cached_property from ..._resource import SyncAPIResource, AsyncAPIResource from ..._response import ( diff --git a/src/llama_stack_client/resources/post_training/post_training.py b/src/llama_stack_client/resources/post_training/post_training.py index a93a1ebb..fe0d2b7b 100644 --- a/src/llama_stack_client/resources/post_training/post_training.py +++ b/src/llama_stack_client/resources/post_training/post_training.py @@ -19,10 +19,7 @@ post_training_supervised_fine_tune_params, ) from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven -from ..._utils import ( - maybe_transform, - async_maybe_transform, -) +from ..._utils import maybe_transform, async_maybe_transform from ..._compat import cached_property from ..._resource import SyncAPIResource, AsyncAPIResource from ..._response import ( @@ -113,10 +110,10 @@ def supervised_fine_tune( hyperparam_search_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]], job_uuid: str, logger_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]], - model: str, training_config: post_training_supervised_fine_tune_params.TrainingConfig, algorithm_config: AlgorithmConfigParam | NotGiven = NOT_GIVEN, checkpoint_dir: str | NotGiven = NOT_GIVEN, + model: str | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -141,10 +138,10 @@ def supervised_fine_tune( "hyperparam_search_config": hyperparam_search_config, "job_uuid": job_uuid, "logger_config": logger_config, - "model": model, "training_config": training_config, "algorithm_config": algorithm_config, "checkpoint_dir": checkpoint_dir, + "model": model, }, post_training_supervised_fine_tune_params.PostTrainingSupervisedFineTuneParams, ), @@ -230,10 +227,10 @@ async def supervised_fine_tune( hyperparam_search_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]], job_uuid: str, logger_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]], - model: str, training_config: post_training_supervised_fine_tune_params.TrainingConfig, algorithm_config: AlgorithmConfigParam | NotGiven = NOT_GIVEN, checkpoint_dir: str | NotGiven = NOT_GIVEN, + model: str | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -258,10 +255,10 @@ async def supervised_fine_tune( "hyperparam_search_config": hyperparam_search_config, "job_uuid": job_uuid, "logger_config": logger_config, - "model": model, "training_config": training_config, "algorithm_config": algorithm_config, "checkpoint_dir": checkpoint_dir, + "model": model, }, post_training_supervised_fine_tune_params.PostTrainingSupervisedFineTuneParams, ), diff --git a/src/llama_stack_client/resources/safety.py b/src/llama_stack_client/resources/safety.py index 7382c81c..66646102 100644 --- a/src/llama_stack_client/resources/safety.py +++ b/src/llama_stack_client/resources/safety.py @@ -8,10 +8,7 @@ from ..types import safety_run_shield_params from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven -from .._utils import ( - maybe_transform, - async_maybe_transform, -) +from .._utils import maybe_transform, async_maybe_transform from .._compat import cached_property from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( diff --git a/src/llama_stack_client/resources/scoring.py b/src/llama_stack_client/resources/scoring.py index ebe42934..33ee8969 100644 --- a/src/llama_stack_client/resources/scoring.py +++ b/src/llama_stack_client/resources/scoring.py @@ -8,10 +8,7 @@ from ..types import scoring_score_params, scoring_score_batch_params from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven -from .._utils import ( - maybe_transform, - async_maybe_transform, -) +from .._utils import maybe_transform, async_maybe_transform from .._compat import cached_property from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( diff --git a/src/llama_stack_client/resources/scoring_functions.py b/src/llama_stack_client/resources/scoring_functions.py index c152c805..f01ff17b 100644 --- a/src/llama_stack_client/resources/scoring_functions.py +++ b/src/llama_stack_client/resources/scoring_functions.py @@ -8,10 +8,7 @@ from ..types import scoring_function_register_params from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven -from .._utils import ( - maybe_transform, - async_maybe_transform, -) +from .._utils import maybe_transform, async_maybe_transform from .._compat import cached_property from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( diff --git a/src/llama_stack_client/resources/shields.py b/src/llama_stack_client/resources/shields.py index 150455c3..4ef88ac7 100644 --- a/src/llama_stack_client/resources/shields.py +++ b/src/llama_stack_client/resources/shields.py @@ -8,10 +8,7 @@ from ..types import shield_register_params from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven -from .._utils import ( - maybe_transform, - async_maybe_transform, -) +from .._utils import maybe_transform, async_maybe_transform from .._compat import cached_property from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( diff --git a/src/llama_stack_client/resources/synthetic_data_generation.py b/src/llama_stack_client/resources/synthetic_data_generation.py index 3c848575..59df1b39 100644 --- a/src/llama_stack_client/resources/synthetic_data_generation.py +++ b/src/llama_stack_client/resources/synthetic_data_generation.py @@ -9,10 +9,7 @@ from ..types import synthetic_data_generation_generate_params from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven -from .._utils import ( - maybe_transform, - async_maybe_transform, -) +from .._utils import maybe_transform, async_maybe_transform from .._compat import cached_property from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( diff --git a/src/llama_stack_client/resources/telemetry.py b/src/llama_stack_client/resources/telemetry.py index cd93e775..12261eee 100644 --- a/src/llama_stack_client/resources/telemetry.py +++ b/src/llama_stack_client/resources/telemetry.py @@ -14,10 +14,7 @@ telemetry_save_spans_to_dataset_params, ) from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven -from .._utils import ( - maybe_transform, - async_maybe_transform, -) +from .._utils import maybe_transform, async_maybe_transform from .._compat import cached_property from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( diff --git a/src/llama_stack_client/resources/tool_runtime/rag_tool.py b/src/llama_stack_client/resources/tool_runtime/rag_tool.py index 14ea8454..048ea980 100644 --- a/src/llama_stack_client/resources/tool_runtime/rag_tool.py +++ b/src/llama_stack_client/resources/tool_runtime/rag_tool.py @@ -7,10 +7,7 @@ import httpx from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven -from ..._utils import ( - maybe_transform, - async_maybe_transform, -) +from ..._utils import maybe_transform, async_maybe_transform from ..._compat import cached_property from ..._resource import SyncAPIResource, AsyncAPIResource from ..._response import ( diff --git a/src/llama_stack_client/resources/tool_runtime/tool_runtime.py b/src/llama_stack_client/resources/tool_runtime/tool_runtime.py index aa380f79..dda3f661 100644 --- a/src/llama_stack_client/resources/tool_runtime/tool_runtime.py +++ b/src/llama_stack_client/resources/tool_runtime/tool_runtime.py @@ -8,10 +8,7 @@ from ...types import tool_runtime_list_tools_params, tool_runtime_invoke_tool_params from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven -from ..._utils import ( - maybe_transform, - async_maybe_transform, -) +from ..._utils import maybe_transform, async_maybe_transform from .rag_tool import ( RagToolResource, AsyncRagToolResource, diff --git a/src/llama_stack_client/resources/toolgroups.py b/src/llama_stack_client/resources/toolgroups.py index 6a9b79d0..d882a6eb 100644 --- a/src/llama_stack_client/resources/toolgroups.py +++ b/src/llama_stack_client/resources/toolgroups.py @@ -8,10 +8,7 @@ from ..types import toolgroup_register_params from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven -from .._utils import ( - maybe_transform, - async_maybe_transform, -) +from .._utils import maybe_transform, async_maybe_transform from .._compat import cached_property from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( diff --git a/src/llama_stack_client/resources/tools.py b/src/llama_stack_client/resources/tools.py index 206389f3..8a9b91e8 100644 --- a/src/llama_stack_client/resources/tools.py +++ b/src/llama_stack_client/resources/tools.py @@ -8,10 +8,7 @@ from ..types import tool_list_params from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven -from .._utils import ( - maybe_transform, - async_maybe_transform, -) +from .._utils import maybe_transform, async_maybe_transform from .._compat import cached_property from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( diff --git a/src/llama_stack_client/resources/vector_dbs.py b/src/llama_stack_client/resources/vector_dbs.py index 79d7939d..c75d261d 100644 --- a/src/llama_stack_client/resources/vector_dbs.py +++ b/src/llama_stack_client/resources/vector_dbs.py @@ -8,10 +8,7 @@ from ..types import vector_db_register_params from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven -from .._utils import ( - maybe_transform, - async_maybe_transform, -) +from .._utils import maybe_transform, async_maybe_transform from .._compat import cached_property from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( diff --git a/src/llama_stack_client/resources/vector_io.py b/src/llama_stack_client/resources/vector_io.py index a432ea40..9b1e8822 100644 --- a/src/llama_stack_client/resources/vector_io.py +++ b/src/llama_stack_client/resources/vector_io.py @@ -8,10 +8,7 @@ from ..types import vector_io_query_params, vector_io_insert_params from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven -from .._utils import ( - maybe_transform, - async_maybe_transform, -) +from .._utils import maybe_transform, async_maybe_transform from .._compat import cached_property from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( diff --git a/src/llama_stack_client/types/__init__.py b/src/llama_stack_client/types/__init__.py index a78eae03..3db3080c 100644 --- a/src/llama_stack_client/types/__init__.py +++ b/src/llama_stack_client/types/__init__.py @@ -70,6 +70,7 @@ from .scoring_score_params import ScoringScoreParams as ScoringScoreParams from .shield_list_response import ShieldListResponse as ShieldListResponse from .agent_create_response import AgentCreateResponse as AgentCreateResponse +from .chat_completion_chunk import ChatCompletionChunk as ChatCompletionChunk from .dataset_list_response import DatasetListResponse as DatasetListResponse from .list_shields_response import ListShieldsResponse as ListShieldsResponse from .memory_retrieval_step import MemoryRetrievalStep as MemoryRetrievalStep @@ -92,6 +93,7 @@ from .toolgroup_list_response import ToolgroupListResponse as ToolgroupListResponse from .vector_db_list_response import VectorDBListResponse as VectorDBListResponse from .vector_io_insert_params import VectorIoInsertParams as VectorIoInsertParams +from .completion_create_params import CompletionCreateParams as CompletionCreateParams from .list_benchmarks_response import ListBenchmarksResponse as ListBenchmarksResponse from .list_vector_dbs_response import ListVectorDBsResponse as ListVectorDBsResponse from .safety_run_shield_params import SafetyRunShieldParams as SafetyRunShieldParams @@ -103,6 +105,7 @@ from .list_tool_groups_response import ListToolGroupsResponse as ListToolGroupsResponse from .toolgroup_register_params import ToolgroupRegisterParams as ToolgroupRegisterParams from .vector_db_register_params import VectorDBRegisterParams as VectorDBRegisterParams +from .completion_create_response import CompletionCreateResponse as CompletionCreateResponse from .eval_run_eval_alpha_params import EvalRunEvalAlphaParams as EvalRunEvalAlphaParams from .scoring_score_batch_params import ScoringScoreBatchParams as ScoringScoreBatchParams from .telemetry_log_event_params import TelemetryLogEventParams as TelemetryLogEventParams diff --git a/src/llama_stack_client/types/agent_create_response.py b/src/llama_stack_client/types/agent_create_response.py index 65d2275f..93651cb6 100644 --- a/src/llama_stack_client/types/agent_create_response.py +++ b/src/llama_stack_client/types/agent_create_response.py @@ -1,6 +1,5 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - from .._models import BaseModel __all__ = ["AgentCreateResponse"] diff --git a/src/llama_stack_client/types/agents/agent_turn_response_stream_chunk.py b/src/llama_stack_client/types/agents/agent_turn_response_stream_chunk.py index bda45d88..c488ba81 100644 --- a/src/llama_stack_client/types/agents/agent_turn_response_stream_chunk.py +++ b/src/llama_stack_client/types/agents/agent_turn_response_stream_chunk.py @@ -1,6 +1,5 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - from ..._models import BaseModel from .turn_response_event import TurnResponseEvent diff --git a/src/llama_stack_client/types/agents/session_create_response.py b/src/llama_stack_client/types/agents/session_create_response.py index 6adcf0b2..abf18665 100644 --- a/src/llama_stack_client/types/agents/session_create_response.py +++ b/src/llama_stack_client/types/agents/session_create_response.py @@ -1,6 +1,5 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - from ..._models import BaseModel __all__ = ["SessionCreateResponse"] diff --git a/src/llama_stack_client/types/agents/turn_response_event.py b/src/llama_stack_client/types/agents/turn_response_event.py index 1b9ad5a6..c6a42d75 100644 --- a/src/llama_stack_client/types/agents/turn_response_event.py +++ b/src/llama_stack_client/types/agents/turn_response_event.py @@ -1,6 +1,5 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - from ..._models import BaseModel from .turn_response_event_payload import TurnResponseEventPayload diff --git a/src/llama_stack_client/types/chat/__init__.py b/src/llama_stack_client/types/chat/__init__.py new file mode 100644 index 00000000..9384ac14 --- /dev/null +++ b/src/llama_stack_client/types/chat/__init__.py @@ -0,0 +1,6 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from .completion_create_params import CompletionCreateParams as CompletionCreateParams +from .completion_create_response import CompletionCreateResponse as CompletionCreateResponse diff --git a/src/llama_stack_client/types/chat/completion_create_params.py b/src/llama_stack_client/types/chat/completion_create_params.py new file mode 100644 index 00000000..0281420b --- /dev/null +++ b/src/llama_stack_client/types/chat/completion_create_params.py @@ -0,0 +1,401 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Dict, List, Union, Iterable +from typing_extensions import Literal, Required, TypeAlias, TypedDict + +__all__ = [ + "CompletionCreateParamsBase", + "Message", + "MessageOpenAIUserMessageParam", + "MessageOpenAIUserMessageParamContentUnionMember1", + "MessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "MessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "MessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "MessageOpenAISystemMessageParam", + "MessageOpenAISystemMessageParamContentUnionMember1", + "MessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "MessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "MessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "MessageOpenAIAssistantMessageParam", + "MessageOpenAIAssistantMessageParamContentUnionMember1", + "MessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "MessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "MessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "MessageOpenAIAssistantMessageParamToolCall", + "MessageOpenAIAssistantMessageParamToolCallFunction", + "MessageOpenAIToolMessageParam", + "MessageOpenAIToolMessageParamContentUnionMember1", + "MessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "MessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "MessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "MessageOpenAIDeveloperMessageParam", + "MessageOpenAIDeveloperMessageParamContentUnionMember1", + "MessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "MessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "MessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "ResponseFormat", + "ResponseFormatOpenAIResponseFormatText", + "ResponseFormatOpenAIResponseFormatJsonSchema", + "ResponseFormatOpenAIResponseFormatJsonSchemaJsonSchema", + "ResponseFormatOpenAIResponseFormatJsonObject", + "CompletionCreateParamsNonStreaming", + "CompletionCreateParamsStreaming", +] + + +class CompletionCreateParamsBase(TypedDict, total=False): + messages: Required[Iterable[Message]] + """List of messages in the conversation""" + + model: Required[str] + """The identifier of the model to use. + + The model must be registered with Llama Stack and available via the /models + endpoint. + """ + + frequency_penalty: float + """(Optional) The penalty for repeated tokens""" + + function_call: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + """(Optional) The function call to use""" + + functions: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + """(Optional) List of functions to use""" + + logit_bias: Dict[str, float] + """(Optional) The logit bias to use""" + + logprobs: bool + """(Optional) The log probabilities to use""" + + max_completion_tokens: int + """(Optional) The maximum number of tokens to generate""" + + max_tokens: int + """(Optional) The maximum number of tokens to generate""" + + n: int + """(Optional) The number of completions to generate""" + + parallel_tool_calls: bool + """(Optional) Whether to parallelize tool calls""" + + presence_penalty: float + """(Optional) The penalty for repeated tokens""" + + response_format: ResponseFormat + """(Optional) The response format to use""" + + seed: int + """(Optional) The seed to use""" + + stop: Union[str, List[str]] + """(Optional) The stop tokens to use""" + + stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] + """(Optional) The stream options to use""" + + temperature: float + """(Optional) The temperature to use""" + + tool_choice: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + """(Optional) The tool choice to use""" + + tools: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + """(Optional) The tools to use""" + + top_logprobs: int + """(Optional) The top log probabilities to use""" + + top_p: float + """(Optional) The top p to use""" + + user: str + """(Optional) The user to use""" + + +class MessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(TypedDict, total=False): + text: Required[str] + + type: Required[Literal["text"]] + + +class MessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + TypedDict, total=False +): + url: Required[str] + + detail: str + + +class MessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(TypedDict, total=False): + image_url: Required[ + MessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + ] + + type: Required[Literal["image_url"]] + + +MessageOpenAIUserMessageParamContentUnionMember1: TypeAlias = Union[ + MessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + MessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, +] + + +class MessageOpenAIUserMessageParam(TypedDict, total=False): + content: Required[Union[str, Iterable[MessageOpenAIUserMessageParamContentUnionMember1]]] + """The content of the message, which can include text and other media""" + + role: Required[Literal["user"]] + """Must be "user" to identify this as a user message""" + + name: str + """(Optional) The name of the user message participant.""" + + +class MessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam( + TypedDict, total=False +): + text: Required[str] + + type: Required[Literal["text"]] + + +class MessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + TypedDict, total=False +): + url: Required[str] + + detail: str + + +class MessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam( + TypedDict, total=False +): + image_url: Required[ + MessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + ] + + type: Required[Literal["image_url"]] + + +MessageOpenAISystemMessageParamContentUnionMember1: TypeAlias = Union[ + MessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + MessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, +] + + +class MessageOpenAISystemMessageParam(TypedDict, total=False): + content: Required[Union[str, Iterable[MessageOpenAISystemMessageParamContentUnionMember1]]] + """The content of the "system prompt". + + If multiple system messages are provided, they are concatenated. The underlying + Llama Stack code may also add other system messages (for example, for formatting + tool definitions). + """ + + role: Required[Literal["system"]] + """Must be "system" to identify this as a system message""" + + name: str + """(Optional) The name of the system message participant.""" + + +class MessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam( + TypedDict, total=False +): + text: Required[str] + + type: Required[Literal["text"]] + + +class MessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + TypedDict, total=False +): + url: Required[str] + + detail: str + + +class MessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam( + TypedDict, total=False +): + image_url: Required[ + MessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + ] + + type: Required[Literal["image_url"]] + + +MessageOpenAIAssistantMessageParamContentUnionMember1: TypeAlias = Union[ + MessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + MessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, +] + + +class MessageOpenAIAssistantMessageParamToolCallFunction(TypedDict, total=False): + arguments: str + + name: str + + +class MessageOpenAIAssistantMessageParamToolCall(TypedDict, total=False): + type: Required[Literal["function"]] + + id: str + + function: MessageOpenAIAssistantMessageParamToolCallFunction + + index: int + + +class MessageOpenAIAssistantMessageParam(TypedDict, total=False): + role: Required[Literal["assistant"]] + """Must be "assistant" to identify this as the model's response""" + + content: Union[str, Iterable[MessageOpenAIAssistantMessageParamContentUnionMember1]] + """The content of the model's response""" + + name: str + """(Optional) The name of the assistant message participant.""" + + tool_calls: Iterable[MessageOpenAIAssistantMessageParamToolCall] + """List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object.""" + + +class MessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(TypedDict, total=False): + text: Required[str] + + type: Required[Literal["text"]] + + +class MessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + TypedDict, total=False +): + url: Required[str] + + detail: str + + +class MessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(TypedDict, total=False): + image_url: Required[ + MessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + ] + + type: Required[Literal["image_url"]] + + +MessageOpenAIToolMessageParamContentUnionMember1: TypeAlias = Union[ + MessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + MessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, +] + + +class MessageOpenAIToolMessageParam(TypedDict, total=False): + content: Required[Union[str, Iterable[MessageOpenAIToolMessageParamContentUnionMember1]]] + """The response content from the tool""" + + role: Required[Literal["tool"]] + """Must be "tool" to identify this as a tool response""" + + tool_call_id: Required[str] + """Unique identifier for the tool call this response is for""" + + +class MessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam( + TypedDict, total=False +): + text: Required[str] + + type: Required[Literal["text"]] + + +class MessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + TypedDict, total=False +): + url: Required[str] + + detail: str + + +class MessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam( + TypedDict, total=False +): + image_url: Required[ + MessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + ] + + type: Required[Literal["image_url"]] + + +MessageOpenAIDeveloperMessageParamContentUnionMember1: TypeAlias = Union[ + MessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + MessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, +] + + +class MessageOpenAIDeveloperMessageParam(TypedDict, total=False): + content: Required[Union[str, Iterable[MessageOpenAIDeveloperMessageParamContentUnionMember1]]] + """The content of the developer message""" + + role: Required[Literal["developer"]] + """Must be "developer" to identify this as a developer message""" + + name: str + """(Optional) The name of the developer message participant.""" + + +Message: TypeAlias = Union[ + MessageOpenAIUserMessageParam, + MessageOpenAISystemMessageParam, + MessageOpenAIAssistantMessageParam, + MessageOpenAIToolMessageParam, + MessageOpenAIDeveloperMessageParam, +] + + +class ResponseFormatOpenAIResponseFormatText(TypedDict, total=False): + type: Required[Literal["text"]] + + +class ResponseFormatOpenAIResponseFormatJsonSchemaJsonSchema(TypedDict, total=False): + name: Required[str] + + description: str + + schema: Dict[str, Union[bool, float, str, Iterable[object], object, None]] + + strict: bool + + +class ResponseFormatOpenAIResponseFormatJsonSchema(TypedDict, total=False): + json_schema: Required[ResponseFormatOpenAIResponseFormatJsonSchemaJsonSchema] + + type: Required[Literal["json_schema"]] + + +class ResponseFormatOpenAIResponseFormatJsonObject(TypedDict, total=False): + type: Required[Literal["json_object"]] + + +ResponseFormat: TypeAlias = Union[ + ResponseFormatOpenAIResponseFormatText, + ResponseFormatOpenAIResponseFormatJsonSchema, + ResponseFormatOpenAIResponseFormatJsonObject, +] + + +class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False): + stream: Literal[False] + """(Optional) Whether to stream the response""" + + +class CompletionCreateParamsStreaming(CompletionCreateParamsBase): + stream: Required[Literal[True]] + """(Optional) Whether to stream the response""" + + +CompletionCreateParams = Union[CompletionCreateParamsNonStreaming, CompletionCreateParamsStreaming] diff --git a/src/llama_stack_client/types/chat/completion_create_response.py b/src/llama_stack_client/types/chat/completion_create_response.py new file mode 100644 index 00000000..5c8eb51c --- /dev/null +++ b/src/llama_stack_client/types/chat/completion_create_response.py @@ -0,0 +1,383 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List, Union, Optional +from typing_extensions import Literal, Annotated, TypeAlias + +from ..._utils import PropertyInfo +from ..._models import BaseModel +from ..chat_completion_chunk import ChatCompletionChunk + +__all__ = [ + "CompletionCreateResponse", + "OpenAIChatCompletion", + "OpenAIChatCompletionChoice", + "OpenAIChatCompletionChoiceMessage", + "OpenAIChatCompletionChoiceMessageOpenAIUserMessageParam", + "OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1", + "OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "OpenAIChatCompletionChoiceMessageOpenAISystemMessageParam", + "OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1", + "OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParam", + "OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1", + "OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamToolCall", + "OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamToolCallFunction", + "OpenAIChatCompletionChoiceMessageOpenAIToolMessageParam", + "OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1", + "OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParam", + "OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1", + "OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "OpenAIChatCompletionChoiceLogprobs", + "OpenAIChatCompletionChoiceLogprobsContent", + "OpenAIChatCompletionChoiceLogprobsContentTopLogprob", + "OpenAIChatCompletionChoiceLogprobsRefusal", + "OpenAIChatCompletionChoiceLogprobsRefusalTopLogprob", +] + + +class OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam( + BaseModel +): + text: str + + type: Literal["text"] + + +class OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[str] = None + + +class OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam( + BaseModel +): + image_url: OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + + type: Literal["image_url"] + + +OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class OpenAIChatCompletionChoiceMessageOpenAIUserMessageParam(BaseModel): + content: Union[str, List[OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1]] + """The content of the message, which can include text and other media""" + + role: Literal["user"] + """Must be "user" to identify this as a user message""" + + name: Optional[str] = None + """(Optional) The name of the user message participant.""" + + +class OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam( + BaseModel +): + text: str + + type: Literal["text"] + + +class OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[str] = None + + +class OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam( + BaseModel +): + image_url: OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + + type: Literal["image_url"] + + +OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class OpenAIChatCompletionChoiceMessageOpenAISystemMessageParam(BaseModel): + content: Union[str, List[OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1]] + """The content of the "system prompt". + + If multiple system messages are provided, they are concatenated. The underlying + Llama Stack code may also add other system messages (for example, for formatting + tool definitions). + """ + + role: Literal["system"] + """Must be "system" to identify this as a system message""" + + name: Optional[str] = None + """(Optional) The name of the system message participant.""" + + +class OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam( + BaseModel +): + text: str + + type: Literal["text"] + + +class OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[str] = None + + +class OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam( + BaseModel +): + image_url: OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + + type: Literal["image_url"] + + +OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamToolCallFunction(BaseModel): + arguments: Optional[str] = None + + name: Optional[str] = None + + +class OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamToolCall(BaseModel): + type: Literal["function"] + + id: Optional[str] = None + + function: Optional[OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamToolCallFunction] = None + + index: Optional[int] = None + + +class OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParam(BaseModel): + role: Literal["assistant"] + """Must be "assistant" to identify this as the model's response""" + + content: Union[str, List[OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1], None] = ( + None + ) + """The content of the model's response""" + + name: Optional[str] = None + """(Optional) The name of the assistant message participant.""" + + tool_calls: Optional[List[OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamToolCall]] = None + """List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object.""" + + +class OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam( + BaseModel +): + text: str + + type: Literal["text"] + + +class OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[str] = None + + +class OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam( + BaseModel +): + image_url: OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + + type: Literal["image_url"] + + +OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class OpenAIChatCompletionChoiceMessageOpenAIToolMessageParam(BaseModel): + content: Union[str, List[OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1]] + """The response content from the tool""" + + role: Literal["tool"] + """Must be "tool" to identify this as a tool response""" + + tool_call_id: str + """Unique identifier for the tool call this response is for""" + + +class OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam( + BaseModel +): + text: str + + type: Literal["text"] + + +class OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[str] = None + + +class OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam( + BaseModel +): + image_url: OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + + type: Literal["image_url"] + + +OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParam(BaseModel): + content: Union[str, List[OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1]] + """The content of the developer message""" + + role: Literal["developer"] + """Must be "developer" to identify this as a developer message""" + + name: Optional[str] = None + """(Optional) The name of the developer message participant.""" + + +OpenAIChatCompletionChoiceMessage: TypeAlias = Annotated[ + Union[ + OpenAIChatCompletionChoiceMessageOpenAIUserMessageParam, + OpenAIChatCompletionChoiceMessageOpenAISystemMessageParam, + OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParam, + OpenAIChatCompletionChoiceMessageOpenAIToolMessageParam, + OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParam, + ], + PropertyInfo(discriminator="role"), +] + + +class OpenAIChatCompletionChoiceLogprobsContentTopLogprob(BaseModel): + token: str + + logprob: float + + bytes: Optional[List[int]] = None + + +class OpenAIChatCompletionChoiceLogprobsContent(BaseModel): + token: str + + logprob: float + + top_logprobs: List[OpenAIChatCompletionChoiceLogprobsContentTopLogprob] + + bytes: Optional[List[int]] = None + + +class OpenAIChatCompletionChoiceLogprobsRefusalTopLogprob(BaseModel): + token: str + + logprob: float + + bytes: Optional[List[int]] = None + + +class OpenAIChatCompletionChoiceLogprobsRefusal(BaseModel): + token: str + + logprob: float + + top_logprobs: List[OpenAIChatCompletionChoiceLogprobsRefusalTopLogprob] + + bytes: Optional[List[int]] = None + + +class OpenAIChatCompletionChoiceLogprobs(BaseModel): + content: Optional[List[OpenAIChatCompletionChoiceLogprobsContent]] = None + """(Optional) The log probabilities for the tokens in the message""" + + refusal: Optional[List[OpenAIChatCompletionChoiceLogprobsRefusal]] = None + """(Optional) The log probabilities for the tokens in the message""" + + +class OpenAIChatCompletionChoice(BaseModel): + finish_reason: str + """The reason the model stopped generating""" + + index: int + """The index of the choice""" + + message: OpenAIChatCompletionChoiceMessage + """The message from the model""" + + logprobs: Optional[OpenAIChatCompletionChoiceLogprobs] = None + """(Optional) The log probabilities for the tokens in the message""" + + +class OpenAIChatCompletion(BaseModel): + id: str + """The ID of the chat completion""" + + choices: List[OpenAIChatCompletionChoice] + """List of choices""" + + created: int + """The Unix timestamp in seconds when the chat completion was created""" + + model: str + """The model that was used to generate the chat completion""" + + object: Literal["chat.completion"] + """The object type, which will be "chat.completion" """ + + +CompletionCreateResponse: TypeAlias = Union[OpenAIChatCompletion, ChatCompletionChunk] diff --git a/src/llama_stack_client/types/chat_completion_chunk.py b/src/llama_stack_client/types/chat_completion_chunk.py new file mode 100644 index 00000000..7d74663a --- /dev/null +++ b/src/llama_stack_client/types/chat_completion_chunk.py @@ -0,0 +1,124 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List, Optional +from typing_extensions import Literal + +from .._models import BaseModel + +__all__ = [ + "ChatCompletionChunk", + "Choice", + "ChoiceDelta", + "ChoiceDeltaToolCall", + "ChoiceDeltaToolCallFunction", + "ChoiceLogprobs", + "ChoiceLogprobsContent", + "ChoiceLogprobsContentTopLogprob", + "ChoiceLogprobsRefusal", + "ChoiceLogprobsRefusalTopLogprob", +] + + +class ChoiceDeltaToolCallFunction(BaseModel): + arguments: Optional[str] = None + + name: Optional[str] = None + + +class ChoiceDeltaToolCall(BaseModel): + type: Literal["function"] + + id: Optional[str] = None + + function: Optional[ChoiceDeltaToolCallFunction] = None + + index: Optional[int] = None + + +class ChoiceDelta(BaseModel): + content: Optional[str] = None + """(Optional) The content of the delta""" + + refusal: Optional[str] = None + """(Optional) The refusal of the delta""" + + role: Optional[str] = None + """(Optional) The role of the delta""" + + tool_calls: Optional[List[ChoiceDeltaToolCall]] = None + """(Optional) The tool calls of the delta""" + + +class ChoiceLogprobsContentTopLogprob(BaseModel): + token: str + + logprob: float + + bytes: Optional[List[int]] = None + + +class ChoiceLogprobsContent(BaseModel): + token: str + + logprob: float + + top_logprobs: List[ChoiceLogprobsContentTopLogprob] + + bytes: Optional[List[int]] = None + + +class ChoiceLogprobsRefusalTopLogprob(BaseModel): + token: str + + logprob: float + + bytes: Optional[List[int]] = None + + +class ChoiceLogprobsRefusal(BaseModel): + token: str + + logprob: float + + top_logprobs: List[ChoiceLogprobsRefusalTopLogprob] + + bytes: Optional[List[int]] = None + + +class ChoiceLogprobs(BaseModel): + content: Optional[List[ChoiceLogprobsContent]] = None + """(Optional) The log probabilities for the tokens in the message""" + + refusal: Optional[List[ChoiceLogprobsRefusal]] = None + """(Optional) The log probabilities for the tokens in the message""" + + +class Choice(BaseModel): + delta: ChoiceDelta + """The delta from the chunk""" + + finish_reason: str + """The reason the model stopped generating""" + + index: int + """The index of the choice""" + + logprobs: Optional[ChoiceLogprobs] = None + """(Optional) The log probabilities for the tokens in the message""" + + +class ChatCompletionChunk(BaseModel): + id: str + """The ID of the chat completion""" + + choices: List[Choice] + """List of choices""" + + created: int + """The Unix timestamp in seconds when the chat completion was created""" + + model: str + """The model that was used to generate the chat completion""" + + object: Literal["chat.completion.chunk"] + """The object type, which will be "chat.completion.chunk" """ diff --git a/src/llama_stack_client/types/completion_create_params.py b/src/llama_stack_client/types/completion_create_params.py new file mode 100644 index 00000000..a92b733e --- /dev/null +++ b/src/llama_stack_client/types/completion_create_params.py @@ -0,0 +1,79 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Dict, List, Union, Iterable +from typing_extensions import Literal, Required, TypedDict + +__all__ = ["CompletionCreateParamsBase", "CompletionCreateParamsNonStreaming", "CompletionCreateParamsStreaming"] + + +class CompletionCreateParamsBase(TypedDict, total=False): + model: Required[str] + """The identifier of the model to use. + + The model must be registered with Llama Stack and available via the /models + endpoint. + """ + + prompt: Required[Union[str, List[str], Iterable[int], Iterable[Iterable[int]]]] + """The prompt to generate a completion for""" + + best_of: int + """(Optional) The number of completions to generate""" + + echo: bool + """(Optional) Whether to echo the prompt""" + + frequency_penalty: float + """(Optional) The penalty for repeated tokens""" + + guided_choice: List[str] + + logit_bias: Dict[str, float] + """(Optional) The logit bias to use""" + + logprobs: bool + """(Optional) The log probabilities to use""" + + max_tokens: int + """(Optional) The maximum number of tokens to generate""" + + n: int + """(Optional) The number of completions to generate""" + + presence_penalty: float + """(Optional) The penalty for repeated tokens""" + + prompt_logprobs: int + + seed: int + """(Optional) The seed to use""" + + stop: Union[str, List[str]] + """(Optional) The stop tokens to use""" + + stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] + """(Optional) The stream options to use""" + + temperature: float + """(Optional) The temperature to use""" + + top_p: float + """(Optional) The top p to use""" + + user: str + """(Optional) The user to use""" + + +class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False): + stream: Literal[False] + """(Optional) Whether to stream the response""" + + +class CompletionCreateParamsStreaming(CompletionCreateParamsBase): + stream: Required[Literal[True]] + """(Optional) Whether to stream the response""" + + +CompletionCreateParams = Union[CompletionCreateParamsNonStreaming, CompletionCreateParamsStreaming] diff --git a/src/llama_stack_client/types/completion_create_response.py b/src/llama_stack_client/types/completion_create_response.py new file mode 100644 index 00000000..0c43e68a --- /dev/null +++ b/src/llama_stack_client/types/completion_create_response.py @@ -0,0 +1,86 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List, Optional +from typing_extensions import Literal + +from .._models import BaseModel + +__all__ = [ + "CompletionCreateResponse", + "Choice", + "ChoiceLogprobs", + "ChoiceLogprobsContent", + "ChoiceLogprobsContentTopLogprob", + "ChoiceLogprobsRefusal", + "ChoiceLogprobsRefusalTopLogprob", +] + + +class ChoiceLogprobsContentTopLogprob(BaseModel): + token: str + + logprob: float + + bytes: Optional[List[int]] = None + + +class ChoiceLogprobsContent(BaseModel): + token: str + + logprob: float + + top_logprobs: List[ChoiceLogprobsContentTopLogprob] + + bytes: Optional[List[int]] = None + + +class ChoiceLogprobsRefusalTopLogprob(BaseModel): + token: str + + logprob: float + + bytes: Optional[List[int]] = None + + +class ChoiceLogprobsRefusal(BaseModel): + token: str + + logprob: float + + top_logprobs: List[ChoiceLogprobsRefusalTopLogprob] + + bytes: Optional[List[int]] = None + + +class ChoiceLogprobs(BaseModel): + content: Optional[List[ChoiceLogprobsContent]] = None + """(Optional) The log probabilities for the tokens in the message""" + + refusal: Optional[List[ChoiceLogprobsRefusal]] = None + """(Optional) The log probabilities for the tokens in the message""" + + +class Choice(BaseModel): + finish_reason: str + + index: int + + text: str + + logprobs: Optional[ChoiceLogprobs] = None + """ + The log probabilities for the tokens in the message from an OpenAI-compatible + chat completion response. + """ + + +class CompletionCreateResponse(BaseModel): + id: str + + choices: List[Choice] + + created: int + + model: str + + object: Literal["text_completion"] diff --git a/src/llama_stack_client/types/health_info.py b/src/llama_stack_client/types/health_info.py index f410c8d2..3441ddd1 100644 --- a/src/llama_stack_client/types/health_info.py +++ b/src/llama_stack_client/types/health_info.py @@ -1,5 +1,6 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. +from typing_extensions import Literal from .._models import BaseModel @@ -7,4 +8,4 @@ class HealthInfo(BaseModel): - status: str + status: Literal["OK", "Error", "Not Implemented"] diff --git a/src/llama_stack_client/types/list_benchmarks_response.py b/src/llama_stack_client/types/list_benchmarks_response.py index 4185f3d1..f265f130 100644 --- a/src/llama_stack_client/types/list_benchmarks_response.py +++ b/src/llama_stack_client/types/list_benchmarks_response.py @@ -1,6 +1,5 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - from .._models import BaseModel from .benchmark_list_response import BenchmarkListResponse diff --git a/src/llama_stack_client/types/list_datasets_response.py b/src/llama_stack_client/types/list_datasets_response.py index 635c9c88..5a897f78 100644 --- a/src/llama_stack_client/types/list_datasets_response.py +++ b/src/llama_stack_client/types/list_datasets_response.py @@ -1,6 +1,5 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - from .._models import BaseModel from .dataset_list_response import DatasetListResponse diff --git a/src/llama_stack_client/types/list_models_response.py b/src/llama_stack_client/types/list_models_response.py index 32dcc9d9..a36896b8 100644 --- a/src/llama_stack_client/types/list_models_response.py +++ b/src/llama_stack_client/types/list_models_response.py @@ -1,6 +1,5 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - from .._models import BaseModel from .model_list_response import ModelListResponse diff --git a/src/llama_stack_client/types/list_providers_response.py b/src/llama_stack_client/types/list_providers_response.py index cbe69e3b..4904c0b1 100644 --- a/src/llama_stack_client/types/list_providers_response.py +++ b/src/llama_stack_client/types/list_providers_response.py @@ -1,6 +1,5 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - from .._models import BaseModel from .provider_list_response import ProviderListResponse diff --git a/src/llama_stack_client/types/list_routes_response.py b/src/llama_stack_client/types/list_routes_response.py index 02cbd1e3..59e8392b 100644 --- a/src/llama_stack_client/types/list_routes_response.py +++ b/src/llama_stack_client/types/list_routes_response.py @@ -1,6 +1,5 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - from .._models import BaseModel from .route_list_response import RouteListResponse diff --git a/src/llama_stack_client/types/list_scoring_functions_response.py b/src/llama_stack_client/types/list_scoring_functions_response.py index 845c37be..2c044ba1 100644 --- a/src/llama_stack_client/types/list_scoring_functions_response.py +++ b/src/llama_stack_client/types/list_scoring_functions_response.py @@ -1,6 +1,5 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - from .._models import BaseModel from .scoring_function_list_response import ScoringFunctionListResponse diff --git a/src/llama_stack_client/types/list_shields_response.py b/src/llama_stack_client/types/list_shields_response.py index 35d1650d..fabbc9da 100644 --- a/src/llama_stack_client/types/list_shields_response.py +++ b/src/llama_stack_client/types/list_shields_response.py @@ -1,6 +1,5 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - from .._models import BaseModel from .shield_list_response import ShieldListResponse diff --git a/src/llama_stack_client/types/list_tool_groups_response.py b/src/llama_stack_client/types/list_tool_groups_response.py index fec39d2f..6433b164 100644 --- a/src/llama_stack_client/types/list_tool_groups_response.py +++ b/src/llama_stack_client/types/list_tool_groups_response.py @@ -1,6 +1,5 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - from .._models import BaseModel from .toolgroup_list_response import ToolgroupListResponse diff --git a/src/llama_stack_client/types/list_tools_response.py b/src/llama_stack_client/types/list_tools_response.py index 02013c4f..c9b4ec6b 100644 --- a/src/llama_stack_client/types/list_tools_response.py +++ b/src/llama_stack_client/types/list_tools_response.py @@ -1,6 +1,5 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - from .._models import BaseModel from .tool_list_response import ToolListResponse diff --git a/src/llama_stack_client/types/list_vector_dbs_response.py b/src/llama_stack_client/types/list_vector_dbs_response.py index 7d64c3d6..fede6c42 100644 --- a/src/llama_stack_client/types/list_vector_dbs_response.py +++ b/src/llama_stack_client/types/list_vector_dbs_response.py @@ -1,6 +1,5 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - from .._models import BaseModel from .vector_db_list_response import VectorDBListResponse diff --git a/src/llama_stack_client/types/post_training_job.py b/src/llama_stack_client/types/post_training_job.py index 8cd98126..d0ba5fce 100644 --- a/src/llama_stack_client/types/post_training_job.py +++ b/src/llama_stack_client/types/post_training_job.py @@ -1,6 +1,5 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - from .._models import BaseModel __all__ = ["PostTrainingJob"] diff --git a/src/llama_stack_client/types/post_training_preference_optimize_params.py b/src/llama_stack_client/types/post_training_preference_optimize_params.py index 0d79173a..11392907 100644 --- a/src/llama_stack_client/types/post_training_preference_optimize_params.py +++ b/src/llama_stack_client/types/post_training_preference_optimize_params.py @@ -10,8 +10,8 @@ "AlgorithmConfig", "TrainingConfig", "TrainingConfigDataConfig", - "TrainingConfigOptimizerConfig", "TrainingConfigEfficiencyConfig", + "TrainingConfigOptimizerConfig", ] @@ -55,16 +55,6 @@ class TrainingConfigDataConfig(TypedDict, total=False): validation_dataset_id: str -class TrainingConfigOptimizerConfig(TypedDict, total=False): - lr: Required[float] - - num_warmup_steps: Required[int] - - optimizer_type: Required[Literal["adam", "adamw", "sgd"]] - - weight_decay: Required[float] - - class TrainingConfigEfficiencyConfig(TypedDict, total=False): enable_activation_checkpointing: bool @@ -75,19 +65,29 @@ class TrainingConfigEfficiencyConfig(TypedDict, total=False): memory_efficient_fsdp_wrap: bool -class TrainingConfig(TypedDict, total=False): - data_config: Required[TrainingConfigDataConfig] +class TrainingConfigOptimizerConfig(TypedDict, total=False): + lr: Required[float] + + num_warmup_steps: Required[int] + optimizer_type: Required[Literal["adam", "adamw", "sgd"]] + + weight_decay: Required[float] + + +class TrainingConfig(TypedDict, total=False): gradient_accumulation_steps: Required[int] max_steps_per_epoch: Required[int] - max_validation_steps: Required[int] - n_epochs: Required[int] - optimizer_config: Required[TrainingConfigOptimizerConfig] + data_config: TrainingConfigDataConfig dtype: str efficiency_config: TrainingConfigEfficiencyConfig + + max_validation_steps: int + + optimizer_config: TrainingConfigOptimizerConfig diff --git a/src/llama_stack_client/types/post_training_supervised_fine_tune_params.py b/src/llama_stack_client/types/post_training_supervised_fine_tune_params.py index fa18742a..ad298817 100644 --- a/src/llama_stack_client/types/post_training_supervised_fine_tune_params.py +++ b/src/llama_stack_client/types/post_training_supervised_fine_tune_params.py @@ -11,8 +11,8 @@ "PostTrainingSupervisedFineTuneParams", "TrainingConfig", "TrainingConfigDataConfig", - "TrainingConfigOptimizerConfig", "TrainingConfigEfficiencyConfig", + "TrainingConfigOptimizerConfig", ] @@ -23,14 +23,14 @@ class PostTrainingSupervisedFineTuneParams(TypedDict, total=False): logger_config: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] - model: Required[str] - training_config: Required[TrainingConfig] algorithm_config: AlgorithmConfigParam checkpoint_dir: str + model: str + class TrainingConfigDataConfig(TypedDict, total=False): batch_size: Required[int] @@ -48,16 +48,6 @@ class TrainingConfigDataConfig(TypedDict, total=False): validation_dataset_id: str -class TrainingConfigOptimizerConfig(TypedDict, total=False): - lr: Required[float] - - num_warmup_steps: Required[int] - - optimizer_type: Required[Literal["adam", "adamw", "sgd"]] - - weight_decay: Required[float] - - class TrainingConfigEfficiencyConfig(TypedDict, total=False): enable_activation_checkpointing: bool @@ -68,19 +58,29 @@ class TrainingConfigEfficiencyConfig(TypedDict, total=False): memory_efficient_fsdp_wrap: bool -class TrainingConfig(TypedDict, total=False): - data_config: Required[TrainingConfigDataConfig] +class TrainingConfigOptimizerConfig(TypedDict, total=False): + lr: Required[float] + + num_warmup_steps: Required[int] + + optimizer_type: Required[Literal["adam", "adamw", "sgd"]] + weight_decay: Required[float] + + +class TrainingConfig(TypedDict, total=False): gradient_accumulation_steps: Required[int] max_steps_per_epoch: Required[int] - max_validation_steps: Required[int] - n_epochs: Required[int] - optimizer_config: Required[TrainingConfigOptimizerConfig] + data_config: TrainingConfigDataConfig dtype: str efficiency_config: TrainingConfigEfficiencyConfig + + max_validation_steps: int + + optimizer_config: TrainingConfigOptimizerConfig diff --git a/src/llama_stack_client/types/provider_info.py b/src/llama_stack_client/types/provider_info.py index 3e0d0d85..c9c748cc 100644 --- a/src/llama_stack_client/types/provider_info.py +++ b/src/llama_stack_client/types/provider_info.py @@ -12,6 +12,8 @@ class ProviderInfo(BaseModel): config: Dict[str, Union[bool, float, str, List[object], object, None]] + health: Dict[str, Union[bool, float, str, List[object], object, None]] + provider_id: str provider_type: str diff --git a/src/llama_stack_client/types/query_spans_response.py b/src/llama_stack_client/types/query_spans_response.py index 5c54e623..488a4331 100644 --- a/src/llama_stack_client/types/query_spans_response.py +++ b/src/llama_stack_client/types/query_spans_response.py @@ -1,6 +1,5 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - from .._models import BaseModel from .telemetry_query_spans_response import TelemetryQuerySpansResponse diff --git a/src/llama_stack_client/types/shared/agent_config.py b/src/llama_stack_client/types/shared/agent_config.py index 04997ac4..eb116159 100644 --- a/src/llama_stack_client/types/shared/agent_config.py +++ b/src/llama_stack_client/types/shared/agent_config.py @@ -51,21 +51,27 @@ class ToolgroupAgentToolGroupWithArgs(BaseModel): class AgentConfig(BaseModel): instructions: str + """The system instructions for the agent""" model: str + """The model identifier to use for the agent""" client_tools: Optional[List[ToolDef]] = None enable_session_persistence: Optional[bool] = None + """Optional flag indicating whether session data has to be persisted""" input_shields: Optional[List[str]] = None max_infer_iters: Optional[int] = None + name: Optional[str] = None + """Optional name for the agent, used in telemetry and identification""" + output_shields: Optional[List[str]] = None response_format: Optional[ResponseFormat] = None - """Configuration for JSON schema-guided response generation.""" + """Optional response format configuration""" sampling_params: Optional[SamplingParams] = None """Sampling parameters.""" diff --git a/src/llama_stack_client/types/shared/query_config.py b/src/llama_stack_client/types/shared/query_config.py index 1bfd872a..679f7dcb 100644 --- a/src/llama_stack_client/types/shared/query_config.py +++ b/src/llama_stack_client/types/shared/query_config.py @@ -1,6 +1,5 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - from ..._models import BaseModel from .query_generator_config import QueryGeneratorConfig diff --git a/src/llama_stack_client/types/shared_params/agent_config.py b/src/llama_stack_client/types/shared_params/agent_config.py index f07efa39..5cebec3f 100644 --- a/src/llama_stack_client/types/shared_params/agent_config.py +++ b/src/llama_stack_client/types/shared_params/agent_config.py @@ -52,21 +52,27 @@ class ToolgroupAgentToolGroupWithArgs(TypedDict, total=False): class AgentConfig(TypedDict, total=False): instructions: Required[str] + """The system instructions for the agent""" model: Required[str] + """The model identifier to use for the agent""" client_tools: Iterable[ToolDefParam] enable_session_persistence: bool + """Optional flag indicating whether session data has to be persisted""" input_shields: List[str] max_infer_iters: int + name: str + """Optional name for the agent, used in telemetry and identification""" + output_shields: List[str] response_format: ResponseFormat - """Configuration for JSON schema-guided response generation.""" + """Optional response format configuration""" sampling_params: SamplingParams """Sampling parameters.""" diff --git a/src/llama_stack_client/types/version_info.py b/src/llama_stack_client/types/version_info.py index 3e877545..5fc5bbb4 100644 --- a/src/llama_stack_client/types/version_info.py +++ b/src/llama_stack_client/types/version_info.py @@ -1,6 +1,5 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - from .._models import BaseModel __all__ = ["VersionInfo"] diff --git a/tests/api_resources/chat/__init__.py b/tests/api_resources/chat/__init__.py new file mode 100644 index 00000000..fd8019a9 --- /dev/null +++ b/tests/api_resources/chat/__init__.py @@ -0,0 +1 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py new file mode 100644 index 00000000..5c3d96c3 --- /dev/null +++ b/tests/api_resources/chat/test_completions.py @@ -0,0 +1,362 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import os +from typing import Any, cast + +import pytest + +from tests.utils import assert_matches_type +from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient +from llama_stack_client.types.chat import CompletionCreateResponse + +base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") + + +class TestCompletions: + parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + def test_method_create_overload_1(self, client: LlamaStackClient) -> None: + completion = client.chat.completions.create( + messages=[ + { + "content": "string", + "role": "user", + } + ], + model="model", + ) + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + @parametrize + def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient) -> None: + completion = client.chat.completions.create( + messages=[ + { + "content": "string", + "role": "user", + "name": "name", + } + ], + model="model", + frequency_penalty=0, + function_call="string", + functions=[{"foo": True}], + logit_bias={"foo": 0}, + logprobs=True, + max_completion_tokens=0, + max_tokens=0, + n=0, + parallel_tool_calls=True, + presence_penalty=0, + response_format={"type": "text"}, + seed=0, + stop="string", + stream=False, + stream_options={"foo": True}, + temperature=0, + tool_choice="string", + tools=[{"foo": True}], + top_logprobs=0, + top_p=0, + user="user", + ) + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + @parametrize + def test_raw_response_create_overload_1(self, client: LlamaStackClient) -> None: + response = client.chat.completions.with_raw_response.create( + messages=[ + { + "content": "string", + "role": "user", + } + ], + model="model", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + completion = response.parse() + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + @parametrize + def test_streaming_response_create_overload_1(self, client: LlamaStackClient) -> None: + with client.chat.completions.with_streaming_response.create( + messages=[ + { + "content": "string", + "role": "user", + } + ], + model="model", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + completion = response.parse() + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_method_create_overload_2(self, client: LlamaStackClient) -> None: + completion_stream = client.chat.completions.create( + messages=[ + { + "content": "string", + "role": "user", + } + ], + model="model", + stream=True, + ) + completion_stream.response.close() + + @parametrize + def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient) -> None: + completion_stream = client.chat.completions.create( + messages=[ + { + "content": "string", + "role": "user", + "name": "name", + } + ], + model="model", + stream=True, + frequency_penalty=0, + function_call="string", + functions=[{"foo": True}], + logit_bias={"foo": 0}, + logprobs=True, + max_completion_tokens=0, + max_tokens=0, + n=0, + parallel_tool_calls=True, + presence_penalty=0, + response_format={"type": "text"}, + seed=0, + stop="string", + stream_options={"foo": True}, + temperature=0, + tool_choice="string", + tools=[{"foo": True}], + top_logprobs=0, + top_p=0, + user="user", + ) + completion_stream.response.close() + + @parametrize + def test_raw_response_create_overload_2(self, client: LlamaStackClient) -> None: + response = client.chat.completions.with_raw_response.create( + messages=[ + { + "content": "string", + "role": "user", + } + ], + model="model", + stream=True, + ) + + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + stream = response.parse() + stream.close() + + @parametrize + def test_streaming_response_create_overload_2(self, client: LlamaStackClient) -> None: + with client.chat.completions.with_streaming_response.create( + messages=[ + { + "content": "string", + "role": "user", + } + ], + model="model", + stream=True, + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + stream = response.parse() + stream.close() + + assert cast(Any, response.is_closed) is True + + +class TestAsyncCompletions: + parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None: + completion = await async_client.chat.completions.create( + messages=[ + { + "content": "string", + "role": "user", + } + ], + model="model", + ) + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + @parametrize + async def test_method_create_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None: + completion = await async_client.chat.completions.create( + messages=[ + { + "content": "string", + "role": "user", + "name": "name", + } + ], + model="model", + frequency_penalty=0, + function_call="string", + functions=[{"foo": True}], + logit_bias={"foo": 0}, + logprobs=True, + max_completion_tokens=0, + max_tokens=0, + n=0, + parallel_tool_calls=True, + presence_penalty=0, + response_format={"type": "text"}, + seed=0, + stop="string", + stream=False, + stream_options={"foo": True}, + temperature=0, + tool_choice="string", + tools=[{"foo": True}], + top_logprobs=0, + top_p=0, + user="user", + ) + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + @parametrize + async def test_raw_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None: + response = await async_client.chat.completions.with_raw_response.create( + messages=[ + { + "content": "string", + "role": "user", + } + ], + model="model", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + completion = await response.parse() + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + @parametrize + async def test_streaming_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None: + async with async_client.chat.completions.with_streaming_response.create( + messages=[ + { + "content": "string", + "role": "user", + } + ], + model="model", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + completion = await response.parse() + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_method_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None: + completion_stream = await async_client.chat.completions.create( + messages=[ + { + "content": "string", + "role": "user", + } + ], + model="model", + stream=True, + ) + await completion_stream.response.aclose() + + @parametrize + async def test_method_create_with_all_params_overload_2(self, async_client: AsyncLlamaStackClient) -> None: + completion_stream = await async_client.chat.completions.create( + messages=[ + { + "content": "string", + "role": "user", + "name": "name", + } + ], + model="model", + stream=True, + frequency_penalty=0, + function_call="string", + functions=[{"foo": True}], + logit_bias={"foo": 0}, + logprobs=True, + max_completion_tokens=0, + max_tokens=0, + n=0, + parallel_tool_calls=True, + presence_penalty=0, + response_format={"type": "text"}, + seed=0, + stop="string", + stream_options={"foo": True}, + temperature=0, + tool_choice="string", + tools=[{"foo": True}], + top_logprobs=0, + top_p=0, + user="user", + ) + await completion_stream.response.aclose() + + @parametrize + async def test_raw_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None: + response = await async_client.chat.completions.with_raw_response.create( + messages=[ + { + "content": "string", + "role": "user", + } + ], + model="model", + stream=True, + ) + + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + stream = await response.parse() + await stream.close() + + @parametrize + async def test_streaming_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None: + async with async_client.chat.completions.with_streaming_response.create( + messages=[ + { + "content": "string", + "role": "user", + } + ], + model="model", + stream=True, + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + stream = await response.parse() + await stream.close() + + assert cast(Any, response.is_closed) is True diff --git a/tests/api_resources/test_agents.py b/tests/api_resources/test_agents.py index 235d6258..1c0478a6 100644 --- a/tests/api_resources/test_agents.py +++ b/tests/api_resources/test_agents.py @@ -52,6 +52,7 @@ def test_method_create_with_all_params(self, client: LlamaStackClient) -> None: "enable_session_persistence": True, "input_shields": ["string"], "max_infer_iters": 0, + "name": "name", "output_shields": ["string"], "response_format": { "json_schema": {"foo": True}, @@ -182,6 +183,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncLlamaStack "enable_session_persistence": True, "input_shields": ["string"], "max_infer_iters": 0, + "name": "name", "output_shields": ["string"], "response_format": { "json_schema": {"foo": True}, diff --git a/tests/api_resources/test_completions.py b/tests/api_resources/test_completions.py new file mode 100644 index 00000000..30e15b7b --- /dev/null +++ b/tests/api_resources/test_completions.py @@ -0,0 +1,262 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import os +from typing import Any, cast + +import pytest + +from tests.utils import assert_matches_type +from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient +from llama_stack_client.types import CompletionCreateResponse + +base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") + + +class TestCompletions: + parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + def test_method_create_overload_1(self, client: LlamaStackClient) -> None: + completion = client.completions.create( + model="model", + prompt="string", + ) + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + @parametrize + def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient) -> None: + completion = client.completions.create( + model="model", + prompt="string", + best_of=0, + echo=True, + frequency_penalty=0, + guided_choice=["string"], + logit_bias={"foo": 0}, + logprobs=True, + max_tokens=0, + n=0, + presence_penalty=0, + prompt_logprobs=0, + seed=0, + stop="string", + stream=False, + stream_options={"foo": True}, + temperature=0, + top_p=0, + user="user", + ) + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + @parametrize + def test_raw_response_create_overload_1(self, client: LlamaStackClient) -> None: + response = client.completions.with_raw_response.create( + model="model", + prompt="string", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + completion = response.parse() + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + @parametrize + def test_streaming_response_create_overload_1(self, client: LlamaStackClient) -> None: + with client.completions.with_streaming_response.create( + model="model", + prompt="string", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + completion = response.parse() + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_method_create_overload_2(self, client: LlamaStackClient) -> None: + completion_stream = client.completions.create( + model="model", + prompt="string", + stream=True, + ) + completion_stream.response.close() + + @parametrize + def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient) -> None: + completion_stream = client.completions.create( + model="model", + prompt="string", + stream=True, + best_of=0, + echo=True, + frequency_penalty=0, + guided_choice=["string"], + logit_bias={"foo": 0}, + logprobs=True, + max_tokens=0, + n=0, + presence_penalty=0, + prompt_logprobs=0, + seed=0, + stop="string", + stream_options={"foo": True}, + temperature=0, + top_p=0, + user="user", + ) + completion_stream.response.close() + + @parametrize + def test_raw_response_create_overload_2(self, client: LlamaStackClient) -> None: + response = client.completions.with_raw_response.create( + model="model", + prompt="string", + stream=True, + ) + + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + stream = response.parse() + stream.close() + + @parametrize + def test_streaming_response_create_overload_2(self, client: LlamaStackClient) -> None: + with client.completions.with_streaming_response.create( + model="model", + prompt="string", + stream=True, + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + stream = response.parse() + stream.close() + + assert cast(Any, response.is_closed) is True + + +class TestAsyncCompletions: + parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None: + completion = await async_client.completions.create( + model="model", + prompt="string", + ) + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + @parametrize + async def test_method_create_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None: + completion = await async_client.completions.create( + model="model", + prompt="string", + best_of=0, + echo=True, + frequency_penalty=0, + guided_choice=["string"], + logit_bias={"foo": 0}, + logprobs=True, + max_tokens=0, + n=0, + presence_penalty=0, + prompt_logprobs=0, + seed=0, + stop="string", + stream=False, + stream_options={"foo": True}, + temperature=0, + top_p=0, + user="user", + ) + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + @parametrize + async def test_raw_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None: + response = await async_client.completions.with_raw_response.create( + model="model", + prompt="string", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + completion = await response.parse() + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + @parametrize + async def test_streaming_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None: + async with async_client.completions.with_streaming_response.create( + model="model", + prompt="string", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + completion = await response.parse() + assert_matches_type(CompletionCreateResponse, completion, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_method_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None: + completion_stream = await async_client.completions.create( + model="model", + prompt="string", + stream=True, + ) + await completion_stream.response.aclose() + + @parametrize + async def test_method_create_with_all_params_overload_2(self, async_client: AsyncLlamaStackClient) -> None: + completion_stream = await async_client.completions.create( + model="model", + prompt="string", + stream=True, + best_of=0, + echo=True, + frequency_penalty=0, + guided_choice=["string"], + logit_bias={"foo": 0}, + logprobs=True, + max_tokens=0, + n=0, + presence_penalty=0, + prompt_logprobs=0, + seed=0, + stop="string", + stream_options={"foo": True}, + temperature=0, + top_p=0, + user="user", + ) + await completion_stream.response.aclose() + + @parametrize + async def test_raw_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None: + response = await async_client.completions.with_raw_response.create( + model="model", + prompt="string", + stream=True, + ) + + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + stream = await response.parse() + await stream.close() + + @parametrize + async def test_streaming_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None: + async with async_client.completions.with_streaming_response.create( + model="model", + prompt="string", + stream=True, + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + stream = await response.parse() + await stream.close() + + assert cast(Any, response.is_closed) is True diff --git a/tests/api_resources/test_post_training.py b/tests/api_resources/test_post_training.py index 98047e4c..1d0613da 100644 --- a/tests/api_resources/test_post_training.py +++ b/tests/api_resources/test_post_training.py @@ -33,22 +33,9 @@ def test_method_preference_optimize(self, client: LlamaStackClient) -> None: job_uuid="job_uuid", logger_config={"foo": True}, training_config={ - "data_config": { - "batch_size": 0, - "data_format": "instruct", - "dataset_id": "dataset_id", - "shuffle": True, - }, "gradient_accumulation_steps": 0, "max_steps_per_epoch": 0, - "max_validation_steps": 0, "n_epochs": 0, - "optimizer_config": { - "lr": 0, - "num_warmup_steps": 0, - "optimizer_type": "adam", - "weight_decay": 0, - }, }, ) assert_matches_type(PostTrainingJob, post_training, path=["response"]) @@ -67,6 +54,9 @@ def test_method_preference_optimize_with_all_params(self, client: LlamaStackClie job_uuid="job_uuid", logger_config={"foo": True}, training_config={ + "gradient_accumulation_steps": 0, + "max_steps_per_epoch": 0, + "n_epochs": 0, "data_config": { "batch_size": 0, "data_format": "instruct", @@ -76,16 +66,6 @@ def test_method_preference_optimize_with_all_params(self, client: LlamaStackClie "train_on_input": True, "validation_dataset_id": "validation_dataset_id", }, - "gradient_accumulation_steps": 0, - "max_steps_per_epoch": 0, - "max_validation_steps": 0, - "n_epochs": 0, - "optimizer_config": { - "lr": 0, - "num_warmup_steps": 0, - "optimizer_type": "adam", - "weight_decay": 0, - }, "dtype": "dtype", "efficiency_config": { "enable_activation_checkpointing": True, @@ -93,6 +73,13 @@ def test_method_preference_optimize_with_all_params(self, client: LlamaStackClie "fsdp_cpu_offload": True, "memory_efficient_fsdp_wrap": True, }, + "max_validation_steps": 0, + "optimizer_config": { + "lr": 0, + "num_warmup_steps": 0, + "optimizer_type": "adam", + "weight_decay": 0, + }, }, ) assert_matches_type(PostTrainingJob, post_training, path=["response"]) @@ -111,22 +98,9 @@ def test_raw_response_preference_optimize(self, client: LlamaStackClient) -> Non job_uuid="job_uuid", logger_config={"foo": True}, training_config={ - "data_config": { - "batch_size": 0, - "data_format": "instruct", - "dataset_id": "dataset_id", - "shuffle": True, - }, "gradient_accumulation_steps": 0, "max_steps_per_epoch": 0, - "max_validation_steps": 0, "n_epochs": 0, - "optimizer_config": { - "lr": 0, - "num_warmup_steps": 0, - "optimizer_type": "adam", - "weight_decay": 0, - }, }, ) @@ -149,22 +123,9 @@ def test_streaming_response_preference_optimize(self, client: LlamaStackClient) job_uuid="job_uuid", logger_config={"foo": True}, training_config={ - "data_config": { - "batch_size": 0, - "data_format": "instruct", - "dataset_id": "dataset_id", - "shuffle": True, - }, "gradient_accumulation_steps": 0, "max_steps_per_epoch": 0, - "max_validation_steps": 0, "n_epochs": 0, - "optimizer_config": { - "lr": 0, - "num_warmup_steps": 0, - "optimizer_type": "adam", - "weight_decay": 0, - }, }, ) as response: assert not response.is_closed @@ -181,24 +142,10 @@ def test_method_supervised_fine_tune(self, client: LlamaStackClient) -> None: hyperparam_search_config={"foo": True}, job_uuid="job_uuid", logger_config={"foo": True}, - model="model", training_config={ - "data_config": { - "batch_size": 0, - "data_format": "instruct", - "dataset_id": "dataset_id", - "shuffle": True, - }, "gradient_accumulation_steps": 0, "max_steps_per_epoch": 0, - "max_validation_steps": 0, "n_epochs": 0, - "optimizer_config": { - "lr": 0, - "num_warmup_steps": 0, - "optimizer_type": "adam", - "weight_decay": 0, - }, }, ) assert_matches_type(PostTrainingJob, post_training, path=["response"]) @@ -209,8 +156,10 @@ def test_method_supervised_fine_tune_with_all_params(self, client: LlamaStackCli hyperparam_search_config={"foo": True}, job_uuid="job_uuid", logger_config={"foo": True}, - model="model", training_config={ + "gradient_accumulation_steps": 0, + "max_steps_per_epoch": 0, + "n_epochs": 0, "data_config": { "batch_size": 0, "data_format": "instruct", @@ -220,16 +169,6 @@ def test_method_supervised_fine_tune_with_all_params(self, client: LlamaStackCli "train_on_input": True, "validation_dataset_id": "validation_dataset_id", }, - "gradient_accumulation_steps": 0, - "max_steps_per_epoch": 0, - "max_validation_steps": 0, - "n_epochs": 0, - "optimizer_config": { - "lr": 0, - "num_warmup_steps": 0, - "optimizer_type": "adam", - "weight_decay": 0, - }, "dtype": "dtype", "efficiency_config": { "enable_activation_checkpointing": True, @@ -237,6 +176,13 @@ def test_method_supervised_fine_tune_with_all_params(self, client: LlamaStackCli "fsdp_cpu_offload": True, "memory_efficient_fsdp_wrap": True, }, + "max_validation_steps": 0, + "optimizer_config": { + "lr": 0, + "num_warmup_steps": 0, + "optimizer_type": "adam", + "weight_decay": 0, + }, }, algorithm_config={ "alpha": 0, @@ -249,6 +195,7 @@ def test_method_supervised_fine_tune_with_all_params(self, client: LlamaStackCli "use_dora": True, }, checkpoint_dir="checkpoint_dir", + model="model", ) assert_matches_type(PostTrainingJob, post_training, path=["response"]) @@ -258,24 +205,10 @@ def test_raw_response_supervised_fine_tune(self, client: LlamaStackClient) -> No hyperparam_search_config={"foo": True}, job_uuid="job_uuid", logger_config={"foo": True}, - model="model", training_config={ - "data_config": { - "batch_size": 0, - "data_format": "instruct", - "dataset_id": "dataset_id", - "shuffle": True, - }, "gradient_accumulation_steps": 0, "max_steps_per_epoch": 0, - "max_validation_steps": 0, "n_epochs": 0, - "optimizer_config": { - "lr": 0, - "num_warmup_steps": 0, - "optimizer_type": "adam", - "weight_decay": 0, - }, }, ) @@ -290,24 +223,10 @@ def test_streaming_response_supervised_fine_tune(self, client: LlamaStackClient) hyperparam_search_config={"foo": True}, job_uuid="job_uuid", logger_config={"foo": True}, - model="model", training_config={ - "data_config": { - "batch_size": 0, - "data_format": "instruct", - "dataset_id": "dataset_id", - "shuffle": True, - }, "gradient_accumulation_steps": 0, "max_steps_per_epoch": 0, - "max_validation_steps": 0, "n_epochs": 0, - "optimizer_config": { - "lr": 0, - "num_warmup_steps": 0, - "optimizer_type": "adam", - "weight_decay": 0, - }, }, ) as response: assert not response.is_closed @@ -336,22 +255,9 @@ async def test_method_preference_optimize(self, async_client: AsyncLlamaStackCli job_uuid="job_uuid", logger_config={"foo": True}, training_config={ - "data_config": { - "batch_size": 0, - "data_format": "instruct", - "dataset_id": "dataset_id", - "shuffle": True, - }, "gradient_accumulation_steps": 0, "max_steps_per_epoch": 0, - "max_validation_steps": 0, "n_epochs": 0, - "optimizer_config": { - "lr": 0, - "num_warmup_steps": 0, - "optimizer_type": "adam", - "weight_decay": 0, - }, }, ) assert_matches_type(PostTrainingJob, post_training, path=["response"]) @@ -370,6 +276,9 @@ async def test_method_preference_optimize_with_all_params(self, async_client: As job_uuid="job_uuid", logger_config={"foo": True}, training_config={ + "gradient_accumulation_steps": 0, + "max_steps_per_epoch": 0, + "n_epochs": 0, "data_config": { "batch_size": 0, "data_format": "instruct", @@ -379,16 +288,6 @@ async def test_method_preference_optimize_with_all_params(self, async_client: As "train_on_input": True, "validation_dataset_id": "validation_dataset_id", }, - "gradient_accumulation_steps": 0, - "max_steps_per_epoch": 0, - "max_validation_steps": 0, - "n_epochs": 0, - "optimizer_config": { - "lr": 0, - "num_warmup_steps": 0, - "optimizer_type": "adam", - "weight_decay": 0, - }, "dtype": "dtype", "efficiency_config": { "enable_activation_checkpointing": True, @@ -396,6 +295,13 @@ async def test_method_preference_optimize_with_all_params(self, async_client: As "fsdp_cpu_offload": True, "memory_efficient_fsdp_wrap": True, }, + "max_validation_steps": 0, + "optimizer_config": { + "lr": 0, + "num_warmup_steps": 0, + "optimizer_type": "adam", + "weight_decay": 0, + }, }, ) assert_matches_type(PostTrainingJob, post_training, path=["response"]) @@ -414,22 +320,9 @@ async def test_raw_response_preference_optimize(self, async_client: AsyncLlamaSt job_uuid="job_uuid", logger_config={"foo": True}, training_config={ - "data_config": { - "batch_size": 0, - "data_format": "instruct", - "dataset_id": "dataset_id", - "shuffle": True, - }, "gradient_accumulation_steps": 0, "max_steps_per_epoch": 0, - "max_validation_steps": 0, "n_epochs": 0, - "optimizer_config": { - "lr": 0, - "num_warmup_steps": 0, - "optimizer_type": "adam", - "weight_decay": 0, - }, }, ) @@ -452,22 +345,9 @@ async def test_streaming_response_preference_optimize(self, async_client: AsyncL job_uuid="job_uuid", logger_config={"foo": True}, training_config={ - "data_config": { - "batch_size": 0, - "data_format": "instruct", - "dataset_id": "dataset_id", - "shuffle": True, - }, "gradient_accumulation_steps": 0, "max_steps_per_epoch": 0, - "max_validation_steps": 0, "n_epochs": 0, - "optimizer_config": { - "lr": 0, - "num_warmup_steps": 0, - "optimizer_type": "adam", - "weight_decay": 0, - }, }, ) as response: assert not response.is_closed @@ -484,24 +364,10 @@ async def test_method_supervised_fine_tune(self, async_client: AsyncLlamaStackCl hyperparam_search_config={"foo": True}, job_uuid="job_uuid", logger_config={"foo": True}, - model="model", training_config={ - "data_config": { - "batch_size": 0, - "data_format": "instruct", - "dataset_id": "dataset_id", - "shuffle": True, - }, "gradient_accumulation_steps": 0, "max_steps_per_epoch": 0, - "max_validation_steps": 0, "n_epochs": 0, - "optimizer_config": { - "lr": 0, - "num_warmup_steps": 0, - "optimizer_type": "adam", - "weight_decay": 0, - }, }, ) assert_matches_type(PostTrainingJob, post_training, path=["response"]) @@ -512,8 +378,10 @@ async def test_method_supervised_fine_tune_with_all_params(self, async_client: A hyperparam_search_config={"foo": True}, job_uuid="job_uuid", logger_config={"foo": True}, - model="model", training_config={ + "gradient_accumulation_steps": 0, + "max_steps_per_epoch": 0, + "n_epochs": 0, "data_config": { "batch_size": 0, "data_format": "instruct", @@ -523,16 +391,6 @@ async def test_method_supervised_fine_tune_with_all_params(self, async_client: A "train_on_input": True, "validation_dataset_id": "validation_dataset_id", }, - "gradient_accumulation_steps": 0, - "max_steps_per_epoch": 0, - "max_validation_steps": 0, - "n_epochs": 0, - "optimizer_config": { - "lr": 0, - "num_warmup_steps": 0, - "optimizer_type": "adam", - "weight_decay": 0, - }, "dtype": "dtype", "efficiency_config": { "enable_activation_checkpointing": True, @@ -540,6 +398,13 @@ async def test_method_supervised_fine_tune_with_all_params(self, async_client: A "fsdp_cpu_offload": True, "memory_efficient_fsdp_wrap": True, }, + "max_validation_steps": 0, + "optimizer_config": { + "lr": 0, + "num_warmup_steps": 0, + "optimizer_type": "adam", + "weight_decay": 0, + }, }, algorithm_config={ "alpha": 0, @@ -552,6 +417,7 @@ async def test_method_supervised_fine_tune_with_all_params(self, async_client: A "use_dora": True, }, checkpoint_dir="checkpoint_dir", + model="model", ) assert_matches_type(PostTrainingJob, post_training, path=["response"]) @@ -561,24 +427,10 @@ async def test_raw_response_supervised_fine_tune(self, async_client: AsyncLlamaS hyperparam_search_config={"foo": True}, job_uuid="job_uuid", logger_config={"foo": True}, - model="model", training_config={ - "data_config": { - "batch_size": 0, - "data_format": "instruct", - "dataset_id": "dataset_id", - "shuffle": True, - }, "gradient_accumulation_steps": 0, "max_steps_per_epoch": 0, - "max_validation_steps": 0, "n_epochs": 0, - "optimizer_config": { - "lr": 0, - "num_warmup_steps": 0, - "optimizer_type": "adam", - "weight_decay": 0, - }, }, ) @@ -593,24 +445,10 @@ async def test_streaming_response_supervised_fine_tune(self, async_client: Async hyperparam_search_config={"foo": True}, job_uuid="job_uuid", logger_config={"foo": True}, - model="model", training_config={ - "data_config": { - "batch_size": 0, - "data_format": "instruct", - "dataset_id": "dataset_id", - "shuffle": True, - }, "gradient_accumulation_steps": 0, "max_steps_per_epoch": 0, - "max_validation_steps": 0, "n_epochs": 0, - "optimizer_config": { - "lr": 0, - "num_warmup_steps": 0, - "optimizer_type": "adam", - "weight_decay": 0, - }, }, ) as response: assert not response.is_closed diff --git a/tests/conftest.py b/tests/conftest.py index 645cbf63..dd04ad98 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,7 +10,7 @@ from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient if TYPE_CHECKING: - from _pytest.fixtures import FixtureRequest + from _pytest.fixtures import FixtureRequest # pyright: ignore[reportPrivateImportUsage] pytest.register_assert_rewrite("tests.utils") diff --git a/tests/test_models.py b/tests/test_models.py index 8b4c0bc9..a27dfa46 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -492,12 +492,15 @@ class Model(BaseModel): resource_id: Optional[str] = None m = Model.construct() + assert m.resource_id is None assert "resource_id" not in m.model_fields_set m = Model.construct(resource_id=None) + assert m.resource_id is None assert "resource_id" in m.model_fields_set m = Model.construct(resource_id="foo") + assert m.resource_id == "foo" assert "resource_id" in m.model_fields_set @@ -832,7 +835,7 @@ class B(BaseModel): @pytest.mark.skipif(not PYDANTIC_V2, reason="TypeAliasType is not supported in Pydantic v1") def test_type_alias_type() -> None: - Alias = TypeAliasType("Alias", str) + Alias = TypeAliasType("Alias", str) # pyright: ignore class Model(BaseModel): alias: Alias