diff --git a/docs/cli_reference.md b/docs/cli_reference.md index be6ab1ad..20c06029 100644 --- a/docs/cli_reference.md +++ b/docs/cli_reference.md @@ -182,7 +182,7 @@ Options: - `--num-examples`: Optional. Number of examples to evaluate (useful for debugging) - `--visualize`: Optional flag. If set, visualizes evaluation results after completion -Example eval_task_config.json: +Example eval_benchmark_config.json: ```json { "type": "benchmark", diff --git a/src/llama_stack_client/_base_client.py b/src/llama_stack_client/_base_client.py index 90df64c4..5a0376e6 100644 --- a/src/llama_stack_client/_base_client.py +++ b/src/llama_stack_client/_base_client.py @@ -9,7 +9,6 @@ import inspect import logging import platform -import warnings import email.utils from types import TracebackType from random import random @@ -36,7 +35,7 @@ import httpx import distro import pydantic -from httpx import URL, Limits +from httpx import URL from pydantic import PrivateAttr from . import _exceptions @@ -51,19 +50,16 @@ Timeout, NotGiven, ResponseT, - Transport, AnyMapping, PostParser, - ProxiesTypes, RequestFiles, HttpxSendArgs, - AsyncTransport, RequestOptions, HttpxRequestFiles, ModelBuilderProtocol, ) from ._utils import is_dict, is_list, asyncify, is_given, lru_cache, is_mapping -from ._compat import model_copy, model_dump +from ._compat import PYDANTIC_V2, model_copy, model_dump from ._models import GenericModel, FinalRequestOptions, validate_type, construct_type from ._response import ( APIResponse, @@ -207,6 +203,9 @@ def _set_private_attributes( model: Type[_T], options: FinalRequestOptions, ) -> None: + if PYDANTIC_V2 and getattr(self, "__pydantic_private__", None) is None: + self.__pydantic_private__ = {} + self._model = model self._client = client self._options = options @@ -292,6 +291,9 @@ def _set_private_attributes( client: AsyncAPIClient, options: FinalRequestOptions, ) -> None: + if PYDANTIC_V2 and getattr(self, "__pydantic_private__", None) is None: + self.__pydantic_private__ = {} + self._model = model self._client = client self._options = options @@ -331,9 +333,6 @@ class BaseClient(Generic[_HttpxClientT, _DefaultStreamT]): _base_url: URL max_retries: int timeout: Union[float, Timeout, None] - _limits: httpx.Limits - _proxies: ProxiesTypes | None - _transport: Transport | AsyncTransport | None _strict_response_validation: bool _idempotency_header: str | None _default_stream_cls: type[_DefaultStreamT] | None = None @@ -346,9 +345,6 @@ def __init__( _strict_response_validation: bool, max_retries: int = DEFAULT_MAX_RETRIES, timeout: float | Timeout | None = DEFAULT_TIMEOUT, - limits: httpx.Limits, - transport: Transport | AsyncTransport | None, - proxies: ProxiesTypes | None, custom_headers: Mapping[str, str] | None = None, custom_query: Mapping[str, object] | None = None, ) -> None: @@ -356,9 +352,6 @@ def __init__( self._base_url = self._enforce_trailing_slash(URL(base_url)) self.max_retries = max_retries self.timeout = timeout - self._limits = limits - self._proxies = proxies - self._transport = transport self._custom_headers = custom_headers or {} self._custom_query = custom_query or {} self._strict_response_validation = _strict_response_validation @@ -794,46 +787,11 @@ def __init__( base_url: str | URL, max_retries: int = DEFAULT_MAX_RETRIES, timeout: float | Timeout | None | NotGiven = NOT_GIVEN, - transport: Transport | None = None, - proxies: ProxiesTypes | None = None, - limits: Limits | None = None, http_client: httpx.Client | None = None, custom_headers: Mapping[str, str] | None = None, custom_query: Mapping[str, object] | None = None, _strict_response_validation: bool, ) -> None: - kwargs: dict[str, Any] = {} - if limits is not None: - warnings.warn( - "The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead", - category=DeprecationWarning, - stacklevel=3, - ) - if http_client is not None: - raise ValueError("The `http_client` argument is mutually exclusive with `connection_pool_limits`") - else: - limits = DEFAULT_CONNECTION_LIMITS - - if transport is not None: - kwargs["transport"] = transport - warnings.warn( - "The `transport` argument is deprecated. The `http_client` argument should be passed instead", - category=DeprecationWarning, - stacklevel=3, - ) - if http_client is not None: - raise ValueError("The `http_client` argument is mutually exclusive with `transport`") - - if proxies is not None: - kwargs["proxies"] = proxies - warnings.warn( - "The `proxies` argument is deprecated. The `http_client` argument should be passed instead", - category=DeprecationWarning, - stacklevel=3, - ) - if http_client is not None: - raise ValueError("The `http_client` argument is mutually exclusive with `proxies`") - if not is_given(timeout): # if the user passed in a custom http client with a non-default # timeout set then we use that timeout. @@ -854,12 +812,9 @@ def __init__( super().__init__( version=version, - limits=limits, # cast to a valid type because mypy doesn't understand our type narrowing timeout=cast(Timeout, timeout), - proxies=proxies, base_url=base_url, - transport=transport, max_retries=max_retries, custom_query=custom_query, custom_headers=custom_headers, @@ -869,9 +824,6 @@ def __init__( base_url=base_url, # cast to a valid type because mypy doesn't understand our type narrowing timeout=cast(Timeout, timeout), - limits=limits, - follow_redirects=True, - **kwargs, # type: ignore ) def is_closed(self) -> bool: @@ -1366,45 +1318,10 @@ def __init__( _strict_response_validation: bool, max_retries: int = DEFAULT_MAX_RETRIES, timeout: float | Timeout | None | NotGiven = NOT_GIVEN, - transport: AsyncTransport | None = None, - proxies: ProxiesTypes | None = None, - limits: Limits | None = None, http_client: httpx.AsyncClient | None = None, custom_headers: Mapping[str, str] | None = None, custom_query: Mapping[str, object] | None = None, ) -> None: - kwargs: dict[str, Any] = {} - if limits is not None: - warnings.warn( - "The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead", - category=DeprecationWarning, - stacklevel=3, - ) - if http_client is not None: - raise ValueError("The `http_client` argument is mutually exclusive with `connection_pool_limits`") - else: - limits = DEFAULT_CONNECTION_LIMITS - - if transport is not None: - kwargs["transport"] = transport - warnings.warn( - "The `transport` argument is deprecated. The `http_client` argument should be passed instead", - category=DeprecationWarning, - stacklevel=3, - ) - if http_client is not None: - raise ValueError("The `http_client` argument is mutually exclusive with `transport`") - - if proxies is not None: - kwargs["proxies"] = proxies - warnings.warn( - "The `proxies` argument is deprecated. The `http_client` argument should be passed instead", - category=DeprecationWarning, - stacklevel=3, - ) - if http_client is not None: - raise ValueError("The `http_client` argument is mutually exclusive with `proxies`") - if not is_given(timeout): # if the user passed in a custom http client with a non-default # timeout set then we use that timeout. @@ -1426,11 +1343,8 @@ def __init__( super().__init__( version=version, base_url=base_url, - limits=limits, # cast to a valid type because mypy doesn't understand our type narrowing timeout=cast(Timeout, timeout), - proxies=proxies, - transport=transport, max_retries=max_retries, custom_query=custom_query, custom_headers=custom_headers, @@ -1440,9 +1354,6 @@ def __init__( base_url=base_url, # cast to a valid type because mypy doesn't understand our type narrowing timeout=cast(Timeout, timeout), - limits=limits, - follow_redirects=True, - **kwargs, # type: ignore ) def is_closed(self) -> bool: diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py index 760eaeee..e0094eda 100644 --- a/src/llama_stack_client/_client.py +++ b/src/llama_stack_client/_client.py @@ -124,7 +124,7 @@ def __init__( _strict_response_validation: bool = False, provider_data: Mapping[str, Any] | None = None, ) -> None: - """Construct a new synchronous llama-stack-client client instance. + """Construct a new synchronous LlamaStackClient client instance. This automatically infers the `api_key` argument from the `LLAMA_STACK_API_KEY` environment variable if it is not provided. """ @@ -340,7 +340,7 @@ def __init__( _strict_response_validation: bool = False, provider_data: Mapping[str, Any] | None = None, ) -> None: - """Construct a new async llama-stack-client client instance. + """Construct a new async AsyncLlamaStackClient client instance. This automatically infers the `api_key` argument from the `LLAMA_STACK_API_KEY` environment variable if it is not provided. """ diff --git a/src/llama_stack_client/lib/agents/agent.py b/src/llama_stack_client/lib/agents/agent.py index e323cde2..bb6bb26c 100644 --- a/src/llama_stack_client/lib/agents/agent.py +++ b/src/llama_stack_client/lib/agents/agent.py @@ -151,7 +151,6 @@ def _create_turn_streaming( stream=True, documents=documents, toolgroups=toolgroups, - allow_turn_resume=True, ) # 2. process turn and resume if there's a tool call diff --git a/src/llama_stack_client/lib/cli/eval/run_benchmark.py b/src/llama_stack_client/lib/cli/eval/run_benchmark.py index 933b1338..6d246292 100644 --- a/src/llama_stack_client/lib/cli/eval/run_benchmark.py +++ b/src/llama_stack_client/lib/cli/eval/run_benchmark.py @@ -13,12 +13,7 @@ from tqdm.rich import tqdm from ..common.utils import create_bar_chart -from .utils import ( - aggregate_accuracy, - aggregate_average, - aggregate_categorical_count, - aggregate_median, -) +from .utils import aggregate_accuracy, aggregate_average, aggregate_categorical_count, aggregate_median @click.command("run-benchmark") @@ -110,7 +105,7 @@ def run_benchmark( benchmark_id=benchmark_id, input_rows=[r], scoring_functions=scoring_functions, - task_config={ + benchmark_config={ "type": "benchmark", "eval_candidate": { "type": "model", diff --git a/src/llama_stack_client/resources/agents/turn.py b/src/llama_stack_client/resources/agents/turn.py index a17021ec..6c787e92 100644 --- a/src/llama_stack_client/resources/agents/turn.py +++ b/src/llama_stack_client/resources/agents/turn.py @@ -58,7 +58,6 @@ def create( *, agent_id: str, messages: Iterable[turn_create_params.Message], - allow_turn_resume: bool | NotGiven = NOT_GIVEN, documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN, stream: Literal[False] | NotGiven = NOT_GIVEN, tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN, @@ -92,7 +91,6 @@ def create( agent_id: str, messages: Iterable[turn_create_params.Message], stream: Literal[True], - allow_turn_resume: bool | NotGiven = NOT_GIVEN, documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN, tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN, toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN, @@ -125,7 +123,6 @@ def create( agent_id: str, messages: Iterable[turn_create_params.Message], stream: bool, - allow_turn_resume: bool | NotGiven = NOT_GIVEN, documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN, tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN, toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN, @@ -157,7 +154,6 @@ def create( *, agent_id: str, messages: Iterable[turn_create_params.Message], - allow_turn_resume: bool | NotGiven = NOT_GIVEN, documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN, stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN, tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN, @@ -178,7 +174,6 @@ def create( body=maybe_transform( { "messages": messages, - "allow_turn_resume": allow_turn_resume, "documents": documents, "stream": stream, "tool_config": tool_config, @@ -412,7 +407,6 @@ async def create( *, agent_id: str, messages: Iterable[turn_create_params.Message], - allow_turn_resume: bool | NotGiven = NOT_GIVEN, documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN, stream: Literal[False] | NotGiven = NOT_GIVEN, tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN, @@ -446,7 +440,6 @@ async def create( agent_id: str, messages: Iterable[turn_create_params.Message], stream: Literal[True], - allow_turn_resume: bool | NotGiven = NOT_GIVEN, documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN, tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN, toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN, @@ -479,7 +472,6 @@ async def create( agent_id: str, messages: Iterable[turn_create_params.Message], stream: bool, - allow_turn_resume: bool | NotGiven = NOT_GIVEN, documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN, tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN, toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN, @@ -511,7 +503,6 @@ async def create( *, agent_id: str, messages: Iterable[turn_create_params.Message], - allow_turn_resume: bool | NotGiven = NOT_GIVEN, documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN, stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN, tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN, @@ -532,7 +523,6 @@ async def create( body=await async_maybe_transform( { "messages": messages, - "allow_turn_resume": allow_turn_resume, "documents": documents, "stream": stream, "tool_config": tool_config, diff --git a/src/llama_stack_client/resources/eval/eval.py b/src/llama_stack_client/resources/eval/eval.py index 6ea1669c..2c2d8d9c 100644 --- a/src/llama_stack_client/resources/eval/eval.py +++ b/src/llama_stack_client/resources/eval/eval.py @@ -69,9 +69,9 @@ def evaluate_rows( self, benchmark_id: str, *, + benchmark_config: BenchmarkConfigParam, input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]], scoring_functions: List[str], - task_config: BenchmarkConfigParam, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -95,9 +95,9 @@ def evaluate_rows( f"/v1/eval/benchmarks/{benchmark_id}/evaluations", body=maybe_transform( { + "benchmark_config": benchmark_config, "input_rows": input_rows, "scoring_functions": scoring_functions, - "task_config": task_config, }, eval_evaluate_rows_params.EvalEvaluateRowsParams, ), @@ -111,9 +111,9 @@ def evaluate_rows_alpha( self, benchmark_id: str, *, + benchmark_config: BenchmarkConfigParam, input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]], scoring_functions: List[str], - task_config: BenchmarkConfigParam, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -137,9 +137,9 @@ def evaluate_rows_alpha( f"/v1/eval/benchmarks/{benchmark_id}/evaluations", body=maybe_transform( { + "benchmark_config": benchmark_config, "input_rows": input_rows, "scoring_functions": scoring_functions, - "task_config": task_config, }, eval_evaluate_rows_alpha_params.EvalEvaluateRowsAlphaParams, ), @@ -153,7 +153,7 @@ def run_eval( self, benchmark_id: str, *, - task_config: BenchmarkConfigParam, + benchmark_config: BenchmarkConfigParam, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -175,7 +175,7 @@ def run_eval( raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}") return self._post( f"/v1/eval/benchmarks/{benchmark_id}/jobs", - body=maybe_transform({"task_config": task_config}, eval_run_eval_params.EvalRunEvalParams), + body=maybe_transform({"benchmark_config": benchmark_config}, eval_run_eval_params.EvalRunEvalParams), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -186,7 +186,7 @@ def run_eval_alpha( self, benchmark_id: str, *, - task_config: BenchmarkConfigParam, + benchmark_config: BenchmarkConfigParam, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -208,7 +208,9 @@ def run_eval_alpha( raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}") return self._post( f"/v1/eval/benchmarks/{benchmark_id}/jobs", - body=maybe_transform({"task_config": task_config}, eval_run_eval_alpha_params.EvalRunEvalAlphaParams), + body=maybe_transform( + {"benchmark_config": benchmark_config}, eval_run_eval_alpha_params.EvalRunEvalAlphaParams + ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -244,9 +246,9 @@ async def evaluate_rows( self, benchmark_id: str, *, + benchmark_config: BenchmarkConfigParam, input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]], scoring_functions: List[str], - task_config: BenchmarkConfigParam, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -270,9 +272,9 @@ async def evaluate_rows( f"/v1/eval/benchmarks/{benchmark_id}/evaluations", body=await async_maybe_transform( { + "benchmark_config": benchmark_config, "input_rows": input_rows, "scoring_functions": scoring_functions, - "task_config": task_config, }, eval_evaluate_rows_params.EvalEvaluateRowsParams, ), @@ -286,9 +288,9 @@ async def evaluate_rows_alpha( self, benchmark_id: str, *, + benchmark_config: BenchmarkConfigParam, input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]], scoring_functions: List[str], - task_config: BenchmarkConfigParam, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -312,9 +314,9 @@ async def evaluate_rows_alpha( f"/v1/eval/benchmarks/{benchmark_id}/evaluations", body=await async_maybe_transform( { + "benchmark_config": benchmark_config, "input_rows": input_rows, "scoring_functions": scoring_functions, - "task_config": task_config, }, eval_evaluate_rows_alpha_params.EvalEvaluateRowsAlphaParams, ), @@ -328,7 +330,7 @@ async def run_eval( self, benchmark_id: str, *, - task_config: BenchmarkConfigParam, + benchmark_config: BenchmarkConfigParam, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -350,7 +352,9 @@ async def run_eval( raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}") return await self._post( f"/v1/eval/benchmarks/{benchmark_id}/jobs", - body=await async_maybe_transform({"task_config": task_config}, eval_run_eval_params.EvalRunEvalParams), + body=await async_maybe_transform( + {"benchmark_config": benchmark_config}, eval_run_eval_params.EvalRunEvalParams + ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -361,7 +365,7 @@ async def run_eval_alpha( self, benchmark_id: str, *, - task_config: BenchmarkConfigParam, + benchmark_config: BenchmarkConfigParam, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -384,7 +388,7 @@ async def run_eval_alpha( return await self._post( f"/v1/eval/benchmarks/{benchmark_id}/jobs", body=await async_maybe_transform( - {"task_config": task_config}, eval_run_eval_alpha_params.EvalRunEvalAlphaParams + {"benchmark_config": benchmark_config}, eval_run_eval_alpha_params.EvalRunEvalAlphaParams ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout diff --git a/src/llama_stack_client/types/agents/turn_create_params.py b/src/llama_stack_client/types/agents/turn_create_params.py index 729ab74c..357f572c 100644 --- a/src/llama_stack_client/types/agents/turn_create_params.py +++ b/src/llama_stack_client/types/agents/turn_create_params.py @@ -32,8 +32,6 @@ class TurnCreateParamsBase(TypedDict, total=False): messages: Required[Iterable[Message]] - allow_turn_resume: bool - documents: Iterable[Document] tool_config: ToolConfig diff --git a/src/llama_stack_client/types/eval_evaluate_rows_alpha_params.py b/src/llama_stack_client/types/eval_evaluate_rows_alpha_params.py index 9758e814..125a0760 100644 --- a/src/llama_stack_client/types/eval_evaluate_rows_alpha_params.py +++ b/src/llama_stack_client/types/eval_evaluate_rows_alpha_params.py @@ -11,8 +11,8 @@ class EvalEvaluateRowsAlphaParams(TypedDict, total=False): + benchmark_config: Required[BenchmarkConfigParam] + input_rows: Required[Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]] scoring_functions: Required[List[str]] - - task_config: Required[BenchmarkConfigParam] diff --git a/src/llama_stack_client/types/eval_evaluate_rows_params.py b/src/llama_stack_client/types/eval_evaluate_rows_params.py index 86cdde00..f0671298 100644 --- a/src/llama_stack_client/types/eval_evaluate_rows_params.py +++ b/src/llama_stack_client/types/eval_evaluate_rows_params.py @@ -11,8 +11,8 @@ class EvalEvaluateRowsParams(TypedDict, total=False): + benchmark_config: Required[BenchmarkConfigParam] + input_rows: Required[Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]] scoring_functions: Required[List[str]] - - task_config: Required[BenchmarkConfigParam] diff --git a/src/llama_stack_client/types/eval_run_eval_alpha_params.py b/src/llama_stack_client/types/eval_run_eval_alpha_params.py index 3ca2521a..4716dd49 100644 --- a/src/llama_stack_client/types/eval_run_eval_alpha_params.py +++ b/src/llama_stack_client/types/eval_run_eval_alpha_params.py @@ -10,4 +10,4 @@ class EvalRunEvalAlphaParams(TypedDict, total=False): - task_config: Required[BenchmarkConfigParam] + benchmark_config: Required[BenchmarkConfigParam] diff --git a/src/llama_stack_client/types/eval_run_eval_params.py b/src/llama_stack_client/types/eval_run_eval_params.py index a5715f29..a5e46d42 100644 --- a/src/llama_stack_client/types/eval_run_eval_params.py +++ b/src/llama_stack_client/types/eval_run_eval_params.py @@ -10,4 +10,4 @@ class EvalRunEvalParams(TypedDict, total=False): - task_config: Required[BenchmarkConfigParam] + benchmark_config: Required[BenchmarkConfigParam] diff --git a/tests/api_resources/agents/test_turn.py b/tests/api_resources/agents/test_turn.py index e74502bd..311dfcd8 100644 --- a/tests/api_resources/agents/test_turn.py +++ b/tests/api_resources/agents/test_turn.py @@ -43,7 +43,6 @@ def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient "context": "string", } ], - allow_turn_resume=True, documents=[ { "content": "string", @@ -152,7 +151,6 @@ def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient } ], stream=True, - allow_turn_resume=True, documents=[ { "content": "string", @@ -564,7 +562,6 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn "context": "string", } ], - allow_turn_resume=True, documents=[ { "content": "string", @@ -673,7 +670,6 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn } ], stream=True, - allow_turn_resume=True, documents=[ { "content": "string", diff --git a/tests/api_resources/test_eval.py b/tests/api_resources/test_eval.py index cf120885..9735b4c4 100644 --- a/tests/api_resources/test_eval.py +++ b/tests/api_resources/test_eval.py @@ -24,9 +24,7 @@ class TestEval: def test_method_evaluate_rows(self, client: LlamaStackClient) -> None: eval = client.eval.evaluate_rows( benchmark_id="benchmark_id", - input_rows=[{"foo": True}], - scoring_functions=["string"], - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -39,6 +37,8 @@ def test_method_evaluate_rows(self, client: LlamaStackClient) -> None: } }, }, + input_rows=[{"foo": True}], + scoring_functions=["string"], ) assert_matches_type(EvaluateResponse, eval, path=["response"]) @@ -46,9 +46,7 @@ def test_method_evaluate_rows(self, client: LlamaStackClient) -> None: def test_method_evaluate_rows_with_all_params(self, client: LlamaStackClient) -> None: eval = client.eval.evaluate_rows( benchmark_id="benchmark_id", - input_rows=[{"foo": True}], - scoring_functions=["string"], - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": { @@ -73,6 +71,8 @@ def test_method_evaluate_rows_with_all_params(self, client: LlamaStackClient) -> }, "num_examples": 0, }, + input_rows=[{"foo": True}], + scoring_functions=["string"], ) assert_matches_type(EvaluateResponse, eval, path=["response"]) @@ -80,9 +80,7 @@ def test_method_evaluate_rows_with_all_params(self, client: LlamaStackClient) -> def test_raw_response_evaluate_rows(self, client: LlamaStackClient) -> None: response = client.eval.with_raw_response.evaluate_rows( benchmark_id="benchmark_id", - input_rows=[{"foo": True}], - scoring_functions=["string"], - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -95,6 +93,8 @@ def test_raw_response_evaluate_rows(self, client: LlamaStackClient) -> None: } }, }, + input_rows=[{"foo": True}], + scoring_functions=["string"], ) assert response.is_closed is True @@ -106,9 +106,7 @@ def test_raw_response_evaluate_rows(self, client: LlamaStackClient) -> None: def test_streaming_response_evaluate_rows(self, client: LlamaStackClient) -> None: with client.eval.with_streaming_response.evaluate_rows( benchmark_id="benchmark_id", - input_rows=[{"foo": True}], - scoring_functions=["string"], - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -121,6 +119,8 @@ def test_streaming_response_evaluate_rows(self, client: LlamaStackClient) -> Non } }, }, + input_rows=[{"foo": True}], + scoring_functions=["string"], ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -135,9 +135,7 @@ def test_path_params_evaluate_rows(self, client: LlamaStackClient) -> None: with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): client.eval.with_raw_response.evaluate_rows( benchmark_id="", - input_rows=[{"foo": True}], - scoring_functions=["string"], - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -150,15 +148,15 @@ def test_path_params_evaluate_rows(self, client: LlamaStackClient) -> None: } }, }, + input_rows=[{"foo": True}], + scoring_functions=["string"], ) @parametrize def test_method_evaluate_rows_alpha(self, client: LlamaStackClient) -> None: eval = client.eval.evaluate_rows_alpha( benchmark_id="benchmark_id", - input_rows=[{"foo": True}], - scoring_functions=["string"], - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -171,6 +169,8 @@ def test_method_evaluate_rows_alpha(self, client: LlamaStackClient) -> None: } }, }, + input_rows=[{"foo": True}], + scoring_functions=["string"], ) assert_matches_type(EvaluateResponse, eval, path=["response"]) @@ -178,9 +178,7 @@ def test_method_evaluate_rows_alpha(self, client: LlamaStackClient) -> None: def test_method_evaluate_rows_alpha_with_all_params(self, client: LlamaStackClient) -> None: eval = client.eval.evaluate_rows_alpha( benchmark_id="benchmark_id", - input_rows=[{"foo": True}], - scoring_functions=["string"], - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": { @@ -205,6 +203,8 @@ def test_method_evaluate_rows_alpha_with_all_params(self, client: LlamaStackClie }, "num_examples": 0, }, + input_rows=[{"foo": True}], + scoring_functions=["string"], ) assert_matches_type(EvaluateResponse, eval, path=["response"]) @@ -212,9 +212,7 @@ def test_method_evaluate_rows_alpha_with_all_params(self, client: LlamaStackClie def test_raw_response_evaluate_rows_alpha(self, client: LlamaStackClient) -> None: response = client.eval.with_raw_response.evaluate_rows_alpha( benchmark_id="benchmark_id", - input_rows=[{"foo": True}], - scoring_functions=["string"], - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -227,6 +225,8 @@ def test_raw_response_evaluate_rows_alpha(self, client: LlamaStackClient) -> Non } }, }, + input_rows=[{"foo": True}], + scoring_functions=["string"], ) assert response.is_closed is True @@ -238,9 +238,7 @@ def test_raw_response_evaluate_rows_alpha(self, client: LlamaStackClient) -> Non def test_streaming_response_evaluate_rows_alpha(self, client: LlamaStackClient) -> None: with client.eval.with_streaming_response.evaluate_rows_alpha( benchmark_id="benchmark_id", - input_rows=[{"foo": True}], - scoring_functions=["string"], - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -253,6 +251,8 @@ def test_streaming_response_evaluate_rows_alpha(self, client: LlamaStackClient) } }, }, + input_rows=[{"foo": True}], + scoring_functions=["string"], ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -267,9 +267,7 @@ def test_path_params_evaluate_rows_alpha(self, client: LlamaStackClient) -> None with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): client.eval.with_raw_response.evaluate_rows_alpha( benchmark_id="", - input_rows=[{"foo": True}], - scoring_functions=["string"], - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -282,13 +280,15 @@ def test_path_params_evaluate_rows_alpha(self, client: LlamaStackClient) -> None } }, }, + input_rows=[{"foo": True}], + scoring_functions=["string"], ) @parametrize def test_method_run_eval(self, client: LlamaStackClient) -> None: eval = client.eval.run_eval( benchmark_id="benchmark_id", - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -308,7 +308,7 @@ def test_method_run_eval(self, client: LlamaStackClient) -> None: def test_method_run_eval_with_all_params(self, client: LlamaStackClient) -> None: eval = client.eval.run_eval( benchmark_id="benchmark_id", - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": { @@ -340,7 +340,7 @@ def test_method_run_eval_with_all_params(self, client: LlamaStackClient) -> None def test_raw_response_run_eval(self, client: LlamaStackClient) -> None: response = client.eval.with_raw_response.run_eval( benchmark_id="benchmark_id", - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -364,7 +364,7 @@ def test_raw_response_run_eval(self, client: LlamaStackClient) -> None: def test_streaming_response_run_eval(self, client: LlamaStackClient) -> None: with client.eval.with_streaming_response.run_eval( benchmark_id="benchmark_id", - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -391,7 +391,7 @@ def test_path_params_run_eval(self, client: LlamaStackClient) -> None: with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): client.eval.with_raw_response.run_eval( benchmark_id="", - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -410,7 +410,7 @@ def test_path_params_run_eval(self, client: LlamaStackClient) -> None: def test_method_run_eval_alpha(self, client: LlamaStackClient) -> None: eval = client.eval.run_eval_alpha( benchmark_id="benchmark_id", - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -430,7 +430,7 @@ def test_method_run_eval_alpha(self, client: LlamaStackClient) -> None: def test_method_run_eval_alpha_with_all_params(self, client: LlamaStackClient) -> None: eval = client.eval.run_eval_alpha( benchmark_id="benchmark_id", - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": { @@ -462,7 +462,7 @@ def test_method_run_eval_alpha_with_all_params(self, client: LlamaStackClient) - def test_raw_response_run_eval_alpha(self, client: LlamaStackClient) -> None: response = client.eval.with_raw_response.run_eval_alpha( benchmark_id="benchmark_id", - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -486,7 +486,7 @@ def test_raw_response_run_eval_alpha(self, client: LlamaStackClient) -> None: def test_streaming_response_run_eval_alpha(self, client: LlamaStackClient) -> None: with client.eval.with_streaming_response.run_eval_alpha( benchmark_id="benchmark_id", - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -513,7 +513,7 @@ def test_path_params_run_eval_alpha(self, client: LlamaStackClient) -> None: with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): client.eval.with_raw_response.run_eval_alpha( benchmark_id="", - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -536,9 +536,7 @@ class TestAsyncEval: async def test_method_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None: eval = await async_client.eval.evaluate_rows( benchmark_id="benchmark_id", - input_rows=[{"foo": True}], - scoring_functions=["string"], - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -551,6 +549,8 @@ async def test_method_evaluate_rows(self, async_client: AsyncLlamaStackClient) - } }, }, + input_rows=[{"foo": True}], + scoring_functions=["string"], ) assert_matches_type(EvaluateResponse, eval, path=["response"]) @@ -558,9 +558,7 @@ async def test_method_evaluate_rows(self, async_client: AsyncLlamaStackClient) - async def test_method_evaluate_rows_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: eval = await async_client.eval.evaluate_rows( benchmark_id="benchmark_id", - input_rows=[{"foo": True}], - scoring_functions=["string"], - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": { @@ -585,6 +583,8 @@ async def test_method_evaluate_rows_with_all_params(self, async_client: AsyncLla }, "num_examples": 0, }, + input_rows=[{"foo": True}], + scoring_functions=["string"], ) assert_matches_type(EvaluateResponse, eval, path=["response"]) @@ -592,9 +592,7 @@ async def test_method_evaluate_rows_with_all_params(self, async_client: AsyncLla async def test_raw_response_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None: response = await async_client.eval.with_raw_response.evaluate_rows( benchmark_id="benchmark_id", - input_rows=[{"foo": True}], - scoring_functions=["string"], - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -607,6 +605,8 @@ async def test_raw_response_evaluate_rows(self, async_client: AsyncLlamaStackCli } }, }, + input_rows=[{"foo": True}], + scoring_functions=["string"], ) assert response.is_closed is True @@ -618,9 +618,7 @@ async def test_raw_response_evaluate_rows(self, async_client: AsyncLlamaStackCli async def test_streaming_response_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None: async with async_client.eval.with_streaming_response.evaluate_rows( benchmark_id="benchmark_id", - input_rows=[{"foo": True}], - scoring_functions=["string"], - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -633,6 +631,8 @@ async def test_streaming_response_evaluate_rows(self, async_client: AsyncLlamaSt } }, }, + input_rows=[{"foo": True}], + scoring_functions=["string"], ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -647,9 +647,7 @@ async def test_path_params_evaluate_rows(self, async_client: AsyncLlamaStackClie with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): await async_client.eval.with_raw_response.evaluate_rows( benchmark_id="", - input_rows=[{"foo": True}], - scoring_functions=["string"], - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -662,15 +660,15 @@ async def test_path_params_evaluate_rows(self, async_client: AsyncLlamaStackClie } }, }, + input_rows=[{"foo": True}], + scoring_functions=["string"], ) @parametrize async def test_method_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None: eval = await async_client.eval.evaluate_rows_alpha( benchmark_id="benchmark_id", - input_rows=[{"foo": True}], - scoring_functions=["string"], - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -683,6 +681,8 @@ async def test_method_evaluate_rows_alpha(self, async_client: AsyncLlamaStackCli } }, }, + input_rows=[{"foo": True}], + scoring_functions=["string"], ) assert_matches_type(EvaluateResponse, eval, path=["response"]) @@ -690,9 +690,7 @@ async def test_method_evaluate_rows_alpha(self, async_client: AsyncLlamaStackCli async def test_method_evaluate_rows_alpha_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: eval = await async_client.eval.evaluate_rows_alpha( benchmark_id="benchmark_id", - input_rows=[{"foo": True}], - scoring_functions=["string"], - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": { @@ -717,6 +715,8 @@ async def test_method_evaluate_rows_alpha_with_all_params(self, async_client: As }, "num_examples": 0, }, + input_rows=[{"foo": True}], + scoring_functions=["string"], ) assert_matches_type(EvaluateResponse, eval, path=["response"]) @@ -724,9 +724,7 @@ async def test_method_evaluate_rows_alpha_with_all_params(self, async_client: As async def test_raw_response_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None: response = await async_client.eval.with_raw_response.evaluate_rows_alpha( benchmark_id="benchmark_id", - input_rows=[{"foo": True}], - scoring_functions=["string"], - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -739,6 +737,8 @@ async def test_raw_response_evaluate_rows_alpha(self, async_client: AsyncLlamaSt } }, }, + input_rows=[{"foo": True}], + scoring_functions=["string"], ) assert response.is_closed is True @@ -750,9 +750,7 @@ async def test_raw_response_evaluate_rows_alpha(self, async_client: AsyncLlamaSt async def test_streaming_response_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None: async with async_client.eval.with_streaming_response.evaluate_rows_alpha( benchmark_id="benchmark_id", - input_rows=[{"foo": True}], - scoring_functions=["string"], - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -765,6 +763,8 @@ async def test_streaming_response_evaluate_rows_alpha(self, async_client: AsyncL } }, }, + input_rows=[{"foo": True}], + scoring_functions=["string"], ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -779,9 +779,7 @@ async def test_path_params_evaluate_rows_alpha(self, async_client: AsyncLlamaSta with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): await async_client.eval.with_raw_response.evaluate_rows_alpha( benchmark_id="", - input_rows=[{"foo": True}], - scoring_functions=["string"], - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -794,13 +792,15 @@ async def test_path_params_evaluate_rows_alpha(self, async_client: AsyncLlamaSta } }, }, + input_rows=[{"foo": True}], + scoring_functions=["string"], ) @parametrize async def test_method_run_eval(self, async_client: AsyncLlamaStackClient) -> None: eval = await async_client.eval.run_eval( benchmark_id="benchmark_id", - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -820,7 +820,7 @@ async def test_method_run_eval(self, async_client: AsyncLlamaStackClient) -> Non async def test_method_run_eval_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: eval = await async_client.eval.run_eval( benchmark_id="benchmark_id", - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": { @@ -852,7 +852,7 @@ async def test_method_run_eval_with_all_params(self, async_client: AsyncLlamaSta async def test_raw_response_run_eval(self, async_client: AsyncLlamaStackClient) -> None: response = await async_client.eval.with_raw_response.run_eval( benchmark_id="benchmark_id", - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -876,7 +876,7 @@ async def test_raw_response_run_eval(self, async_client: AsyncLlamaStackClient) async def test_streaming_response_run_eval(self, async_client: AsyncLlamaStackClient) -> None: async with async_client.eval.with_streaming_response.run_eval( benchmark_id="benchmark_id", - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -903,7 +903,7 @@ async def test_path_params_run_eval(self, async_client: AsyncLlamaStackClient) - with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): await async_client.eval.with_raw_response.run_eval( benchmark_id="", - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -922,7 +922,7 @@ async def test_path_params_run_eval(self, async_client: AsyncLlamaStackClient) - async def test_method_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None: eval = await async_client.eval.run_eval_alpha( benchmark_id="benchmark_id", - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -942,7 +942,7 @@ async def test_method_run_eval_alpha(self, async_client: AsyncLlamaStackClient) async def test_method_run_eval_alpha_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: eval = await async_client.eval.run_eval_alpha( benchmark_id="benchmark_id", - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": { @@ -974,7 +974,7 @@ async def test_method_run_eval_alpha_with_all_params(self, async_client: AsyncLl async def test_raw_response_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None: response = await async_client.eval.with_raw_response.run_eval_alpha( benchmark_id="benchmark_id", - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -998,7 +998,7 @@ async def test_raw_response_run_eval_alpha(self, async_client: AsyncLlamaStackCl async def test_streaming_response_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None: async with async_client.eval.with_streaming_response.run_eval_alpha( benchmark_id="benchmark_id", - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}}, @@ -1025,7 +1025,7 @@ async def test_path_params_run_eval_alpha(self, async_client: AsyncLlamaStackCli with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): await async_client.eval.with_raw_response.run_eval_alpha( benchmark_id="", - task_config={ + benchmark_config={ "eval_candidate": { "model": "model", "sampling_params": {"strategy": {"type": "greedy"}},