diff --git a/.stats.yml b/.stats.yml
index aebb90c8cf..ebe07c1372 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 82
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-4bce8217a697c729ac98046d4caf2c9e826b54c427fb0ab4f98e549a2e0ce31c.yml
-openapi_spec_hash: 7996d2c34cc44fe2ce9ffe93c0ab774e
-config_hash: bcd2cacdcb9fae9938f273cd167f613c
+configured_endpoints: 97
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-472fe3036ea745365257fe870c0330917fb3153705c2826f49873cd631319b0a.yml
+openapi_spec_hash: ea86343b5e9858a74e85da8ab2c532f6
+config_hash: ef19d36c307306f14f2e1cd5c834a151
diff --git a/api.md b/api.md
index a13b89a9c3..a26ac98add 100644
--- a/api.md
+++ b/api.md
@@ -258,6 +258,26 @@ Methods:
- client.fine_tuning.jobs.checkpoints.list(fine_tuning_job_id, \*\*params) -> SyncCursorPage[FineTuningJobCheckpoint]
+## Checkpoints
+
+### Permissions
+
+Types:
+
+```python
+from openai.types.fine_tuning.checkpoints import (
+ PermissionCreateResponse,
+ PermissionRetrieveResponse,
+ PermissionDeleteResponse,
+)
+```
+
+Methods:
+
+- client.fine_tuning.checkpoints.permissions.create(fine_tuned_model_checkpoint, \*\*params) -> SyncPage[PermissionCreateResponse]
+- client.fine_tuning.checkpoints.permissions.retrieve(fine_tuned_model_checkpoint, \*\*params) -> PermissionRetrieveResponse
+- client.fine_tuning.checkpoints.permissions.delete(fine_tuned_model_checkpoint) -> PermissionDeleteResponse
+
# VectorStores
Types:
@@ -690,3 +710,68 @@ from openai.types.responses import ResponseItemList
Methods:
- client.responses.input_items.list(response_id, \*\*params) -> SyncCursorPage[ResponseItem]
+
+# Evals
+
+Types:
+
+```python
+from openai.types import (
+ EvalCustomDataSourceConfig,
+ EvalLabelModelGrader,
+ EvalStoredCompletionsDataSourceConfig,
+ EvalStringCheckGrader,
+ EvalTextSimilarityGrader,
+ EvalCreateResponse,
+ EvalRetrieveResponse,
+ EvalUpdateResponse,
+ EvalListResponse,
+ EvalDeleteResponse,
+)
+```
+
+Methods:
+
+- client.evals.create(\*\*params) -> EvalCreateResponse
+- client.evals.retrieve(eval_id) -> EvalRetrieveResponse
+- client.evals.update(eval_id, \*\*params) -> EvalUpdateResponse
+- client.evals.list(\*\*params) -> SyncCursorPage[EvalListResponse]
+- client.evals.delete(eval_id) -> EvalDeleteResponse
+
+## Runs
+
+Types:
+
+```python
+from openai.types.evals import (
+ CreateEvalCompletionsRunDataSource,
+ CreateEvalJSONLRunDataSource,
+ EvalAPIError,
+ RunCreateResponse,
+ RunRetrieveResponse,
+ RunListResponse,
+ RunDeleteResponse,
+ RunCancelResponse,
+)
+```
+
+Methods:
+
+- client.evals.runs.create(eval_id, \*\*params) -> RunCreateResponse
+- client.evals.runs.retrieve(run_id, \*, eval_id) -> RunRetrieveResponse
+- client.evals.runs.list(eval_id, \*\*params) -> SyncCursorPage[RunListResponse]
+- client.evals.runs.delete(run_id, \*, eval_id) -> RunDeleteResponse
+- client.evals.runs.cancel(run_id, \*, eval_id) -> RunCancelResponse
+
+### OutputItems
+
+Types:
+
+```python
+from openai.types.evals.runs import OutputItemRetrieveResponse, OutputItemListResponse
+```
+
+Methods:
+
+- client.evals.runs.output_items.retrieve(output_item_id, \*, eval_id, run_id) -> OutputItemRetrieveResponse
+- client.evals.runs.output_items.list(run_id, \*, eval_id, \*\*params) -> SyncCursorPage[OutputItemListResponse]
diff --git a/src/openai/__init__.py b/src/openai/__init__.py
index 1107973aed..4efb48e411 100644
--- a/src/openai/__init__.py
+++ b/src/openai/__init__.py
@@ -251,6 +251,7 @@ def _reset_client() -> None: # type: ignore[reportUnusedFunction]
beta as beta,
chat as chat,
audio as audio,
+ evals as evals,
files as files,
images as images,
models as models,
diff --git a/src/openai/_client.py b/src/openai/_client.py
index 18d96da9a3..3aca6cb124 100644
--- a/src/openai/_client.py
+++ b/src/openai/_client.py
@@ -36,6 +36,7 @@
from .resources.beta import beta
from .resources.chat import chat
from .resources.audio import audio
+from .resources.evals import evals
from .resources.uploads import uploads
from .resources.responses import responses
from .resources.fine_tuning import fine_tuning
@@ -59,6 +60,7 @@ class OpenAI(SyncAPIClient):
batches: batches.Batches
uploads: uploads.Uploads
responses: responses.Responses
+ evals: evals.Evals
with_raw_response: OpenAIWithRawResponse
with_streaming_response: OpenAIWithStreamedResponse
@@ -158,6 +160,7 @@ def __init__(
self.batches = batches.Batches(self)
self.uploads = uploads.Uploads(self)
self.responses = responses.Responses(self)
+ self.evals = evals.Evals(self)
self.with_raw_response = OpenAIWithRawResponse(self)
self.with_streaming_response = OpenAIWithStreamedResponse(self)
@@ -290,6 +293,7 @@ class AsyncOpenAI(AsyncAPIClient):
batches: batches.AsyncBatches
uploads: uploads.AsyncUploads
responses: responses.AsyncResponses
+ evals: evals.AsyncEvals
with_raw_response: AsyncOpenAIWithRawResponse
with_streaming_response: AsyncOpenAIWithStreamedResponse
@@ -389,6 +393,7 @@ def __init__(
self.batches = batches.AsyncBatches(self)
self.uploads = uploads.AsyncUploads(self)
self.responses = responses.AsyncResponses(self)
+ self.evals = evals.AsyncEvals(self)
self.with_raw_response = AsyncOpenAIWithRawResponse(self)
self.with_streaming_response = AsyncOpenAIWithStreamedResponse(self)
@@ -522,6 +527,7 @@ def __init__(self, client: OpenAI) -> None:
self.batches = batches.BatchesWithRawResponse(client.batches)
self.uploads = uploads.UploadsWithRawResponse(client.uploads)
self.responses = responses.ResponsesWithRawResponse(client.responses)
+ self.evals = evals.EvalsWithRawResponse(client.evals)
class AsyncOpenAIWithRawResponse:
@@ -540,6 +546,7 @@ def __init__(self, client: AsyncOpenAI) -> None:
self.batches = batches.AsyncBatchesWithRawResponse(client.batches)
self.uploads = uploads.AsyncUploadsWithRawResponse(client.uploads)
self.responses = responses.AsyncResponsesWithRawResponse(client.responses)
+ self.evals = evals.AsyncEvalsWithRawResponse(client.evals)
class OpenAIWithStreamedResponse:
@@ -558,6 +565,7 @@ def __init__(self, client: OpenAI) -> None:
self.batches = batches.BatchesWithStreamingResponse(client.batches)
self.uploads = uploads.UploadsWithStreamingResponse(client.uploads)
self.responses = responses.ResponsesWithStreamingResponse(client.responses)
+ self.evals = evals.EvalsWithStreamingResponse(client.evals)
class AsyncOpenAIWithStreamedResponse:
@@ -576,6 +584,7 @@ def __init__(self, client: AsyncOpenAI) -> None:
self.batches = batches.AsyncBatchesWithStreamingResponse(client.batches)
self.uploads = uploads.AsyncUploadsWithStreamingResponse(client.uploads)
self.responses = responses.AsyncResponsesWithStreamingResponse(client.responses)
+ self.evals = evals.AsyncEvalsWithStreamingResponse(client.evals)
Client = OpenAI
diff --git a/src/openai/_module_client.py b/src/openai/_module_client.py
index e7d2657860..cf12f7a31e 100644
--- a/src/openai/_module_client.py
+++ b/src/openai/_module_client.py
@@ -30,6 +30,12 @@ def __load__(self) -> resources.Audio:
return _load_client().audio
+class EvalsProxy(LazyProxy[resources.Evals]):
+ @override
+ def __load__(self) -> resources.Evals:
+ return _load_client().evals
+
+
class ImagesProxy(LazyProxy[resources.Images]):
@override
def __load__(self) -> resources.Images:
@@ -94,6 +100,7 @@ def __load__(self) -> resources.VectorStores:
beta: resources.Beta = BetaProxy().__as_proxied__()
files: resources.Files = FilesProxy().__as_proxied__()
audio: resources.Audio = AudioProxy().__as_proxied__()
+evals: resources.Evals = EvalsProxy().__as_proxied__()
images: resources.Images = ImagesProxy().__as_proxied__()
models: resources.Models = ModelsProxy().__as_proxied__()
batches: resources.Batches = BatchesProxy().__as_proxied__()
diff --git a/src/openai/resources/__init__.py b/src/openai/resources/__init__.py
index d3457cf319..ab9cd73e81 100644
--- a/src/openai/resources/__init__.py
+++ b/src/openai/resources/__init__.py
@@ -24,6 +24,14 @@
AudioWithStreamingResponse,
AsyncAudioWithStreamingResponse,
)
+from .evals import (
+ Evals,
+ AsyncEvals,
+ EvalsWithRawResponse,
+ AsyncEvalsWithRawResponse,
+ EvalsWithStreamingResponse,
+ AsyncEvalsWithStreamingResponse,
+)
from .files import (
Files,
AsyncFiles,
@@ -198,4 +206,10 @@
"AsyncResponsesWithRawResponse",
"ResponsesWithStreamingResponse",
"AsyncResponsesWithStreamingResponse",
+ "Evals",
+ "AsyncEvals",
+ "EvalsWithRawResponse",
+ "AsyncEvalsWithRawResponse",
+ "EvalsWithStreamingResponse",
+ "AsyncEvalsWithStreamingResponse",
]
diff --git a/src/openai/resources/evals/__init__.py b/src/openai/resources/evals/__init__.py
new file mode 100644
index 0000000000..84f707511d
--- /dev/null
+++ b/src/openai/resources/evals/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .runs import (
+ Runs,
+ AsyncRuns,
+ RunsWithRawResponse,
+ AsyncRunsWithRawResponse,
+ RunsWithStreamingResponse,
+ AsyncRunsWithStreamingResponse,
+)
+from .evals import (
+ Evals,
+ AsyncEvals,
+ EvalsWithRawResponse,
+ AsyncEvalsWithRawResponse,
+ EvalsWithStreamingResponse,
+ AsyncEvalsWithStreamingResponse,
+)
+
+__all__ = [
+ "Runs",
+ "AsyncRuns",
+ "RunsWithRawResponse",
+ "AsyncRunsWithRawResponse",
+ "RunsWithStreamingResponse",
+ "AsyncRunsWithStreamingResponse",
+ "Evals",
+ "AsyncEvals",
+ "EvalsWithRawResponse",
+ "AsyncEvalsWithRawResponse",
+ "EvalsWithStreamingResponse",
+ "AsyncEvalsWithStreamingResponse",
+]
diff --git a/src/openai/resources/evals/evals.py b/src/openai/resources/evals/evals.py
new file mode 100644
index 0000000000..24a0350cfb
--- /dev/null
+++ b/src/openai/resources/evals/evals.py
@@ -0,0 +1,663 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ...types import eval_list_params, eval_create_params, eval_update_params
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
+ maybe_transform,
+ async_maybe_transform,
+)
+from ..._compat import cached_property
+from .runs.runs import (
+ Runs,
+ AsyncRuns,
+ RunsWithRawResponse,
+ AsyncRunsWithRawResponse,
+ RunsWithStreamingResponse,
+ AsyncRunsWithStreamingResponse,
+)
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncCursorPage, AsyncCursorPage
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.eval_list_response import EvalListResponse
+from ...types.eval_create_response import EvalCreateResponse
+from ...types.eval_delete_response import EvalDeleteResponse
+from ...types.eval_update_response import EvalUpdateResponse
+from ...types.eval_retrieve_response import EvalRetrieveResponse
+from ...types.shared_params.metadata import Metadata
+
+__all__ = ["Evals", "AsyncEvals"]
+
+
+class Evals(SyncAPIResource):
+ @cached_property
+ def runs(self) -> Runs:
+ return Runs(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> EvalsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return EvalsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> EvalsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return EvalsWithStreamingResponse(self)
+
+ def create(
+ self,
+ *,
+ data_source_config: eval_create_params.DataSourceConfig,
+ testing_criteria: Iterable[eval_create_params.TestingCriterion],
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ name: str | NotGiven = NOT_GIVEN,
+ share_with_openai: bool | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> EvalCreateResponse:
+ """
+ Create the structure of an evaluation that can be used to test a model's
+ performance. An evaluation is a set of testing criteria and a datasource. After
+ creating an evaluation, you can run it on different models and model parameters.
+ We support several types of graders and datasources. For more information, see
+ the [Evals guide](https://platform.openai.com/docs/guides/evals).
+
+ Args:
+ data_source_config: The configuration for the data source used for the evaluation runs.
+
+ testing_criteria: A list of graders for all eval runs in this group.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ name: The name of the evaluation.
+
+ share_with_openai: Indicates whether the evaluation is shared with OpenAI.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return self._post(
+ "/evals",
+ body=maybe_transform(
+ {
+ "data_source_config": data_source_config,
+ "testing_criteria": testing_criteria,
+ "metadata": metadata,
+ "name": name,
+ "share_with_openai": share_with_openai,
+ },
+ eval_create_params.EvalCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=EvalCreateResponse,
+ )
+
+ def retrieve(
+ self,
+ eval_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> EvalRetrieveResponse:
+ """
+ Get an evaluation by ID.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not eval_id:
+ raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+ return self._get(
+ f"/evals/{eval_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=EvalRetrieveResponse,
+ )
+
+ def update(
+ self,
+ eval_id: str,
+ *,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ name: str | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> EvalUpdateResponse:
+ """
+ Update certain properties of an evaluation.
+
+ Args:
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ name: Rename the evaluation.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not eval_id:
+ raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+ return self._post(
+ f"/evals/{eval_id}",
+ body=maybe_transform(
+ {
+ "metadata": metadata,
+ "name": name,
+ },
+ eval_update_params.EvalUpdateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=EvalUpdateResponse,
+ )
+
+ def list(
+ self,
+ *,
+ after: str | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+ order_by: Literal["created_at", "updated_at"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> SyncCursorPage[EvalListResponse]:
+ """
+ List evaluations for a project.
+
+ Args:
+ after: Identifier for the last eval from the previous pagination request.
+
+ limit: Number of evals to retrieve.
+
+ order: Sort order for evals by timestamp. Use `asc` for ascending order or `desc` for
+ descending order.
+
+ order_by: Evals can be ordered by creation time or last updated time. Use `created_at` for
+ creation time or `updated_at` for last updated time.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return self._get_api_list(
+ "/evals",
+ page=SyncCursorPage[EvalListResponse],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "limit": limit,
+ "order": order,
+ "order_by": order_by,
+ },
+ eval_list_params.EvalListParams,
+ ),
+ ),
+ model=EvalListResponse,
+ )
+
+ def delete(
+ self,
+ eval_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> EvalDeleteResponse:
+ """
+ Delete an evaluation.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not eval_id:
+ raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+ return self._delete(
+ f"/evals/{eval_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=EvalDeleteResponse,
+ )
+
+
+class AsyncEvals(AsyncAPIResource):
+ @cached_property
+ def runs(self) -> AsyncRuns:
+ return AsyncRuns(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> AsyncEvalsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncEvalsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncEvalsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncEvalsWithStreamingResponse(self)
+
+ async def create(
+ self,
+ *,
+ data_source_config: eval_create_params.DataSourceConfig,
+ testing_criteria: Iterable[eval_create_params.TestingCriterion],
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ name: str | NotGiven = NOT_GIVEN,
+ share_with_openai: bool | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> EvalCreateResponse:
+ """
+ Create the structure of an evaluation that can be used to test a model's
+ performance. An evaluation is a set of testing criteria and a datasource. After
+ creating an evaluation, you can run it on different models and model parameters.
+ We support several types of graders and datasources. For more information, see
+ the [Evals guide](https://platform.openai.com/docs/guides/evals).
+
+ Args:
+ data_source_config: The configuration for the data source used for the evaluation runs.
+
+ testing_criteria: A list of graders for all eval runs in this group.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ name: The name of the evaluation.
+
+ share_with_openai: Indicates whether the evaluation is shared with OpenAI.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return await self._post(
+ "/evals",
+ body=await async_maybe_transform(
+ {
+ "data_source_config": data_source_config,
+ "testing_criteria": testing_criteria,
+ "metadata": metadata,
+ "name": name,
+ "share_with_openai": share_with_openai,
+ },
+ eval_create_params.EvalCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=EvalCreateResponse,
+ )
+
+ async def retrieve(
+ self,
+ eval_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> EvalRetrieveResponse:
+ """
+ Get an evaluation by ID.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not eval_id:
+ raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+ return await self._get(
+ f"/evals/{eval_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=EvalRetrieveResponse,
+ )
+
+ async def update(
+ self,
+ eval_id: str,
+ *,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ name: str | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> EvalUpdateResponse:
+ """
+ Update certain properties of an evaluation.
+
+ Args:
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ name: Rename the evaluation.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not eval_id:
+ raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+ return await self._post(
+ f"/evals/{eval_id}",
+ body=await async_maybe_transform(
+ {
+ "metadata": metadata,
+ "name": name,
+ },
+ eval_update_params.EvalUpdateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=EvalUpdateResponse,
+ )
+
+ def list(
+ self,
+ *,
+ after: str | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+ order_by: Literal["created_at", "updated_at"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncPaginator[EvalListResponse, AsyncCursorPage[EvalListResponse]]:
+ """
+ List evaluations for a project.
+
+ Args:
+ after: Identifier for the last eval from the previous pagination request.
+
+ limit: Number of evals to retrieve.
+
+ order: Sort order for evals by timestamp. Use `asc` for ascending order or `desc` for
+ descending order.
+
+ order_by: Evals can be ordered by creation time or last updated time. Use `created_at` for
+ creation time or `updated_at` for last updated time.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return self._get_api_list(
+ "/evals",
+ page=AsyncCursorPage[EvalListResponse],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "limit": limit,
+ "order": order,
+ "order_by": order_by,
+ },
+ eval_list_params.EvalListParams,
+ ),
+ ),
+ model=EvalListResponse,
+ )
+
+ async def delete(
+ self,
+ eval_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> EvalDeleteResponse:
+ """
+ Delete an evaluation.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not eval_id:
+ raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+ return await self._delete(
+ f"/evals/{eval_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=EvalDeleteResponse,
+ )
+
+
+class EvalsWithRawResponse:
+ def __init__(self, evals: Evals) -> None:
+ self._evals = evals
+
+ self.create = _legacy_response.to_raw_response_wrapper(
+ evals.create,
+ )
+ self.retrieve = _legacy_response.to_raw_response_wrapper(
+ evals.retrieve,
+ )
+ self.update = _legacy_response.to_raw_response_wrapper(
+ evals.update,
+ )
+ self.list = _legacy_response.to_raw_response_wrapper(
+ evals.list,
+ )
+ self.delete = _legacy_response.to_raw_response_wrapper(
+ evals.delete,
+ )
+
+ @cached_property
+ def runs(self) -> RunsWithRawResponse:
+ return RunsWithRawResponse(self._evals.runs)
+
+
+class AsyncEvalsWithRawResponse:
+ def __init__(self, evals: AsyncEvals) -> None:
+ self._evals = evals
+
+ self.create = _legacy_response.async_to_raw_response_wrapper(
+ evals.create,
+ )
+ self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+ evals.retrieve,
+ )
+ self.update = _legacy_response.async_to_raw_response_wrapper(
+ evals.update,
+ )
+ self.list = _legacy_response.async_to_raw_response_wrapper(
+ evals.list,
+ )
+ self.delete = _legacy_response.async_to_raw_response_wrapper(
+ evals.delete,
+ )
+
+ @cached_property
+ def runs(self) -> AsyncRunsWithRawResponse:
+ return AsyncRunsWithRawResponse(self._evals.runs)
+
+
+class EvalsWithStreamingResponse:
+ def __init__(self, evals: Evals) -> None:
+ self._evals = evals
+
+ self.create = to_streamed_response_wrapper(
+ evals.create,
+ )
+ self.retrieve = to_streamed_response_wrapper(
+ evals.retrieve,
+ )
+ self.update = to_streamed_response_wrapper(
+ evals.update,
+ )
+ self.list = to_streamed_response_wrapper(
+ evals.list,
+ )
+ self.delete = to_streamed_response_wrapper(
+ evals.delete,
+ )
+
+ @cached_property
+ def runs(self) -> RunsWithStreamingResponse:
+ return RunsWithStreamingResponse(self._evals.runs)
+
+
+class AsyncEvalsWithStreamingResponse:
+ def __init__(self, evals: AsyncEvals) -> None:
+ self._evals = evals
+
+ self.create = async_to_streamed_response_wrapper(
+ evals.create,
+ )
+ self.retrieve = async_to_streamed_response_wrapper(
+ evals.retrieve,
+ )
+ self.update = async_to_streamed_response_wrapper(
+ evals.update,
+ )
+ self.list = async_to_streamed_response_wrapper(
+ evals.list,
+ )
+ self.delete = async_to_streamed_response_wrapper(
+ evals.delete,
+ )
+
+ @cached_property
+ def runs(self) -> AsyncRunsWithStreamingResponse:
+ return AsyncRunsWithStreamingResponse(self._evals.runs)
diff --git a/src/openai/resources/evals/runs/__init__.py b/src/openai/resources/evals/runs/__init__.py
new file mode 100644
index 0000000000..d189f16fb7
--- /dev/null
+++ b/src/openai/resources/evals/runs/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .runs import (
+ Runs,
+ AsyncRuns,
+ RunsWithRawResponse,
+ AsyncRunsWithRawResponse,
+ RunsWithStreamingResponse,
+ AsyncRunsWithStreamingResponse,
+)
+from .output_items import (
+ OutputItems,
+ AsyncOutputItems,
+ OutputItemsWithRawResponse,
+ AsyncOutputItemsWithRawResponse,
+ OutputItemsWithStreamingResponse,
+ AsyncOutputItemsWithStreamingResponse,
+)
+
+__all__ = [
+ "OutputItems",
+ "AsyncOutputItems",
+ "OutputItemsWithRawResponse",
+ "AsyncOutputItemsWithRawResponse",
+ "OutputItemsWithStreamingResponse",
+ "AsyncOutputItemsWithStreamingResponse",
+ "Runs",
+ "AsyncRuns",
+ "RunsWithRawResponse",
+ "AsyncRunsWithRawResponse",
+ "RunsWithStreamingResponse",
+ "AsyncRunsWithStreamingResponse",
+]
diff --git a/src/openai/resources/evals/runs/output_items.py b/src/openai/resources/evals/runs/output_items.py
new file mode 100644
index 0000000000..8fd0fdea92
--- /dev/null
+++ b/src/openai/resources/evals/runs/output_items.py
@@ -0,0 +1,315 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.evals.runs import output_item_list_params
+from ....types.evals.runs.output_item_list_response import OutputItemListResponse
+from ....types.evals.runs.output_item_retrieve_response import OutputItemRetrieveResponse
+
+__all__ = ["OutputItems", "AsyncOutputItems"]
+
+
+class OutputItems(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> OutputItemsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return OutputItemsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> OutputItemsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return OutputItemsWithStreamingResponse(self)
+
+ def retrieve(
+ self,
+ output_item_id: str,
+ *,
+ eval_id: str,
+ run_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> OutputItemRetrieveResponse:
+ """
+ Get an evaluation run output item by ID.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not eval_id:
+ raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ if not output_item_id:
+ raise ValueError(f"Expected a non-empty value for `output_item_id` but received {output_item_id!r}")
+ return self._get(
+ f"/evals/{eval_id}/runs/{run_id}/output_items/{output_item_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=OutputItemRetrieveResponse,
+ )
+
+ def list(
+ self,
+ run_id: str,
+ *,
+ eval_id: str,
+ after: str | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+ status: Literal["fail", "pass"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> SyncCursorPage[OutputItemListResponse]:
+ """
+ Get a list of output items for an evaluation run.
+
+ Args:
+ after: Identifier for the last output item from the previous pagination request.
+
+ limit: Number of output items to retrieve.
+
+ order: Sort order for output items by timestamp. Use `asc` for ascending order or
+ `desc` for descending order. Defaults to `asc`.
+
+ status: Filter output items by status. Use `failed` to filter by failed output items or
+ `pass` to filter by passed output items.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not eval_id:
+ raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ return self._get_api_list(
+ f"/evals/{eval_id}/runs/{run_id}/output_items",
+ page=SyncCursorPage[OutputItemListResponse],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "limit": limit,
+ "order": order,
+ "status": status,
+ },
+ output_item_list_params.OutputItemListParams,
+ ),
+ ),
+ model=OutputItemListResponse,
+ )
+
+
+class AsyncOutputItems(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncOutputItemsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncOutputItemsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncOutputItemsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncOutputItemsWithStreamingResponse(self)
+
+ async def retrieve(
+ self,
+ output_item_id: str,
+ *,
+ eval_id: str,
+ run_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> OutputItemRetrieveResponse:
+ """
+ Get an evaluation run output item by ID.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not eval_id:
+ raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ if not output_item_id:
+ raise ValueError(f"Expected a non-empty value for `output_item_id` but received {output_item_id!r}")
+ return await self._get(
+ f"/evals/{eval_id}/runs/{run_id}/output_items/{output_item_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=OutputItemRetrieveResponse,
+ )
+
+ def list(
+ self,
+ run_id: str,
+ *,
+ eval_id: str,
+ after: str | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+ status: Literal["fail", "pass"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncPaginator[OutputItemListResponse, AsyncCursorPage[OutputItemListResponse]]:
+ """
+ Get a list of output items for an evaluation run.
+
+ Args:
+ after: Identifier for the last output item from the previous pagination request.
+
+ limit: Number of output items to retrieve.
+
+ order: Sort order for output items by timestamp. Use `asc` for ascending order or
+ `desc` for descending order. Defaults to `asc`.
+
+ status: Filter output items by status. Use `failed` to filter by failed output items or
+ `pass` to filter by passed output items.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not eval_id:
+ raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ return self._get_api_list(
+ f"/evals/{eval_id}/runs/{run_id}/output_items",
+ page=AsyncCursorPage[OutputItemListResponse],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "limit": limit,
+ "order": order,
+ "status": status,
+ },
+ output_item_list_params.OutputItemListParams,
+ ),
+ ),
+ model=OutputItemListResponse,
+ )
+
+
+class OutputItemsWithRawResponse:
+ def __init__(self, output_items: OutputItems) -> None:
+ self._output_items = output_items
+
+ self.retrieve = _legacy_response.to_raw_response_wrapper(
+ output_items.retrieve,
+ )
+ self.list = _legacy_response.to_raw_response_wrapper(
+ output_items.list,
+ )
+
+
+class AsyncOutputItemsWithRawResponse:
+ def __init__(self, output_items: AsyncOutputItems) -> None:
+ self._output_items = output_items
+
+ self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+ output_items.retrieve,
+ )
+ self.list = _legacy_response.async_to_raw_response_wrapper(
+ output_items.list,
+ )
+
+
+class OutputItemsWithStreamingResponse:
+ def __init__(self, output_items: OutputItems) -> None:
+ self._output_items = output_items
+
+ self.retrieve = to_streamed_response_wrapper(
+ output_items.retrieve,
+ )
+ self.list = to_streamed_response_wrapper(
+ output_items.list,
+ )
+
+
+class AsyncOutputItemsWithStreamingResponse:
+ def __init__(self, output_items: AsyncOutputItems) -> None:
+ self._output_items = output_items
+
+ self.retrieve = async_to_streamed_response_wrapper(
+ output_items.retrieve,
+ )
+ self.list = async_to_streamed_response_wrapper(
+ output_items.list,
+ )
diff --git a/src/openai/resources/evals/runs/runs.py b/src/openai/resources/evals/runs/runs.py
new file mode 100644
index 0000000000..6df0b6d121
--- /dev/null
+++ b/src/openai/resources/evals/runs/runs.py
@@ -0,0 +1,635 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+ maybe_transform,
+ async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .output_items import (
+ OutputItems,
+ AsyncOutputItems,
+ OutputItemsWithRawResponse,
+ AsyncOutputItemsWithRawResponse,
+ OutputItemsWithStreamingResponse,
+ AsyncOutputItemsWithStreamingResponse,
+)
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ....types.evals import run_list_params, run_create_params
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.shared_params.metadata import Metadata
+from ....types.evals.run_list_response import RunListResponse
+from ....types.evals.run_cancel_response import RunCancelResponse
+from ....types.evals.run_create_response import RunCreateResponse
+from ....types.evals.run_delete_response import RunDeleteResponse
+from ....types.evals.run_retrieve_response import RunRetrieveResponse
+
+__all__ = ["Runs", "AsyncRuns"]
+
+
+class Runs(SyncAPIResource):
+ @cached_property
+ def output_items(self) -> OutputItems:
+ return OutputItems(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> RunsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return RunsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> RunsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return RunsWithStreamingResponse(self)
+
+ def create(
+ self,
+ eval_id: str,
+ *,
+ data_source: run_create_params.DataSource,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ name: str | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> RunCreateResponse:
+ """Create a new evaluation run.
+
+ This is the endpoint that will kick off grading.
+
+ Args:
+ data_source: Details about the run's data source.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ name: The name of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not eval_id:
+ raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+ return self._post(
+ f"/evals/{eval_id}/runs",
+ body=maybe_transform(
+ {
+ "data_source": data_source,
+ "metadata": metadata,
+ "name": name,
+ },
+ run_create_params.RunCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=RunCreateResponse,
+ )
+
+ def retrieve(
+ self,
+ run_id: str,
+ *,
+ eval_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> RunRetrieveResponse:
+ """
+ Get an evaluation run by ID.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not eval_id:
+ raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ return self._get(
+ f"/evals/{eval_id}/runs/{run_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=RunRetrieveResponse,
+ )
+
+ def list(
+ self,
+ eval_id: str,
+ *,
+ after: str | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+ status: Literal["queued", "in_progress", "completed", "canceled", "failed"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> SyncCursorPage[RunListResponse]:
+ """
+ Get a list of runs for an evaluation.
+
+ Args:
+ after: Identifier for the last run from the previous pagination request.
+
+ limit: Number of runs to retrieve.
+
+ order: Sort order for runs by timestamp. Use `asc` for ascending order or `desc` for
+ descending order. Defaults to `asc`.
+
+ status: Filter runs by status. Use "queued" | "in_progress" | "failed" | "completed" |
+ "canceled".
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not eval_id:
+ raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+ return self._get_api_list(
+ f"/evals/{eval_id}/runs",
+ page=SyncCursorPage[RunListResponse],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "limit": limit,
+ "order": order,
+ "status": status,
+ },
+ run_list_params.RunListParams,
+ ),
+ ),
+ model=RunListResponse,
+ )
+
+ def delete(
+ self,
+ run_id: str,
+ *,
+ eval_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> RunDeleteResponse:
+ """
+ Delete an eval run.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not eval_id:
+ raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ return self._delete(
+ f"/evals/{eval_id}/runs/{run_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=RunDeleteResponse,
+ )
+
+ def cancel(
+ self,
+ run_id: str,
+ *,
+ eval_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> RunCancelResponse:
+ """
+ Cancel an ongoing evaluation run.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not eval_id:
+ raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ return self._post(
+ f"/evals/{eval_id}/runs/{run_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=RunCancelResponse,
+ )
+
+
+class AsyncRuns(AsyncAPIResource):
+ @cached_property
+ def output_items(self) -> AsyncOutputItems:
+ return AsyncOutputItems(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> AsyncRunsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncRunsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncRunsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncRunsWithStreamingResponse(self)
+
+ async def create(
+ self,
+ eval_id: str,
+ *,
+ data_source: run_create_params.DataSource,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ name: str | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> RunCreateResponse:
+ """Create a new evaluation run.
+
+ This is the endpoint that will kick off grading.
+
+ Args:
+ data_source: Details about the run's data source.
+
+ metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+ for storing additional information about the object in a structured format, and
+ querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+
+ name: The name of the run.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not eval_id:
+ raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+ return await self._post(
+ f"/evals/{eval_id}/runs",
+ body=await async_maybe_transform(
+ {
+ "data_source": data_source,
+ "metadata": metadata,
+ "name": name,
+ },
+ run_create_params.RunCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=RunCreateResponse,
+ )
+
+ async def retrieve(
+ self,
+ run_id: str,
+ *,
+ eval_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> RunRetrieveResponse:
+ """
+ Get an evaluation run by ID.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not eval_id:
+ raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ return await self._get(
+ f"/evals/{eval_id}/runs/{run_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=RunRetrieveResponse,
+ )
+
+ def list(
+ self,
+ eval_id: str,
+ *,
+ after: str | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+ status: Literal["queued", "in_progress", "completed", "canceled", "failed"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncPaginator[RunListResponse, AsyncCursorPage[RunListResponse]]:
+ """
+ Get a list of runs for an evaluation.
+
+ Args:
+ after: Identifier for the last run from the previous pagination request.
+
+ limit: Number of runs to retrieve.
+
+ order: Sort order for runs by timestamp. Use `asc` for ascending order or `desc` for
+ descending order. Defaults to `asc`.
+
+ status: Filter runs by status. Use "queued" | "in_progress" | "failed" | "completed" |
+ "canceled".
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not eval_id:
+ raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+ return self._get_api_list(
+ f"/evals/{eval_id}/runs",
+ page=AsyncCursorPage[RunListResponse],
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "limit": limit,
+ "order": order,
+ "status": status,
+ },
+ run_list_params.RunListParams,
+ ),
+ ),
+ model=RunListResponse,
+ )
+
+ async def delete(
+ self,
+ run_id: str,
+ *,
+ eval_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> RunDeleteResponse:
+ """
+ Delete an eval run.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not eval_id:
+ raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ return await self._delete(
+ f"/evals/{eval_id}/runs/{run_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=RunDeleteResponse,
+ )
+
+ async def cancel(
+ self,
+ run_id: str,
+ *,
+ eval_id: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> RunCancelResponse:
+ """
+ Cancel an ongoing evaluation run.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not eval_id:
+ raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+ if not run_id:
+ raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+ return await self._post(
+ f"/evals/{eval_id}/runs/{run_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=RunCancelResponse,
+ )
+
+
+class RunsWithRawResponse:
+ def __init__(self, runs: Runs) -> None:
+ self._runs = runs
+
+ self.create = _legacy_response.to_raw_response_wrapper(
+ runs.create,
+ )
+ self.retrieve = _legacy_response.to_raw_response_wrapper(
+ runs.retrieve,
+ )
+ self.list = _legacy_response.to_raw_response_wrapper(
+ runs.list,
+ )
+ self.delete = _legacy_response.to_raw_response_wrapper(
+ runs.delete,
+ )
+ self.cancel = _legacy_response.to_raw_response_wrapper(
+ runs.cancel,
+ )
+
+ @cached_property
+ def output_items(self) -> OutputItemsWithRawResponse:
+ return OutputItemsWithRawResponse(self._runs.output_items)
+
+
+class AsyncRunsWithRawResponse:
+ def __init__(self, runs: AsyncRuns) -> None:
+ self._runs = runs
+
+ self.create = _legacy_response.async_to_raw_response_wrapper(
+ runs.create,
+ )
+ self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+ runs.retrieve,
+ )
+ self.list = _legacy_response.async_to_raw_response_wrapper(
+ runs.list,
+ )
+ self.delete = _legacy_response.async_to_raw_response_wrapper(
+ runs.delete,
+ )
+ self.cancel = _legacy_response.async_to_raw_response_wrapper(
+ runs.cancel,
+ )
+
+ @cached_property
+ def output_items(self) -> AsyncOutputItemsWithRawResponse:
+ return AsyncOutputItemsWithRawResponse(self._runs.output_items)
+
+
+class RunsWithStreamingResponse:
+ def __init__(self, runs: Runs) -> None:
+ self._runs = runs
+
+ self.create = to_streamed_response_wrapper(
+ runs.create,
+ )
+ self.retrieve = to_streamed_response_wrapper(
+ runs.retrieve,
+ )
+ self.list = to_streamed_response_wrapper(
+ runs.list,
+ )
+ self.delete = to_streamed_response_wrapper(
+ runs.delete,
+ )
+ self.cancel = to_streamed_response_wrapper(
+ runs.cancel,
+ )
+
+ @cached_property
+ def output_items(self) -> OutputItemsWithStreamingResponse:
+ return OutputItemsWithStreamingResponse(self._runs.output_items)
+
+
+class AsyncRunsWithStreamingResponse:
+ def __init__(self, runs: AsyncRuns) -> None:
+ self._runs = runs
+
+ self.create = async_to_streamed_response_wrapper(
+ runs.create,
+ )
+ self.retrieve = async_to_streamed_response_wrapper(
+ runs.retrieve,
+ )
+ self.list = async_to_streamed_response_wrapper(
+ runs.list,
+ )
+ self.delete = async_to_streamed_response_wrapper(
+ runs.delete,
+ )
+ self.cancel = async_to_streamed_response_wrapper(
+ runs.cancel,
+ )
+
+ @cached_property
+ def output_items(self) -> AsyncOutputItemsWithStreamingResponse:
+ return AsyncOutputItemsWithStreamingResponse(self._runs.output_items)
diff --git a/src/openai/resources/fine_tuning/__init__.py b/src/openai/resources/fine_tuning/__init__.py
index 7765231fee..ed7db4f4e0 100644
--- a/src/openai/resources/fine_tuning/__init__.py
+++ b/src/openai/resources/fine_tuning/__init__.py
@@ -8,6 +8,14 @@
JobsWithStreamingResponse,
AsyncJobsWithStreamingResponse,
)
+from .checkpoints import (
+ Checkpoints,
+ AsyncCheckpoints,
+ CheckpointsWithRawResponse,
+ AsyncCheckpointsWithRawResponse,
+ CheckpointsWithStreamingResponse,
+ AsyncCheckpointsWithStreamingResponse,
+)
from .fine_tuning import (
FineTuning,
AsyncFineTuning,
@@ -24,6 +32,12 @@
"AsyncJobsWithRawResponse",
"JobsWithStreamingResponse",
"AsyncJobsWithStreamingResponse",
+ "Checkpoints",
+ "AsyncCheckpoints",
+ "CheckpointsWithRawResponse",
+ "AsyncCheckpointsWithRawResponse",
+ "CheckpointsWithStreamingResponse",
+ "AsyncCheckpointsWithStreamingResponse",
"FineTuning",
"AsyncFineTuning",
"FineTuningWithRawResponse",
diff --git a/src/openai/resources/fine_tuning/checkpoints/__init__.py b/src/openai/resources/fine_tuning/checkpoints/__init__.py
new file mode 100644
index 0000000000..fdc37940f9
--- /dev/null
+++ b/src/openai/resources/fine_tuning/checkpoints/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .checkpoints import (
+ Checkpoints,
+ AsyncCheckpoints,
+ CheckpointsWithRawResponse,
+ AsyncCheckpointsWithRawResponse,
+ CheckpointsWithStreamingResponse,
+ AsyncCheckpointsWithStreamingResponse,
+)
+from .permissions import (
+ Permissions,
+ AsyncPermissions,
+ PermissionsWithRawResponse,
+ AsyncPermissionsWithRawResponse,
+ PermissionsWithStreamingResponse,
+ AsyncPermissionsWithStreamingResponse,
+)
+
+__all__ = [
+ "Permissions",
+ "AsyncPermissions",
+ "PermissionsWithRawResponse",
+ "AsyncPermissionsWithRawResponse",
+ "PermissionsWithStreamingResponse",
+ "AsyncPermissionsWithStreamingResponse",
+ "Checkpoints",
+ "AsyncCheckpoints",
+ "CheckpointsWithRawResponse",
+ "AsyncCheckpointsWithRawResponse",
+ "CheckpointsWithStreamingResponse",
+ "AsyncCheckpointsWithStreamingResponse",
+]
diff --git a/src/openai/resources/fine_tuning/checkpoints/checkpoints.py b/src/openai/resources/fine_tuning/checkpoints/checkpoints.py
new file mode 100644
index 0000000000..f59976a264
--- /dev/null
+++ b/src/openai/resources/fine_tuning/checkpoints/checkpoints.py
@@ -0,0 +1,102 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ...._compat import cached_property
+from .permissions import (
+ Permissions,
+ AsyncPermissions,
+ PermissionsWithRawResponse,
+ AsyncPermissionsWithRawResponse,
+ PermissionsWithStreamingResponse,
+ AsyncPermissionsWithStreamingResponse,
+)
+from ...._resource import SyncAPIResource, AsyncAPIResource
+
+__all__ = ["Checkpoints", "AsyncCheckpoints"]
+
+
+class Checkpoints(SyncAPIResource):
+ @cached_property
+ def permissions(self) -> Permissions:
+ return Permissions(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> CheckpointsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return CheckpointsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> CheckpointsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return CheckpointsWithStreamingResponse(self)
+
+
+class AsyncCheckpoints(AsyncAPIResource):
+ @cached_property
+ def permissions(self) -> AsyncPermissions:
+ return AsyncPermissions(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> AsyncCheckpointsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncCheckpointsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncCheckpointsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncCheckpointsWithStreamingResponse(self)
+
+
+class CheckpointsWithRawResponse:
+ def __init__(self, checkpoints: Checkpoints) -> None:
+ self._checkpoints = checkpoints
+
+ @cached_property
+ def permissions(self) -> PermissionsWithRawResponse:
+ return PermissionsWithRawResponse(self._checkpoints.permissions)
+
+
+class AsyncCheckpointsWithRawResponse:
+ def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+ self._checkpoints = checkpoints
+
+ @cached_property
+ def permissions(self) -> AsyncPermissionsWithRawResponse:
+ return AsyncPermissionsWithRawResponse(self._checkpoints.permissions)
+
+
+class CheckpointsWithStreamingResponse:
+ def __init__(self, checkpoints: Checkpoints) -> None:
+ self._checkpoints = checkpoints
+
+ @cached_property
+ def permissions(self) -> PermissionsWithStreamingResponse:
+ return PermissionsWithStreamingResponse(self._checkpoints.permissions)
+
+
+class AsyncCheckpointsWithStreamingResponse:
+ def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+ self._checkpoints = checkpoints
+
+ @cached_property
+ def permissions(self) -> AsyncPermissionsWithStreamingResponse:
+ return AsyncPermissionsWithStreamingResponse(self._checkpoints.permissions)
diff --git a/src/openai/resources/fine_tuning/checkpoints/permissions.py b/src/openai/resources/fine_tuning/checkpoints/permissions.py
new file mode 100644
index 0000000000..beb7b099d3
--- /dev/null
+++ b/src/openai/resources/fine_tuning/checkpoints/permissions.py
@@ -0,0 +1,416 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+ maybe_transform,
+ async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncPage, AsyncPage
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.fine_tuning.checkpoints import permission_create_params, permission_retrieve_params
+from ....types.fine_tuning.checkpoints.permission_create_response import PermissionCreateResponse
+from ....types.fine_tuning.checkpoints.permission_delete_response import PermissionDeleteResponse
+from ....types.fine_tuning.checkpoints.permission_retrieve_response import PermissionRetrieveResponse
+
+__all__ = ["Permissions", "AsyncPermissions"]
+
+
+class Permissions(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> PermissionsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return PermissionsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> PermissionsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return PermissionsWithStreamingResponse(self)
+
+ def create(
+ self,
+ fine_tuned_model_checkpoint: str,
+ *,
+ project_ids: List[str],
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> SyncPage[PermissionCreateResponse]:
+ """
+ **NOTE:** Calling this endpoint requires an [admin API key](../admin-api-keys).
+
+ This enables organization owners to share fine-tuned models with other projects
+ in their organization.
+
+ Args:
+ project_ids: The project identifiers to grant access to.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not fine_tuned_model_checkpoint:
+ raise ValueError(
+ f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+ )
+ return self._get_api_list(
+ f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions",
+ page=SyncPage[PermissionCreateResponse],
+ body=maybe_transform({"project_ids": project_ids}, permission_create_params.PermissionCreateParams),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ model=PermissionCreateResponse,
+ method="post",
+ )
+
+ def retrieve(
+ self,
+ fine_tuned_model_checkpoint: str,
+ *,
+ after: str | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ order: Literal["ascending", "descending"] | NotGiven = NOT_GIVEN,
+ project_id: str | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> PermissionRetrieveResponse:
+ """
+ **NOTE:** This endpoint requires an [admin API key](../admin-api-keys).
+
+ Organization owners can use this endpoint to view all permissions for a
+ fine-tuned model checkpoint.
+
+ Args:
+ after: Identifier for the last permission ID from the previous pagination request.
+
+ limit: Number of permissions to retrieve.
+
+ order: The order in which to retrieve permissions.
+
+ project_id: The ID of the project to get permissions for.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not fine_tuned_model_checkpoint:
+ raise ValueError(
+ f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+ )
+ return self._get(
+ f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions",
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform(
+ {
+ "after": after,
+ "limit": limit,
+ "order": order,
+ "project_id": project_id,
+ },
+ permission_retrieve_params.PermissionRetrieveParams,
+ ),
+ ),
+ cast_to=PermissionRetrieveResponse,
+ )
+
+ def delete(
+ self,
+ fine_tuned_model_checkpoint: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> PermissionDeleteResponse:
+ """
+ **NOTE:** This endpoint requires an [admin API key](../admin-api-keys).
+
+ Organization owners can use this endpoint to delete a permission for a
+ fine-tuned model checkpoint.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not fine_tuned_model_checkpoint:
+ raise ValueError(
+ f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+ )
+ return self._delete(
+ f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=PermissionDeleteResponse,
+ )
+
+
+class AsyncPermissions(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncPermissionsWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncPermissionsWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncPermissionsWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+ """
+ return AsyncPermissionsWithStreamingResponse(self)
+
+ def create(
+ self,
+ fine_tuned_model_checkpoint: str,
+ *,
+ project_ids: List[str],
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncPaginator[PermissionCreateResponse, AsyncPage[PermissionCreateResponse]]:
+ """
+ **NOTE:** Calling this endpoint requires an [admin API key](../admin-api-keys).
+
+ This enables organization owners to share fine-tuned models with other projects
+ in their organization.
+
+ Args:
+ project_ids: The project identifiers to grant access to.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not fine_tuned_model_checkpoint:
+ raise ValueError(
+ f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+ )
+ return self._get_api_list(
+ f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions",
+ page=AsyncPage[PermissionCreateResponse],
+ body=maybe_transform({"project_ids": project_ids}, permission_create_params.PermissionCreateParams),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ model=PermissionCreateResponse,
+ method="post",
+ )
+
+ async def retrieve(
+ self,
+ fine_tuned_model_checkpoint: str,
+ *,
+ after: str | NotGiven = NOT_GIVEN,
+ limit: int | NotGiven = NOT_GIVEN,
+ order: Literal["ascending", "descending"] | NotGiven = NOT_GIVEN,
+ project_id: str | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> PermissionRetrieveResponse:
+ """
+ **NOTE:** This endpoint requires an [admin API key](../admin-api-keys).
+
+ Organization owners can use this endpoint to view all permissions for a
+ fine-tuned model checkpoint.
+
+ Args:
+ after: Identifier for the last permission ID from the previous pagination request.
+
+ limit: Number of permissions to retrieve.
+
+ order: The order in which to retrieve permissions.
+
+ project_id: The ID of the project to get permissions for.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not fine_tuned_model_checkpoint:
+ raise ValueError(
+ f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+ )
+ return await self._get(
+ f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions",
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=await async_maybe_transform(
+ {
+ "after": after,
+ "limit": limit,
+ "order": order,
+ "project_id": project_id,
+ },
+ permission_retrieve_params.PermissionRetrieveParams,
+ ),
+ ),
+ cast_to=PermissionRetrieveResponse,
+ )
+
+ async def delete(
+ self,
+ fine_tuned_model_checkpoint: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> PermissionDeleteResponse:
+ """
+ **NOTE:** This endpoint requires an [admin API key](../admin-api-keys).
+
+ Organization owners can use this endpoint to delete a permission for a
+ fine-tuned model checkpoint.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not fine_tuned_model_checkpoint:
+ raise ValueError(
+ f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+ )
+ return await self._delete(
+ f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=PermissionDeleteResponse,
+ )
+
+
+class PermissionsWithRawResponse:
+ def __init__(self, permissions: Permissions) -> None:
+ self._permissions = permissions
+
+ self.create = _legacy_response.to_raw_response_wrapper(
+ permissions.create,
+ )
+ self.retrieve = _legacy_response.to_raw_response_wrapper(
+ permissions.retrieve,
+ )
+ self.delete = _legacy_response.to_raw_response_wrapper(
+ permissions.delete,
+ )
+
+
+class AsyncPermissionsWithRawResponse:
+ def __init__(self, permissions: AsyncPermissions) -> None:
+ self._permissions = permissions
+
+ self.create = _legacy_response.async_to_raw_response_wrapper(
+ permissions.create,
+ )
+ self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+ permissions.retrieve,
+ )
+ self.delete = _legacy_response.async_to_raw_response_wrapper(
+ permissions.delete,
+ )
+
+
+class PermissionsWithStreamingResponse:
+ def __init__(self, permissions: Permissions) -> None:
+ self._permissions = permissions
+
+ self.create = to_streamed_response_wrapper(
+ permissions.create,
+ )
+ self.retrieve = to_streamed_response_wrapper(
+ permissions.retrieve,
+ )
+ self.delete = to_streamed_response_wrapper(
+ permissions.delete,
+ )
+
+
+class AsyncPermissionsWithStreamingResponse:
+ def __init__(self, permissions: AsyncPermissions) -> None:
+ self._permissions = permissions
+
+ self.create = async_to_streamed_response_wrapper(
+ permissions.create,
+ )
+ self.retrieve = async_to_streamed_response_wrapper(
+ permissions.retrieve,
+ )
+ self.delete = async_to_streamed_response_wrapper(
+ permissions.delete,
+ )
diff --git a/src/openai/resources/fine_tuning/fine_tuning.py b/src/openai/resources/fine_tuning/fine_tuning.py
index eebde07d81..1388c8230c 100644
--- a/src/openai/resources/fine_tuning/fine_tuning.py
+++ b/src/openai/resources/fine_tuning/fine_tuning.py
@@ -12,6 +12,14 @@
AsyncJobsWithStreamingResponse,
)
from ..._resource import SyncAPIResource, AsyncAPIResource
+from .checkpoints.checkpoints import (
+ Checkpoints,
+ AsyncCheckpoints,
+ CheckpointsWithRawResponse,
+ AsyncCheckpointsWithRawResponse,
+ CheckpointsWithStreamingResponse,
+ AsyncCheckpointsWithStreamingResponse,
+)
__all__ = ["FineTuning", "AsyncFineTuning"]
@@ -21,6 +29,10 @@ class FineTuning(SyncAPIResource):
def jobs(self) -> Jobs:
return Jobs(self._client)
+ @cached_property
+ def checkpoints(self) -> Checkpoints:
+ return Checkpoints(self._client)
+
@cached_property
def with_raw_response(self) -> FineTuningWithRawResponse:
"""
@@ -46,6 +58,10 @@ class AsyncFineTuning(AsyncAPIResource):
def jobs(self) -> AsyncJobs:
return AsyncJobs(self._client)
+ @cached_property
+ def checkpoints(self) -> AsyncCheckpoints:
+ return AsyncCheckpoints(self._client)
+
@cached_property
def with_raw_response(self) -> AsyncFineTuningWithRawResponse:
"""
@@ -74,6 +90,10 @@ def __init__(self, fine_tuning: FineTuning) -> None:
def jobs(self) -> JobsWithRawResponse:
return JobsWithRawResponse(self._fine_tuning.jobs)
+ @cached_property
+ def checkpoints(self) -> CheckpointsWithRawResponse:
+ return CheckpointsWithRawResponse(self._fine_tuning.checkpoints)
+
class AsyncFineTuningWithRawResponse:
def __init__(self, fine_tuning: AsyncFineTuning) -> None:
@@ -83,6 +103,10 @@ def __init__(self, fine_tuning: AsyncFineTuning) -> None:
def jobs(self) -> AsyncJobsWithRawResponse:
return AsyncJobsWithRawResponse(self._fine_tuning.jobs)
+ @cached_property
+ def checkpoints(self) -> AsyncCheckpointsWithRawResponse:
+ return AsyncCheckpointsWithRawResponse(self._fine_tuning.checkpoints)
+
class FineTuningWithStreamingResponse:
def __init__(self, fine_tuning: FineTuning) -> None:
@@ -92,6 +116,10 @@ def __init__(self, fine_tuning: FineTuning) -> None:
def jobs(self) -> JobsWithStreamingResponse:
return JobsWithStreamingResponse(self._fine_tuning.jobs)
+ @cached_property
+ def checkpoints(self) -> CheckpointsWithStreamingResponse:
+ return CheckpointsWithStreamingResponse(self._fine_tuning.checkpoints)
+
class AsyncFineTuningWithStreamingResponse:
def __init__(self, fine_tuning: AsyncFineTuning) -> None:
@@ -100,3 +128,7 @@ def __init__(self, fine_tuning: AsyncFineTuning) -> None:
@cached_property
def jobs(self) -> AsyncJobsWithStreamingResponse:
return AsyncJobsWithStreamingResponse(self._fine_tuning.jobs)
+
+ @cached_property
+ def checkpoints(self) -> AsyncCheckpointsWithStreamingResponse:
+ return AsyncCheckpointsWithStreamingResponse(self._fine_tuning.checkpoints)
diff --git a/src/openai/types/__init__.py b/src/openai/types/__init__.py
index 11761534c9..57c91811b9 100644
--- a/src/openai/types/__init__.py
+++ b/src/openai/types/__init__.py
@@ -38,22 +38,32 @@
from .embedding_model import EmbeddingModel as EmbeddingModel
from .images_response import ImagesResponse as ImagesResponse
from .completion_usage import CompletionUsage as CompletionUsage
+from .eval_list_params import EvalListParams as EvalListParams
from .file_list_params import FileListParams as FileListParams
from .moderation_model import ModerationModel as ModerationModel
from .batch_list_params import BatchListParams as BatchListParams
from .completion_choice import CompletionChoice as CompletionChoice
from .image_edit_params import ImageEditParams as ImageEditParams
+from .eval_create_params import EvalCreateParams as EvalCreateParams
+from .eval_list_response import EvalListResponse as EvalListResponse
+from .eval_update_params import EvalUpdateParams as EvalUpdateParams
from .file_create_params import FileCreateParams as FileCreateParams
from .batch_create_params import BatchCreateParams as BatchCreateParams
from .batch_request_counts import BatchRequestCounts as BatchRequestCounts
+from .eval_create_response import EvalCreateResponse as EvalCreateResponse
+from .eval_delete_response import EvalDeleteResponse as EvalDeleteResponse
+from .eval_update_response import EvalUpdateResponse as EvalUpdateResponse
from .upload_create_params import UploadCreateParams as UploadCreateParams
from .vector_store_deleted import VectorStoreDeleted as VectorStoreDeleted
from .audio_response_format import AudioResponseFormat as AudioResponseFormat
from .image_generate_params import ImageGenerateParams as ImageGenerateParams
+from .eval_retrieve_response import EvalRetrieveResponse as EvalRetrieveResponse
from .file_chunking_strategy import FileChunkingStrategy as FileChunkingStrategy
from .upload_complete_params import UploadCompleteParams as UploadCompleteParams
from .embedding_create_params import EmbeddingCreateParams as EmbeddingCreateParams
+from .eval_label_model_grader import EvalLabelModelGrader as EvalLabelModelGrader
from .completion_create_params import CompletionCreateParams as CompletionCreateParams
+from .eval_string_check_grader import EvalStringCheckGrader as EvalStringCheckGrader
from .moderation_create_params import ModerationCreateParams as ModerationCreateParams
from .vector_store_list_params import VectorStoreListParams as VectorStoreListParams
from .create_embedding_response import CreateEmbeddingResponse as CreateEmbeddingResponse
@@ -61,18 +71,25 @@
from .vector_store_create_params import VectorStoreCreateParams as VectorStoreCreateParams
from .vector_store_search_params import VectorStoreSearchParams as VectorStoreSearchParams
from .vector_store_update_params import VectorStoreUpdateParams as VectorStoreUpdateParams
+from .eval_text_similarity_grader import EvalTextSimilarityGrader as EvalTextSimilarityGrader
from .moderation_text_input_param import ModerationTextInputParam as ModerationTextInputParam
from .file_chunking_strategy_param import FileChunkingStrategyParam as FileChunkingStrategyParam
from .vector_store_search_response import VectorStoreSearchResponse as VectorStoreSearchResponse
from .websocket_connection_options import WebsocketConnectionOptions as WebsocketConnectionOptions
from .image_create_variation_params import ImageCreateVariationParams as ImageCreateVariationParams
from .static_file_chunking_strategy import StaticFileChunkingStrategy as StaticFileChunkingStrategy
+from .eval_custom_data_source_config import EvalCustomDataSourceConfig as EvalCustomDataSourceConfig
+from .eval_string_check_grader_param import EvalStringCheckGraderParam as EvalStringCheckGraderParam
from .moderation_image_url_input_param import ModerationImageURLInputParam as ModerationImageURLInputParam
from .auto_file_chunking_strategy_param import AutoFileChunkingStrategyParam as AutoFileChunkingStrategyParam
+from .eval_text_similarity_grader_param import EvalTextSimilarityGraderParam as EvalTextSimilarityGraderParam
from .moderation_multi_modal_input_param import ModerationMultiModalInputParam as ModerationMultiModalInputParam
from .other_file_chunking_strategy_object import OtherFileChunkingStrategyObject as OtherFileChunkingStrategyObject
from .static_file_chunking_strategy_param import StaticFileChunkingStrategyParam as StaticFileChunkingStrategyParam
from .static_file_chunking_strategy_object import StaticFileChunkingStrategyObject as StaticFileChunkingStrategyObject
+from .eval_stored_completions_data_source_config import (
+ EvalStoredCompletionsDataSourceConfig as EvalStoredCompletionsDataSourceConfig,
+)
from .static_file_chunking_strategy_object_param import (
StaticFileChunkingStrategyObjectParam as StaticFileChunkingStrategyObjectParam,
)
diff --git a/src/openai/types/eval_create_params.py b/src/openai/types/eval_create_params.py
new file mode 100644
index 0000000000..8b28e51a6b
--- /dev/null
+++ b/src/openai/types/eval_create_params.py
@@ -0,0 +1,153 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .shared_params.metadata import Metadata
+from .eval_string_check_grader_param import EvalStringCheckGraderParam
+from .eval_text_similarity_grader_param import EvalTextSimilarityGraderParam
+
+__all__ = [
+ "EvalCreateParams",
+ "DataSourceConfig",
+ "DataSourceConfigCustom",
+ "DataSourceConfigStoredCompletions",
+ "TestingCriterion",
+ "TestingCriterionLabelModel",
+ "TestingCriterionLabelModelInput",
+ "TestingCriterionLabelModelInputSimpleInputMessage",
+ "TestingCriterionLabelModelInputInputMessage",
+ "TestingCriterionLabelModelInputInputMessageContent",
+ "TestingCriterionLabelModelInputOutputMessage",
+ "TestingCriterionLabelModelInputOutputMessageContent",
+]
+
+
+class EvalCreateParams(TypedDict, total=False):
+ data_source_config: Required[DataSourceConfig]
+ """The configuration for the data source used for the evaluation runs."""
+
+ testing_criteria: Required[Iterable[TestingCriterion]]
+ """A list of graders for all eval runs in this group."""
+
+ metadata: Optional[Metadata]
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format, and querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+ """
+
+ name: str
+ """The name of the evaluation."""
+
+ share_with_openai: bool
+ """Indicates whether the evaluation is shared with OpenAI."""
+
+
+class DataSourceConfigCustom(TypedDict, total=False):
+ item_schema: Required[Dict[str, object]]
+ """The json schema for the run data source items."""
+
+ type: Required[Literal["custom"]]
+ """The type of data source. Always `custom`."""
+
+ include_sample_schema: bool
+ """Whether to include the sample schema in the data source."""
+
+
+class DataSourceConfigStoredCompletions(TypedDict, total=False):
+ type: Required[Literal["stored_completions"]]
+ """The type of data source. Always `stored_completions`."""
+
+ metadata: Optional[Metadata]
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format, and querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+ """
+
+
+DataSourceConfig: TypeAlias = Union[DataSourceConfigCustom, DataSourceConfigStoredCompletions]
+
+
+class TestingCriterionLabelModelInputSimpleInputMessage(TypedDict, total=False):
+ content: Required[str]
+ """The content of the message."""
+
+ role: Required[str]
+ """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class TestingCriterionLabelModelInputInputMessageContent(TypedDict, total=False):
+ text: Required[str]
+ """The text content."""
+
+ type: Required[Literal["input_text"]]
+ """The type of content, which is always `input_text`."""
+
+
+class TestingCriterionLabelModelInputInputMessage(TypedDict, total=False):
+ content: Required[TestingCriterionLabelModelInputInputMessageContent]
+
+ role: Required[Literal["user", "system", "developer"]]
+ """The role of the message. One of `user`, `system`, or `developer`."""
+
+ type: Required[Literal["message"]]
+ """The type of item, which is always `message`."""
+
+
+class TestingCriterionLabelModelInputOutputMessageContent(TypedDict, total=False):
+ text: Required[str]
+ """The text content."""
+
+ type: Required[Literal["output_text"]]
+ """The type of content, which is always `output_text`."""
+
+
+class TestingCriterionLabelModelInputOutputMessage(TypedDict, total=False):
+ content: Required[TestingCriterionLabelModelInputOutputMessageContent]
+
+ role: Required[Literal["assistant"]]
+ """The role of the message. Must be `assistant` for output."""
+
+ type: Required[Literal["message"]]
+ """The type of item, which is always `message`."""
+
+
+TestingCriterionLabelModelInput: TypeAlias = Union[
+ TestingCriterionLabelModelInputSimpleInputMessage,
+ TestingCriterionLabelModelInputInputMessage,
+ TestingCriterionLabelModelInputOutputMessage,
+]
+
+
+class TestingCriterionLabelModel(TypedDict, total=False):
+ input: Required[Iterable[TestingCriterionLabelModelInput]]
+
+ labels: Required[List[str]]
+ """The labels to classify to each item in the evaluation."""
+
+ model: Required[str]
+ """The model to use for the evaluation. Must support structured outputs."""
+
+ name: Required[str]
+ """The name of the grader."""
+
+ passing_labels: Required[List[str]]
+ """The labels that indicate a passing result. Must be a subset of labels."""
+
+ type: Required[Literal["label_model"]]
+ """The object type, which is always `label_model`."""
+
+
+TestingCriterion: TypeAlias = Union[
+ TestingCriterionLabelModel, EvalStringCheckGraderParam, EvalTextSimilarityGraderParam
+]
diff --git a/src/openai/types/eval_create_response.py b/src/openai/types/eval_create_response.py
new file mode 100644
index 0000000000..a1c2853a2a
--- /dev/null
+++ b/src/openai/types/eval_create_response.py
@@ -0,0 +1,56 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+from .shared.metadata import Metadata
+from .eval_label_model_grader import EvalLabelModelGrader
+from .eval_string_check_grader import EvalStringCheckGrader
+from .eval_text_similarity_grader import EvalTextSimilarityGrader
+from .eval_custom_data_source_config import EvalCustomDataSourceConfig
+from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
+
+__all__ = ["EvalCreateResponse", "DataSourceConfig", "TestingCriterion"]
+
+DataSourceConfig: TypeAlias = Annotated[
+ Union[EvalCustomDataSourceConfig, EvalStoredCompletionsDataSourceConfig], PropertyInfo(discriminator="type")
+]
+
+TestingCriterion: TypeAlias = Annotated[
+ Union[EvalLabelModelGrader, EvalStringCheckGrader, EvalTextSimilarityGrader], PropertyInfo(discriminator="type")
+]
+
+
+class EvalCreateResponse(BaseModel):
+ id: str
+ """Unique identifier for the evaluation."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) for when the eval was created."""
+
+ data_source_config: DataSourceConfig
+ """Configuration of data sources used in runs of the evaluation."""
+
+ metadata: Optional[Metadata] = None
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format, and querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+ """
+
+ name: str
+ """The name of the evaluation."""
+
+ object: Literal["eval"]
+ """The object type."""
+
+ share_with_openai: bool
+ """Indicates whether the evaluation is shared with OpenAI."""
+
+ testing_criteria: List[TestingCriterion]
+ """A list of testing criteria."""
diff --git a/src/openai/types/eval_custom_data_source_config.py b/src/openai/types/eval_custom_data_source_config.py
new file mode 100644
index 0000000000..d99701cc71
--- /dev/null
+++ b/src/openai/types/eval_custom_data_source_config.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+
+__all__ = ["EvalCustomDataSourceConfig"]
+
+
+class EvalCustomDataSourceConfig(BaseModel):
+ schema_: Dict[str, object] = FieldInfo(alias="schema")
+ """
+ The json schema for the run data source items. Learn how to build JSON schemas
+ [here](https://json-schema.org/).
+ """
+
+ type: Literal["custom"]
+ """The type of data source. Always `custom`."""
diff --git a/src/openai/types/eval_delete_response.py b/src/openai/types/eval_delete_response.py
new file mode 100644
index 0000000000..adb460ddbb
--- /dev/null
+++ b/src/openai/types/eval_delete_response.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from .._models import BaseModel
+
+__all__ = ["EvalDeleteResponse"]
+
+
+class EvalDeleteResponse(BaseModel):
+ deleted: bool
+
+ eval_id: str
+
+ object: str
diff --git a/src/openai/types/eval_label_model_grader.py b/src/openai/types/eval_label_model_grader.py
new file mode 100644
index 0000000000..826b116287
--- /dev/null
+++ b/src/openai/types/eval_label_model_grader.py
@@ -0,0 +1,74 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+
+__all__ = [
+ "EvalLabelModelGrader",
+ "Input",
+ "InputInputMessage",
+ "InputInputMessageContent",
+ "InputAssistant",
+ "InputAssistantContent",
+]
+
+
+class InputInputMessageContent(BaseModel):
+ text: str
+ """The text content."""
+
+ type: Literal["input_text"]
+ """The type of content, which is always `input_text`."""
+
+
+class InputInputMessage(BaseModel):
+ content: InputInputMessageContent
+
+ role: Literal["user", "system", "developer"]
+ """The role of the message. One of `user`, `system`, or `developer`."""
+
+ type: Literal["message"]
+ """The type of item, which is always `message`."""
+
+
+class InputAssistantContent(BaseModel):
+ text: str
+ """The text content."""
+
+ type: Literal["output_text"]
+ """The type of content, which is always `output_text`."""
+
+
+class InputAssistant(BaseModel):
+ content: InputAssistantContent
+
+ role: Literal["assistant"]
+ """The role of the message. Must be `assistant` for output."""
+
+ type: Literal["message"]
+ """The type of item, which is always `message`."""
+
+
+Input: TypeAlias = Annotated[Union[InputInputMessage, InputAssistant], PropertyInfo(discriminator="role")]
+
+
+class EvalLabelModelGrader(BaseModel):
+ input: List[Input]
+
+ labels: List[str]
+ """The labels to assign to each item in the evaluation."""
+
+ model: str
+ """The model to use for the evaluation. Must support structured outputs."""
+
+ name: str
+ """The name of the grader."""
+
+ passing_labels: List[str]
+ """The labels that indicate a passing result. Must be a subset of labels."""
+
+ type: Literal["label_model"]
+ """The object type, which is always `label_model`."""
diff --git a/src/openai/types/eval_list_params.py b/src/openai/types/eval_list_params.py
new file mode 100644
index 0000000000..d9a12d0ddf
--- /dev/null
+++ b/src/openai/types/eval_list_params.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["EvalListParams"]
+
+
+class EvalListParams(TypedDict, total=False):
+ after: str
+ """Identifier for the last eval from the previous pagination request."""
+
+ limit: int
+ """Number of evals to retrieve."""
+
+ order: Literal["asc", "desc"]
+ """Sort order for evals by timestamp.
+
+ Use `asc` for ascending order or `desc` for descending order.
+ """
+
+ order_by: Literal["created_at", "updated_at"]
+ """Evals can be ordered by creation time or last updated time.
+
+ Use `created_at` for creation time or `updated_at` for last updated time.
+ """
diff --git a/src/openai/types/eval_list_response.py b/src/openai/types/eval_list_response.py
new file mode 100644
index 0000000000..eb54569011
--- /dev/null
+++ b/src/openai/types/eval_list_response.py
@@ -0,0 +1,56 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+from .shared.metadata import Metadata
+from .eval_label_model_grader import EvalLabelModelGrader
+from .eval_string_check_grader import EvalStringCheckGrader
+from .eval_text_similarity_grader import EvalTextSimilarityGrader
+from .eval_custom_data_source_config import EvalCustomDataSourceConfig
+from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
+
+__all__ = ["EvalListResponse", "DataSourceConfig", "TestingCriterion"]
+
+DataSourceConfig: TypeAlias = Annotated[
+ Union[EvalCustomDataSourceConfig, EvalStoredCompletionsDataSourceConfig], PropertyInfo(discriminator="type")
+]
+
+TestingCriterion: TypeAlias = Annotated[
+ Union[EvalLabelModelGrader, EvalStringCheckGrader, EvalTextSimilarityGrader], PropertyInfo(discriminator="type")
+]
+
+
+class EvalListResponse(BaseModel):
+ id: str
+ """Unique identifier for the evaluation."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) for when the eval was created."""
+
+ data_source_config: DataSourceConfig
+ """Configuration of data sources used in runs of the evaluation."""
+
+ metadata: Optional[Metadata] = None
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format, and querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+ """
+
+ name: str
+ """The name of the evaluation."""
+
+ object: Literal["eval"]
+ """The object type."""
+
+ share_with_openai: bool
+ """Indicates whether the evaluation is shared with OpenAI."""
+
+ testing_criteria: List[TestingCriterion]
+ """A list of testing criteria."""
diff --git a/src/openai/types/eval_retrieve_response.py b/src/openai/types/eval_retrieve_response.py
new file mode 100644
index 0000000000..8f3bfdf902
--- /dev/null
+++ b/src/openai/types/eval_retrieve_response.py
@@ -0,0 +1,56 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+from .shared.metadata import Metadata
+from .eval_label_model_grader import EvalLabelModelGrader
+from .eval_string_check_grader import EvalStringCheckGrader
+from .eval_text_similarity_grader import EvalTextSimilarityGrader
+from .eval_custom_data_source_config import EvalCustomDataSourceConfig
+from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
+
+__all__ = ["EvalRetrieveResponse", "DataSourceConfig", "TestingCriterion"]
+
+DataSourceConfig: TypeAlias = Annotated[
+ Union[EvalCustomDataSourceConfig, EvalStoredCompletionsDataSourceConfig], PropertyInfo(discriminator="type")
+]
+
+TestingCriterion: TypeAlias = Annotated[
+ Union[EvalLabelModelGrader, EvalStringCheckGrader, EvalTextSimilarityGrader], PropertyInfo(discriminator="type")
+]
+
+
+class EvalRetrieveResponse(BaseModel):
+ id: str
+ """Unique identifier for the evaluation."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) for when the eval was created."""
+
+ data_source_config: DataSourceConfig
+ """Configuration of data sources used in runs of the evaluation."""
+
+ metadata: Optional[Metadata] = None
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format, and querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+ """
+
+ name: str
+ """The name of the evaluation."""
+
+ object: Literal["eval"]
+ """The object type."""
+
+ share_with_openai: bool
+ """Indicates whether the evaluation is shared with OpenAI."""
+
+ testing_criteria: List[TestingCriterion]
+ """A list of testing criteria."""
diff --git a/src/openai/types/eval_stored_completions_data_source_config.py b/src/openai/types/eval_stored_completions_data_source_config.py
new file mode 100644
index 0000000000..98f86a4719
--- /dev/null
+++ b/src/openai/types/eval_stored_completions_data_source_config.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, Optional
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+from .shared.metadata import Metadata
+
+__all__ = ["EvalStoredCompletionsDataSourceConfig"]
+
+
+class EvalStoredCompletionsDataSourceConfig(BaseModel):
+ schema_: Dict[str, object] = FieldInfo(alias="schema")
+ """
+ The json schema for the run data source items. Learn how to build JSON schemas
+ [here](https://json-schema.org/).
+ """
+
+ type: Literal["stored_completions"]
+ """The type of data source. Always `stored_completions`."""
+
+ metadata: Optional[Metadata] = None
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format, and querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+ """
diff --git a/src/openai/types/eval_string_check_grader.py b/src/openai/types/eval_string_check_grader.py
new file mode 100644
index 0000000000..4dfc8035f9
--- /dev/null
+++ b/src/openai/types/eval_string_check_grader.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["EvalStringCheckGrader"]
+
+
+class EvalStringCheckGrader(BaseModel):
+ input: str
+ """The input text. This may include template strings."""
+
+ name: str
+ """The name of the grader."""
+
+ operation: Literal["eq", "ne", "like", "ilike"]
+ """The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`."""
+
+ reference: str
+ """The reference text. This may include template strings."""
+
+ type: Literal["string_check"]
+ """The object type, which is always `string_check`."""
diff --git a/src/openai/types/eval_string_check_grader_param.py b/src/openai/types/eval_string_check_grader_param.py
new file mode 100644
index 0000000000..3511329f8b
--- /dev/null
+++ b/src/openai/types/eval_string_check_grader_param.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["EvalStringCheckGraderParam"]
+
+
+class EvalStringCheckGraderParam(TypedDict, total=False):
+ input: Required[str]
+ """The input text. This may include template strings."""
+
+ name: Required[str]
+ """The name of the grader."""
+
+ operation: Required[Literal["eq", "ne", "like", "ilike"]]
+ """The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`."""
+
+ reference: Required[str]
+ """The reference text. This may include template strings."""
+
+ type: Required[Literal["string_check"]]
+ """The object type, which is always `string_check`."""
diff --git a/src/openai/types/eval_text_similarity_grader.py b/src/openai/types/eval_text_similarity_grader.py
new file mode 100644
index 0000000000..7c6897a4a7
--- /dev/null
+++ b/src/openai/types/eval_text_similarity_grader.py
@@ -0,0 +1,44 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["EvalTextSimilarityGrader"]
+
+
+class EvalTextSimilarityGrader(BaseModel):
+ evaluation_metric: Literal[
+ "fuzzy_match",
+ "bleu",
+ "gleu",
+ "meteor",
+ "rouge_1",
+ "rouge_2",
+ "rouge_3",
+ "rouge_4",
+ "rouge_5",
+ "rouge_l",
+ "cosine",
+ ]
+ """The evaluation metric to use.
+
+ One of `cosine`, `fuzzy_match`, `bleu`, `gleu`, `meteor`, `rouge_1`, `rouge_2`,
+ `rouge_3`, `rouge_4`, `rouge_5`, or `rouge_l`.
+ """
+
+ input: str
+ """The text being graded."""
+
+ pass_threshold: float
+ """A float score where a value greater than or equal indicates a passing grade."""
+
+ reference: str
+ """The text being graded against."""
+
+ type: Literal["text_similarity"]
+ """The type of grader."""
+
+ name: Optional[str] = None
+ """The name of the grader."""
diff --git a/src/openai/types/eval_text_similarity_grader_param.py b/src/openai/types/eval_text_similarity_grader_param.py
new file mode 100644
index 0000000000..4bf5d586f3
--- /dev/null
+++ b/src/openai/types/eval_text_similarity_grader_param.py
@@ -0,0 +1,45 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["EvalTextSimilarityGraderParam"]
+
+
+class EvalTextSimilarityGraderParam(TypedDict, total=False):
+ evaluation_metric: Required[
+ Literal[
+ "fuzzy_match",
+ "bleu",
+ "gleu",
+ "meteor",
+ "rouge_1",
+ "rouge_2",
+ "rouge_3",
+ "rouge_4",
+ "rouge_5",
+ "rouge_l",
+ "cosine",
+ ]
+ ]
+ """The evaluation metric to use.
+
+ One of `cosine`, `fuzzy_match`, `bleu`, `gleu`, `meteor`, `rouge_1`, `rouge_2`,
+ `rouge_3`, `rouge_4`, `rouge_5`, or `rouge_l`.
+ """
+
+ input: Required[str]
+ """The text being graded."""
+
+ pass_threshold: Required[float]
+ """A float score where a value greater than or equal indicates a passing grade."""
+
+ reference: Required[str]
+ """The text being graded against."""
+
+ type: Required[Literal["text_similarity"]]
+ """The type of grader."""
+
+ name: str
+ """The name of the grader."""
diff --git a/src/openai/types/eval_update_params.py b/src/openai/types/eval_update_params.py
new file mode 100644
index 0000000000..042db29af5
--- /dev/null
+++ b/src/openai/types/eval_update_params.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import TypedDict
+
+from .shared_params.metadata import Metadata
+
+__all__ = ["EvalUpdateParams"]
+
+
+class EvalUpdateParams(TypedDict, total=False):
+ metadata: Optional[Metadata]
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format, and querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+ """
+
+ name: str
+ """Rename the evaluation."""
diff --git a/src/openai/types/eval_update_response.py b/src/openai/types/eval_update_response.py
new file mode 100644
index 0000000000..728a291736
--- /dev/null
+++ b/src/openai/types/eval_update_response.py
@@ -0,0 +1,56 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+from .shared.metadata import Metadata
+from .eval_label_model_grader import EvalLabelModelGrader
+from .eval_string_check_grader import EvalStringCheckGrader
+from .eval_text_similarity_grader import EvalTextSimilarityGrader
+from .eval_custom_data_source_config import EvalCustomDataSourceConfig
+from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
+
+__all__ = ["EvalUpdateResponse", "DataSourceConfig", "TestingCriterion"]
+
+DataSourceConfig: TypeAlias = Annotated[
+ Union[EvalCustomDataSourceConfig, EvalStoredCompletionsDataSourceConfig], PropertyInfo(discriminator="type")
+]
+
+TestingCriterion: TypeAlias = Annotated[
+ Union[EvalLabelModelGrader, EvalStringCheckGrader, EvalTextSimilarityGrader], PropertyInfo(discriminator="type")
+]
+
+
+class EvalUpdateResponse(BaseModel):
+ id: str
+ """Unique identifier for the evaluation."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) for when the eval was created."""
+
+ data_source_config: DataSourceConfig
+ """Configuration of data sources used in runs of the evaluation."""
+
+ metadata: Optional[Metadata] = None
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format, and querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+ """
+
+ name: str
+ """The name of the evaluation."""
+
+ object: Literal["eval"]
+ """The object type."""
+
+ share_with_openai: bool
+ """Indicates whether the evaluation is shared with OpenAI."""
+
+ testing_criteria: List[TestingCriterion]
+ """A list of testing criteria."""
diff --git a/src/openai/types/evals/__init__.py b/src/openai/types/evals/__init__.py
new file mode 100644
index 0000000000..ebf84c6b8d
--- /dev/null
+++ b/src/openai/types/evals/__init__.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .eval_api_error import EvalAPIError as EvalAPIError
+from .run_list_params import RunListParams as RunListParams
+from .run_create_params import RunCreateParams as RunCreateParams
+from .run_list_response import RunListResponse as RunListResponse
+from .run_cancel_response import RunCancelResponse as RunCancelResponse
+from .run_create_response import RunCreateResponse as RunCreateResponse
+from .run_delete_response import RunDeleteResponse as RunDeleteResponse
+from .run_retrieve_response import RunRetrieveResponse as RunRetrieveResponse
+from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource as CreateEvalJSONLRunDataSource
+from .create_eval_completions_run_data_source import (
+ CreateEvalCompletionsRunDataSource as CreateEvalCompletionsRunDataSource,
+)
+from .create_eval_jsonl_run_data_source_param import (
+ CreateEvalJSONLRunDataSourceParam as CreateEvalJSONLRunDataSourceParam,
+)
+from .create_eval_completions_run_data_source_param import (
+ CreateEvalCompletionsRunDataSourceParam as CreateEvalCompletionsRunDataSourceParam,
+)
diff --git a/src/openai/types/evals/create_eval_completions_run_data_source.py b/src/openai/types/evals/create_eval_completions_run_data_source.py
new file mode 100644
index 0000000000..07b88129e2
--- /dev/null
+++ b/src/openai/types/evals/create_eval_completions_run_data_source.py
@@ -0,0 +1,185 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from ..shared.metadata import Metadata
+
+__all__ = [
+ "CreateEvalCompletionsRunDataSource",
+ "InputMessages",
+ "InputMessagesTemplate",
+ "InputMessagesTemplateTemplate",
+ "InputMessagesTemplateTemplateChatMessage",
+ "InputMessagesTemplateTemplateInputMessage",
+ "InputMessagesTemplateTemplateInputMessageContent",
+ "InputMessagesTemplateTemplateOutputMessage",
+ "InputMessagesTemplateTemplateOutputMessageContent",
+ "InputMessagesItemReference",
+ "Source",
+ "SourceFileContent",
+ "SourceFileContentContent",
+ "SourceFileID",
+ "SourceStoredCompletions",
+ "SamplingParams",
+]
+
+
+class InputMessagesTemplateTemplateChatMessage(BaseModel):
+ content: str
+ """The content of the message."""
+
+ role: str
+ """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class InputMessagesTemplateTemplateInputMessageContent(BaseModel):
+ text: str
+ """The text content."""
+
+ type: Literal["input_text"]
+ """The type of content, which is always `input_text`."""
+
+
+class InputMessagesTemplateTemplateInputMessage(BaseModel):
+ content: InputMessagesTemplateTemplateInputMessageContent
+
+ role: Literal["user", "system", "developer"]
+ """The role of the message. One of `user`, `system`, or `developer`."""
+
+ type: Literal["message"]
+ """The type of item, which is always `message`."""
+
+
+class InputMessagesTemplateTemplateOutputMessageContent(BaseModel):
+ text: str
+ """The text content."""
+
+ type: Literal["output_text"]
+ """The type of content, which is always `output_text`."""
+
+
+class InputMessagesTemplateTemplateOutputMessage(BaseModel):
+ content: InputMessagesTemplateTemplateOutputMessageContent
+
+ role: Literal["assistant"]
+ """The role of the message. Must be `assistant` for output."""
+
+ type: Literal["message"]
+ """The type of item, which is always `message`."""
+
+
+InputMessagesTemplateTemplate: TypeAlias = Union[
+ InputMessagesTemplateTemplateChatMessage,
+ InputMessagesTemplateTemplateInputMessage,
+ InputMessagesTemplateTemplateOutputMessage,
+]
+
+
+class InputMessagesTemplate(BaseModel):
+ template: List[InputMessagesTemplateTemplate]
+ """A list of chat messages forming the prompt or context.
+
+ May include variable references to the "item" namespace, ie {{item.name}}.
+ """
+
+ type: Literal["template"]
+ """The type of input messages. Always `template`."""
+
+
+class InputMessagesItemReference(BaseModel):
+ item_reference: str
+ """A reference to a variable in the "item" namespace. Ie, "item.name" """
+
+ type: Literal["item_reference"]
+ """The type of input messages. Always `item_reference`."""
+
+
+InputMessages: TypeAlias = Annotated[
+ Union[InputMessagesTemplate, InputMessagesItemReference], PropertyInfo(discriminator="type")
+]
+
+
+class SourceFileContentContent(BaseModel):
+ item: Dict[str, object]
+
+ sample: Optional[Dict[str, object]] = None
+
+
+class SourceFileContent(BaseModel):
+ content: List[SourceFileContentContent]
+ """The content of the jsonl file."""
+
+ type: Literal["file_content"]
+ """The type of jsonl source. Always `file_content`."""
+
+
+class SourceFileID(BaseModel):
+ id: str
+ """The identifier of the file."""
+
+ type: Literal["file_id"]
+ """The type of jsonl source. Always `file_id`."""
+
+
+class SourceStoredCompletions(BaseModel):
+ created_after: Optional[int] = None
+ """An optional Unix timestamp to filter items created after this time."""
+
+ created_before: Optional[int] = None
+ """An optional Unix timestamp to filter items created before this time."""
+
+ limit: Optional[int] = None
+ """An optional maximum number of items to return."""
+
+ metadata: Optional[Metadata] = None
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format, and querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+ """
+
+ model: Optional[str] = None
+ """An optional model to filter by (e.g., 'gpt-4o')."""
+
+ type: Literal["stored_completions"]
+ """The type of source. Always `stored_completions`."""
+
+
+Source: TypeAlias = Annotated[
+ Union[SourceFileContent, SourceFileID, SourceStoredCompletions], PropertyInfo(discriminator="type")
+]
+
+
+class SamplingParams(BaseModel):
+ max_completion_tokens: Optional[int] = None
+ """The maximum number of tokens in the generated output."""
+
+ seed: Optional[int] = None
+ """A seed value to initialize the randomness, during sampling."""
+
+ temperature: Optional[float] = None
+ """A higher temperature increases randomness in the outputs."""
+
+ top_p: Optional[float] = None
+ """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class CreateEvalCompletionsRunDataSource(BaseModel):
+ input_messages: InputMessages
+
+ model: str
+ """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+ source: Source
+ """A StoredCompletionsRunDataSource configuration describing a set of filters"""
+
+ type: Literal["completions"]
+ """The type of run data source. Always `completions`."""
+
+ sampling_params: Optional[SamplingParams] = None
diff --git a/src/openai/types/evals/create_eval_completions_run_data_source_param.py b/src/openai/types/evals/create_eval_completions_run_data_source_param.py
new file mode 100644
index 0000000000..be4a6f1ec6
--- /dev/null
+++ b/src/openai/types/evals/create_eval_completions_run_data_source_param.py
@@ -0,0 +1,181 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..shared_params.metadata import Metadata
+
+__all__ = [
+ "CreateEvalCompletionsRunDataSourceParam",
+ "InputMessages",
+ "InputMessagesTemplate",
+ "InputMessagesTemplateTemplate",
+ "InputMessagesTemplateTemplateChatMessage",
+ "InputMessagesTemplateTemplateInputMessage",
+ "InputMessagesTemplateTemplateInputMessageContent",
+ "InputMessagesTemplateTemplateOutputMessage",
+ "InputMessagesTemplateTemplateOutputMessageContent",
+ "InputMessagesItemReference",
+ "Source",
+ "SourceFileContent",
+ "SourceFileContentContent",
+ "SourceFileID",
+ "SourceStoredCompletions",
+ "SamplingParams",
+]
+
+
+class InputMessagesTemplateTemplateChatMessage(TypedDict, total=False):
+ content: Required[str]
+ """The content of the message."""
+
+ role: Required[str]
+ """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class InputMessagesTemplateTemplateInputMessageContent(TypedDict, total=False):
+ text: Required[str]
+ """The text content."""
+
+ type: Required[Literal["input_text"]]
+ """The type of content, which is always `input_text`."""
+
+
+class InputMessagesTemplateTemplateInputMessage(TypedDict, total=False):
+ content: Required[InputMessagesTemplateTemplateInputMessageContent]
+
+ role: Required[Literal["user", "system", "developer"]]
+ """The role of the message. One of `user`, `system`, or `developer`."""
+
+ type: Required[Literal["message"]]
+ """The type of item, which is always `message`."""
+
+
+class InputMessagesTemplateTemplateOutputMessageContent(TypedDict, total=False):
+ text: Required[str]
+ """The text content."""
+
+ type: Required[Literal["output_text"]]
+ """The type of content, which is always `output_text`."""
+
+
+class InputMessagesTemplateTemplateOutputMessage(TypedDict, total=False):
+ content: Required[InputMessagesTemplateTemplateOutputMessageContent]
+
+ role: Required[Literal["assistant"]]
+ """The role of the message. Must be `assistant` for output."""
+
+ type: Required[Literal["message"]]
+ """The type of item, which is always `message`."""
+
+
+InputMessagesTemplateTemplate: TypeAlias = Union[
+ InputMessagesTemplateTemplateChatMessage,
+ InputMessagesTemplateTemplateInputMessage,
+ InputMessagesTemplateTemplateOutputMessage,
+]
+
+
+class InputMessagesTemplate(TypedDict, total=False):
+ template: Required[Iterable[InputMessagesTemplateTemplate]]
+ """A list of chat messages forming the prompt or context.
+
+ May include variable references to the "item" namespace, ie {{item.name}}.
+ """
+
+ type: Required[Literal["template"]]
+ """The type of input messages. Always `template`."""
+
+
+class InputMessagesItemReference(TypedDict, total=False):
+ item_reference: Required[str]
+ """A reference to a variable in the "item" namespace. Ie, "item.name" """
+
+ type: Required[Literal["item_reference"]]
+ """The type of input messages. Always `item_reference`."""
+
+
+InputMessages: TypeAlias = Union[InputMessagesTemplate, InputMessagesItemReference]
+
+
+class SourceFileContentContent(TypedDict, total=False):
+ item: Required[Dict[str, object]]
+
+ sample: Dict[str, object]
+
+
+class SourceFileContent(TypedDict, total=False):
+ content: Required[Iterable[SourceFileContentContent]]
+ """The content of the jsonl file."""
+
+ type: Required[Literal["file_content"]]
+ """The type of jsonl source. Always `file_content`."""
+
+
+class SourceFileID(TypedDict, total=False):
+ id: Required[str]
+ """The identifier of the file."""
+
+ type: Required[Literal["file_id"]]
+ """The type of jsonl source. Always `file_id`."""
+
+
+class SourceStoredCompletions(TypedDict, total=False):
+ created_after: Required[Optional[int]]
+ """An optional Unix timestamp to filter items created after this time."""
+
+ created_before: Required[Optional[int]]
+ """An optional Unix timestamp to filter items created before this time."""
+
+ limit: Required[Optional[int]]
+ """An optional maximum number of items to return."""
+
+ metadata: Required[Optional[Metadata]]
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format, and querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+ """
+
+ model: Required[Optional[str]]
+ """An optional model to filter by (e.g., 'gpt-4o')."""
+
+ type: Required[Literal["stored_completions"]]
+ """The type of source. Always `stored_completions`."""
+
+
+Source: TypeAlias = Union[SourceFileContent, SourceFileID, SourceStoredCompletions]
+
+
+class SamplingParams(TypedDict, total=False):
+ max_completion_tokens: int
+ """The maximum number of tokens in the generated output."""
+
+ seed: int
+ """A seed value to initialize the randomness, during sampling."""
+
+ temperature: float
+ """A higher temperature increases randomness in the outputs."""
+
+ top_p: float
+ """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class CreateEvalCompletionsRunDataSourceParam(TypedDict, total=False):
+ input_messages: Required[InputMessages]
+
+ model: Required[str]
+ """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+ source: Required[Source]
+ """A StoredCompletionsRunDataSource configuration describing a set of filters"""
+
+ type: Required[Literal["completions"]]
+ """The type of run data source. Always `completions`."""
+
+ sampling_params: SamplingParams
diff --git a/src/openai/types/evals/create_eval_jsonl_run_data_source.py b/src/openai/types/evals/create_eval_jsonl_run_data_source.py
new file mode 100644
index 0000000000..d2be56243b
--- /dev/null
+++ b/src/openai/types/evals/create_eval_jsonl_run_data_source.py
@@ -0,0 +1,41 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = ["CreateEvalJSONLRunDataSource", "Source", "SourceFileContent", "SourceFileContentContent", "SourceFileID"]
+
+
+class SourceFileContentContent(BaseModel):
+ item: Dict[str, object]
+
+ sample: Optional[Dict[str, object]] = None
+
+
+class SourceFileContent(BaseModel):
+ content: List[SourceFileContentContent]
+ """The content of the jsonl file."""
+
+ type: Literal["file_content"]
+ """The type of jsonl source. Always `file_content`."""
+
+
+class SourceFileID(BaseModel):
+ id: str
+ """The identifier of the file."""
+
+ type: Literal["file_id"]
+ """The type of jsonl source. Always `file_id`."""
+
+
+Source: TypeAlias = Annotated[Union[SourceFileContent, SourceFileID], PropertyInfo(discriminator="type")]
+
+
+class CreateEvalJSONLRunDataSource(BaseModel):
+ source: Source
+
+ type: Literal["jsonl"]
+ """The type of data source. Always `jsonl`."""
diff --git a/src/openai/types/evals/create_eval_jsonl_run_data_source_param.py b/src/openai/types/evals/create_eval_jsonl_run_data_source_param.py
new file mode 100644
index 0000000000..b8ba48a666
--- /dev/null
+++ b/src/openai/types/evals/create_eval_jsonl_run_data_source_param.py
@@ -0,0 +1,46 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = [
+ "CreateEvalJSONLRunDataSourceParam",
+ "Source",
+ "SourceFileContent",
+ "SourceFileContentContent",
+ "SourceFileID",
+]
+
+
+class SourceFileContentContent(TypedDict, total=False):
+ item: Required[Dict[str, object]]
+
+ sample: Dict[str, object]
+
+
+class SourceFileContent(TypedDict, total=False):
+ content: Required[Iterable[SourceFileContentContent]]
+ """The content of the jsonl file."""
+
+ type: Required[Literal["file_content"]]
+ """The type of jsonl source. Always `file_content`."""
+
+
+class SourceFileID(TypedDict, total=False):
+ id: Required[str]
+ """The identifier of the file."""
+
+ type: Required[Literal["file_id"]]
+ """The type of jsonl source. Always `file_id`."""
+
+
+Source: TypeAlias = Union[SourceFileContent, SourceFileID]
+
+
+class CreateEvalJSONLRunDataSourceParam(TypedDict, total=False):
+ source: Required[Source]
+
+ type: Required[Literal["jsonl"]]
+ """The type of data source. Always `jsonl`."""
diff --git a/src/openai/types/evals/eval_api_error.py b/src/openai/types/evals/eval_api_error.py
new file mode 100644
index 0000000000..d67185e981
--- /dev/null
+++ b/src/openai/types/evals/eval_api_error.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from ..._models import BaseModel
+
+__all__ = ["EvalAPIError"]
+
+
+class EvalAPIError(BaseModel):
+ code: str
+ """The error code."""
+
+ message: str
+ """The error message."""
diff --git a/src/openai/types/evals/run_cancel_response.py b/src/openai/types/evals/run_cancel_response.py
new file mode 100644
index 0000000000..90e52241a6
--- /dev/null
+++ b/src/openai/types/evals/run_cancel_response.py
@@ -0,0 +1,115 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .eval_api_error import EvalAPIError
+from ..shared.metadata import Metadata
+from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource
+from .create_eval_completions_run_data_source import CreateEvalCompletionsRunDataSource
+
+__all__ = ["RunCancelResponse", "DataSource", "PerModelUsage", "PerTestingCriteriaResult", "ResultCounts"]
+
+DataSource: TypeAlias = Annotated[
+ Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource], PropertyInfo(discriminator="type")
+]
+
+
+class PerModelUsage(BaseModel):
+ cached_tokens: int
+ """The number of tokens retrieved from cache."""
+
+ completion_tokens: int
+ """The number of completion tokens generated."""
+
+ invocation_count: int
+ """The number of invocations."""
+
+ run_model_name: str = FieldInfo(alias="model_name")
+ """The name of the model."""
+
+ prompt_tokens: int
+ """The number of prompt tokens used."""
+
+ total_tokens: int
+ """The total number of tokens used."""
+
+
+class PerTestingCriteriaResult(BaseModel):
+ failed: int
+ """Number of tests failed for this criteria."""
+
+ passed: int
+ """Number of tests passed for this criteria."""
+
+ testing_criteria: str
+ """A description of the testing criteria."""
+
+
+class ResultCounts(BaseModel):
+ errored: int
+ """Number of output items that resulted in an error."""
+
+ failed: int
+ """Number of output items that failed to pass the evaluation."""
+
+ passed: int
+ """Number of output items that passed the evaluation."""
+
+ total: int
+ """Total number of executed output items."""
+
+
+class RunCancelResponse(BaseModel):
+ id: str
+ """Unique identifier for the evaluation run."""
+
+ created_at: int
+ """Unix timestamp (in seconds) when the evaluation run was created."""
+
+ data_source: DataSource
+ """Information about the run's data source."""
+
+ error: EvalAPIError
+ """An object representing an error response from the Eval API."""
+
+ eval_id: str
+ """The identifier of the associated evaluation."""
+
+ metadata: Optional[Metadata] = None
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format, and querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+ """
+
+ model: str
+ """The model that is evaluated, if applicable."""
+
+ name: str
+ """The name of the evaluation run."""
+
+ object: Literal["eval.run"]
+ """The type of the object. Always "eval.run"."""
+
+ per_model_usage: List[PerModelUsage]
+ """Usage statistics for each model during the evaluation run."""
+
+ per_testing_criteria_results: List[PerTestingCriteriaResult]
+ """Results per testing criteria applied during the evaluation run."""
+
+ report_url: str
+ """The URL to the rendered evaluation run report on the UI dashboard."""
+
+ result_counts: ResultCounts
+ """Counters summarizing the outcomes of the evaluation run."""
+
+ status: str
+ """The status of the evaluation run."""
diff --git a/src/openai/types/evals/run_create_params.py b/src/openai/types/evals/run_create_params.py
new file mode 100644
index 0000000000..acf7b1b126
--- /dev/null
+++ b/src/openai/types/evals/run_create_params.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Optional
+from typing_extensions import Required, TypeAlias, TypedDict
+
+from ..shared_params.metadata import Metadata
+from .create_eval_jsonl_run_data_source_param import CreateEvalJSONLRunDataSourceParam
+from .create_eval_completions_run_data_source_param import CreateEvalCompletionsRunDataSourceParam
+
+__all__ = ["RunCreateParams", "DataSource"]
+
+
+class RunCreateParams(TypedDict, total=False):
+ data_source: Required[DataSource]
+ """Details about the run's data source."""
+
+ metadata: Optional[Metadata]
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format, and querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+ """
+
+ name: str
+ """The name of the run."""
+
+
+DataSource: TypeAlias = Union[CreateEvalJSONLRunDataSourceParam, CreateEvalCompletionsRunDataSourceParam]
diff --git a/src/openai/types/evals/run_create_response.py b/src/openai/types/evals/run_create_response.py
new file mode 100644
index 0000000000..14ca426427
--- /dev/null
+++ b/src/openai/types/evals/run_create_response.py
@@ -0,0 +1,115 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .eval_api_error import EvalAPIError
+from ..shared.metadata import Metadata
+from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource
+from .create_eval_completions_run_data_source import CreateEvalCompletionsRunDataSource
+
+__all__ = ["RunCreateResponse", "DataSource", "PerModelUsage", "PerTestingCriteriaResult", "ResultCounts"]
+
+DataSource: TypeAlias = Annotated[
+ Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource], PropertyInfo(discriminator="type")
+]
+
+
+class PerModelUsage(BaseModel):
+ cached_tokens: int
+ """The number of tokens retrieved from cache."""
+
+ completion_tokens: int
+ """The number of completion tokens generated."""
+
+ invocation_count: int
+ """The number of invocations."""
+
+ run_model_name: str = FieldInfo(alias="model_name")
+ """The name of the model."""
+
+ prompt_tokens: int
+ """The number of prompt tokens used."""
+
+ total_tokens: int
+ """The total number of tokens used."""
+
+
+class PerTestingCriteriaResult(BaseModel):
+ failed: int
+ """Number of tests failed for this criteria."""
+
+ passed: int
+ """Number of tests passed for this criteria."""
+
+ testing_criteria: str
+ """A description of the testing criteria."""
+
+
+class ResultCounts(BaseModel):
+ errored: int
+ """Number of output items that resulted in an error."""
+
+ failed: int
+ """Number of output items that failed to pass the evaluation."""
+
+ passed: int
+ """Number of output items that passed the evaluation."""
+
+ total: int
+ """Total number of executed output items."""
+
+
+class RunCreateResponse(BaseModel):
+ id: str
+ """Unique identifier for the evaluation run."""
+
+ created_at: int
+ """Unix timestamp (in seconds) when the evaluation run was created."""
+
+ data_source: DataSource
+ """Information about the run's data source."""
+
+ error: EvalAPIError
+ """An object representing an error response from the Eval API."""
+
+ eval_id: str
+ """The identifier of the associated evaluation."""
+
+ metadata: Optional[Metadata] = None
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format, and querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+ """
+
+ model: str
+ """The model that is evaluated, if applicable."""
+
+ name: str
+ """The name of the evaluation run."""
+
+ object: Literal["eval.run"]
+ """The type of the object. Always "eval.run"."""
+
+ per_model_usage: List[PerModelUsage]
+ """Usage statistics for each model during the evaluation run."""
+
+ per_testing_criteria_results: List[PerTestingCriteriaResult]
+ """Results per testing criteria applied during the evaluation run."""
+
+ report_url: str
+ """The URL to the rendered evaluation run report on the UI dashboard."""
+
+ result_counts: ResultCounts
+ """Counters summarizing the outcomes of the evaluation run."""
+
+ status: str
+ """The status of the evaluation run."""
diff --git a/src/openai/types/evals/run_delete_response.py b/src/openai/types/evals/run_delete_response.py
new file mode 100644
index 0000000000..d48d01f86c
--- /dev/null
+++ b/src/openai/types/evals/run_delete_response.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+
+__all__ = ["RunDeleteResponse"]
+
+
+class RunDeleteResponse(BaseModel):
+ deleted: Optional[bool] = None
+
+ object: Optional[str] = None
+
+ run_id: Optional[str] = None
diff --git a/src/openai/types/evals/run_list_params.py b/src/openai/types/evals/run_list_params.py
new file mode 100644
index 0000000000..6060eafb97
--- /dev/null
+++ b/src/openai/types/evals/run_list_params.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["RunListParams"]
+
+
+class RunListParams(TypedDict, total=False):
+ after: str
+ """Identifier for the last run from the previous pagination request."""
+
+ limit: int
+ """Number of runs to retrieve."""
+
+ order: Literal["asc", "desc"]
+ """Sort order for runs by timestamp.
+
+ Use `asc` for ascending order or `desc` for descending order. Defaults to `asc`.
+ """
+
+ status: Literal["queued", "in_progress", "completed", "canceled", "failed"]
+ """Filter runs by status.
+
+ Use "queued" | "in_progress" | "failed" | "completed" | "canceled".
+ """
diff --git a/src/openai/types/evals/run_list_response.py b/src/openai/types/evals/run_list_response.py
new file mode 100644
index 0000000000..a1022f542f
--- /dev/null
+++ b/src/openai/types/evals/run_list_response.py
@@ -0,0 +1,115 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .eval_api_error import EvalAPIError
+from ..shared.metadata import Metadata
+from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource
+from .create_eval_completions_run_data_source import CreateEvalCompletionsRunDataSource
+
+__all__ = ["RunListResponse", "DataSource", "PerModelUsage", "PerTestingCriteriaResult", "ResultCounts"]
+
+DataSource: TypeAlias = Annotated[
+ Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource], PropertyInfo(discriminator="type")
+]
+
+
+class PerModelUsage(BaseModel):
+ cached_tokens: int
+ """The number of tokens retrieved from cache."""
+
+ completion_tokens: int
+ """The number of completion tokens generated."""
+
+ invocation_count: int
+ """The number of invocations."""
+
+ run_model_name: str = FieldInfo(alias="model_name")
+ """The name of the model."""
+
+ prompt_tokens: int
+ """The number of prompt tokens used."""
+
+ total_tokens: int
+ """The total number of tokens used."""
+
+
+class PerTestingCriteriaResult(BaseModel):
+ failed: int
+ """Number of tests failed for this criteria."""
+
+ passed: int
+ """Number of tests passed for this criteria."""
+
+ testing_criteria: str
+ """A description of the testing criteria."""
+
+
+class ResultCounts(BaseModel):
+ errored: int
+ """Number of output items that resulted in an error."""
+
+ failed: int
+ """Number of output items that failed to pass the evaluation."""
+
+ passed: int
+ """Number of output items that passed the evaluation."""
+
+ total: int
+ """Total number of executed output items."""
+
+
+class RunListResponse(BaseModel):
+ id: str
+ """Unique identifier for the evaluation run."""
+
+ created_at: int
+ """Unix timestamp (in seconds) when the evaluation run was created."""
+
+ data_source: DataSource
+ """Information about the run's data source."""
+
+ error: EvalAPIError
+ """An object representing an error response from the Eval API."""
+
+ eval_id: str
+ """The identifier of the associated evaluation."""
+
+ metadata: Optional[Metadata] = None
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format, and querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+ """
+
+ model: str
+ """The model that is evaluated, if applicable."""
+
+ name: str
+ """The name of the evaluation run."""
+
+ object: Literal["eval.run"]
+ """The type of the object. Always "eval.run"."""
+
+ per_model_usage: List[PerModelUsage]
+ """Usage statistics for each model during the evaluation run."""
+
+ per_testing_criteria_results: List[PerTestingCriteriaResult]
+ """Results per testing criteria applied during the evaluation run."""
+
+ report_url: str
+ """The URL to the rendered evaluation run report on the UI dashboard."""
+
+ result_counts: ResultCounts
+ """Counters summarizing the outcomes of the evaluation run."""
+
+ status: str
+ """The status of the evaluation run."""
diff --git a/src/openai/types/evals/run_retrieve_response.py b/src/openai/types/evals/run_retrieve_response.py
new file mode 100644
index 0000000000..461ed43dda
--- /dev/null
+++ b/src/openai/types/evals/run_retrieve_response.py
@@ -0,0 +1,115 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .eval_api_error import EvalAPIError
+from ..shared.metadata import Metadata
+from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource
+from .create_eval_completions_run_data_source import CreateEvalCompletionsRunDataSource
+
+__all__ = ["RunRetrieveResponse", "DataSource", "PerModelUsage", "PerTestingCriteriaResult", "ResultCounts"]
+
+DataSource: TypeAlias = Annotated[
+ Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource], PropertyInfo(discriminator="type")
+]
+
+
+class PerModelUsage(BaseModel):
+ cached_tokens: int
+ """The number of tokens retrieved from cache."""
+
+ completion_tokens: int
+ """The number of completion tokens generated."""
+
+ invocation_count: int
+ """The number of invocations."""
+
+ run_model_name: str = FieldInfo(alias="model_name")
+ """The name of the model."""
+
+ prompt_tokens: int
+ """The number of prompt tokens used."""
+
+ total_tokens: int
+ """The total number of tokens used."""
+
+
+class PerTestingCriteriaResult(BaseModel):
+ failed: int
+ """Number of tests failed for this criteria."""
+
+ passed: int
+ """Number of tests passed for this criteria."""
+
+ testing_criteria: str
+ """A description of the testing criteria."""
+
+
+class ResultCounts(BaseModel):
+ errored: int
+ """Number of output items that resulted in an error."""
+
+ failed: int
+ """Number of output items that failed to pass the evaluation."""
+
+ passed: int
+ """Number of output items that passed the evaluation."""
+
+ total: int
+ """Total number of executed output items."""
+
+
+class RunRetrieveResponse(BaseModel):
+ id: str
+ """Unique identifier for the evaluation run."""
+
+ created_at: int
+ """Unix timestamp (in seconds) when the evaluation run was created."""
+
+ data_source: DataSource
+ """Information about the run's data source."""
+
+ error: EvalAPIError
+ """An object representing an error response from the Eval API."""
+
+ eval_id: str
+ """The identifier of the associated evaluation."""
+
+ metadata: Optional[Metadata] = None
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format, and querying for objects via API or the dashboard.
+
+ Keys are strings with a maximum length of 64 characters. Values are strings with
+ a maximum length of 512 characters.
+ """
+
+ model: str
+ """The model that is evaluated, if applicable."""
+
+ name: str
+ """The name of the evaluation run."""
+
+ object: Literal["eval.run"]
+ """The type of the object. Always "eval.run"."""
+
+ per_model_usage: List[PerModelUsage]
+ """Usage statistics for each model during the evaluation run."""
+
+ per_testing_criteria_results: List[PerTestingCriteriaResult]
+ """Results per testing criteria applied during the evaluation run."""
+
+ report_url: str
+ """The URL to the rendered evaluation run report on the UI dashboard."""
+
+ result_counts: ResultCounts
+ """Counters summarizing the outcomes of the evaluation run."""
+
+ status: str
+ """The status of the evaluation run."""
diff --git a/src/openai/types/evals/runs/__init__.py b/src/openai/types/evals/runs/__init__.py
new file mode 100644
index 0000000000..b77cbb6acd
--- /dev/null
+++ b/src/openai/types/evals/runs/__init__.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .output_item_list_params import OutputItemListParams as OutputItemListParams
+from .output_item_list_response import OutputItemListResponse as OutputItemListResponse
+from .output_item_retrieve_response import OutputItemRetrieveResponse as OutputItemRetrieveResponse
diff --git a/src/openai/types/evals/runs/output_item_list_params.py b/src/openai/types/evals/runs/output_item_list_params.py
new file mode 100644
index 0000000000..073bfc69a7
--- /dev/null
+++ b/src/openai/types/evals/runs/output_item_list_params.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["OutputItemListParams"]
+
+
+class OutputItemListParams(TypedDict, total=False):
+ eval_id: Required[str]
+
+ after: str
+ """Identifier for the last output item from the previous pagination request."""
+
+ limit: int
+ """Number of output items to retrieve."""
+
+ order: Literal["asc", "desc"]
+ """Sort order for output items by timestamp.
+
+ Use `asc` for ascending order or `desc` for descending order. Defaults to `asc`.
+ """
+
+ status: Literal["fail", "pass"]
+ """Filter output items by status.
+
+ Use `failed` to filter by failed output items or `pass` to filter by passed
+ output items.
+ """
diff --git a/src/openai/types/evals/runs/output_item_list_response.py b/src/openai/types/evals/runs/output_item_list_response.py
new file mode 100644
index 0000000000..72b1049f7b
--- /dev/null
+++ b/src/openai/types/evals/runs/output_item_list_response.py
@@ -0,0 +1,104 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+import builtins
+from typing import Dict, List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from ..eval_api_error import EvalAPIError
+
+__all__ = ["OutputItemListResponse", "Sample", "SampleInput", "SampleOutput", "SampleUsage"]
+
+
+class SampleInput(BaseModel):
+ content: str
+ """The content of the message."""
+
+ role: str
+ """The role of the message sender (e.g., system, user, developer)."""
+
+
+class SampleOutput(BaseModel):
+ content: Optional[str] = None
+ """The content of the message."""
+
+ role: Optional[str] = None
+ """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class SampleUsage(BaseModel):
+ cached_tokens: int
+ """The number of tokens retrieved from cache."""
+
+ completion_tokens: int
+ """The number of completion tokens generated."""
+
+ prompt_tokens: int
+ """The number of prompt tokens used."""
+
+ total_tokens: int
+ """The total number of tokens used."""
+
+
+class Sample(BaseModel):
+ error: EvalAPIError
+ """An object representing an error response from the Eval API."""
+
+ finish_reason: str
+ """The reason why the sample generation was finished."""
+
+ input: List[SampleInput]
+ """An array of input messages."""
+
+ max_completion_tokens: int
+ """The maximum number of tokens allowed for completion."""
+
+ model: str
+ """The model used for generating the sample."""
+
+ output: List[SampleOutput]
+ """An array of output messages."""
+
+ seed: int
+ """The seed used for generating the sample."""
+
+ temperature: float
+ """The sampling temperature used."""
+
+ top_p: float
+ """The top_p value used for sampling."""
+
+ usage: SampleUsage
+ """Token usage details for the sample."""
+
+
+class OutputItemListResponse(BaseModel):
+ id: str
+ """Unique identifier for the evaluation run output item."""
+
+ created_at: int
+ """Unix timestamp (in seconds) when the evaluation run was created."""
+
+ datasource_item: Dict[str, object]
+ """Details of the input data source item."""
+
+ datasource_item_id: int
+ """The identifier for the data source item."""
+
+ eval_id: str
+ """The identifier of the evaluation group."""
+
+ object: Literal["eval.run.output_item"]
+ """The type of the object. Always "eval.run.output_item"."""
+
+ results: List[Dict[str, builtins.object]]
+ """A list of results from the evaluation run."""
+
+ run_id: str
+ """The identifier of the evaluation run associated with this output item."""
+
+ sample: Sample
+ """A sample containing the input and output of the evaluation run."""
+
+ status: str
+ """The status of the evaluation run."""
diff --git a/src/openai/types/evals/runs/output_item_retrieve_response.py b/src/openai/types/evals/runs/output_item_retrieve_response.py
new file mode 100644
index 0000000000..63aab5565f
--- /dev/null
+++ b/src/openai/types/evals/runs/output_item_retrieve_response.py
@@ -0,0 +1,104 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+import builtins
+from typing import Dict, List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from ..eval_api_error import EvalAPIError
+
+__all__ = ["OutputItemRetrieveResponse", "Sample", "SampleInput", "SampleOutput", "SampleUsage"]
+
+
+class SampleInput(BaseModel):
+ content: str
+ """The content of the message."""
+
+ role: str
+ """The role of the message sender (e.g., system, user, developer)."""
+
+
+class SampleOutput(BaseModel):
+ content: Optional[str] = None
+ """The content of the message."""
+
+ role: Optional[str] = None
+ """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class SampleUsage(BaseModel):
+ cached_tokens: int
+ """The number of tokens retrieved from cache."""
+
+ completion_tokens: int
+ """The number of completion tokens generated."""
+
+ prompt_tokens: int
+ """The number of prompt tokens used."""
+
+ total_tokens: int
+ """The total number of tokens used."""
+
+
+class Sample(BaseModel):
+ error: EvalAPIError
+ """An object representing an error response from the Eval API."""
+
+ finish_reason: str
+ """The reason why the sample generation was finished."""
+
+ input: List[SampleInput]
+ """An array of input messages."""
+
+ max_completion_tokens: int
+ """The maximum number of tokens allowed for completion."""
+
+ model: str
+ """The model used for generating the sample."""
+
+ output: List[SampleOutput]
+ """An array of output messages."""
+
+ seed: int
+ """The seed used for generating the sample."""
+
+ temperature: float
+ """The sampling temperature used."""
+
+ top_p: float
+ """The top_p value used for sampling."""
+
+ usage: SampleUsage
+ """Token usage details for the sample."""
+
+
+class OutputItemRetrieveResponse(BaseModel):
+ id: str
+ """Unique identifier for the evaluation run output item."""
+
+ created_at: int
+ """Unix timestamp (in seconds) when the evaluation run was created."""
+
+ datasource_item: Dict[str, object]
+ """Details of the input data source item."""
+
+ datasource_item_id: int
+ """The identifier for the data source item."""
+
+ eval_id: str
+ """The identifier of the evaluation group."""
+
+ object: Literal["eval.run.output_item"]
+ """The type of the object. Always "eval.run.output_item"."""
+
+ results: List[Dict[str, builtins.object]]
+ """A list of results from the evaluation run."""
+
+ run_id: str
+ """The identifier of the evaluation run associated with this output item."""
+
+ sample: Sample
+ """A sample containing the input and output of the evaluation run."""
+
+ status: str
+ """The status of the evaluation run."""
diff --git a/src/openai/types/fine_tuning/checkpoints/__init__.py b/src/openai/types/fine_tuning/checkpoints/__init__.py
new file mode 100644
index 0000000000..2947b33145
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/__init__.py
@@ -0,0 +1,9 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .permission_create_params import PermissionCreateParams as PermissionCreateParams
+from .permission_create_response import PermissionCreateResponse as PermissionCreateResponse
+from .permission_delete_response import PermissionDeleteResponse as PermissionDeleteResponse
+from .permission_retrieve_params import PermissionRetrieveParams as PermissionRetrieveParams
+from .permission_retrieve_response import PermissionRetrieveResponse as PermissionRetrieveResponse
diff --git a/src/openai/types/fine_tuning/checkpoints/permission_create_params.py b/src/openai/types/fine_tuning/checkpoints/permission_create_params.py
new file mode 100644
index 0000000000..92f98f21b9
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/permission_create_params.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Required, TypedDict
+
+__all__ = ["PermissionCreateParams"]
+
+
+class PermissionCreateParams(TypedDict, total=False):
+ project_ids: Required[List[str]]
+ """The project identifiers to grant access to."""
diff --git a/src/openai/types/fine_tuning/checkpoints/permission_create_response.py b/src/openai/types/fine_tuning/checkpoints/permission_create_response.py
new file mode 100644
index 0000000000..9bc14c00cc
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/permission_create_response.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["PermissionCreateResponse"]
+
+
+class PermissionCreateResponse(BaseModel):
+ id: str
+ """The permission identifier, which can be referenced in the API endpoints."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) for when the permission was created."""
+
+ object: Literal["checkpoint.permission"]
+ """The object type, which is always "checkpoint.permission"."""
+
+ project_id: str
+ """The project identifier that the permission is for."""
diff --git a/src/openai/types/fine_tuning/checkpoints/permission_delete_response.py b/src/openai/types/fine_tuning/checkpoints/permission_delete_response.py
new file mode 100644
index 0000000000..1a92d912fa
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/permission_delete_response.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["PermissionDeleteResponse"]
+
+
+class PermissionDeleteResponse(BaseModel):
+ id: str
+ """The ID of the fine-tuned model checkpoint permission that was deleted."""
+
+ deleted: bool
+ """Whether the fine-tuned model checkpoint permission was successfully deleted."""
+
+ object: Literal["checkpoint.permission"]
+ """The object type, which is always "checkpoint.permission"."""
diff --git a/src/openai/types/fine_tuning/checkpoints/permission_retrieve_params.py b/src/openai/types/fine_tuning/checkpoints/permission_retrieve_params.py
new file mode 100644
index 0000000000..6e66a867ca
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/permission_retrieve_params.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["PermissionRetrieveParams"]
+
+
+class PermissionRetrieveParams(TypedDict, total=False):
+ after: str
+ """Identifier for the last permission ID from the previous pagination request."""
+
+ limit: int
+ """Number of permissions to retrieve."""
+
+ order: Literal["ascending", "descending"]
+ """The order in which to retrieve permissions."""
+
+ project_id: str
+ """The ID of the project to get permissions for."""
diff --git a/src/openai/types/fine_tuning/checkpoints/permission_retrieve_response.py b/src/openai/types/fine_tuning/checkpoints/permission_retrieve_response.py
new file mode 100644
index 0000000000..14c73b55d0
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/permission_retrieve_response.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["PermissionRetrieveResponse", "Data"]
+
+
+class Data(BaseModel):
+ id: str
+ """The permission identifier, which can be referenced in the API endpoints."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) for when the permission was created."""
+
+ object: Literal["checkpoint.permission"]
+ """The object type, which is always "checkpoint.permission"."""
+
+ project_id: str
+ """The project identifier that the permission is for."""
+
+
+class PermissionRetrieveResponse(BaseModel):
+ data: List[Data]
+
+ has_more: bool
+
+ object: Literal["list"]
+
+ first_id: Optional[str] = None
+
+ last_id: Optional[str] = None
diff --git a/tests/api_resources/evals/__init__.py b/tests/api_resources/evals/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/evals/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/evals/runs/__init__.py b/tests/api_resources/evals/runs/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/evals/runs/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/evals/runs/test_output_items.py b/tests/api_resources/evals/runs/test_output_items.py
new file mode 100644
index 0000000000..f764f0336e
--- /dev/null
+++ b/tests/api_resources/evals/runs/test_output_items.py
@@ -0,0 +1,263 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.evals.runs import OutputItemListResponse, OutputItemRetrieveResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestOutputItems:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ def test_method_retrieve(self, client: OpenAI) -> None:
+ output_item = client.evals.runs.output_items.retrieve(
+ output_item_id="output_item_id",
+ eval_id="eval_id",
+ run_id="run_id",
+ )
+ assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+ @parametrize
+ def test_raw_response_retrieve(self, client: OpenAI) -> None:
+ response = client.evals.runs.output_items.with_raw_response.retrieve(
+ output_item_id="output_item_id",
+ eval_id="eval_id",
+ run_id="run_id",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ output_item = response.parse()
+ assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+ @parametrize
+ def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+ with client.evals.runs.output_items.with_streaming_response.retrieve(
+ output_item_id="output_item_id",
+ eval_id="eval_id",
+ run_id="run_id",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ output_item = response.parse()
+ assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_path_params_retrieve(self, client: OpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+ client.evals.runs.output_items.with_raw_response.retrieve(
+ output_item_id="output_item_id",
+ eval_id="",
+ run_id="run_id",
+ )
+
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+ client.evals.runs.output_items.with_raw_response.retrieve(
+ output_item_id="output_item_id",
+ eval_id="eval_id",
+ run_id="",
+ )
+
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `output_item_id` but received ''"):
+ client.evals.runs.output_items.with_raw_response.retrieve(
+ output_item_id="",
+ eval_id="eval_id",
+ run_id="run_id",
+ )
+
+ @parametrize
+ def test_method_list(self, client: OpenAI) -> None:
+ output_item = client.evals.runs.output_items.list(
+ run_id="run_id",
+ eval_id="eval_id",
+ )
+ assert_matches_type(SyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+ @parametrize
+ def test_method_list_with_all_params(self, client: OpenAI) -> None:
+ output_item = client.evals.runs.output_items.list(
+ run_id="run_id",
+ eval_id="eval_id",
+ after="after",
+ limit=0,
+ order="asc",
+ status="fail",
+ )
+ assert_matches_type(SyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+ @parametrize
+ def test_raw_response_list(self, client: OpenAI) -> None:
+ response = client.evals.runs.output_items.with_raw_response.list(
+ run_id="run_id",
+ eval_id="eval_id",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ output_item = response.parse()
+ assert_matches_type(SyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+ @parametrize
+ def test_streaming_response_list(self, client: OpenAI) -> None:
+ with client.evals.runs.output_items.with_streaming_response.list(
+ run_id="run_id",
+ eval_id="eval_id",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ output_item = response.parse()
+ assert_matches_type(SyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_path_params_list(self, client: OpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+ client.evals.runs.output_items.with_raw_response.list(
+ run_id="run_id",
+ eval_id="",
+ )
+
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+ client.evals.runs.output_items.with_raw_response.list(
+ run_id="",
+ eval_id="eval_id",
+ )
+
+
+class TestAsyncOutputItems:
+ parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+ output_item = await async_client.evals.runs.output_items.retrieve(
+ output_item_id="output_item_id",
+ eval_id="eval_id",
+ run_id="run_id",
+ )
+ assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+ @parametrize
+ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.evals.runs.output_items.with_raw_response.retrieve(
+ output_item_id="output_item_id",
+ eval_id="eval_id",
+ run_id="run_id",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ output_item = response.parse()
+ assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.evals.runs.output_items.with_streaming_response.retrieve(
+ output_item_id="output_item_id",
+ eval_id="eval_id",
+ run_id="run_id",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ output_item = await response.parse()
+ assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+ await async_client.evals.runs.output_items.with_raw_response.retrieve(
+ output_item_id="output_item_id",
+ eval_id="",
+ run_id="run_id",
+ )
+
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+ await async_client.evals.runs.output_items.with_raw_response.retrieve(
+ output_item_id="output_item_id",
+ eval_id="eval_id",
+ run_id="",
+ )
+
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `output_item_id` but received ''"):
+ await async_client.evals.runs.output_items.with_raw_response.retrieve(
+ output_item_id="",
+ eval_id="eval_id",
+ run_id="run_id",
+ )
+
+ @parametrize
+ async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+ output_item = await async_client.evals.runs.output_items.list(
+ run_id="run_id",
+ eval_id="eval_id",
+ )
+ assert_matches_type(AsyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+ @parametrize
+ async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+ output_item = await async_client.evals.runs.output_items.list(
+ run_id="run_id",
+ eval_id="eval_id",
+ after="after",
+ limit=0,
+ order="asc",
+ status="fail",
+ )
+ assert_matches_type(AsyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+ @parametrize
+ async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.evals.runs.output_items.with_raw_response.list(
+ run_id="run_id",
+ eval_id="eval_id",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ output_item = response.parse()
+ assert_matches_type(AsyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.evals.runs.output_items.with_streaming_response.list(
+ run_id="run_id",
+ eval_id="eval_id",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ output_item = await response.parse()
+ assert_matches_type(AsyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+ await async_client.evals.runs.output_items.with_raw_response.list(
+ run_id="run_id",
+ eval_id="",
+ )
+
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+ await async_client.evals.runs.output_items.with_raw_response.list(
+ run_id="",
+ eval_id="eval_id",
+ )
diff --git a/tests/api_resources/evals/test_runs.py b/tests/api_resources/evals/test_runs.py
new file mode 100644
index 0000000000..cefb1c82ff
--- /dev/null
+++ b/tests/api_resources/evals/test_runs.py
@@ -0,0 +1,589 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.evals import (
+ RunListResponse,
+ RunCancelResponse,
+ RunCreateResponse,
+ RunDeleteResponse,
+ RunRetrieveResponse,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestRuns:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ def test_method_create(self, client: OpenAI) -> None:
+ run = client.evals.runs.create(
+ eval_id="eval_id",
+ data_source={
+ "source": {
+ "content": [{"item": {"foo": "bar"}}],
+ "type": "file_content",
+ },
+ "type": "jsonl",
+ },
+ )
+ assert_matches_type(RunCreateResponse, run, path=["response"])
+
+ @parametrize
+ def test_method_create_with_all_params(self, client: OpenAI) -> None:
+ run = client.evals.runs.create(
+ eval_id="eval_id",
+ data_source={
+ "source": {
+ "content": [
+ {
+ "item": {"foo": "bar"},
+ "sample": {"foo": "bar"},
+ }
+ ],
+ "type": "file_content",
+ },
+ "type": "jsonl",
+ },
+ metadata={"foo": "string"},
+ name="name",
+ )
+ assert_matches_type(RunCreateResponse, run, path=["response"])
+
+ @parametrize
+ def test_raw_response_create(self, client: OpenAI) -> None:
+ response = client.evals.runs.with_raw_response.create(
+ eval_id="eval_id",
+ data_source={
+ "source": {
+ "content": [{"item": {"foo": "bar"}}],
+ "type": "file_content",
+ },
+ "type": "jsonl",
+ },
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ run = response.parse()
+ assert_matches_type(RunCreateResponse, run, path=["response"])
+
+ @parametrize
+ def test_streaming_response_create(self, client: OpenAI) -> None:
+ with client.evals.runs.with_streaming_response.create(
+ eval_id="eval_id",
+ data_source={
+ "source": {
+ "content": [{"item": {"foo": "bar"}}],
+ "type": "file_content",
+ },
+ "type": "jsonl",
+ },
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ run = response.parse()
+ assert_matches_type(RunCreateResponse, run, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_path_params_create(self, client: OpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+ client.evals.runs.with_raw_response.create(
+ eval_id="",
+ data_source={
+ "source": {
+ "content": [{"item": {"foo": "bar"}}],
+ "type": "file_content",
+ },
+ "type": "jsonl",
+ },
+ )
+
+ @parametrize
+ def test_method_retrieve(self, client: OpenAI) -> None:
+ run = client.evals.runs.retrieve(
+ run_id="run_id",
+ eval_id="eval_id",
+ )
+ assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+ @parametrize
+ def test_raw_response_retrieve(self, client: OpenAI) -> None:
+ response = client.evals.runs.with_raw_response.retrieve(
+ run_id="run_id",
+ eval_id="eval_id",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ run = response.parse()
+ assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+ @parametrize
+ def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+ with client.evals.runs.with_streaming_response.retrieve(
+ run_id="run_id",
+ eval_id="eval_id",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ run = response.parse()
+ assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_path_params_retrieve(self, client: OpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+ client.evals.runs.with_raw_response.retrieve(
+ run_id="run_id",
+ eval_id="",
+ )
+
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+ client.evals.runs.with_raw_response.retrieve(
+ run_id="",
+ eval_id="eval_id",
+ )
+
+ @parametrize
+ def test_method_list(self, client: OpenAI) -> None:
+ run = client.evals.runs.list(
+ eval_id="eval_id",
+ )
+ assert_matches_type(SyncCursorPage[RunListResponse], run, path=["response"])
+
+ @parametrize
+ def test_method_list_with_all_params(self, client: OpenAI) -> None:
+ run = client.evals.runs.list(
+ eval_id="eval_id",
+ after="after",
+ limit=0,
+ order="asc",
+ status="queued",
+ )
+ assert_matches_type(SyncCursorPage[RunListResponse], run, path=["response"])
+
+ @parametrize
+ def test_raw_response_list(self, client: OpenAI) -> None:
+ response = client.evals.runs.with_raw_response.list(
+ eval_id="eval_id",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ run = response.parse()
+ assert_matches_type(SyncCursorPage[RunListResponse], run, path=["response"])
+
+ @parametrize
+ def test_streaming_response_list(self, client: OpenAI) -> None:
+ with client.evals.runs.with_streaming_response.list(
+ eval_id="eval_id",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ run = response.parse()
+ assert_matches_type(SyncCursorPage[RunListResponse], run, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_path_params_list(self, client: OpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+ client.evals.runs.with_raw_response.list(
+ eval_id="",
+ )
+
+ @parametrize
+ def test_method_delete(self, client: OpenAI) -> None:
+ run = client.evals.runs.delete(
+ run_id="run_id",
+ eval_id="eval_id",
+ )
+ assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+ @parametrize
+ def test_raw_response_delete(self, client: OpenAI) -> None:
+ response = client.evals.runs.with_raw_response.delete(
+ run_id="run_id",
+ eval_id="eval_id",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ run = response.parse()
+ assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+ @parametrize
+ def test_streaming_response_delete(self, client: OpenAI) -> None:
+ with client.evals.runs.with_streaming_response.delete(
+ run_id="run_id",
+ eval_id="eval_id",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ run = response.parse()
+ assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_path_params_delete(self, client: OpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+ client.evals.runs.with_raw_response.delete(
+ run_id="run_id",
+ eval_id="",
+ )
+
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+ client.evals.runs.with_raw_response.delete(
+ run_id="",
+ eval_id="eval_id",
+ )
+
+ @parametrize
+ def test_method_cancel(self, client: OpenAI) -> None:
+ run = client.evals.runs.cancel(
+ run_id="run_id",
+ eval_id="eval_id",
+ )
+ assert_matches_type(RunCancelResponse, run, path=["response"])
+
+ @parametrize
+ def test_raw_response_cancel(self, client: OpenAI) -> None:
+ response = client.evals.runs.with_raw_response.cancel(
+ run_id="run_id",
+ eval_id="eval_id",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ run = response.parse()
+ assert_matches_type(RunCancelResponse, run, path=["response"])
+
+ @parametrize
+ def test_streaming_response_cancel(self, client: OpenAI) -> None:
+ with client.evals.runs.with_streaming_response.cancel(
+ run_id="run_id",
+ eval_id="eval_id",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ run = response.parse()
+ assert_matches_type(RunCancelResponse, run, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_path_params_cancel(self, client: OpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+ client.evals.runs.with_raw_response.cancel(
+ run_id="run_id",
+ eval_id="",
+ )
+
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+ client.evals.runs.with_raw_response.cancel(
+ run_id="",
+ eval_id="eval_id",
+ )
+
+
+class TestAsyncRuns:
+ parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+ run = await async_client.evals.runs.create(
+ eval_id="eval_id",
+ data_source={
+ "source": {
+ "content": [{"item": {"foo": "bar"}}],
+ "type": "file_content",
+ },
+ "type": "jsonl",
+ },
+ )
+ assert_matches_type(RunCreateResponse, run, path=["response"])
+
+ @parametrize
+ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+ run = await async_client.evals.runs.create(
+ eval_id="eval_id",
+ data_source={
+ "source": {
+ "content": [
+ {
+ "item": {"foo": "bar"},
+ "sample": {"foo": "bar"},
+ }
+ ],
+ "type": "file_content",
+ },
+ "type": "jsonl",
+ },
+ metadata={"foo": "string"},
+ name="name",
+ )
+ assert_matches_type(RunCreateResponse, run, path=["response"])
+
+ @parametrize
+ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.evals.runs.with_raw_response.create(
+ eval_id="eval_id",
+ data_source={
+ "source": {
+ "content": [{"item": {"foo": "bar"}}],
+ "type": "file_content",
+ },
+ "type": "jsonl",
+ },
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ run = response.parse()
+ assert_matches_type(RunCreateResponse, run, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.evals.runs.with_streaming_response.create(
+ eval_id="eval_id",
+ data_source={
+ "source": {
+ "content": [{"item": {"foo": "bar"}}],
+ "type": "file_content",
+ },
+ "type": "jsonl",
+ },
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ run = await response.parse()
+ assert_matches_type(RunCreateResponse, run, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+ await async_client.evals.runs.with_raw_response.create(
+ eval_id="",
+ data_source={
+ "source": {
+ "content": [{"item": {"foo": "bar"}}],
+ "type": "file_content",
+ },
+ "type": "jsonl",
+ },
+ )
+
+ @parametrize
+ async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+ run = await async_client.evals.runs.retrieve(
+ run_id="run_id",
+ eval_id="eval_id",
+ )
+ assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+ @parametrize
+ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.evals.runs.with_raw_response.retrieve(
+ run_id="run_id",
+ eval_id="eval_id",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ run = response.parse()
+ assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.evals.runs.with_streaming_response.retrieve(
+ run_id="run_id",
+ eval_id="eval_id",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ run = await response.parse()
+ assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+ await async_client.evals.runs.with_raw_response.retrieve(
+ run_id="run_id",
+ eval_id="",
+ )
+
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+ await async_client.evals.runs.with_raw_response.retrieve(
+ run_id="",
+ eval_id="eval_id",
+ )
+
+ @parametrize
+ async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+ run = await async_client.evals.runs.list(
+ eval_id="eval_id",
+ )
+ assert_matches_type(AsyncCursorPage[RunListResponse], run, path=["response"])
+
+ @parametrize
+ async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+ run = await async_client.evals.runs.list(
+ eval_id="eval_id",
+ after="after",
+ limit=0,
+ order="asc",
+ status="queued",
+ )
+ assert_matches_type(AsyncCursorPage[RunListResponse], run, path=["response"])
+
+ @parametrize
+ async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.evals.runs.with_raw_response.list(
+ eval_id="eval_id",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ run = response.parse()
+ assert_matches_type(AsyncCursorPage[RunListResponse], run, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.evals.runs.with_streaming_response.list(
+ eval_id="eval_id",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ run = await response.parse()
+ assert_matches_type(AsyncCursorPage[RunListResponse], run, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+ await async_client.evals.runs.with_raw_response.list(
+ eval_id="",
+ )
+
+ @parametrize
+ async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+ run = await async_client.evals.runs.delete(
+ run_id="run_id",
+ eval_id="eval_id",
+ )
+ assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+ @parametrize
+ async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.evals.runs.with_raw_response.delete(
+ run_id="run_id",
+ eval_id="eval_id",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ run = response.parse()
+ assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.evals.runs.with_streaming_response.delete(
+ run_id="run_id",
+ eval_id="eval_id",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ run = await response.parse()
+ assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+ await async_client.evals.runs.with_raw_response.delete(
+ run_id="run_id",
+ eval_id="",
+ )
+
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+ await async_client.evals.runs.with_raw_response.delete(
+ run_id="",
+ eval_id="eval_id",
+ )
+
+ @parametrize
+ async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
+ run = await async_client.evals.runs.cancel(
+ run_id="run_id",
+ eval_id="eval_id",
+ )
+ assert_matches_type(RunCancelResponse, run, path=["response"])
+
+ @parametrize
+ async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.evals.runs.with_raw_response.cancel(
+ run_id="run_id",
+ eval_id="eval_id",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ run = response.parse()
+ assert_matches_type(RunCancelResponse, run, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.evals.runs.with_streaming_response.cancel(
+ run_id="run_id",
+ eval_id="eval_id",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ run = await response.parse()
+ assert_matches_type(RunCancelResponse, run, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+ await async_client.evals.runs.with_raw_response.cancel(
+ run_id="run_id",
+ eval_id="",
+ )
+
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+ await async_client.evals.runs.with_raw_response.cancel(
+ run_id="",
+ eval_id="eval_id",
+ )
diff --git a/tests/api_resources/fine_tuning/checkpoints/__init__.py b/tests/api_resources/fine_tuning/checkpoints/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/fine_tuning/checkpoints/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/fine_tuning/checkpoints/test_permissions.py b/tests/api_resources/fine_tuning/checkpoints/test_permissions.py
new file mode 100644
index 0000000000..d25c784c33
--- /dev/null
+++ b/tests/api_resources/fine_tuning/checkpoints/test_permissions.py
@@ -0,0 +1,297 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncPage, AsyncPage
+from openai.types.fine_tuning.checkpoints import (
+ PermissionCreateResponse,
+ PermissionDeleteResponse,
+ PermissionRetrieveResponse,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestPermissions:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ def test_method_create(self, client: OpenAI) -> None:
+ permission = client.fine_tuning.checkpoints.permissions.create(
+ fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+ project_ids=["string"],
+ )
+ assert_matches_type(SyncPage[PermissionCreateResponse], permission, path=["response"])
+
+ @parametrize
+ def test_raw_response_create(self, client: OpenAI) -> None:
+ response = client.fine_tuning.checkpoints.permissions.with_raw_response.create(
+ fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+ project_ids=["string"],
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ permission = response.parse()
+ assert_matches_type(SyncPage[PermissionCreateResponse], permission, path=["response"])
+
+ @parametrize
+ def test_streaming_response_create(self, client: OpenAI) -> None:
+ with client.fine_tuning.checkpoints.permissions.with_streaming_response.create(
+ fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+ project_ids=["string"],
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ permission = response.parse()
+ assert_matches_type(SyncPage[PermissionCreateResponse], permission, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_path_params_create(self, client: OpenAI) -> None:
+ with pytest.raises(
+ ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+ ):
+ client.fine_tuning.checkpoints.permissions.with_raw_response.create(
+ fine_tuned_model_checkpoint="",
+ project_ids=["string"],
+ )
+
+ @parametrize
+ def test_method_retrieve(self, client: OpenAI) -> None:
+ permission = client.fine_tuning.checkpoints.permissions.retrieve(
+ fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ )
+ assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+ @parametrize
+ def test_method_retrieve_with_all_params(self, client: OpenAI) -> None:
+ permission = client.fine_tuning.checkpoints.permissions.retrieve(
+ fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ after="after",
+ limit=0,
+ order="ascending",
+ project_id="project_id",
+ )
+ assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+ @parametrize
+ def test_raw_response_retrieve(self, client: OpenAI) -> None:
+ response = client.fine_tuning.checkpoints.permissions.with_raw_response.retrieve(
+ fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ permission = response.parse()
+ assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+ @parametrize
+ def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+ with client.fine_tuning.checkpoints.permissions.with_streaming_response.retrieve(
+ fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ permission = response.parse()
+ assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_path_params_retrieve(self, client: OpenAI) -> None:
+ with pytest.raises(
+ ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+ ):
+ client.fine_tuning.checkpoints.permissions.with_raw_response.retrieve(
+ fine_tuned_model_checkpoint="",
+ )
+
+ @parametrize
+ def test_method_delete(self, client: OpenAI) -> None:
+ permission = client.fine_tuning.checkpoints.permissions.delete(
+ "ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+ )
+ assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+ @parametrize
+ def test_raw_response_delete(self, client: OpenAI) -> None:
+ response = client.fine_tuning.checkpoints.permissions.with_raw_response.delete(
+ "ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ permission = response.parse()
+ assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+ @parametrize
+ def test_streaming_response_delete(self, client: OpenAI) -> None:
+ with client.fine_tuning.checkpoints.permissions.with_streaming_response.delete(
+ "ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ permission = response.parse()
+ assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_path_params_delete(self, client: OpenAI) -> None:
+ with pytest.raises(
+ ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+ ):
+ client.fine_tuning.checkpoints.permissions.with_raw_response.delete(
+ "",
+ )
+
+
+class TestAsyncPermissions:
+ parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+ permission = await async_client.fine_tuning.checkpoints.permissions.create(
+ fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+ project_ids=["string"],
+ )
+ assert_matches_type(AsyncPage[PermissionCreateResponse], permission, path=["response"])
+
+ @parametrize
+ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.fine_tuning.checkpoints.permissions.with_raw_response.create(
+ fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+ project_ids=["string"],
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ permission = response.parse()
+ assert_matches_type(AsyncPage[PermissionCreateResponse], permission, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.fine_tuning.checkpoints.permissions.with_streaming_response.create(
+ fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+ project_ids=["string"],
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ permission = await response.parse()
+ assert_matches_type(AsyncPage[PermissionCreateResponse], permission, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
+ with pytest.raises(
+ ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+ ):
+ await async_client.fine_tuning.checkpoints.permissions.with_raw_response.create(
+ fine_tuned_model_checkpoint="",
+ project_ids=["string"],
+ )
+
+ @parametrize
+ async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+ permission = await async_client.fine_tuning.checkpoints.permissions.retrieve(
+ fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ )
+ assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+ @parametrize
+ async def test_method_retrieve_with_all_params(self, async_client: AsyncOpenAI) -> None:
+ permission = await async_client.fine_tuning.checkpoints.permissions.retrieve(
+ fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ after="after",
+ limit=0,
+ order="ascending",
+ project_id="project_id",
+ )
+ assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+ @parametrize
+ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.fine_tuning.checkpoints.permissions.with_raw_response.retrieve(
+ fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ permission = response.parse()
+ assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.fine_tuning.checkpoints.permissions.with_streaming_response.retrieve(
+ fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ permission = await response.parse()
+ assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+ with pytest.raises(
+ ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+ ):
+ await async_client.fine_tuning.checkpoints.permissions.with_raw_response.retrieve(
+ fine_tuned_model_checkpoint="",
+ )
+
+ @parametrize
+ async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+ permission = await async_client.fine_tuning.checkpoints.permissions.delete(
+ "ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+ )
+ assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+ @parametrize
+ async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.fine_tuning.checkpoints.permissions.with_raw_response.delete(
+ "ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ permission = response.parse()
+ assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.fine_tuning.checkpoints.permissions.with_streaming_response.delete(
+ "ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ permission = await response.parse()
+ assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+ with pytest.raises(
+ ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+ ):
+ await async_client.fine_tuning.checkpoints.permissions.with_raw_response.delete(
+ "",
+ )
diff --git a/tests/api_resources/test_evals.py b/tests/api_resources/test_evals.py
new file mode 100644
index 0000000000..33ba92cda5
--- /dev/null
+++ b/tests/api_resources/test_evals.py
@@ -0,0 +1,1701 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types import (
+ EvalListResponse,
+ EvalCreateResponse,
+ EvalDeleteResponse,
+ EvalUpdateResponse,
+ EvalRetrieveResponse,
+)
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestEvals:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ def test_method_create(self, client: OpenAI) -> None:
+ eval = client.evals.create(
+ data_source_config={
+ "item_schema": {
+ "0": "bar",
+ "1": "bar",
+ "2": "bar",
+ "3": "bar",
+ "4": "bar",
+ "5": "bar",
+ "6": "bar",
+ "7": "bar",
+ "8": "bar",
+ "9": "bar",
+ "10": "bar",
+ "11": "bar",
+ "12": "bar",
+ "13": "bar",
+ "14": "bar",
+ "15": "bar",
+ "16": "bar",
+ "17": "bar",
+ "18": "bar",
+ "19": "bar",
+ "20": "bar",
+ "21": "bar",
+ "22": "bar",
+ "23": "bar",
+ "24": "bar",
+ "25": "bar",
+ "26": "bar",
+ "27": "bar",
+ "28": "bar",
+ "29": "bar",
+ "30": "bar",
+ "31": "bar",
+ "32": "bar",
+ "33": "bar",
+ "34": "bar",
+ "35": "bar",
+ "36": "bar",
+ "37": "bar",
+ "38": "bar",
+ "39": "bar",
+ "40": "bar",
+ "41": "bar",
+ "42": "bar",
+ "43": "bar",
+ "44": "bar",
+ "45": "bar",
+ "46": "bar",
+ "47": "bar",
+ "48": "bar",
+ "49": "bar",
+ "50": "bar",
+ "51": "bar",
+ "52": "bar",
+ "53": "bar",
+ "54": "bar",
+ "55": "bar",
+ "56": "bar",
+ "57": "bar",
+ "58": "bar",
+ "59": "bar",
+ "60": "bar",
+ "61": "bar",
+ "62": "bar",
+ "63": "bar",
+ "64": "bar",
+ "65": "bar",
+ "66": "bar",
+ "67": "bar",
+ "68": "bar",
+ "69": "bar",
+ "70": "bar",
+ "71": "bar",
+ "72": "bar",
+ "73": "bar",
+ "74": "bar",
+ "75": "bar",
+ "76": "bar",
+ "77": "bar",
+ "78": "bar",
+ "79": "bar",
+ "80": "bar",
+ "81": "bar",
+ "82": "bar",
+ "83": "bar",
+ "84": "bar",
+ "85": "bar",
+ "86": "bar",
+ "87": "bar",
+ "88": "bar",
+ "89": "bar",
+ "90": "bar",
+ "91": "bar",
+ "92": "bar",
+ "93": "bar",
+ "94": "bar",
+ "95": "bar",
+ "96": "bar",
+ "97": "bar",
+ "98": "bar",
+ "99": "bar",
+ "100": "bar",
+ "101": "bar",
+ "102": "bar",
+ "103": "bar",
+ "104": "bar",
+ "105": "bar",
+ "106": "bar",
+ "107": "bar",
+ "108": "bar",
+ "109": "bar",
+ "110": "bar",
+ "111": "bar",
+ "112": "bar",
+ "113": "bar",
+ "114": "bar",
+ "115": "bar",
+ "116": "bar",
+ "117": "bar",
+ "118": "bar",
+ "119": "bar",
+ "120": "bar",
+ "121": "bar",
+ "122": "bar",
+ "123": "bar",
+ "124": "bar",
+ "125": "bar",
+ "126": "bar",
+ "127": "bar",
+ "128": "bar",
+ "129": "bar",
+ "130": "bar",
+ "131": "bar",
+ "132": "bar",
+ "133": "bar",
+ "134": "bar",
+ "135": "bar",
+ "136": "bar",
+ "137": "bar",
+ "138": "bar",
+ "139": "bar",
+ },
+ "type": "custom",
+ },
+ testing_criteria=[
+ {
+ "input": [
+ {
+ "content": "content",
+ "role": "role",
+ }
+ ],
+ "labels": ["string"],
+ "model": "model",
+ "name": "name",
+ "passing_labels": ["string"],
+ "type": "label_model",
+ }
+ ],
+ )
+ assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+ @parametrize
+ def test_method_create_with_all_params(self, client: OpenAI) -> None:
+ eval = client.evals.create(
+ data_source_config={
+ "item_schema": {
+ "0": "bar",
+ "1": "bar",
+ "2": "bar",
+ "3": "bar",
+ "4": "bar",
+ "5": "bar",
+ "6": "bar",
+ "7": "bar",
+ "8": "bar",
+ "9": "bar",
+ "10": "bar",
+ "11": "bar",
+ "12": "bar",
+ "13": "bar",
+ "14": "bar",
+ "15": "bar",
+ "16": "bar",
+ "17": "bar",
+ "18": "bar",
+ "19": "bar",
+ "20": "bar",
+ "21": "bar",
+ "22": "bar",
+ "23": "bar",
+ "24": "bar",
+ "25": "bar",
+ "26": "bar",
+ "27": "bar",
+ "28": "bar",
+ "29": "bar",
+ "30": "bar",
+ "31": "bar",
+ "32": "bar",
+ "33": "bar",
+ "34": "bar",
+ "35": "bar",
+ "36": "bar",
+ "37": "bar",
+ "38": "bar",
+ "39": "bar",
+ "40": "bar",
+ "41": "bar",
+ "42": "bar",
+ "43": "bar",
+ "44": "bar",
+ "45": "bar",
+ "46": "bar",
+ "47": "bar",
+ "48": "bar",
+ "49": "bar",
+ "50": "bar",
+ "51": "bar",
+ "52": "bar",
+ "53": "bar",
+ "54": "bar",
+ "55": "bar",
+ "56": "bar",
+ "57": "bar",
+ "58": "bar",
+ "59": "bar",
+ "60": "bar",
+ "61": "bar",
+ "62": "bar",
+ "63": "bar",
+ "64": "bar",
+ "65": "bar",
+ "66": "bar",
+ "67": "bar",
+ "68": "bar",
+ "69": "bar",
+ "70": "bar",
+ "71": "bar",
+ "72": "bar",
+ "73": "bar",
+ "74": "bar",
+ "75": "bar",
+ "76": "bar",
+ "77": "bar",
+ "78": "bar",
+ "79": "bar",
+ "80": "bar",
+ "81": "bar",
+ "82": "bar",
+ "83": "bar",
+ "84": "bar",
+ "85": "bar",
+ "86": "bar",
+ "87": "bar",
+ "88": "bar",
+ "89": "bar",
+ "90": "bar",
+ "91": "bar",
+ "92": "bar",
+ "93": "bar",
+ "94": "bar",
+ "95": "bar",
+ "96": "bar",
+ "97": "bar",
+ "98": "bar",
+ "99": "bar",
+ "100": "bar",
+ "101": "bar",
+ "102": "bar",
+ "103": "bar",
+ "104": "bar",
+ "105": "bar",
+ "106": "bar",
+ "107": "bar",
+ "108": "bar",
+ "109": "bar",
+ "110": "bar",
+ "111": "bar",
+ "112": "bar",
+ "113": "bar",
+ "114": "bar",
+ "115": "bar",
+ "116": "bar",
+ "117": "bar",
+ "118": "bar",
+ "119": "bar",
+ "120": "bar",
+ "121": "bar",
+ "122": "bar",
+ "123": "bar",
+ "124": "bar",
+ "125": "bar",
+ "126": "bar",
+ "127": "bar",
+ "128": "bar",
+ "129": "bar",
+ "130": "bar",
+ "131": "bar",
+ "132": "bar",
+ "133": "bar",
+ "134": "bar",
+ "135": "bar",
+ "136": "bar",
+ "137": "bar",
+ "138": "bar",
+ "139": "bar",
+ },
+ "type": "custom",
+ "include_sample_schema": True,
+ },
+ testing_criteria=[
+ {
+ "input": [
+ {
+ "content": "content",
+ "role": "role",
+ }
+ ],
+ "labels": ["string"],
+ "model": "model",
+ "name": "name",
+ "passing_labels": ["string"],
+ "type": "label_model",
+ }
+ ],
+ metadata={"foo": "string"},
+ name="name",
+ share_with_openai=True,
+ )
+ assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+ @parametrize
+ def test_raw_response_create(self, client: OpenAI) -> None:
+ response = client.evals.with_raw_response.create(
+ data_source_config={
+ "item_schema": {
+ "0": "bar",
+ "1": "bar",
+ "2": "bar",
+ "3": "bar",
+ "4": "bar",
+ "5": "bar",
+ "6": "bar",
+ "7": "bar",
+ "8": "bar",
+ "9": "bar",
+ "10": "bar",
+ "11": "bar",
+ "12": "bar",
+ "13": "bar",
+ "14": "bar",
+ "15": "bar",
+ "16": "bar",
+ "17": "bar",
+ "18": "bar",
+ "19": "bar",
+ "20": "bar",
+ "21": "bar",
+ "22": "bar",
+ "23": "bar",
+ "24": "bar",
+ "25": "bar",
+ "26": "bar",
+ "27": "bar",
+ "28": "bar",
+ "29": "bar",
+ "30": "bar",
+ "31": "bar",
+ "32": "bar",
+ "33": "bar",
+ "34": "bar",
+ "35": "bar",
+ "36": "bar",
+ "37": "bar",
+ "38": "bar",
+ "39": "bar",
+ "40": "bar",
+ "41": "bar",
+ "42": "bar",
+ "43": "bar",
+ "44": "bar",
+ "45": "bar",
+ "46": "bar",
+ "47": "bar",
+ "48": "bar",
+ "49": "bar",
+ "50": "bar",
+ "51": "bar",
+ "52": "bar",
+ "53": "bar",
+ "54": "bar",
+ "55": "bar",
+ "56": "bar",
+ "57": "bar",
+ "58": "bar",
+ "59": "bar",
+ "60": "bar",
+ "61": "bar",
+ "62": "bar",
+ "63": "bar",
+ "64": "bar",
+ "65": "bar",
+ "66": "bar",
+ "67": "bar",
+ "68": "bar",
+ "69": "bar",
+ "70": "bar",
+ "71": "bar",
+ "72": "bar",
+ "73": "bar",
+ "74": "bar",
+ "75": "bar",
+ "76": "bar",
+ "77": "bar",
+ "78": "bar",
+ "79": "bar",
+ "80": "bar",
+ "81": "bar",
+ "82": "bar",
+ "83": "bar",
+ "84": "bar",
+ "85": "bar",
+ "86": "bar",
+ "87": "bar",
+ "88": "bar",
+ "89": "bar",
+ "90": "bar",
+ "91": "bar",
+ "92": "bar",
+ "93": "bar",
+ "94": "bar",
+ "95": "bar",
+ "96": "bar",
+ "97": "bar",
+ "98": "bar",
+ "99": "bar",
+ "100": "bar",
+ "101": "bar",
+ "102": "bar",
+ "103": "bar",
+ "104": "bar",
+ "105": "bar",
+ "106": "bar",
+ "107": "bar",
+ "108": "bar",
+ "109": "bar",
+ "110": "bar",
+ "111": "bar",
+ "112": "bar",
+ "113": "bar",
+ "114": "bar",
+ "115": "bar",
+ "116": "bar",
+ "117": "bar",
+ "118": "bar",
+ "119": "bar",
+ "120": "bar",
+ "121": "bar",
+ "122": "bar",
+ "123": "bar",
+ "124": "bar",
+ "125": "bar",
+ "126": "bar",
+ "127": "bar",
+ "128": "bar",
+ "129": "bar",
+ "130": "bar",
+ "131": "bar",
+ "132": "bar",
+ "133": "bar",
+ "134": "bar",
+ "135": "bar",
+ "136": "bar",
+ "137": "bar",
+ "138": "bar",
+ "139": "bar",
+ },
+ "type": "custom",
+ },
+ testing_criteria=[
+ {
+ "input": [
+ {
+ "content": "content",
+ "role": "role",
+ }
+ ],
+ "labels": ["string"],
+ "model": "model",
+ "name": "name",
+ "passing_labels": ["string"],
+ "type": "label_model",
+ }
+ ],
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ eval = response.parse()
+ assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+ @parametrize
+ def test_streaming_response_create(self, client: OpenAI) -> None:
+ with client.evals.with_streaming_response.create(
+ data_source_config={
+ "item_schema": {
+ "0": "bar",
+ "1": "bar",
+ "2": "bar",
+ "3": "bar",
+ "4": "bar",
+ "5": "bar",
+ "6": "bar",
+ "7": "bar",
+ "8": "bar",
+ "9": "bar",
+ "10": "bar",
+ "11": "bar",
+ "12": "bar",
+ "13": "bar",
+ "14": "bar",
+ "15": "bar",
+ "16": "bar",
+ "17": "bar",
+ "18": "bar",
+ "19": "bar",
+ "20": "bar",
+ "21": "bar",
+ "22": "bar",
+ "23": "bar",
+ "24": "bar",
+ "25": "bar",
+ "26": "bar",
+ "27": "bar",
+ "28": "bar",
+ "29": "bar",
+ "30": "bar",
+ "31": "bar",
+ "32": "bar",
+ "33": "bar",
+ "34": "bar",
+ "35": "bar",
+ "36": "bar",
+ "37": "bar",
+ "38": "bar",
+ "39": "bar",
+ "40": "bar",
+ "41": "bar",
+ "42": "bar",
+ "43": "bar",
+ "44": "bar",
+ "45": "bar",
+ "46": "bar",
+ "47": "bar",
+ "48": "bar",
+ "49": "bar",
+ "50": "bar",
+ "51": "bar",
+ "52": "bar",
+ "53": "bar",
+ "54": "bar",
+ "55": "bar",
+ "56": "bar",
+ "57": "bar",
+ "58": "bar",
+ "59": "bar",
+ "60": "bar",
+ "61": "bar",
+ "62": "bar",
+ "63": "bar",
+ "64": "bar",
+ "65": "bar",
+ "66": "bar",
+ "67": "bar",
+ "68": "bar",
+ "69": "bar",
+ "70": "bar",
+ "71": "bar",
+ "72": "bar",
+ "73": "bar",
+ "74": "bar",
+ "75": "bar",
+ "76": "bar",
+ "77": "bar",
+ "78": "bar",
+ "79": "bar",
+ "80": "bar",
+ "81": "bar",
+ "82": "bar",
+ "83": "bar",
+ "84": "bar",
+ "85": "bar",
+ "86": "bar",
+ "87": "bar",
+ "88": "bar",
+ "89": "bar",
+ "90": "bar",
+ "91": "bar",
+ "92": "bar",
+ "93": "bar",
+ "94": "bar",
+ "95": "bar",
+ "96": "bar",
+ "97": "bar",
+ "98": "bar",
+ "99": "bar",
+ "100": "bar",
+ "101": "bar",
+ "102": "bar",
+ "103": "bar",
+ "104": "bar",
+ "105": "bar",
+ "106": "bar",
+ "107": "bar",
+ "108": "bar",
+ "109": "bar",
+ "110": "bar",
+ "111": "bar",
+ "112": "bar",
+ "113": "bar",
+ "114": "bar",
+ "115": "bar",
+ "116": "bar",
+ "117": "bar",
+ "118": "bar",
+ "119": "bar",
+ "120": "bar",
+ "121": "bar",
+ "122": "bar",
+ "123": "bar",
+ "124": "bar",
+ "125": "bar",
+ "126": "bar",
+ "127": "bar",
+ "128": "bar",
+ "129": "bar",
+ "130": "bar",
+ "131": "bar",
+ "132": "bar",
+ "133": "bar",
+ "134": "bar",
+ "135": "bar",
+ "136": "bar",
+ "137": "bar",
+ "138": "bar",
+ "139": "bar",
+ },
+ "type": "custom",
+ },
+ testing_criteria=[
+ {
+ "input": [
+ {
+ "content": "content",
+ "role": "role",
+ }
+ ],
+ "labels": ["string"],
+ "model": "model",
+ "name": "name",
+ "passing_labels": ["string"],
+ "type": "label_model",
+ }
+ ],
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ eval = response.parse()
+ assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_method_retrieve(self, client: OpenAI) -> None:
+ eval = client.evals.retrieve(
+ "eval_id",
+ )
+ assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+ @parametrize
+ def test_raw_response_retrieve(self, client: OpenAI) -> None:
+ response = client.evals.with_raw_response.retrieve(
+ "eval_id",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ eval = response.parse()
+ assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+ @parametrize
+ def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+ with client.evals.with_streaming_response.retrieve(
+ "eval_id",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ eval = response.parse()
+ assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_path_params_retrieve(self, client: OpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+ client.evals.with_raw_response.retrieve(
+ "",
+ )
+
+ @parametrize
+ def test_method_update(self, client: OpenAI) -> None:
+ eval = client.evals.update(
+ eval_id="eval_id",
+ )
+ assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+ @parametrize
+ def test_method_update_with_all_params(self, client: OpenAI) -> None:
+ eval = client.evals.update(
+ eval_id="eval_id",
+ metadata={"foo": "string"},
+ name="name",
+ )
+ assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+ @parametrize
+ def test_raw_response_update(self, client: OpenAI) -> None:
+ response = client.evals.with_raw_response.update(
+ eval_id="eval_id",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ eval = response.parse()
+ assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+ @parametrize
+ def test_streaming_response_update(self, client: OpenAI) -> None:
+ with client.evals.with_streaming_response.update(
+ eval_id="eval_id",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ eval = response.parse()
+ assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_path_params_update(self, client: OpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+ client.evals.with_raw_response.update(
+ eval_id="",
+ )
+
+ @parametrize
+ def test_method_list(self, client: OpenAI) -> None:
+ eval = client.evals.list()
+ assert_matches_type(SyncCursorPage[EvalListResponse], eval, path=["response"])
+
+ @parametrize
+ def test_method_list_with_all_params(self, client: OpenAI) -> None:
+ eval = client.evals.list(
+ after="after",
+ limit=0,
+ order="asc",
+ order_by="created_at",
+ )
+ assert_matches_type(SyncCursorPage[EvalListResponse], eval, path=["response"])
+
+ @parametrize
+ def test_raw_response_list(self, client: OpenAI) -> None:
+ response = client.evals.with_raw_response.list()
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ eval = response.parse()
+ assert_matches_type(SyncCursorPage[EvalListResponse], eval, path=["response"])
+
+ @parametrize
+ def test_streaming_response_list(self, client: OpenAI) -> None:
+ with client.evals.with_streaming_response.list() as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ eval = response.parse()
+ assert_matches_type(SyncCursorPage[EvalListResponse], eval, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_method_delete(self, client: OpenAI) -> None:
+ eval = client.evals.delete(
+ "eval_id",
+ )
+ assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+ @parametrize
+ def test_raw_response_delete(self, client: OpenAI) -> None:
+ response = client.evals.with_raw_response.delete(
+ "eval_id",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ eval = response.parse()
+ assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+ @parametrize
+ def test_streaming_response_delete(self, client: OpenAI) -> None:
+ with client.evals.with_streaming_response.delete(
+ "eval_id",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ eval = response.parse()
+ assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_path_params_delete(self, client: OpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+ client.evals.with_raw_response.delete(
+ "",
+ )
+
+
+class TestAsyncEvals:
+ parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+ eval = await async_client.evals.create(
+ data_source_config={
+ "item_schema": {
+ "0": "bar",
+ "1": "bar",
+ "2": "bar",
+ "3": "bar",
+ "4": "bar",
+ "5": "bar",
+ "6": "bar",
+ "7": "bar",
+ "8": "bar",
+ "9": "bar",
+ "10": "bar",
+ "11": "bar",
+ "12": "bar",
+ "13": "bar",
+ "14": "bar",
+ "15": "bar",
+ "16": "bar",
+ "17": "bar",
+ "18": "bar",
+ "19": "bar",
+ "20": "bar",
+ "21": "bar",
+ "22": "bar",
+ "23": "bar",
+ "24": "bar",
+ "25": "bar",
+ "26": "bar",
+ "27": "bar",
+ "28": "bar",
+ "29": "bar",
+ "30": "bar",
+ "31": "bar",
+ "32": "bar",
+ "33": "bar",
+ "34": "bar",
+ "35": "bar",
+ "36": "bar",
+ "37": "bar",
+ "38": "bar",
+ "39": "bar",
+ "40": "bar",
+ "41": "bar",
+ "42": "bar",
+ "43": "bar",
+ "44": "bar",
+ "45": "bar",
+ "46": "bar",
+ "47": "bar",
+ "48": "bar",
+ "49": "bar",
+ "50": "bar",
+ "51": "bar",
+ "52": "bar",
+ "53": "bar",
+ "54": "bar",
+ "55": "bar",
+ "56": "bar",
+ "57": "bar",
+ "58": "bar",
+ "59": "bar",
+ "60": "bar",
+ "61": "bar",
+ "62": "bar",
+ "63": "bar",
+ "64": "bar",
+ "65": "bar",
+ "66": "bar",
+ "67": "bar",
+ "68": "bar",
+ "69": "bar",
+ "70": "bar",
+ "71": "bar",
+ "72": "bar",
+ "73": "bar",
+ "74": "bar",
+ "75": "bar",
+ "76": "bar",
+ "77": "bar",
+ "78": "bar",
+ "79": "bar",
+ "80": "bar",
+ "81": "bar",
+ "82": "bar",
+ "83": "bar",
+ "84": "bar",
+ "85": "bar",
+ "86": "bar",
+ "87": "bar",
+ "88": "bar",
+ "89": "bar",
+ "90": "bar",
+ "91": "bar",
+ "92": "bar",
+ "93": "bar",
+ "94": "bar",
+ "95": "bar",
+ "96": "bar",
+ "97": "bar",
+ "98": "bar",
+ "99": "bar",
+ "100": "bar",
+ "101": "bar",
+ "102": "bar",
+ "103": "bar",
+ "104": "bar",
+ "105": "bar",
+ "106": "bar",
+ "107": "bar",
+ "108": "bar",
+ "109": "bar",
+ "110": "bar",
+ "111": "bar",
+ "112": "bar",
+ "113": "bar",
+ "114": "bar",
+ "115": "bar",
+ "116": "bar",
+ "117": "bar",
+ "118": "bar",
+ "119": "bar",
+ "120": "bar",
+ "121": "bar",
+ "122": "bar",
+ "123": "bar",
+ "124": "bar",
+ "125": "bar",
+ "126": "bar",
+ "127": "bar",
+ "128": "bar",
+ "129": "bar",
+ "130": "bar",
+ "131": "bar",
+ "132": "bar",
+ "133": "bar",
+ "134": "bar",
+ "135": "bar",
+ "136": "bar",
+ "137": "bar",
+ "138": "bar",
+ "139": "bar",
+ },
+ "type": "custom",
+ },
+ testing_criteria=[
+ {
+ "input": [
+ {
+ "content": "content",
+ "role": "role",
+ }
+ ],
+ "labels": ["string"],
+ "model": "model",
+ "name": "name",
+ "passing_labels": ["string"],
+ "type": "label_model",
+ }
+ ],
+ )
+ assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+ @parametrize
+ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+ eval = await async_client.evals.create(
+ data_source_config={
+ "item_schema": {
+ "0": "bar",
+ "1": "bar",
+ "2": "bar",
+ "3": "bar",
+ "4": "bar",
+ "5": "bar",
+ "6": "bar",
+ "7": "bar",
+ "8": "bar",
+ "9": "bar",
+ "10": "bar",
+ "11": "bar",
+ "12": "bar",
+ "13": "bar",
+ "14": "bar",
+ "15": "bar",
+ "16": "bar",
+ "17": "bar",
+ "18": "bar",
+ "19": "bar",
+ "20": "bar",
+ "21": "bar",
+ "22": "bar",
+ "23": "bar",
+ "24": "bar",
+ "25": "bar",
+ "26": "bar",
+ "27": "bar",
+ "28": "bar",
+ "29": "bar",
+ "30": "bar",
+ "31": "bar",
+ "32": "bar",
+ "33": "bar",
+ "34": "bar",
+ "35": "bar",
+ "36": "bar",
+ "37": "bar",
+ "38": "bar",
+ "39": "bar",
+ "40": "bar",
+ "41": "bar",
+ "42": "bar",
+ "43": "bar",
+ "44": "bar",
+ "45": "bar",
+ "46": "bar",
+ "47": "bar",
+ "48": "bar",
+ "49": "bar",
+ "50": "bar",
+ "51": "bar",
+ "52": "bar",
+ "53": "bar",
+ "54": "bar",
+ "55": "bar",
+ "56": "bar",
+ "57": "bar",
+ "58": "bar",
+ "59": "bar",
+ "60": "bar",
+ "61": "bar",
+ "62": "bar",
+ "63": "bar",
+ "64": "bar",
+ "65": "bar",
+ "66": "bar",
+ "67": "bar",
+ "68": "bar",
+ "69": "bar",
+ "70": "bar",
+ "71": "bar",
+ "72": "bar",
+ "73": "bar",
+ "74": "bar",
+ "75": "bar",
+ "76": "bar",
+ "77": "bar",
+ "78": "bar",
+ "79": "bar",
+ "80": "bar",
+ "81": "bar",
+ "82": "bar",
+ "83": "bar",
+ "84": "bar",
+ "85": "bar",
+ "86": "bar",
+ "87": "bar",
+ "88": "bar",
+ "89": "bar",
+ "90": "bar",
+ "91": "bar",
+ "92": "bar",
+ "93": "bar",
+ "94": "bar",
+ "95": "bar",
+ "96": "bar",
+ "97": "bar",
+ "98": "bar",
+ "99": "bar",
+ "100": "bar",
+ "101": "bar",
+ "102": "bar",
+ "103": "bar",
+ "104": "bar",
+ "105": "bar",
+ "106": "bar",
+ "107": "bar",
+ "108": "bar",
+ "109": "bar",
+ "110": "bar",
+ "111": "bar",
+ "112": "bar",
+ "113": "bar",
+ "114": "bar",
+ "115": "bar",
+ "116": "bar",
+ "117": "bar",
+ "118": "bar",
+ "119": "bar",
+ "120": "bar",
+ "121": "bar",
+ "122": "bar",
+ "123": "bar",
+ "124": "bar",
+ "125": "bar",
+ "126": "bar",
+ "127": "bar",
+ "128": "bar",
+ "129": "bar",
+ "130": "bar",
+ "131": "bar",
+ "132": "bar",
+ "133": "bar",
+ "134": "bar",
+ "135": "bar",
+ "136": "bar",
+ "137": "bar",
+ "138": "bar",
+ "139": "bar",
+ },
+ "type": "custom",
+ "include_sample_schema": True,
+ },
+ testing_criteria=[
+ {
+ "input": [
+ {
+ "content": "content",
+ "role": "role",
+ }
+ ],
+ "labels": ["string"],
+ "model": "model",
+ "name": "name",
+ "passing_labels": ["string"],
+ "type": "label_model",
+ }
+ ],
+ metadata={"foo": "string"},
+ name="name",
+ share_with_openai=True,
+ )
+ assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+ @parametrize
+ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.evals.with_raw_response.create(
+ data_source_config={
+ "item_schema": {
+ "0": "bar",
+ "1": "bar",
+ "2": "bar",
+ "3": "bar",
+ "4": "bar",
+ "5": "bar",
+ "6": "bar",
+ "7": "bar",
+ "8": "bar",
+ "9": "bar",
+ "10": "bar",
+ "11": "bar",
+ "12": "bar",
+ "13": "bar",
+ "14": "bar",
+ "15": "bar",
+ "16": "bar",
+ "17": "bar",
+ "18": "bar",
+ "19": "bar",
+ "20": "bar",
+ "21": "bar",
+ "22": "bar",
+ "23": "bar",
+ "24": "bar",
+ "25": "bar",
+ "26": "bar",
+ "27": "bar",
+ "28": "bar",
+ "29": "bar",
+ "30": "bar",
+ "31": "bar",
+ "32": "bar",
+ "33": "bar",
+ "34": "bar",
+ "35": "bar",
+ "36": "bar",
+ "37": "bar",
+ "38": "bar",
+ "39": "bar",
+ "40": "bar",
+ "41": "bar",
+ "42": "bar",
+ "43": "bar",
+ "44": "bar",
+ "45": "bar",
+ "46": "bar",
+ "47": "bar",
+ "48": "bar",
+ "49": "bar",
+ "50": "bar",
+ "51": "bar",
+ "52": "bar",
+ "53": "bar",
+ "54": "bar",
+ "55": "bar",
+ "56": "bar",
+ "57": "bar",
+ "58": "bar",
+ "59": "bar",
+ "60": "bar",
+ "61": "bar",
+ "62": "bar",
+ "63": "bar",
+ "64": "bar",
+ "65": "bar",
+ "66": "bar",
+ "67": "bar",
+ "68": "bar",
+ "69": "bar",
+ "70": "bar",
+ "71": "bar",
+ "72": "bar",
+ "73": "bar",
+ "74": "bar",
+ "75": "bar",
+ "76": "bar",
+ "77": "bar",
+ "78": "bar",
+ "79": "bar",
+ "80": "bar",
+ "81": "bar",
+ "82": "bar",
+ "83": "bar",
+ "84": "bar",
+ "85": "bar",
+ "86": "bar",
+ "87": "bar",
+ "88": "bar",
+ "89": "bar",
+ "90": "bar",
+ "91": "bar",
+ "92": "bar",
+ "93": "bar",
+ "94": "bar",
+ "95": "bar",
+ "96": "bar",
+ "97": "bar",
+ "98": "bar",
+ "99": "bar",
+ "100": "bar",
+ "101": "bar",
+ "102": "bar",
+ "103": "bar",
+ "104": "bar",
+ "105": "bar",
+ "106": "bar",
+ "107": "bar",
+ "108": "bar",
+ "109": "bar",
+ "110": "bar",
+ "111": "bar",
+ "112": "bar",
+ "113": "bar",
+ "114": "bar",
+ "115": "bar",
+ "116": "bar",
+ "117": "bar",
+ "118": "bar",
+ "119": "bar",
+ "120": "bar",
+ "121": "bar",
+ "122": "bar",
+ "123": "bar",
+ "124": "bar",
+ "125": "bar",
+ "126": "bar",
+ "127": "bar",
+ "128": "bar",
+ "129": "bar",
+ "130": "bar",
+ "131": "bar",
+ "132": "bar",
+ "133": "bar",
+ "134": "bar",
+ "135": "bar",
+ "136": "bar",
+ "137": "bar",
+ "138": "bar",
+ "139": "bar",
+ },
+ "type": "custom",
+ },
+ testing_criteria=[
+ {
+ "input": [
+ {
+ "content": "content",
+ "role": "role",
+ }
+ ],
+ "labels": ["string"],
+ "model": "model",
+ "name": "name",
+ "passing_labels": ["string"],
+ "type": "label_model",
+ }
+ ],
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ eval = response.parse()
+ assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.evals.with_streaming_response.create(
+ data_source_config={
+ "item_schema": {
+ "0": "bar",
+ "1": "bar",
+ "2": "bar",
+ "3": "bar",
+ "4": "bar",
+ "5": "bar",
+ "6": "bar",
+ "7": "bar",
+ "8": "bar",
+ "9": "bar",
+ "10": "bar",
+ "11": "bar",
+ "12": "bar",
+ "13": "bar",
+ "14": "bar",
+ "15": "bar",
+ "16": "bar",
+ "17": "bar",
+ "18": "bar",
+ "19": "bar",
+ "20": "bar",
+ "21": "bar",
+ "22": "bar",
+ "23": "bar",
+ "24": "bar",
+ "25": "bar",
+ "26": "bar",
+ "27": "bar",
+ "28": "bar",
+ "29": "bar",
+ "30": "bar",
+ "31": "bar",
+ "32": "bar",
+ "33": "bar",
+ "34": "bar",
+ "35": "bar",
+ "36": "bar",
+ "37": "bar",
+ "38": "bar",
+ "39": "bar",
+ "40": "bar",
+ "41": "bar",
+ "42": "bar",
+ "43": "bar",
+ "44": "bar",
+ "45": "bar",
+ "46": "bar",
+ "47": "bar",
+ "48": "bar",
+ "49": "bar",
+ "50": "bar",
+ "51": "bar",
+ "52": "bar",
+ "53": "bar",
+ "54": "bar",
+ "55": "bar",
+ "56": "bar",
+ "57": "bar",
+ "58": "bar",
+ "59": "bar",
+ "60": "bar",
+ "61": "bar",
+ "62": "bar",
+ "63": "bar",
+ "64": "bar",
+ "65": "bar",
+ "66": "bar",
+ "67": "bar",
+ "68": "bar",
+ "69": "bar",
+ "70": "bar",
+ "71": "bar",
+ "72": "bar",
+ "73": "bar",
+ "74": "bar",
+ "75": "bar",
+ "76": "bar",
+ "77": "bar",
+ "78": "bar",
+ "79": "bar",
+ "80": "bar",
+ "81": "bar",
+ "82": "bar",
+ "83": "bar",
+ "84": "bar",
+ "85": "bar",
+ "86": "bar",
+ "87": "bar",
+ "88": "bar",
+ "89": "bar",
+ "90": "bar",
+ "91": "bar",
+ "92": "bar",
+ "93": "bar",
+ "94": "bar",
+ "95": "bar",
+ "96": "bar",
+ "97": "bar",
+ "98": "bar",
+ "99": "bar",
+ "100": "bar",
+ "101": "bar",
+ "102": "bar",
+ "103": "bar",
+ "104": "bar",
+ "105": "bar",
+ "106": "bar",
+ "107": "bar",
+ "108": "bar",
+ "109": "bar",
+ "110": "bar",
+ "111": "bar",
+ "112": "bar",
+ "113": "bar",
+ "114": "bar",
+ "115": "bar",
+ "116": "bar",
+ "117": "bar",
+ "118": "bar",
+ "119": "bar",
+ "120": "bar",
+ "121": "bar",
+ "122": "bar",
+ "123": "bar",
+ "124": "bar",
+ "125": "bar",
+ "126": "bar",
+ "127": "bar",
+ "128": "bar",
+ "129": "bar",
+ "130": "bar",
+ "131": "bar",
+ "132": "bar",
+ "133": "bar",
+ "134": "bar",
+ "135": "bar",
+ "136": "bar",
+ "137": "bar",
+ "138": "bar",
+ "139": "bar",
+ },
+ "type": "custom",
+ },
+ testing_criteria=[
+ {
+ "input": [
+ {
+ "content": "content",
+ "role": "role",
+ }
+ ],
+ "labels": ["string"],
+ "model": "model",
+ "name": "name",
+ "passing_labels": ["string"],
+ "type": "label_model",
+ }
+ ],
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ eval = await response.parse()
+ assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+ eval = await async_client.evals.retrieve(
+ "eval_id",
+ )
+ assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+ @parametrize
+ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.evals.with_raw_response.retrieve(
+ "eval_id",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ eval = response.parse()
+ assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.evals.with_streaming_response.retrieve(
+ "eval_id",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ eval = await response.parse()
+ assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+ await async_client.evals.with_raw_response.retrieve(
+ "",
+ )
+
+ @parametrize
+ async def test_method_update(self, async_client: AsyncOpenAI) -> None:
+ eval = await async_client.evals.update(
+ eval_id="eval_id",
+ )
+ assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+ @parametrize
+ async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) -> None:
+ eval = await async_client.evals.update(
+ eval_id="eval_id",
+ metadata={"foo": "string"},
+ name="name",
+ )
+ assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+ @parametrize
+ async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.evals.with_raw_response.update(
+ eval_id="eval_id",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ eval = response.parse()
+ assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.evals.with_streaming_response.update(
+ eval_id="eval_id",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ eval = await response.parse()
+ assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+ await async_client.evals.with_raw_response.update(
+ eval_id="",
+ )
+
+ @parametrize
+ async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+ eval = await async_client.evals.list()
+ assert_matches_type(AsyncCursorPage[EvalListResponse], eval, path=["response"])
+
+ @parametrize
+ async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+ eval = await async_client.evals.list(
+ after="after",
+ limit=0,
+ order="asc",
+ order_by="created_at",
+ )
+ assert_matches_type(AsyncCursorPage[EvalListResponse], eval, path=["response"])
+
+ @parametrize
+ async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.evals.with_raw_response.list()
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ eval = response.parse()
+ assert_matches_type(AsyncCursorPage[EvalListResponse], eval, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.evals.with_streaming_response.list() as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ eval = await response.parse()
+ assert_matches_type(AsyncCursorPage[EvalListResponse], eval, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+ eval = await async_client.evals.delete(
+ "eval_id",
+ )
+ assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+ @parametrize
+ async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.evals.with_raw_response.delete(
+ "eval_id",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ eval = response.parse()
+ assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.evals.with_streaming_response.delete(
+ "eval_id",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ eval = await response.parse()
+ assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+ await async_client.evals.with_raw_response.delete(
+ "",
+ )