From b4b4e660b8bb7ae937787fcab9b40feaeba7f711 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Tue, 18 Jun 2024 19:53:21 +0000 Subject: [PATCH] feat(api): add service tier argument for chat completions (#1486) --- .stats.yml | 2 +- src/openai/_base_client.py | 8 ++- src/openai/resources/chat/completions.py | 70 +++++++++++++++++++ src/openai/types/chat/chat_completion.py | 7 ++ .../types/chat/chat_completion_chunk.py | 7 ++ .../types/chat/completion_create_params.py | 13 ++++ tests/api_resources/chat/test_completions.py | 4 ++ 7 files changed, 109 insertions(+), 2 deletions(-) diff --git a/.stats.yml b/.stats.yml index c5ada3b5df..aa7e8427b0 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,2 +1,2 @@ configured_endpoints: 64 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-5cb1810135c35c5024698f3365626471a04796e26e393aefe1aa0ba3c0891919.yml +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-8fe357c6b5a425d810d731e4102a052d8e38c5e2d66950e6de1025415160bf88.yml diff --git a/src/openai/_base_client.py b/src/openai/_base_client.py index 5d5d25fca9..1c9a1a03f2 100644 --- a/src/openai/_base_client.py +++ b/src/openai/_base_client.py @@ -457,7 +457,7 @@ def _build_request( raise RuntimeError(f"Unexpected JSON data type, {type(json_data)}, cannot merge with `extra_body`") headers = self._build_headers(options) - params = _merge_mappings(self._custom_query, options.params) + params = _merge_mappings(self.default_query, options.params) content_type = headers.get("Content-Type") # If the given Content-Type header is multipart/form-data then it @@ -593,6 +593,12 @@ def default_headers(self) -> dict[str, str | Omit]: **self._custom_headers, } + @property + def default_query(self) -> dict[str, object]: + return { + **self._custom_query, + } + def _validate_headers( self, headers: Headers, # noqa: ARG002 diff --git a/src/openai/resources/chat/completions.py b/src/openai/resources/chat/completions.py index ed8e9373b0..d50bce0757 100644 --- a/src/openai/resources/chat/completions.py +++ b/src/openai/resources/chat/completions.py @@ -59,6 +59,7 @@ def create( presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN, stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, @@ -163,6 +164,16 @@ def create( should refer to the `system_fingerprint` response parameter to monitor changes in the backend. + service_tier: Specifies the latency tier to use for processing the request. This parameter is + relevant for customers subscribed to the scale tier service: + + - If set to 'auto', the system will utilize scale tier credits until they are + exhausted. + - If set to 'default', the request will be processed in the shared cluster. + + When this parameter is set, the response body will include the `service_tier` + utilized. + stop: Up to 4 sequences where the API will stop generating further tokens. stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be @@ -236,6 +247,7 @@ def create( presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN, stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, temperature: Optional[float] | NotGiven = NOT_GIVEN, @@ -346,6 +358,16 @@ def create( should refer to the `system_fingerprint` response parameter to monitor changes in the backend. + service_tier: Specifies the latency tier to use for processing the request. This parameter is + relevant for customers subscribed to the scale tier service: + + - If set to 'auto', the system will utilize scale tier credits until they are + exhausted. + - If set to 'default', the request will be processed in the shared cluster. + + When this parameter is set, the response body will include the `service_tier` + utilized. + stop: Up to 4 sequences where the API will stop generating further tokens. stream_options: Options for streaming response. Only set this when you set `stream: true`. @@ -412,6 +434,7 @@ def create( presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN, stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, temperature: Optional[float] | NotGiven = NOT_GIVEN, @@ -522,6 +545,16 @@ def create( should refer to the `system_fingerprint` response parameter to monitor changes in the backend. + service_tier: Specifies the latency tier to use for processing the request. This parameter is + relevant for customers subscribed to the scale tier service: + + - If set to 'auto', the system will utilize scale tier credits until they are + exhausted. + - If set to 'default', the request will be processed in the shared cluster. + + When this parameter is set, the response body will include the `service_tier` + utilized. + stop: Up to 4 sequences where the API will stop generating further tokens. stream_options: Options for streaming response. Only set this when you set `stream: true`. @@ -587,6 +620,7 @@ def create( presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN, stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, @@ -620,6 +654,7 @@ def create( "presence_penalty": presence_penalty, "response_format": response_format, "seed": seed, + "service_tier": service_tier, "stop": stop, "stream": stream, "stream_options": stream_options, @@ -667,6 +702,7 @@ async def create( presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN, stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, @@ -771,6 +807,16 @@ async def create( should refer to the `system_fingerprint` response parameter to monitor changes in the backend. + service_tier: Specifies the latency tier to use for processing the request. This parameter is + relevant for customers subscribed to the scale tier service: + + - If set to 'auto', the system will utilize scale tier credits until they are + exhausted. + - If set to 'default', the request will be processed in the shared cluster. + + When this parameter is set, the response body will include the `service_tier` + utilized. + stop: Up to 4 sequences where the API will stop generating further tokens. stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be @@ -844,6 +890,7 @@ async def create( presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN, stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, temperature: Optional[float] | NotGiven = NOT_GIVEN, @@ -954,6 +1001,16 @@ async def create( should refer to the `system_fingerprint` response parameter to monitor changes in the backend. + service_tier: Specifies the latency tier to use for processing the request. This parameter is + relevant for customers subscribed to the scale tier service: + + - If set to 'auto', the system will utilize scale tier credits until they are + exhausted. + - If set to 'default', the request will be processed in the shared cluster. + + When this parameter is set, the response body will include the `service_tier` + utilized. + stop: Up to 4 sequences where the API will stop generating further tokens. stream_options: Options for streaming response. Only set this when you set `stream: true`. @@ -1020,6 +1077,7 @@ async def create( presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN, stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, temperature: Optional[float] | NotGiven = NOT_GIVEN, @@ -1130,6 +1188,16 @@ async def create( should refer to the `system_fingerprint` response parameter to monitor changes in the backend. + service_tier: Specifies the latency tier to use for processing the request. This parameter is + relevant for customers subscribed to the scale tier service: + + - If set to 'auto', the system will utilize scale tier credits until they are + exhausted. + - If set to 'default', the request will be processed in the shared cluster. + + When this parameter is set, the response body will include the `service_tier` + utilized. + stop: Up to 4 sequences where the API will stop generating further tokens. stream_options: Options for streaming response. Only set this when you set `stream: true`. @@ -1195,6 +1263,7 @@ async def create( presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, + service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN, stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN, @@ -1228,6 +1297,7 @@ async def create( "presence_penalty": presence_penalty, "response_format": response_format, "seed": seed, + "service_tier": service_tier, "stop": stop, "stream": stream, "stream_options": stream_options, diff --git a/src/openai/types/chat/chat_completion.py b/src/openai/types/chat/chat_completion.py index 61a94a258e..5f4eaf3366 100644 --- a/src/openai/types/chat/chat_completion.py +++ b/src/openai/types/chat/chat_completion.py @@ -56,6 +56,13 @@ class ChatCompletion(BaseModel): object: Literal["chat.completion"] """The object type, which is always `chat.completion`.""" + service_tier: Optional[Literal["scale", "default"]] = None + """The service tier used for processing the request. + + This field is only included if the `service_tier` parameter is specified in the + request. + """ + system_fingerprint: Optional[str] = None """This fingerprint represents the backend configuration that the model runs with. diff --git a/src/openai/types/chat/chat_completion_chunk.py b/src/openai/types/chat/chat_completion_chunk.py index 084a5fcc07..65643c7e60 100644 --- a/src/openai/types/chat/chat_completion_chunk.py +++ b/src/openai/types/chat/chat_completion_chunk.py @@ -122,6 +122,13 @@ class ChatCompletionChunk(BaseModel): object: Literal["chat.completion.chunk"] """The object type, which is always `chat.completion.chunk`.""" + service_tier: Optional[Literal["scale", "default"]] = None + """The service tier used for processing the request. + + This field is only included if the `service_tier` parameter is specified in the + request. + """ + system_fingerprint: Optional[str] = None """ This fingerprint represents the backend configuration that the model runs with. diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py index 7dd7067f66..21187f3741 100644 --- a/src/openai/types/chat/completion_create_params.py +++ b/src/openai/types/chat/completion_create_params.py @@ -146,6 +146,19 @@ class CompletionCreateParamsBase(TypedDict, total=False): in the backend. """ + service_tier: Optional[Literal["auto", "default"]] + """Specifies the latency tier to use for processing the request. + + This parameter is relevant for customers subscribed to the scale tier service: + + - If set to 'auto', the system will utilize scale tier credits until they are + exhausted. + - If set to 'default', the request will be processed in the shared cluster. + + When this parameter is set, the response body will include the `service_tier` + utilized. + """ + stop: Union[Optional[str], List[str]] """Up to 4 sequences where the API will stop generating further tokens.""" diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py index 3099e16815..87df11d1ee 100644 --- a/tests/api_resources/chat/test_completions.py +++ b/tests/api_resources/chat/test_completions.py @@ -60,6 +60,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None: presence_penalty=-2, response_format={"type": "json_object"}, seed=-9223372036854776000, + service_tier="auto", stop="string", stream=False, stream_options={"include_usage": True}, @@ -176,6 +177,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None: presence_penalty=-2, response_format={"type": "json_object"}, seed=-9223372036854776000, + service_tier="auto", stop="string", stream_options={"include_usage": True}, temperature=1, @@ -294,6 +296,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn presence_penalty=-2, response_format={"type": "json_object"}, seed=-9223372036854776000, + service_tier="auto", stop="string", stream=False, stream_options={"include_usage": True}, @@ -410,6 +413,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn presence_penalty=-2, response_format={"type": "json_object"}, seed=-9223372036854776000, + service_tier="auto", stop="string", stream_options={"include_usage": True}, temperature=1,