Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(api): add service tier argument for chat completions #1486

Merged
merged 1 commit into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .stats.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
configured_endpoints: 64
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-5cb1810135c35c5024698f3365626471a04796e26e393aefe1aa0ba3c0891919.yml
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-8fe357c6b5a425d810d731e4102a052d8e38c5e2d66950e6de1025415160bf88.yml
8 changes: 7 additions & 1 deletion src/openai/_base_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ def _build_request(
raise RuntimeError(f"Unexpected JSON data type, {type(json_data)}, cannot merge with `extra_body`")

headers = self._build_headers(options)
params = _merge_mappings(self._custom_query, options.params)
params = _merge_mappings(self.default_query, options.params)
content_type = headers.get("Content-Type")

# If the given Content-Type header is multipart/form-data then it
Expand Down Expand Up @@ -593,6 +593,12 @@ def default_headers(self) -> dict[str, str | Omit]:
**self._custom_headers,
}

@property
def default_query(self) -> dict[str, object]:
return {
**self._custom_query,
}

def _validate_headers(
self,
headers: Headers, # noqa: ARG002
Expand Down
70 changes: 70 additions & 0 deletions src/openai/resources/chat/completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def create(
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
Expand Down Expand Up @@ -163,6 +164,16 @@ def create(
should refer to the `system_fingerprint` response parameter to monitor changes
in the backend.

service_tier: Specifies the latency tier to use for processing the request. This parameter is
relevant for customers subscribed to the scale tier service:

- If set to 'auto', the system will utilize scale tier credits until they are
exhausted.
- If set to 'default', the request will be processed in the shared cluster.

When this parameter is set, the response body will include the `service_tier`
utilized.

stop: Up to 4 sequences where the API will stop generating further tokens.

stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
Expand Down Expand Up @@ -236,6 +247,7 @@ def create(
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
Expand Down Expand Up @@ -346,6 +358,16 @@ def create(
should refer to the `system_fingerprint` response parameter to monitor changes
in the backend.

service_tier: Specifies the latency tier to use for processing the request. This parameter is
relevant for customers subscribed to the scale tier service:

- If set to 'auto', the system will utilize scale tier credits until they are
exhausted.
- If set to 'default', the request will be processed in the shared cluster.

When this parameter is set, the response body will include the `service_tier`
utilized.

stop: Up to 4 sequences where the API will stop generating further tokens.

stream_options: Options for streaming response. Only set this when you set `stream: true`.
Expand Down Expand Up @@ -412,6 +434,7 @@ def create(
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
Expand Down Expand Up @@ -522,6 +545,16 @@ def create(
should refer to the `system_fingerprint` response parameter to monitor changes
in the backend.

service_tier: Specifies the latency tier to use for processing the request. This parameter is
relevant for customers subscribed to the scale tier service:

- If set to 'auto', the system will utilize scale tier credits until they are
exhausted.
- If set to 'default', the request will be processed in the shared cluster.

When this parameter is set, the response body will include the `service_tier`
utilized.

stop: Up to 4 sequences where the API will stop generating further tokens.

stream_options: Options for streaming response. Only set this when you set `stream: true`.
Expand Down Expand Up @@ -587,6 +620,7 @@ def create(
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
Expand Down Expand Up @@ -620,6 +654,7 @@ def create(
"presence_penalty": presence_penalty,
"response_format": response_format,
"seed": seed,
"service_tier": service_tier,
"stop": stop,
"stream": stream,
"stream_options": stream_options,
Expand Down Expand Up @@ -667,6 +702,7 @@ async def create(
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
Expand Down Expand Up @@ -771,6 +807,16 @@ async def create(
should refer to the `system_fingerprint` response parameter to monitor changes
in the backend.

service_tier: Specifies the latency tier to use for processing the request. This parameter is
relevant for customers subscribed to the scale tier service:

- If set to 'auto', the system will utilize scale tier credits until they are
exhausted.
- If set to 'default', the request will be processed in the shared cluster.

When this parameter is set, the response body will include the `service_tier`
utilized.

stop: Up to 4 sequences where the API will stop generating further tokens.

stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
Expand Down Expand Up @@ -844,6 +890,7 @@ async def create(
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
Expand Down Expand Up @@ -954,6 +1001,16 @@ async def create(
should refer to the `system_fingerprint` response parameter to monitor changes
in the backend.

service_tier: Specifies the latency tier to use for processing the request. This parameter is
relevant for customers subscribed to the scale tier service:

- If set to 'auto', the system will utilize scale tier credits until they are
exhausted.
- If set to 'default', the request will be processed in the shared cluster.

When this parameter is set, the response body will include the `service_tier`
utilized.

stop: Up to 4 sequences where the API will stop generating further tokens.

stream_options: Options for streaming response. Only set this when you set `stream: true`.
Expand Down Expand Up @@ -1020,6 +1077,7 @@ async def create(
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
Expand Down Expand Up @@ -1130,6 +1188,16 @@ async def create(
should refer to the `system_fingerprint` response parameter to monitor changes
in the backend.

service_tier: Specifies the latency tier to use for processing the request. This parameter is
relevant for customers subscribed to the scale tier service:

- If set to 'auto', the system will utilize scale tier credits until they are
exhausted.
- If set to 'default', the request will be processed in the shared cluster.

When this parameter is set, the response body will include the `service_tier`
utilized.

stop: Up to 4 sequences where the API will stop generating further tokens.

stream_options: Options for streaming response. Only set this when you set `stream: true`.
Expand Down Expand Up @@ -1195,6 +1263,7 @@ async def create(
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
Expand Down Expand Up @@ -1228,6 +1297,7 @@ async def create(
"presence_penalty": presence_penalty,
"response_format": response_format,
"seed": seed,
"service_tier": service_tier,
"stop": stop,
"stream": stream,
"stream_options": stream_options,
Expand Down
7 changes: 7 additions & 0 deletions src/openai/types/chat/chat_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,13 @@ class ChatCompletion(BaseModel):
object: Literal["chat.completion"]
"""The object type, which is always `chat.completion`."""

service_tier: Optional[Literal["scale", "default"]] = None
"""The service tier used for processing the request.
This field is only included if the `service_tier` parameter is specified in the
request.
"""

system_fingerprint: Optional[str] = None
"""This fingerprint represents the backend configuration that the model runs with.
Expand Down
7 changes: 7 additions & 0 deletions src/openai/types/chat/chat_completion_chunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,13 @@ class ChatCompletionChunk(BaseModel):
object: Literal["chat.completion.chunk"]
"""The object type, which is always `chat.completion.chunk`."""

service_tier: Optional[Literal["scale", "default"]] = None
"""The service tier used for processing the request.

This field is only included if the `service_tier` parameter is specified in the
request.
"""

system_fingerprint: Optional[str] = None
"""
This fingerprint represents the backend configuration that the model runs with.
Expand Down
13 changes: 13 additions & 0 deletions src/openai/types/chat/completion_create_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,19 @@ class CompletionCreateParamsBase(TypedDict, total=False):
in the backend.
"""

service_tier: Optional[Literal["auto", "default"]]
"""Specifies the latency tier to use for processing the request.

This parameter is relevant for customers subscribed to the scale tier service:

- If set to 'auto', the system will utilize scale tier credits until they are
exhausted.
- If set to 'default', the request will be processed in the shared cluster.

When this parameter is set, the response body will include the `service_tier`
utilized.
"""

stop: Union[Optional[str], List[str]]
"""Up to 4 sequences where the API will stop generating further tokens."""

Expand Down
4 changes: 4 additions & 0 deletions tests/api_resources/chat/test_completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
presence_penalty=-2,
response_format={"type": "json_object"},
seed=-9223372036854776000,
service_tier="auto",
stop="string",
stream=False,
stream_options={"include_usage": True},
Expand Down Expand Up @@ -176,6 +177,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
presence_penalty=-2,
response_format={"type": "json_object"},
seed=-9223372036854776000,
service_tier="auto",
stop="string",
stream_options={"include_usage": True},
temperature=1,
Expand Down Expand Up @@ -294,6 +296,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
presence_penalty=-2,
response_format={"type": "json_object"},
seed=-9223372036854776000,
service_tier="auto",
stop="string",
stream=False,
stream_options={"include_usage": True},
Expand Down Expand Up @@ -410,6 +413,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
presence_penalty=-2,
response_format={"type": "json_object"},
seed=-9223372036854776000,
service_tier="auto",
stop="string",
stream_options={"include_usage": True},
temperature=1,
Expand Down