diff --git a/.stats.yml b/.stats.yml index 903c159960..de3167f3a8 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,2 +1,2 @@ configured_endpoints: 68 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-85a85e0c08de456441431c0ae4e9c078cc8f9748c29430b9a9058340db6389ee.yml +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-501122aa32adaa2abb3d4487880ab9cdf2141addce2e6c3d1bd9bb6b44c318a8.yml diff --git a/src/openai/resources/beta/assistants.py b/src/openai/resources/beta/assistants.py index 1e57944eb3..5d8c6ec331 100644 --- a/src/openai/resources/beta/assistants.py +++ b/src/openai/resources/beta/assistants.py @@ -100,11 +100,11 @@ def create( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -250,11 +250,11 @@ def update( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -486,11 +486,11 @@ async def create( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -636,11 +636,11 @@ async def update( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to diff --git a/src/openai/resources/beta/threads/runs/runs.py b/src/openai/resources/beta/threads/runs/runs.py index a17e0016c7..ef0edf0e36 100644 --- a/src/openai/resources/beta/threads/runs/runs.py +++ b/src/openai/resources/beta/threads/runs/runs.py @@ -156,11 +156,11 @@ def create( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -300,11 +300,11 @@ def create( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -440,11 +440,11 @@ def create( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -1004,11 +1004,11 @@ async def create( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -1148,11 +1148,11 @@ async def create( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -1288,11 +1288,11 @@ async def create( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to diff --git a/src/openai/resources/beta/threads/threads.py b/src/openai/resources/beta/threads/threads.py index 27777251ad..3b0e310e4f 100644 --- a/src/openai/resources/beta/threads/threads.py +++ b/src/openai/resources/beta/threads/threads.py @@ -326,11 +326,11 @@ def create_and_run( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -460,11 +460,11 @@ def create_and_run( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -590,11 +590,11 @@ def create_and_run( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -980,11 +980,11 @@ async def create_and_run( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -1114,11 +1114,11 @@ async def create_and_run( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -1244,11 +1244,11 @@ async def create_and_run( and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to diff --git a/src/openai/resources/chat/completions.py b/src/openai/resources/chat/completions.py index 29fd69947a..d25a8c4dfb 100644 --- a/src/openai/resources/chat/completions.py +++ b/src/openai/resources/chat/completions.py @@ -62,6 +62,7 @@ def create( functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, @@ -130,13 +131,17 @@ def create( returns the log probabilities of each output token returned in the `content` of `message`. + max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat - completion. + completion. This value can be used to control + [costs](https://openai.com/api/pricing/) for text generated via API. - The total length of input tokens and generated tokens is limited by the model's - context length. - [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) - for counting tokens. + This value is now deprecated in favor of `max_completion_tokens`, and is not + compatible with + [o1 series models](https://platform.openai.com/docs/guides/reasoning). n: How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the @@ -159,11 +164,11 @@ def create( all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -183,8 +188,11 @@ def create( service_tier: Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service: - - If set to 'auto', the system will utilize scale tier credits until they are - exhausted. + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee. - When not set, the default behavior is 'auto'. @@ -259,6 +267,7 @@ def create( functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, @@ -333,13 +342,17 @@ def create( returns the log probabilities of each output token returned in the `content` of `message`. + max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat - completion. + completion. This value can be used to control + [costs](https://openai.com/api/pricing/) for text generated via API. - The total length of input tokens and generated tokens is limited by the model's - context length. - [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) - for counting tokens. + This value is now deprecated in favor of `max_completion_tokens`, and is not + compatible with + [o1 series models](https://platform.openai.com/docs/guides/reasoning). n: How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the @@ -362,11 +375,11 @@ def create( all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -386,8 +399,11 @@ def create( service_tier: Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service: - - If set to 'auto', the system will utilize scale tier credits until they are - exhausted. + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee. - When not set, the default behavior is 'auto'. @@ -455,6 +471,7 @@ def create( functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, @@ -529,13 +546,17 @@ def create( returns the log probabilities of each output token returned in the `content` of `message`. + max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat - completion. + completion. This value can be used to control + [costs](https://openai.com/api/pricing/) for text generated via API. - The total length of input tokens and generated tokens is limited by the model's - context length. - [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) - for counting tokens. + This value is now deprecated in favor of `max_completion_tokens`, and is not + compatible with + [o1 series models](https://platform.openai.com/docs/guides/reasoning). n: How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the @@ -558,11 +579,11 @@ def create( all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -582,8 +603,11 @@ def create( service_tier: Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service: - - If set to 'auto', the system will utilize scale tier credits until they are - exhausted. + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee. - When not set, the default behavior is 'auto'. @@ -650,6 +674,7 @@ def create( functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, @@ -684,6 +709,7 @@ def create( "functions": functions, "logit_bias": logit_bias, "logprobs": logprobs, + "max_completion_tokens": max_completion_tokens, "max_tokens": max_tokens, "n": n, "parallel_tool_calls": parallel_tool_calls, @@ -743,6 +769,7 @@ async def create( functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, @@ -811,13 +838,17 @@ async def create( returns the log probabilities of each output token returned in the `content` of `message`. + max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat - completion. + completion. This value can be used to control + [costs](https://openai.com/api/pricing/) for text generated via API. - The total length of input tokens and generated tokens is limited by the model's - context length. - [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) - for counting tokens. + This value is now deprecated in favor of `max_completion_tokens`, and is not + compatible with + [o1 series models](https://platform.openai.com/docs/guides/reasoning). n: How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the @@ -840,11 +871,11 @@ async def create( all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -864,8 +895,11 @@ async def create( service_tier: Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service: - - If set to 'auto', the system will utilize scale tier credits until they are - exhausted. + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee. - When not set, the default behavior is 'auto'. @@ -940,6 +974,7 @@ async def create( functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, @@ -1014,13 +1049,17 @@ async def create( returns the log probabilities of each output token returned in the `content` of `message`. + max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat - completion. + completion. This value can be used to control + [costs](https://openai.com/api/pricing/) for text generated via API. - The total length of input tokens and generated tokens is limited by the model's - context length. - [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) - for counting tokens. + This value is now deprecated in favor of `max_completion_tokens`, and is not + compatible with + [o1 series models](https://platform.openai.com/docs/guides/reasoning). n: How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the @@ -1043,11 +1082,11 @@ async def create( all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -1067,8 +1106,11 @@ async def create( service_tier: Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service: - - If set to 'auto', the system will utilize scale tier credits until they are - exhausted. + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee. - When not set, the default behavior is 'auto'. @@ -1136,6 +1178,7 @@ async def create( functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, @@ -1210,13 +1253,17 @@ async def create( returns the log probabilities of each output token returned in the `content` of `message`. + max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat - completion. + completion. This value can be used to control + [costs](https://openai.com/api/pricing/) for text generated via API. - The total length of input tokens and generated tokens is limited by the model's - context length. - [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) - for counting tokens. + This value is now deprecated in favor of `max_completion_tokens`, and is not + compatible with + [o1 series models](https://platform.openai.com/docs/guides/reasoning). n: How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the @@ -1239,11 +1286,11 @@ async def create( all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -1263,8 +1310,11 @@ async def create( service_tier: Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service: - - If set to 'auto', the system will utilize scale tier credits until they are - exhausted. + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee. - When not set, the default behavior is 'auto'. @@ -1331,6 +1381,7 @@ async def create( functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN, logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN, max_tokens: Optional[int] | NotGiven = NOT_GIVEN, n: Optional[int] | NotGiven = NOT_GIVEN, parallel_tool_calls: bool | NotGiven = NOT_GIVEN, @@ -1365,6 +1416,7 @@ async def create( "functions": functions, "logit_bias": logit_bias, "logprobs": logprobs, + "max_completion_tokens": max_completion_tokens, "max_tokens": max_tokens, "n": n, "parallel_tool_calls": parallel_tool_calls, diff --git a/src/openai/resources/fine_tuning/jobs/jobs.py b/src/openai/resources/fine_tuning/jobs/jobs.py index 88ac6107a4..7eb0c5dbfc 100644 --- a/src/openai/resources/fine_tuning/jobs/jobs.py +++ b/src/openai/resources/fine_tuning/jobs/jobs.py @@ -111,7 +111,7 @@ def create( job parameters should produce the same results, but may differ in rare cases. If a seed is not specified, one will be generated for you. - suffix: A string of up to 18 characters that will be added to your fine-tuned model + suffix: A string of up to 64 characters that will be added to your fine-tuned model name. For example, a `suffix` of "custom-model-name" would produce a model name like @@ -402,7 +402,7 @@ async def create( job parameters should produce the same results, but may differ in rare cases. If a seed is not specified, one will be generated for you. - suffix: A string of up to 18 characters that will be added to your fine-tuned model + suffix: A string of up to 64 characters that will be added to your fine-tuned model name. For example, a `suffix` of "custom-model-name" would produce a model name like diff --git a/src/openai/types/beta/assistant.py b/src/openai/types/beta/assistant.py index c6a0a4cfcf..b4da08745d 100644 --- a/src/openai/types/beta/assistant.py +++ b/src/openai/types/beta/assistant.py @@ -90,11 +90,11 @@ class Assistant(BaseModel): and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to diff --git a/src/openai/types/beta/assistant_create_params.py b/src/openai/types/beta/assistant_create_params.py index c1360b5b66..eca4da0a2b 100644 --- a/src/openai/types/beta/assistant_create_params.py +++ b/src/openai/types/beta/assistant_create_params.py @@ -58,11 +58,11 @@ class AssistantCreateParams(TypedDict, total=False): and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to diff --git a/src/openai/types/beta/assistant_update_params.py b/src/openai/types/beta/assistant_update_params.py index ade565819f..5396233937 100644 --- a/src/openai/types/beta/assistant_update_params.py +++ b/src/openai/types/beta/assistant_update_params.py @@ -50,11 +50,11 @@ class AssistantUpdateParams(TypedDict, total=False): and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to diff --git a/src/openai/types/beta/file_search_tool.py b/src/openai/types/beta/file_search_tool.py index 4015b3da09..aee6593e89 100644 --- a/src/openai/types/beta/file_search_tool.py +++ b/src/openai/types/beta/file_search_tool.py @@ -9,16 +9,16 @@ class FileSearchRankingOptions(BaseModel): - ranker: Optional[Literal["auto", "default_2024_08_21"]] = None - """The ranker to use for the file search. + score_threshold: float + """The score threshold for the file search. - If not specified will use the `auto` ranker. + All values must be a floating point number between 0 and 1. """ - score_threshold: Optional[float] = None - """The score threshold for the file search. + ranker: Optional[Literal["auto", "default_2024_08_21"]] = None + """The ranker to use for the file search. - All values must be a floating point number between 0 and 1. + If not specified will use the `auto` ranker. """ @@ -38,6 +38,9 @@ class FileSearch(BaseModel): ranking_options: Optional[FileSearchRankingOptions] = None """The ranking options for the file search. + If not specified, the file search tool will use the `auto` ranker and a + score_threshold of 0. + See the [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings) for more information. diff --git a/src/openai/types/beta/file_search_tool_param.py b/src/openai/types/beta/file_search_tool_param.py index 97e651b0da..5ce91207ba 100644 --- a/src/openai/types/beta/file_search_tool_param.py +++ b/src/openai/types/beta/file_search_tool_param.py @@ -8,16 +8,16 @@ class FileSearchRankingOptions(TypedDict, total=False): - ranker: Literal["auto", "default_2024_08_21"] - """The ranker to use for the file search. + score_threshold: Required[float] + """The score threshold for the file search. - If not specified will use the `auto` ranker. + All values must be a floating point number between 0 and 1. """ - score_threshold: float - """The score threshold for the file search. + ranker: Literal["auto", "default_2024_08_21"] + """The ranker to use for the file search. - All values must be a floating point number between 0 and 1. + If not specified will use the `auto` ranker. """ @@ -37,6 +37,9 @@ class FileSearch(TypedDict, total=False): ranking_options: FileSearchRankingOptions """The ranking options for the file search. + If not specified, the file search tool will use the `auto` ranker and a + score_threshold of 0. + See the [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings) for more information. diff --git a/src/openai/types/beta/thread_create_and_run_params.py b/src/openai/types/beta/thread_create_and_run_params.py index 370c2f9bce..20d525fa1a 100644 --- a/src/openai/types/beta/thread_create_and_run_params.py +++ b/src/openai/types/beta/thread_create_and_run_params.py @@ -98,11 +98,11 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False): and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to diff --git a/src/openai/types/beta/threads/run.py b/src/openai/types/beta/threads/run.py index 0579e229d8..5abc1de295 100644 --- a/src/openai/types/beta/threads/run.py +++ b/src/openai/types/beta/threads/run.py @@ -172,11 +172,11 @@ class Run(BaseModel): and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to diff --git a/src/openai/types/beta/threads/run_create_params.py b/src/openai/types/beta/threads/run_create_params.py index 7c5f571d58..824cb1a041 100644 --- a/src/openai/types/beta/threads/run_create_params.py +++ b/src/openai/types/beta/threads/run_create_params.py @@ -111,11 +111,11 @@ class RunCreateParamsBase(TypedDict, total=False): and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py index b86dab742b..4ed89b00f5 100644 --- a/src/openai/types/chat/completion_create_params.py +++ b/src/openai/types/chat/completion_create_params.py @@ -87,15 +87,22 @@ class CompletionCreateParamsBase(TypedDict, total=False): `content` of `message`. """ + max_completion_tokens: Optional[int] + """ + An upper bound for the number of tokens that can be generated for a completion, + including visible output tokens and + [reasoning tokens](https://platform.openai.com/docs/guides/reasoning). + """ + max_tokens: Optional[int] """ The maximum number of [tokens](/tokenizer) that can be generated in the chat - completion. + completion. This value can be used to control + [costs](https://openai.com/api/pricing/) for text generated via API. - The total length of input tokens and generated tokens is limited by the model's - context length. - [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) - for counting tokens. + This value is now deprecated in favor of `max_completion_tokens`, and is not + compatible with + [o1 series models](https://platform.openai.com/docs/guides/reasoning). """ n: Optional[int] @@ -130,11 +137,11 @@ class CompletionCreateParamsBase(TypedDict, total=False): all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured - Outputs which guarantees the model will match your supplied JSON schema. Learn - more in the + Outputs which ensures the model will match your supplied JSON schema. Learn more + in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs). - Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the model to @@ -160,8 +167,11 @@ class CompletionCreateParamsBase(TypedDict, total=False): This parameter is relevant for customers subscribed to the scale tier service: - - If set to 'auto', the system will utilize scale tier credits until they are - exhausted. + - If set to 'auto', and the Project is Scale tier enabled, the system will + utilize scale tier credits until they are exhausted. + - If set to 'auto', and the Project is not Scale tier enabled, the request will + be processed using the default service tier with a lower uptime SLA and no + latency guarentee. - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee. - When not set, the default behavior is 'auto'. diff --git a/src/openai/types/chat_model.py b/src/openai/types/chat_model.py index 2372d5e14e..f8438c75c8 100644 --- a/src/openai/types/chat_model.py +++ b/src/openai/types/chat_model.py @@ -5,9 +5,13 @@ __all__ = ["ChatModel"] ChatModel: TypeAlias = Literal[ + "o1-preview", + "o1-preview-2024-09-12", + "o1-mini", + "o1-mini-2024-09-12", "gpt-4o", - "gpt-4o-2024-05-13", "gpt-4o-2024-08-06", + "gpt-4o-2024-05-13", "chatgpt-4o-latest", "gpt-4o-mini", "gpt-4o-mini-2024-07-18", diff --git a/src/openai/types/completion_usage.py b/src/openai/types/completion_usage.py index 0d57b96595..a4b9116e35 100644 --- a/src/openai/types/completion_usage.py +++ b/src/openai/types/completion_usage.py @@ -1,10 +1,15 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - +from typing import Optional from .._models import BaseModel -__all__ = ["CompletionUsage"] +__all__ = ["CompletionUsage", "CompletionTokensDetails"] + + +class CompletionTokensDetails(BaseModel): + reasoning_tokens: Optional[int] = None + """Tokens generated by the model for reasoning.""" class CompletionUsage(BaseModel): @@ -16,3 +21,6 @@ class CompletionUsage(BaseModel): total_tokens: int """Total number of tokens used in the request (prompt + completion).""" + + completion_tokens_details: Optional[CompletionTokensDetails] = None + """Breakdown of tokens used in a completion.""" diff --git a/src/openai/types/fine_tuning/job_create_params.py b/src/openai/types/fine_tuning/job_create_params.py index e9be2ef1ca..8f5ea86274 100644 --- a/src/openai/types/fine_tuning/job_create_params.py +++ b/src/openai/types/fine_tuning/job_create_params.py @@ -50,7 +50,7 @@ class JobCreateParams(TypedDict, total=False): suffix: Optional[str] """ - A string of up to 18 characters that will be added to your fine-tuned model + A string of up to 64 characters that will be added to your fine-tuned model name. For example, a `suffix` of "custom-model-name" would produce a model name like diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py index 01ce3f1b0d..df7bc799df 100644 --- a/tests/api_resources/chat/test_completions.py +++ b/tests/api_resources/chat/test_completions.py @@ -54,6 +54,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None: ], logit_bias={"foo": 0}, logprobs=True, + max_completion_tokens=0, max_tokens=0, n=1, parallel_tool_calls=True, @@ -174,6 +175,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None: ], logit_bias={"foo": 0}, logprobs=True, + max_completion_tokens=0, max_tokens=0, n=1, parallel_tool_calls=True, @@ -296,6 +298,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn ], logit_bias={"foo": 0}, logprobs=True, + max_completion_tokens=0, max_tokens=0, n=1, parallel_tool_calls=True, @@ -416,6 +419,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn ], logit_bias={"foo": 0}, logprobs=True, + max_completion_tokens=0, max_tokens=0, n=1, parallel_tool_calls=True,