Skip to content

Commit 6a60d70

Browse files
feat(api): Realtime API token_limits, Hybrid searching ranking options
1 parent 379e97c commit 6a60d70

File tree

10 files changed

+173
-34
lines changed

10 files changed

+173
-34
lines changed

.stats.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 135
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-f68f718cd45ac3f9336603601bccc38a718af44d0b26601031de3d0a71b7ce2f.yml
3-
openapi_spec_hash: 1560717860bba4105936647dde8f618d
4-
config_hash: 50ee3382a63c021a9f821a935950e926
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-3c5d1593d7c6f2b38a7d78d7906041465ee9d6e9022f0651e1da194654488108.yml
3+
openapi_spec_hash: 0a4d8ad2469823ce24a3fd94f23f1c2b
4+
config_hash: 032995825500a503a76da119f5354905

src/resources/images.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,10 @@ export interface ImageEditParamsBase {
545545
background?: 'transparent' | 'opaque' | 'auto' | null;
546546

547547
/**
548-
* Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
548+
* Control how much effort the model will exert to match the style and features,
549+
* especially facial features, of input images. This parameter is only supported
550+
* for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
551+
* `low`. Defaults to `low`.
549552
*/
550553
input_fidelity?: 'high' | 'low' | null;
551554

src/resources/realtime/calls.ts

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,19 @@ export interface CallAcceptParams {
177177
tracing?: RealtimeAPI.RealtimeTracingConfig | null;
178178

179179
/**
180-
* Controls how the realtime conversation is truncated prior to model inference.
181-
* The default is `auto`.
180+
* When the number of tokens in a conversation exceeds the model's input token
181+
* limit, the conversation be truncated, meaning messages (starting from the
182+
* oldest) will not be included in the model's context. A 32k context model with
183+
* 4,096 max output tokens can only include 28,224 tokens in the context before
184+
* truncation occurs. Clients can configure truncation behavior to truncate with a
185+
* lower max token limit, which is an effective way to control token usage and
186+
* cost. Truncation will reduce the number of cached tokens on the next turn
187+
* (busting the cache), since messages are dropped from the beginning of the
188+
* context. However, clients can also configure truncation to retain messages up to
189+
* a fraction of the maximum context size, which will reduce the need for future
190+
* truncations and thus improve the cache rate. Truncation can be disabled
191+
* entirely, which means the server will never truncate but would instead return an
192+
* error if the conversation exceeds the model's input token limit.
182193
*/
183194
truncation?: RealtimeAPI.RealtimeTruncation;
184195
}

src/resources/realtime/client-secrets.ts

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,19 @@ export interface RealtimeSessionCreateResponse {
144144
tracing?: 'auto' | RealtimeSessionCreateResponse.TracingConfiguration | null;
145145

146146
/**
147-
* Controls how the realtime conversation is truncated prior to model inference.
148-
* The default is `auto`.
147+
* When the number of tokens in a conversation exceeds the model's input token
148+
* limit, the conversation be truncated, meaning messages (starting from the
149+
* oldest) will not be included in the model's context. A 32k context model with
150+
* 4,096 max output tokens can only include 28,224 tokens in the context before
151+
* truncation occurs. Clients can configure truncation behavior to truncate with a
152+
* lower max token limit, which is an effective way to control token usage and
153+
* cost. Truncation will reduce the number of cached tokens on the next turn
154+
* (busting the cache), since messages are dropped from the beginning of the
155+
* context. However, clients can also configure truncation to retain messages up to
156+
* a fraction of the maximum context size, which will reduce the need for future
157+
* truncations and thus improve the cache rate. Truncation can be disabled
158+
* entirely, which means the server will never truncate but would instead return an
159+
* error if the conversation exceeds the model's input token limit.
149160
*/
150161
truncation?: RealtimeAPI.RealtimeTruncation;
151162
}

src/resources/realtime/realtime.ts

Lines changed: 53 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3050,8 +3050,19 @@ export interface RealtimeSessionCreateRequest {
30503050
tracing?: RealtimeTracingConfig | null;
30513051

30523052
/**
3053-
* Controls how the realtime conversation is truncated prior to model inference.
3054-
* The default is `auto`.
3053+
* When the number of tokens in a conversation exceeds the model's input token
3054+
* limit, the conversation be truncated, meaning messages (starting from the
3055+
* oldest) will not be included in the model's context. A 32k context model with
3056+
* 4,096 max output tokens can only include 28,224 tokens in the context before
3057+
* truncation occurs. Clients can configure truncation behavior to truncate with a
3058+
* lower max token limit, which is an effective way to control token usage and
3059+
* cost. Truncation will reduce the number of cached tokens on the next turn
3060+
* (busting the cache), since messages are dropped from the beginning of the
3061+
* context. However, clients can also configure truncation to retain messages up to
3062+
* a fraction of the maximum context size, which will reduce the need for future
3063+
* truncations and thus improve the cache rate. Truncation can be disabled
3064+
* entirely, which means the server will never truncate but would instead return an
3065+
* error if the conversation exceeds the model's input token limit.
30553066
*/
30563067
truncation?: RealtimeTruncation;
30573068
}
@@ -3474,8 +3485,19 @@ export interface RealtimeTranscriptionSessionCreateRequest {
34743485
}
34753486

34763487
/**
3477-
* Controls how the realtime conversation is truncated prior to model inference.
3478-
* The default is `auto`.
3488+
* When the number of tokens in a conversation exceeds the model's input token
3489+
* limit, the conversation be truncated, meaning messages (starting from the
3490+
* oldest) will not be included in the model's context. A 32k context model with
3491+
* 4,096 max output tokens can only include 28,224 tokens in the context before
3492+
* truncation occurs. Clients can configure truncation behavior to truncate with a
3493+
* lower max token limit, which is an effective way to control token usage and
3494+
* cost. Truncation will reduce the number of cached tokens on the next turn
3495+
* (busting the cache), since messages are dropped from the beginning of the
3496+
* context. However, clients can also configure truncation to retain messages up to
3497+
* a fraction of the maximum context size, which will reduce the need for future
3498+
* truncations and thus improve the cache rate. Truncation can be disabled
3499+
* entirely, which means the server will never truncate but would instead return an
3500+
* error if the conversation exceeds the model's input token limit.
34793501
*/
34803502
export type RealtimeTruncation = 'auto' | 'disabled' | RealtimeTruncationRetentionRatio;
34813503

@@ -3486,15 +3508,40 @@ export type RealtimeTruncation = 'auto' | 'disabled' | RealtimeTruncationRetenti
34863508
*/
34873509
export interface RealtimeTruncationRetentionRatio {
34883510
/**
3489-
* Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the
3490-
* conversation exceeds the input token limit.
3511+
* Fraction of post-instruction conversation tokens to retain (`0.0` - `1.0`) when
3512+
* the conversation exceeds the input token limit. Setting this to `0.8` means that
3513+
* messages will be dropped until 80% of the maximum allowed tokens are used. This
3514+
* helps reduce the frequency of truncations and improve cache rates.
34913515
*/
34923516
retention_ratio: number;
34933517

34943518
/**
34953519
* Use retention ratio truncation.
34963520
*/
34973521
type: 'retention_ratio';
3522+
3523+
/**
3524+
* Optional custom token limits for this truncation strategy. If not provided, the
3525+
* model's default token limits will be used.
3526+
*/
3527+
token_limits?: RealtimeTruncationRetentionRatio.TokenLimits;
3528+
}
3529+
3530+
export namespace RealtimeTruncationRetentionRatio {
3531+
/**
3532+
* Optional custom token limits for this truncation strategy. If not provided, the
3533+
* model's default token limits will be used.
3534+
*/
3535+
export interface TokenLimits {
3536+
/**
3537+
* Maximum tokens allowed in the conversation after instructions (which including
3538+
* tool definitions). For example, setting this to 5,000 would mean that truncation
3539+
* would occur when the conversation exceeds 5,000 tokens after instructions. This
3540+
* cannot be higher than the model's context window size minus the maximum output
3541+
* tokens.
3542+
*/
3543+
post_instructions?: number;
3544+
}
34983545
}
34993546

35003547
/**

src/resources/responses/responses.ts

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,12 @@ export namespace FileSearchTool {
322322
* Ranking options for search.
323323
*/
324324
export interface RankingOptions {
325+
/**
326+
* Weights that control how reciprocal rank fusion balances semantic embedding
327+
* matches versus sparse keyword matches when hybrid search is enabled.
328+
*/
329+
hybrid_search?: RankingOptions.HybridSearch;
330+
325331
/**
326332
* The ranker to use for the file search.
327333
*/
@@ -334,6 +340,24 @@ export namespace FileSearchTool {
334340
*/
335341
score_threshold?: number;
336342
}
343+
344+
export namespace RankingOptions {
345+
/**
346+
* Weights that control how reciprocal rank fusion balances semantic embedding
347+
* matches versus sparse keyword matches when hybrid search is enabled.
348+
*/
349+
export interface HybridSearch {
350+
/**
351+
* The weight of the embedding in the reciprocal ranking fusion.
352+
*/
353+
embedding_weight: number;
354+
355+
/**
356+
* The weight of the text in the reciprocal ranking fusion.
357+
*/
358+
text_weight: number;
359+
}
360+
}
337361
}
338362

339363
/**
@@ -3846,6 +3870,8 @@ export interface ResponseOutputText {
38463870
| ResponseOutputText.FilePath
38473871
>;
38483872

3873+
logprobs: Array<ResponseOutputText.Logprob>;
3874+
38493875
/**
38503876
* The text output from the model.
38513877
*/
@@ -3855,8 +3881,6 @@ export interface ResponseOutputText {
38553881
* The type of the output text. Always `output_text`.
38563882
*/
38573883
type: 'output_text';
3858-
3859-
logprobs?: Array<ResponseOutputText.Logprob>;
38603884
}
38613885

38623886
export namespace ResponseOutputText {
@@ -5047,6 +5071,8 @@ export namespace Tool {
50475071
* An optional list of uploaded files to make available to your code.
50485072
*/
50495073
file_ids?: Array<string>;
5074+
5075+
memory_limit?: '1g' | '4g' | '16g' | '64g' | null;
50505076
}
50515077
}
50525078

@@ -5066,7 +5092,10 @@ export namespace Tool {
50665092
background?: 'transparent' | 'opaque' | 'auto';
50675093

50685094
/**
5069-
* Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
5095+
* Control how much effort the model will exert to match the style and features,
5096+
* especially facial features, of input images. This parameter is only supported
5097+
* for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
5098+
* `low`. Defaults to `low`.
50705099
*/
50715100
input_fidelity?: 'high' | 'low' | null;
50725101

src/resources/shared.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,13 +135,19 @@ export interface CompoundFilter {
135135
export type CustomToolInputFormat = CustomToolInputFormat.Text | CustomToolInputFormat.Grammar;
136136

137137
export namespace CustomToolInputFormat {
138+
/**
139+
* Unconstrained free-form text.
140+
*/
138141
export interface Text {
139142
/**
140143
* Unconstrained text format. Always `text`.
141144
*/
142145
type: 'text';
143146
}
144147

148+
/**
149+
* A grammar defined by the user.
150+
*/
145151
export interface Grammar {
146152
/**
147153
* The grammar definition.

src/resources/vector-stores/file-batches.ts

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -255,13 +255,6 @@ export namespace VectorStoreFileBatch {
255255
}
256256

257257
export interface FileBatchCreateParams {
258-
/**
259-
* A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
260-
* the vector store should use. Useful for tools like `file_search` that can access
261-
* files.
262-
*/
263-
file_ids: Array<string>;
264-
265258
/**
266259
* Set of 16 key-value pairs that can be attached to an object. This can be useful
267260
* for storing additional information about the object in a structured format, and
@@ -276,6 +269,48 @@ export interface FileBatchCreateParams {
276269
* strategy. Only applicable if `file_ids` is non-empty.
277270
*/
278271
chunking_strategy?: VectorStoresAPI.FileChunkingStrategyParam;
272+
273+
/**
274+
* A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
275+
* the vector store should use. Useful for tools like `file_search` that can access
276+
* files. If `attributes` or `chunking_strategy` are provided, they will be applied
277+
* to all files in the batch. Mutually exclusive with `files`.
278+
*/
279+
file_ids?: Array<string>;
280+
281+
/**
282+
* A list of objects that each include a `file_id` plus optional `attributes` or
283+
* `chunking_strategy`. Use this when you need to override metadata for specific
284+
* files. The global `attributes` or `chunking_strategy` will be ignored and must
285+
* be specified for each file. Mutually exclusive with `file_ids`.
286+
*/
287+
files?: Array<FileBatchCreateParams.File>;
288+
}
289+
290+
export namespace FileBatchCreateParams {
291+
export interface File {
292+
/**
293+
* A [File](https://platform.openai.com/docs/api-reference/files) ID that the
294+
* vector store should use. Useful for tools like `file_search` that can access
295+
* files.
296+
*/
297+
file_id: string;
298+
299+
/**
300+
* Set of 16 key-value pairs that can be attached to an object. This can be useful
301+
* for storing additional information about the object in a structured format, and
302+
* querying for objects via API or the dashboard. Keys are strings with a maximum
303+
* length of 64 characters. Values are strings with a maximum length of 512
304+
* characters, booleans, or numbers.
305+
*/
306+
attributes?: { [key: string]: string | number | boolean } | null;
307+
308+
/**
309+
* The chunking strategy used to chunk the file(s). If not set, will use the `auto`
310+
* strategy. Only applicable if `file_ids` is non-empty.
311+
*/
312+
chunking_strategy?: VectorStoresAPI.FileChunkingStrategyParam;
313+
}
279314
}
280315

281316
export interface FileBatchRetrieveParams {

src/resources/videos.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,11 @@ export interface Video {
114114
*/
115115
progress: number;
116116

117+
/**
118+
* The prompt that was used to generate the video.
119+
*/
120+
prompt: string | null;
121+
117122
/**
118123
* Identifier of the source video if this video is a remix.
119124
*/

tests/api-resources/vector-stores/file-batches.test.ts

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ const client = new OpenAI({
88
});
99

1010
describe('resource fileBatches', () => {
11-
test('create: only required params', async () => {
12-
const responsePromise = client.vectorStores.fileBatches.create('vs_abc123', { file_ids: ['string'] });
11+
test('create', async () => {
12+
const responsePromise = client.vectorStores.fileBatches.create('vs_abc123', {});
1313
const rawResponse = await responsePromise.asResponse();
1414
expect(rawResponse).toBeInstanceOf(Response);
1515
const response = await responsePromise;
@@ -19,14 +19,6 @@ describe('resource fileBatches', () => {
1919
expect(dataAndResponse.response).toBe(rawResponse);
2020
});
2121

22-
test('create: required and optional params', async () => {
23-
const response = await client.vectorStores.fileBatches.create('vs_abc123', {
24-
file_ids: ['string'],
25-
attributes: { foo: 'string' },
26-
chunking_strategy: { type: 'auto' },
27-
});
28-
});
29-
3022
test('retrieve: only required params', async () => {
3123
const responsePromise = client.vectorStores.fileBatches.retrieve('vsfb_abc123', {
3224
vector_store_id: 'vs_abc123',

0 commit comments

Comments
 (0)