Skip to content

Commit a136ac2

Browse files
committed
chore(openai-native): generalize Responses API options and extended prompt caching
1 parent ac5f3eb commit a136ac2

File tree

4 files changed

+29
-26
lines changed

4 files changed

+29
-26
lines changed

packages/types/src/model.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ export const modelInfoSchema = z.object({
7474
contextWindow: z.number(),
7575
supportsImages: z.boolean().optional(),
7676
supportsPromptCache: z.boolean(),
77+
// Optional default prompt cache retention policy for providers that support it.
78+
// When set to "24h", extended prompt caching will be requested; when omitted
79+
// or set to "in_memory", the default in‑memory cache is used.
80+
promptCacheRetention: z.enum(["in_memory", "24h"]).optional(),
7781
// Capability flag to indicate whether the model supports an output verbosity parameter
7882
supportsVerbosity: z.boolean().optional(),
7983
supportsReasoningBudget: z.boolean().optional(),

packages/types/src/providers/openai.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ export const openAiNativeModels = {
1111
contextWindow: 400000,
1212
supportsImages: true,
1313
supportsPromptCache: true,
14+
promptCacheRetention: "24h",
1415
supportsReasoningEffort: ["none", "low", "medium", "high"],
1516
reasoningEffort: "medium",
1617
inputPrice: 1.25,
@@ -29,6 +30,7 @@ export const openAiNativeModels = {
2930
contextWindow: 400000,
3031
supportsImages: true,
3132
supportsPromptCache: true,
33+
promptCacheRetention: "24h",
3234
supportsReasoningEffort: ["low", "medium", "high"],
3335
reasoningEffort: "medium",
3436
inputPrice: 1.25,
@@ -43,6 +45,7 @@ export const openAiNativeModels = {
4345
contextWindow: 400000,
4446
supportsImages: true,
4547
supportsPromptCache: true,
48+
promptCacheRetention: "24h",
4649
supportsReasoningEffort: ["low", "medium", "high"],
4750
reasoningEffort: "medium",
4851
inputPrice: 0.25,

src/api/providers/openai-native.ts

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
5252
constructor(options: ApiHandlerOptions) {
5353
super()
5454
this.options = options
55-
// Default to including reasoning.summary: "auto" for GPT‑5 unless explicitly disabled
56-
if (this.options.enableGpt5ReasoningSummary === undefined) {
57-
this.options.enableGpt5ReasoningSummary = true
55+
// Default to including reasoning.summary: "auto" for models that support Responses API
56+
// reasoning summaries unless explicitly disabled.
57+
if (this.options.enableResponsesReasoningSummary === undefined) {
58+
this.options.enableResponsesReasoningSummary = true
5859
}
5960
const apiKey = this.options.openAiNativeApiKey ?? "not-provided"
6061
this.client = new OpenAI({ baseURL: this.options.openAiNativeBaseUrl, apiKey })
@@ -176,10 +177,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
176177
reasoningEffort: ReasoningEffortExtended | undefined,
177178
metadata?: ApiHandlerCreateMessageMetadata,
178179
): any {
179-
// Build a request body
180-
// Ensure we explicitly pass max_output_tokens for GPT‑5 based on Roo's reserved model response calculation
180+
// Build a request body for the OpenAI Responses API.
181+
// Ensure we explicitly pass max_output_tokens based on Roo's reserved model response calculation
181182
// so requests do not default to very large limits (e.g., 120k).
182-
interface Gpt5RequestBody {
183+
interface ResponsesRequestBody {
183184
model: string
184185
input: Array<{ role: "user" | "assistant"; content: any[] } | { type: string; content: string }>
185186
stream: boolean
@@ -202,7 +203,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
202203
// Decide whether to enable extended prompt cache retention for this request
203204
const promptCacheRetention = this.getPromptCacheRetention(model)
204205

205-
const body: Gpt5RequestBody = {
206+
const body: ResponsesRequestBody = {
206207
model: model.id,
207208
input: formattedInput,
208209
stream: true,
@@ -218,7 +219,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
218219
? {
219220
reasoning: {
220221
...(reasoningEffort ? { effort: reasoningEffort } : {}),
221-
...(this.options.enableGpt5ReasoningSummary ? { summary: "auto" as const } : {}),
222+
...(this.options.enableResponsesReasoningSummary ? { summary: "auto" as const } : {}),
222223
},
223224
}
224225
: {}),
@@ -271,7 +272,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
271272
}
272273
} catch (sdkErr: any) {
273274
// For errors, fallback to manual SSE via fetch
274-
yield* this.makeGpt5ResponsesAPIRequest(requestBody, model, metadata, systemPrompt, messages)
275+
yield* this.makeResponsesApiRequest(requestBody, model, metadata, systemPrompt, messages)
275276
}
276277
}
277278

@@ -330,7 +331,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
330331
return formattedMessages
331332
}
332333

333-
private async *makeGpt5ResponsesAPIRequest(
334+
private async *makeResponsesApiRequest(
334335
requestBody: any,
335336
model: OpenAiNativeModel,
336337
metadata?: ApiHandlerCreateMessageMetadata,
@@ -355,7 +356,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
355356
if (!response.ok) {
356357
const errorText = await response.text()
357358

358-
let errorMessage = `GPT-5 API request failed (${response.status})`
359+
let errorMessage = `OpenAI Responses API request failed (${response.status})`
359360
let errorDetails = ""
360361

361362
// Try to parse error as JSON for better error messages
@@ -811,7 +812,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
811812
}
812813
}
813814

814-
// Usage for done/completed is already handled by processGpt5Event in SDK path.
815+
// Usage for done/completed is already handled by processEvent in the SDK path.
815816
// For SSE path, usage often arrives separately; avoid double-emitting here.
816817
}
817818
// These are structural or status events, we can just log them at a lower level or ignore.
@@ -988,19 +989,14 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
988989
/**
989990
* Returns the appropriate prompt cache retention policy for the given model, if any.
990991
*
991-
* Extended prompt cache retention ("24h") is only available for GPT‑5.1 family models that
992-
* support prompt caching. For other models we omit the parameter so the default in‑memory
993-
* policy is used.
992+
* The policy is driven by ModelInfo.promptCacheRetention so that model-specific details
993+
* live in the shared types layer rather than this provider. When set to "24h" and the
994+
* model supports prompt caching, extended prompt cache retention is requested.
994995
*/
995996
private getPromptCacheRetention(model: OpenAiNativeModel): "24h" | undefined {
996997
if (!model.info.supportsPromptCache) return undefined
997998

998-
// Extended prompt cache retention is only supported for GPT‑5.1 models:
999-
// - gpt-5.1
1000-
// - gpt-5.1-codex
1001-
// - gpt-5.1-codex-mini
1002-
// - gpt-5.1-chat-latest (future compatibility)
1003-
if (model.id.startsWith("gpt-5.1")) {
999+
if (model.info.promptCacheRetention === "24h") {
10041000
return "24h"
10051001
}
10061002

@@ -1113,7 +1109,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
11131109
if (reasoningEffort) {
11141110
requestBody.reasoning = {
11151111
effort: reasoningEffort,
1116-
...(this.options.enableGpt5ReasoningSummary ? { summary: "auto" as const } : {}),
1112+
...(this.options.enableResponsesReasoningSummary ? { summary: "auto" as const } : {}),
11171113
}
11181114
}
11191115

src/shared/api.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ import {
1313
// Extend ProviderSettings (minus apiProvider) with handler-specific toggles.
1414
export type ApiHandlerOptions = Omit<ProviderSettings, "apiProvider"> & {
1515
/**
16-
* When true and using GPT‑5 Responses API, include reasoning.summary: "auto"
17-
* so the API returns reasoning summaries (we already parse and surface them).
18-
* Defaults to true; set to false to disable summaries.
16+
* When true and using OpenAI Responses API models that support reasoning summaries,
17+
* include reasoning.summary: "auto" so the API returns summaries (we already parse
18+
* and surface them). Defaults to true; set to false to disable summaries.
1919
*/
20-
enableGpt5ReasoningSummary?: boolean
20+
enableResponsesReasoningSummary?: boolean
2121
/**
2222
* Optional override for Ollama's num_ctx parameter.
2323
* When set, this value will be used in Ollama chat requests.

0 commit comments

Comments
 (0)