Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
84f58cd
feat(models): add per-model timeout disable to avoid global override …
hannesrudolph Oct 10, 2025
9ecd7bb
feat(openai-models): add gpt-5-pro-2025-10-06 with timeout disabled a…
hannesrudolph Oct 10, 2025
d73bdf3
revert: per-model disableTimeout implementation; remove flag from gpt…
hannesrudolph Oct 10, 2025
3949621
feat(openai-native): background mode + auto-resume and poll fallback
hannesrudolph Oct 12, 2025
f3b654d
chore: remove TEMP_OPENAI_BACKGROUND_TASK_DOCS.DM and ignore temp docs
hannesrudolph Oct 12, 2025
cb975a2
feat(openai-models): update maxTokens for gpt-5-pro-2025-10-06 from 2…
hannesrudolph Oct 16, 2025
c6b7681
feat(chat): enhance background status handling and UI updates for ter…
hannesrudolph Oct 16, 2025
e118846
fix: Address PR review feedback - fix stale resume IDs, update model …
hannesrudolph Oct 16, 2025
7cff4db
fix(webview): define chevron icon via codicon and add missing isExpan…
hannesrudolph Oct 24, 2025
d552cce
fix(openai): update reasoning effort default to high and improve mode…
hannesrudolph Oct 24, 2025
49f90b9
webview-ui: use standard API Request icons for background mode; keep …
hannesrudolph Oct 24, 2025
50242b4
fix(openai-native): add logging for background resume and polling; cl…
hannesrudolph Oct 24, 2025
9803d43
fix(types/openai): correct GPT-5 Pro description typos/grammar; perf(…
hannesrudolph Oct 25, 2025
f988dee
fix: use hyphen instead of em dash in GPT-5 Pro description, remove u…
hannesrudolph Dec 10, 2025
f138361
fix: fix type errors - add metadata to ClineMessage, fix getResponseI…
hannesrudolph Dec 10, 2025
8f5e2ed
fix: track currentRequestResponseId in SSE fallback path for backgrou…
hannesrudolph Dec 10, 2025
706138e
Handle terminal failures in background resume/poll
hannesrudolph Dec 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ logs

# Qdrant
qdrant_storage/

# Architect plans
plans/
plans/

# ignore temp background docs
TEMP_OPENAI_BACKGROUND_TASK_DOCS.DM
TEMP_DOCS/
12 changes: 12 additions & 0 deletions packages/types/src/message.ts
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,18 @@ export const clineMessageSchema = z.object({
isProtected: z.boolean().optional(),
apiProtocol: z.union([z.literal("openai"), z.literal("anthropic")]).optional(),
isAnswered: z.boolean().optional(),
/**
* Optional metadata for API request tracking.
* Used for background mode status display.
*/
metadata: z
.object({
background: z.boolean().optional(),
backgroundStatus: z
.enum(["queued", "in_progress", "reconnecting", "polling", "completed", "failed", "canceled"])
.optional(),
})
.optional(),
})

export type ClineMessage = z.infer<typeof clineMessageSchema>
Expand Down
5 changes: 5 additions & 0 deletions packages/types/src/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@ export const modelInfoSchema = z.object({
// Capability flag to indicate whether the model supports temperature parameter
supportsTemperature: z.boolean().optional(),
defaultTemperature: z.number().optional(),
// When true, force-disable request timeouts for this model (providers will set timeout=0)
disableTimeout: z.boolean().optional(),
// When true, this model must be invoked using Responses background mode.
// Providers should auto-enable background:true, stream:true, and store:true.
backgroundMode: z.boolean().optional(),
requiredReasoningBudget: z.boolean().optional(),
supportsReasoningEffort: z
.union([z.boolean(), z.array(z.enum(["disable", "none", "minimal", "low", "medium", "high", "xhigh"]))])
Expand Down
9 changes: 9 additions & 0 deletions packages/types/src/provider-settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,15 @@ const openAiNativeSchema = apiModelIdProviderModelSchema.extend({
// OpenAI Responses API service tier for openai-native provider only.
// UI should only expose this when the selected model supports flex/priority.
openAiNativeServiceTier: serviceTierSchema.optional(),
// Enable OpenAI Responses background mode when using Responses API.
// Opt-in; defaults to false when omitted.
openAiNativeBackgroundMode: z.boolean().optional(),
// Background auto-resume/poll settings (no UI; plumbed via options)
openAiNativeBackgroundAutoResume: z.boolean().optional(),
openAiNativeBackgroundResumeMaxRetries: z.number().int().min(0).optional(),
openAiNativeBackgroundResumeBaseDelayMs: z.number().int().min(0).optional(),
openAiNativeBackgroundPollIntervalMs: z.number().int().min(0).optional(),
openAiNativeBackgroundPollMaxMinutes: z.number().int().min(1).optional(),
})

const mistralSchema = apiModelIdProviderModelSchema.extend({
Expand Down
15 changes: 15 additions & 0 deletions packages/types/src/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,21 @@ export const openAiNativeModels = {
],
description: "GPT-5.1: The best model for coding and agentic tasks across domains",
},
"gpt-5-pro-2025-10-06": {
maxTokens: 128000,
contextWindow: 400000,
supportsImages: true,
supportsPromptCache: false,
supportsReasoningEffort: false, // This is set to false to prevent the ui from displaying the reasoning effort selector
reasoningEffort: "high", // Pro model uses high reasoning effort by default and must be specified
inputPrice: 15.0,
outputPrice: 120.0,
description:
"GPT-5 Pro: A slow, reasoning-focused model for complex problems. Uses background mode with resilient streaming - requests may take some time and will automatically reconnect if they time out.",
supportsVerbosity: true,
supportsTemperature: false,
backgroundMode: true,
},
"gpt-5.1-codex": {
maxTokens: 128000,
contextWindow: 400000,
Expand Down
30 changes: 30 additions & 0 deletions src/api/providers/__tests__/lm-studio-timeout.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,4 +88,34 @@ describe("LmStudioHandler timeout configuration", () => {
}),
)
})

it("should force zero timeout when model info disables timeout", () => {
;(getApiRequestTimeout as any).mockReturnValue(600000)

const spy = vitest.spyOn(LmStudioHandler.prototype as any, "getModel").mockReturnValue({
id: "llama2",
info: {
maxTokens: -1,
contextWindow: 128000,
supportsPromptCache: false,
supportsImages: true,
disableTimeout: true,
},
})

const options: ApiHandlerOptions = {
apiModelId: "llama2",
lmStudioModelId: "llama2",
}

new LmStudioHandler(options)

expect(mockOpenAIConstructor).toHaveBeenCalledWith(
expect.objectContaining({
timeout: 0,
}),
)

spy.mockRestore()
})
})
32 changes: 32 additions & 0 deletions src/api/providers/__tests__/openai-native-usage.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,38 @@ describe("OpenAiNativeHandler - normalizeUsage", () => {
})
})

it("should produce identical usage chunk when background mode is enabled", () => {
const usage = {
input_tokens: 120,
output_tokens: 60,
cache_creation_input_tokens: 10,
cache_read_input_tokens: 30,
}

const baselineHandler = new OpenAiNativeHandler({
openAiNativeApiKey: "test-key",
apiModelId: "gpt-5-pro-2025-10-06",
})
const backgroundHandler = new OpenAiNativeHandler({
openAiNativeApiKey: "test-key",
apiModelId: "gpt-5-pro-2025-10-06",
openAiNativeBackgroundMode: true,
})

const baselineUsage = (baselineHandler as any).normalizeUsage(usage, baselineHandler.getModel())
const backgroundUsage = (backgroundHandler as any).normalizeUsage(usage, backgroundHandler.getModel())

expect(baselineUsage).toMatchObject({
type: "usage",
inputTokens: 120,
outputTokens: 60,
cacheWriteTokens: 10,
cacheReadTokens: 30,
totalCost: expect.any(Number),
})
expect(backgroundUsage).toEqual(baselineUsage)
})

describe("cost calculation", () => {
it("should pass total input tokens to calculateApiCostOpenAI", () => {
const usage = {
Expand Down
Loading
Loading