diff --git a/.gitignore b/.gitignore index 54cf66cee7a..ed8e3978995 100644 --- a/.gitignore +++ b/.gitignore @@ -49,6 +49,9 @@ logs # Qdrant qdrant_storage/ - # Architect plans -plans/ \ No newline at end of file +plans/ + +# ignore temp background docs +TEMP_OPENAI_BACKGROUND_TASK_DOCS.DM +TEMP_DOCS/ diff --git a/packages/types/src/message.ts b/packages/types/src/message.ts index 82f58f29f28..39b7ed25e3b 100644 --- a/packages/types/src/message.ts +++ b/packages/types/src/message.ts @@ -279,6 +279,18 @@ export const clineMessageSchema = z.object({ isProtected: z.boolean().optional(), apiProtocol: z.union([z.literal("openai"), z.literal("anthropic")]).optional(), isAnswered: z.boolean().optional(), + /** + * Optional metadata for API request tracking. + * Used for background mode status display. + */ + metadata: z + .object({ + background: z.boolean().optional(), + backgroundStatus: z + .enum(["queued", "in_progress", "reconnecting", "polling", "completed", "failed", "canceled"]) + .optional(), + }) + .optional(), }) export type ClineMessage = z.infer diff --git a/packages/types/src/model.ts b/packages/types/src/model.ts index 6c7d0a4b4b6..7bb689d7884 100644 --- a/packages/types/src/model.ts +++ b/packages/types/src/model.ts @@ -86,6 +86,11 @@ export const modelInfoSchema = z.object({ // Capability flag to indicate whether the model supports temperature parameter supportsTemperature: z.boolean().optional(), defaultTemperature: z.number().optional(), + // When true, force-disable request timeouts for this model (providers will set timeout=0) + disableTimeout: z.boolean().optional(), + // When true, this model must be invoked using Responses background mode. + // Providers should auto-enable background:true, stream:true, and store:true. + backgroundMode: z.boolean().optional(), requiredReasoningBudget: z.boolean().optional(), supportsReasoningEffort: z .union([z.boolean(), z.array(z.enum(["disable", "none", "minimal", "low", "medium", "high", "xhigh"]))]) diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts index d713a47d6b4..a577ad4d1ba 100644 --- a/packages/types/src/provider-settings.ts +++ b/packages/types/src/provider-settings.ts @@ -304,6 +304,15 @@ const openAiNativeSchema = apiModelIdProviderModelSchema.extend({ // OpenAI Responses API service tier for openai-native provider only. // UI should only expose this when the selected model supports flex/priority. openAiNativeServiceTier: serviceTierSchema.optional(), + // Enable OpenAI Responses background mode when using Responses API. + // Opt-in; defaults to false when omitted. + openAiNativeBackgroundMode: z.boolean().optional(), + // Background auto-resume/poll settings (no UI; plumbed via options) + openAiNativeBackgroundAutoResume: z.boolean().optional(), + openAiNativeBackgroundResumeMaxRetries: z.number().int().min(0).optional(), + openAiNativeBackgroundResumeBaseDelayMs: z.number().int().min(0).optional(), + openAiNativeBackgroundPollIntervalMs: z.number().int().min(0).optional(), + openAiNativeBackgroundPollMaxMinutes: z.number().int().min(1).optional(), }) const mistralSchema = apiModelIdProviderModelSchema.extend({ diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts index 722b57677cc..4fc245907ac 100644 --- a/packages/types/src/providers/openai.ts +++ b/packages/types/src/providers/openai.ts @@ -47,6 +47,21 @@ export const openAiNativeModels = { ], description: "GPT-5.1: The best model for coding and agentic tasks across domains", }, + "gpt-5-pro-2025-10-06": { + maxTokens: 128000, + contextWindow: 400000, + supportsImages: true, + supportsPromptCache: false, + supportsReasoningEffort: false, // This is set to false to prevent the ui from displaying the reasoning effort selector + reasoningEffort: "high", // Pro model uses high reasoning effort by default and must be specified + inputPrice: 15.0, + outputPrice: 120.0, + description: + "GPT-5 Pro: A slow, reasoning-focused model for complex problems. Uses background mode with resilient streaming - requests may take some time and will automatically reconnect if they time out.", + supportsVerbosity: true, + supportsTemperature: false, + backgroundMode: true, + }, "gpt-5.1-codex": { maxTokens: 128000, contextWindow: 400000, diff --git a/src/api/providers/__tests__/lm-studio-timeout.spec.ts b/src/api/providers/__tests__/lm-studio-timeout.spec.ts index 659fcaaf670..a7abdb46798 100644 --- a/src/api/providers/__tests__/lm-studio-timeout.spec.ts +++ b/src/api/providers/__tests__/lm-studio-timeout.spec.ts @@ -88,4 +88,34 @@ describe("LmStudioHandler timeout configuration", () => { }), ) }) + + it("should force zero timeout when model info disables timeout", () => { + ;(getApiRequestTimeout as any).mockReturnValue(600000) + + const spy = vitest.spyOn(LmStudioHandler.prototype as any, "getModel").mockReturnValue({ + id: "llama2", + info: { + maxTokens: -1, + contextWindow: 128000, + supportsPromptCache: false, + supportsImages: true, + disableTimeout: true, + }, + }) + + const options: ApiHandlerOptions = { + apiModelId: "llama2", + lmStudioModelId: "llama2", + } + + new LmStudioHandler(options) + + expect(mockOpenAIConstructor).toHaveBeenCalledWith( + expect.objectContaining({ + timeout: 0, + }), + ) + + spy.mockRestore() + }) }) diff --git a/src/api/providers/__tests__/openai-native-usage.spec.ts b/src/api/providers/__tests__/openai-native-usage.spec.ts index 48e1c26877b..4068a91bcd6 100644 --- a/src/api/providers/__tests__/openai-native-usage.spec.ts +++ b/src/api/providers/__tests__/openai-native-usage.spec.ts @@ -389,6 +389,38 @@ describe("OpenAiNativeHandler - normalizeUsage", () => { }) }) + it("should produce identical usage chunk when background mode is enabled", () => { + const usage = { + input_tokens: 120, + output_tokens: 60, + cache_creation_input_tokens: 10, + cache_read_input_tokens: 30, + } + + const baselineHandler = new OpenAiNativeHandler({ + openAiNativeApiKey: "test-key", + apiModelId: "gpt-5-pro-2025-10-06", + }) + const backgroundHandler = new OpenAiNativeHandler({ + openAiNativeApiKey: "test-key", + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeBackgroundMode: true, + }) + + const baselineUsage = (baselineHandler as any).normalizeUsage(usage, baselineHandler.getModel()) + const backgroundUsage = (backgroundHandler as any).normalizeUsage(usage, backgroundHandler.getModel()) + + expect(baselineUsage).toMatchObject({ + type: "usage", + inputTokens: 120, + outputTokens: 60, + cacheWriteTokens: 10, + cacheReadTokens: 30, + totalCost: expect.any(Number), + }) + expect(backgroundUsage).toEqual(baselineUsage) + }) + describe("cost calculation", () => { it("should pass total input tokens to calculateApiCostOpenAI", () => { const usage = { diff --git a/src/api/providers/__tests__/openai-native.spec.ts b/src/api/providers/__tests__/openai-native.spec.ts index 0482b8893b8..c0682d16449 100644 --- a/src/api/providers/__tests__/openai-native.spec.ts +++ b/src/api/providers/__tests__/openai-native.spec.ts @@ -3,6 +3,7 @@ import { Anthropic } from "@anthropic-ai/sdk" import { OpenAiNativeHandler } from "../openai-native" +import type { ApiHandlerCreateMessageMetadata } from "../../index" import { ApiHandlerOptions } from "../../../shared/api" // Mock OpenAI client - now everything uses Responses API @@ -1402,3 +1403,517 @@ describe("GPT-5 streaming event coverage (additional)", () => { }) }) }) + +describe("OpenAI Native background mode behavior", () => { + const systemPrompt = "System prompt" + const baseMessages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "hi" }] + const createMinimalIterable = () => ({ + async *[Symbol.asyncIterator]() { + yield { + type: "response.done", + response: { id: "resp_minimal", usage: { input_tokens: 1, output_tokens: 1 } }, + } + }, + }) + const createUsageIterable = () => ({ + async *[Symbol.asyncIterator]() { + yield { type: "response.text.delta", delta: "Hello" } + yield { + type: "response.done", + response: { + id: "resp_usage", + usage: { input_tokens: 120, output_tokens: 60 }, + }, + } + }, + }) + + beforeEach(() => { + mockResponsesCreate.mockClear() + }) + + afterEach(() => { + if ((global as any).fetch) { + delete (global as any).fetch + } + }) + + const metadataStoreFalse: ApiHandlerCreateMessageMetadata = { taskId: "background-test", store: false } + + it("auto-enables background mode for gpt-5-pro when no override is specified", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + // openAiNativeBackgroundMode is undefined + }) + + mockResponsesCreate.mockResolvedValueOnce(createMinimalIterable()) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, baseMessages, metadataStoreFalse)) { + chunks.push(chunk) + } + + expect(chunks).not.toHaveLength(0) + const requestBody = mockResponsesCreate.mock.calls[0][0] + expect(requestBody.background).toBe(true) + expect(requestBody.stream).toBe(true) + expect(requestBody.store).toBe(true) + }) + it("sends background:true, stream:true, and forces store:true for gpt-5-pro when background mode is enabled", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: true, + }) + + mockResponsesCreate.mockResolvedValueOnce(createMinimalIterable()) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, baseMessages, metadataStoreFalse)) { + chunks.push(chunk) + } + + expect(chunks).not.toHaveLength(0) + + const requestBody = mockResponsesCreate.mock.calls[0][0] + expect(requestBody.background).toBe(true) + expect(requestBody.stream).toBe(true) + expect(requestBody.store).toBe(true) + expect(requestBody.instructions).toBe(systemPrompt) + expect(requestBody.model).toBe("gpt-5-pro-2025-10-06") + expect(Array.isArray(requestBody.input)).toBe(true) + expect(requestBody.input.length).toBeGreaterThan(0) + + mockResponsesCreate.mockClear() + + const handlerWithOptionFalse = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: false, // metadata still enforces background mode + }) + + mockResponsesCreate.mockResolvedValueOnce(createMinimalIterable()) + + for await (const chunk of handlerWithOptionFalse.createMessage( + systemPrompt, + baseMessages, + metadataStoreFalse, + )) { + chunks.push(chunk) + } + + const requestBodyWithOptionFalse = mockResponsesCreate.mock.calls[0][0] + // Still enabled due to model.info.backgroundMode + expect(requestBodyWithOptionFalse.background).toBe(true) + expect(requestBodyWithOptionFalse.store).toBe(true) + expect(requestBodyWithOptionFalse.stream).toBe(true) + }) + + it("forces store:true and includes background:true when falling back to SSE", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: true, + }) + + mockResponsesCreate.mockResolvedValueOnce({}) + + const encoder = new TextEncoder() + const sseStream = new ReadableStream({ + start(controller) { + controller.enqueue( + encoder.encode( + 'data: {"type":"response.done","response":{"id":"resp_1","usage":{"input_tokens":1,"output_tokens":1}}}\n\n', + ), + ) + controller.enqueue(encoder.encode("data: [DONE]\n\n")) + controller.close() + }, + }) + + const mockFetch = vitest.fn().mockResolvedValue( + new Response(sseStream, { + status: 200, + headers: { "Content-Type": "text/event-stream" }, + }), + ) + global.fetch = mockFetch as any + + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, baseMessages, metadataStoreFalse)) { + chunks.push(chunk) + } + + expect(mockFetch).toHaveBeenCalledTimes(1) + const requestInit = mockFetch.mock.calls[0][1] as RequestInit + expect(requestInit?.body).toBeDefined() + + const parsedBody = JSON.parse(requestInit?.body as string) + expect(parsedBody.background).toBe(true) + expect(parsedBody.store).toBe(true) + expect(parsedBody.stream).toBe(true) + expect(parsedBody.model).toBe("gpt-5-pro-2025-10-06") + }) + + it("emits identical usage chunk when background mode is enabled", async () => { + const collectUsageChunk = async (options: ApiHandlerOptions) => { + mockResponsesCreate.mockResolvedValueOnce(createUsageIterable()) + const handler = new OpenAiNativeHandler(options) + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, baseMessages)) { + chunks.push(chunk) + } + const usageChunk = chunks.find((chunk) => chunk.type === "usage") + mockResponsesCreate.mockClear() + return usageChunk + } + + const baselineUsage = await collectUsageChunk({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + }) + + expect(baselineUsage).toBeDefined() + + const backgroundUsage = await collectUsageChunk({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: true, + }) + + expect(backgroundUsage).toBeDefined() + expect(backgroundUsage).toEqual(baselineUsage) + }) + + it("emits background status chunks for Responses events (SDK path)", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: true, + }) + + const createStatusIterable = () => ({ + async *[Symbol.asyncIterator]() { + yield { type: "response.queued", response: { id: "resp_bg" } } + yield { type: "response.in_progress" } + yield { type: "response.text.delta", delta: "Hello" } + yield { + type: "response.done", + response: { id: "resp_bg", usage: { input_tokens: 1, output_tokens: 1 } }, + } + }, + }) + mockResponsesCreate.mockResolvedValueOnce(createStatusIterable()) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, baseMessages)) { + chunks.push(chunk) + } + + const statusChunks = chunks.filter((c) => c.type === "status") + expect(statusChunks).toEqual([ + { type: "status", mode: "background", status: "queued", responseId: "resp_bg" }, + { type: "status", mode: "background", status: "in_progress" }, + { type: "status", mode: "background", status: "completed", responseId: "resp_bg" }, + ]) + }) + + it("emits background status chunks for Responses events (SSE fallback)", async () => { + // Force fallback by making SDK return non-iterable + mockResponsesCreate.mockResolvedValueOnce({}) + + const encoder = new TextEncoder() + const sseStream = new ReadableStream({ + start(controller) { + controller.enqueue(encoder.encode('data: {"type":"response.queued","response":{"id":"resp_bg2"}}\n\n')) + controller.enqueue(encoder.encode('data: {"type":"response.in_progress"}\n\n')) + controller.enqueue(encoder.encode('data: {"type":"response.text.delta","delta":"Hi"}\n\n')) + controller.enqueue( + encoder.encode( + 'data: {"type":"response.done","response":{"id":"resp_bg2","usage":{"input_tokens":1,"output_tokens":1}}}\n\n', + ), + ) + controller.enqueue(encoder.encode("data: [DONE]\n\n")) + controller.close() + }, + }) + + const mockFetch = vitest.fn().mockResolvedValue( + new Response(sseStream, { + status: 200, + headers: { "Content-Type": "text/event-stream" }, + }), + ) + global.fetch = mockFetch as any + + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: true, + }) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, baseMessages)) { + chunks.push(chunk) + } + + const statusChunks = chunks.filter((c) => c.type === "status") + expect(statusChunks).toEqual([ + { type: "status", mode: "background", status: "queued", responseId: "resp_bg2" }, + { type: "status", mode: "background", status: "in_progress" }, + { type: "status", mode: "background", status: "completed", responseId: "resp_bg2" }, + ]) + + // Clean up fetch + delete (global as any).fetch + }) +}) + +describe("OpenAI Native streaming metadata tracking", () => { + beforeEach(() => { + mockResponsesCreate.mockClear() + }) + + it("tracks sequence_number from streaming events and exposes via getLastSequenceNumber", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + }) + + const createSequenceIterable = () => ({ + async *[Symbol.asyncIterator]() { + yield { type: "response.text.delta", delta: "A", sequence_number: 1 } + yield { type: "response.reasoning.delta", delta: "B", sequence_number: 2 } + yield { + type: "response.done", + sequence_number: 3, + response: { id: "resp_123", usage: { input_tokens: 1, output_tokens: 2 } }, + } + }, + }) + + mockResponsesCreate.mockResolvedValueOnce(createSequenceIterable()) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage("System", [{ role: "user", content: "hi" }])) { + chunks.push(chunk) + } + + expect(chunks).toContainEqual({ type: "text", text: "A" }) + expect(chunks).toContainEqual({ type: "reasoning", text: "B" }) + expect(handler.getLastSequenceNumber()).toBe(3) + expect(handler.getResponseId()).toBe("resp_123") + }) +}) + +// Added plumbing test for openAiNativeBackgroundMode +describe("OpenAI Native background mode setting (plumbing)", () => { + it("should surface openAiNativeBackgroundMode in handler options when provided", () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-4.1", + openAiNativeApiKey: "test-api-key", + openAiNativeBackgroundMode: true, + } as ApiHandlerOptions) + + // Access protected options via runtime cast to verify pass-through + expect((handler as any).options.openAiNativeBackgroundMode).toBe(true) + }) +}) + +describe("OpenAI Native background auto-resume and polling", () => { + const systemPrompt = "System prompt" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "hello" }] + + beforeEach(() => { + mockResponsesCreate.mockClear() + if ((global as any).fetch) { + delete (global as any).fetch + } + }) + + it("resumes background stream on drop and emits no duplicate deltas", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: true, + }) + + const dropIterable = { + async *[Symbol.asyncIterator]() { + yield { type: "response.queued", response: { id: "resp_resume" }, sequence_number: 0 } + yield { type: "response.in_progress", sequence_number: 1 } + yield { type: "response.text.delta", delta: "Hello", sequence_number: 2 } + throw new Error("network drop") + }, + } + mockResponsesCreate.mockResolvedValueOnce(dropIterable as any) + + const encoder = new TextEncoder() + const sseStream = new ReadableStream({ + start(controller) { + controller.enqueue( + encoder.encode( + 'data: {"type":"response.output_item.added","item":{"type":"text","text":"SHOULD_SKIP"},"sequence_number":2}\n\n', + ), + ) + controller.enqueue( + encoder.encode( + 'data: {"type":"response.output_item.added","item":{"type":"text","text":" world"},"sequence_number":3}\n\n', + ), + ) + controller.enqueue( + encoder.encode( + 'data: {"type":"response.done","response":{"id":"resp_resume","usage":{"input_tokens":10,"output_tokens":5}},"sequence_number":4}\n\n', + ), + ) + controller.enqueue(encoder.encode("data: [DONE]\n\n")) + controller.close() + }, + }) + ;(global as any).fetch = vitest + .fn() + .mockResolvedValue( + new Response(sseStream, { status: 200, headers: { "Content-Type": "text/event-stream" } }), + ) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const c of stream) { + chunks.push(c) + } + + const statusChunks = chunks.filter((c) => c.type === "status") + const statusNames = statusChunks.map((s: any) => s.status) + const reconnectIdx = statusNames.indexOf("reconnecting") + const inProgIdx = statusNames.findIndex((s, i) => s === "in_progress" && i > reconnectIdx) + expect(reconnectIdx).toBeGreaterThanOrEqual(0) + expect(inProgIdx).toBeGreaterThan(reconnectIdx) + + const fullText = chunks + .filter((c) => c.type === "text") + .map((c: any) => c.text) + .join("") + expect(fullText).toBe("Hello world") + expect(fullText).not.toContain("SHOULD_SKIP") + + const usageChunks = chunks.filter((c) => c.type === "usage") + expect(usageChunks).toHaveLength(1) + }) + + it("falls back to polling after failed resume and yields final output/usage", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-5-pro-2025-10-06", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: true, + openAiNativeBackgroundResumeMaxRetries: 1, + openAiNativeBackgroundResumeBaseDelayMs: 0, + openAiNativeBackgroundPollIntervalMs: 1, + openAiNativeBackgroundPollMaxMinutes: 1, + } as ApiHandlerOptions) + + const dropIterable = { + async *[Symbol.asyncIterator]() { + yield { type: "response.queued", response: { id: "resp_poll" }, sequence_number: 0 } + yield { type: "response.in_progress", sequence_number: 1 } + throw new Error("network drop") + }, + } + mockResponsesCreate.mockResolvedValueOnce(dropIterable as any) + + let pollStep = 0 + ;(global as any).fetch = vitest.fn().mockImplementation((url: string) => { + if (url.includes("?stream=true")) { + return Promise.resolve({ + ok: false, + status: 500, + text: async () => "resume failed", + } as any) + } + // polling path + const payloads = [ + { response: { id: "resp_poll", status: "queued" } }, + { response: { id: "resp_poll", status: "in_progress" } }, + { + response: { + id: "resp_poll", + status: "completed", + output: [{ type: "message", content: [{ type: "output_text", text: "Polled result" }] }], + usage: { input_tokens: 7, output_tokens: 3 }, + }, + }, + ] + const payload = payloads[Math.min(pollStep++, payloads.length - 1)] + return Promise.resolve( + new Response(JSON.stringify(payload), { status: 200, headers: { "Content-Type": "application/json" } }), + ) + }) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const c of stream) { + chunks.push(c) + } + + const statusNames = chunks.filter((c) => c.type === "status").map((s: any) => s.status) + const idxReconnect = statusNames.indexOf("reconnecting") + const idxPolling = statusNames.indexOf("polling") + const idxQueued = statusNames.indexOf("queued") + const idxInProgress = statusNames.indexOf("in_progress") + const idxCompleted = statusNames.indexOf("completed") + expect(idxReconnect).toBeGreaterThanOrEqual(0) + expect(idxPolling).toBeGreaterThan(idxReconnect) + + const idxQueuedAfterPolling = statusNames.findIndex((s, i) => s === "queued" && i > idxPolling) + const idxInProgressAfterQueued = statusNames.findIndex( + (s, i) => s === "in_progress" && i > idxQueuedAfterPolling, + ) + const idxCompletedAfterInProgress = statusNames.findIndex( + (s, i) => s === "completed" && i > idxInProgressAfterQueued, + ) + + expect(idxQueuedAfterPolling).toBeGreaterThan(idxPolling) + expect(idxInProgressAfterQueued).toBeGreaterThan(idxQueuedAfterPolling) + expect(idxCompletedAfterInProgress).toBeGreaterThan(idxInProgressAfterQueued) + + const finalText = chunks + .filter((c) => c.type === "text") + .map((c: any) => c.text) + .join("") + expect(finalText).toBe("Polled result") + + const usageChunks = chunks.filter((c) => c.type === "usage") + expect(usageChunks).toHaveLength(1) + expect(usageChunks[0]).toMatchObject({ type: "usage", inputTokens: 7, outputTokens: 3 }) + }) + + it("does not attempt resume when not in background mode", async () => { + const handler = new OpenAiNativeHandler({ + apiModelId: "gpt-4.1", + openAiNativeApiKey: "test", + openAiNativeBackgroundMode: false, + }) + + const dropIterable = { + async *[Symbol.asyncIterator]() { + yield { type: "response.text.delta", delta: "Hi", sequence_number: 1 } + throw new Error("drop") + }, + } + mockResponsesCreate.mockResolvedValueOnce(dropIterable as any) + ;(global as any).fetch = vitest.fn().mockRejectedValue(new Error("SSE fallback failed")) + + const stream = handler.createMessage(systemPrompt, messages) + + const chunks: any[] = [] + await expect(async () => { + for await (const c of stream) { + chunks.push(c) + } + }).rejects.toThrow() + + const statuses = chunks.filter((c) => c.type === "status").map((s: any) => s.status) + expect(statuses).not.toContain("reconnecting") + expect(statuses).not.toContain("polling") + }) +}) diff --git a/src/api/providers/__tests__/openai-timeout.spec.ts b/src/api/providers/__tests__/openai-timeout.spec.ts index 2a09fd94ffa..2ef7df2adaf 100644 --- a/src/api/providers/__tests__/openai-timeout.spec.ts +++ b/src/api/providers/__tests__/openai-timeout.spec.ts @@ -141,4 +141,28 @@ describe("OpenAiHandler timeout configuration", () => { }), ) }) + + it("should force zero timeout when model info disables timeout", () => { + ;(getApiRequestTimeout as any).mockReturnValue(600000) + + const options: ApiHandlerOptions = { + apiModelId: "gpt-4", + openAiModelId: "gpt-4", + openAiCustomModelInfo: { + maxTokens: -1, + contextWindow: 128000, + supportsPromptCache: false, + supportsImages: true, + disableTimeout: true, + } as any, + } + + new OpenAiHandler(options) + + expect(mockOpenAIConstructor).toHaveBeenCalledWith( + expect.objectContaining({ + timeout: 0, // Forced no timeout via model info + }), + ) + }) }) diff --git a/src/api/providers/lm-studio.ts b/src/api/providers/lm-studio.ts index 6c58a96ae1f..1d048bf6791 100644 --- a/src/api/providers/lm-studio.ts +++ b/src/api/providers/lm-studio.ts @@ -32,7 +32,7 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan this.client = new OpenAI({ baseURL: (this.options.lmStudioBaseUrl || "http://localhost:1234") + "/v1", apiKey: apiKey, - timeout: getApiRequestTimeout(), + timeout: this.getModel().info?.disableTimeout === true ? 0 : getApiRequestTimeout(), }) } diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts index b5fb417ee3a..e6bcc5d021a 100644 --- a/src/api/providers/openai-native.ts +++ b/src/api/providers/openai-native.ts @@ -25,6 +25,22 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ". export type OpenAiNativeModel = ReturnType +// GPT-5 specific types + +// Constants for model identification +const GPT5_MODEL_PREFIX = "gpt-5" + +// Marker for terminal background-mode failures so we don't attempt resume/poll fallbacks +function createTerminalBackgroundError(message: string): Error { + const err = new Error(message) + ;(err as any).isTerminalBackgroundError = true + err.name = "TerminalBackgroundError" + return err +} +function isTerminalBackgroundError(err: any): boolean { + return !!(err && (err as any).isTerminalBackgroundError) +} + export class OpenAiNativeHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions private client: OpenAI @@ -36,6 +52,15 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio private lastResponseId: string | undefined // Abort controller for cancelling ongoing requests private abortController?: AbortController + // Sequence number for background mode stream resumption + private lastSequenceNumber: number | undefined + // Track whether current request is in background mode for status chunk annotation + private currentRequestIsBackground?: boolean + // Cutoff sequence for filtering stale events during resume + private resumeCutoffSequence?: number + // Per-request tracking to prevent stale resume attempts + private currentRequestResponseId?: string + private currentRequestSequenceNumber?: number // Event types handled by the shared event processor to avoid duplication private readonly coreHandledEventTypes = new Set([ @@ -241,6 +266,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio }> tool_choice?: any parallel_tool_calls?: boolean + background?: boolean } // Validate requested tier against model support; if not supported, omit. @@ -312,6 +338,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio body.text = { verbosity: (verbosity || "medium") as VerbosityLevel } } + // Enable background mode when either explicitly opted in or required by model metadata + if (this.options.openAiNativeBackgroundMode === true || model.info.backgroundMode === true) { + body.background = true + body.stream = true + body.store = true + } + return body } @@ -325,6 +358,18 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio // Create AbortController for cancellation this.abortController = new AbortController() + // Annotate if this request uses background mode (used for status chunks) + this.currentRequestIsBackground = !!requestBody?.background + // Reset per-request tracking to prevent stale values from previous requests + this.currentRequestResponseId = undefined + this.currentRequestSequenceNumber = undefined + + const canAttemptResume = () => + this.currentRequestIsBackground && + (this.options.openAiNativeBackgroundAutoResume ?? true) && + !!this.currentRequestResponseId && + typeof this.currentRequestSequenceNumber === "number" + try { // Use the official SDK const stream = (await (this.client as any).responses.create(requestBody, { @@ -337,21 +382,64 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio ) } - for await (const event of stream) { - // Check if request was aborted - if (this.abortController.signal.aborted) { - break - } + try { + for await (const event of stream) { + // Check if request was aborted + if (this.abortController?.signal.aborted) { + break + } - for await (const outChunk of this.processEvent(event, model)) { - yield outChunk + for await (const outChunk of this.processEvent(event, model)) { + yield outChunk + } } + } catch (iterErr) { + // If terminal failure, propagate and do not attempt resume/poll + if (isTerminalBackgroundError(iterErr)) { + throw iterErr + } + // Stream dropped mid-flight; attempt resume for background requests + if (canAttemptResume()) { + for await (const chunk of this.attemptResumeOrPoll( + this.currentRequestResponseId!, + this.currentRequestSequenceNumber!, + model, + )) { + yield chunk + } + return + } + throw iterErr } } catch (sdkErr: any) { + // Propagate terminal background failures without fallback + if (isTerminalBackgroundError(sdkErr)) { + throw sdkErr + } // For errors, fallback to manual SSE via fetch - yield* this.makeResponsesApiRequest(requestBody, model, metadata, systemPrompt, messages) + try { + yield* this.makeResponsesApiRequest(requestBody, model, metadata, systemPrompt, messages) + } catch (fallbackErr) { + // If SSE fallback fails mid-stream and we can resume, try that + if (isTerminalBackgroundError(fallbackErr)) { + throw fallbackErr + } + if (canAttemptResume()) { + for await (const chunk of this.attemptResumeOrPoll( + this.currentRequestResponseId!, + this.currentRequestSequenceNumber!, + model, + )) { + yield chunk + } + return + } + throw fallbackErr + } } finally { this.abortController = undefined + // Always clear background flag at end of request lifecycle + this.currentRequestIsBackground = undefined } } @@ -590,6 +678,22 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio try { const parsed = JSON.parse(data) + // Skip stale events when resuming a dropped background stream + if ( + typeof parsed?.sequence_number === "number" && + this.resumeCutoffSequence !== undefined && + parsed.sequence_number <= this.resumeCutoffSequence + ) { + continue + } + + // Record sequence number for cursor tracking + if (typeof parsed?.sequence_number === "number") { + this.lastSequenceNumber = parsed.sequence_number + // Also track for per-request resume capability + this.currentRequestSequenceNumber = parsed.sequence_number + } + // Capture resolved service tier if present if (parsed.response?.service_tier) { this.lastServiceTier = parsed.response.service_tier as ServiceTier @@ -601,6 +705,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio // Capture top-level response id if (parsed.response?.id) { this.lastResponseId = parsed.response.id as string + // Also track for per-request resume capability + this.currentRequestResponseId = parsed.response.id as string } // Delegate standard event types to the shared processor to avoid duplication @@ -868,9 +974,20 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio else if (parsed.type === "response.error" || parsed.type === "error") { // Error event from the API if (parsed.error || parsed.message) { - throw new Error( - `Responses API error: ${parsed.error?.message || parsed.message || "Unknown error"}`, - ) + const errMsg = `Responses API error: ${parsed.error?.message || parsed.message || "Unknown error"}` + // For background mode, treat as terminal to avoid futile resume attempts + if (this.currentRequestIsBackground) { + // Surface a failed status for UI lifecycle before terminating + yield { + type: "status", + mode: "background", + status: "failed", + ...(parsed.response?.id ? { responseId: parsed.response.id } : {}), + } + throw createTerminalBackgroundError(errMsg) + } + // Non-background: propagate as a standard error + throw new Error(errMsg) } } // Handle incomplete event @@ -879,17 +996,34 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } // Handle queued event else if (parsed.type === "response.queued") { - // Response is queued + yield { + type: "status", + mode: this.currentRequestIsBackground ? "background" : undefined, + status: "queued", + ...(parsed.response?.id ? { responseId: parsed.response.id } : {}), + } } // Handle in_progress event else if (parsed.type === "response.in_progress") { - // Response is being processed + yield { + type: "status", + mode: this.currentRequestIsBackground ? "background" : undefined, + status: "in_progress", + ...(parsed.response?.id ? { responseId: parsed.response.id } : {}), + } } // Handle failed event else if (parsed.type === "response.failed") { + // Emit failed status for UI lifecycle + yield { + type: "status", + mode: this.currentRequestIsBackground ? "background" : undefined, + status: "failed", + ...(parsed.response?.id ? { responseId: parsed.response.id } : {}), + } // Response failed if (parsed.error || parsed.message) { - throw new Error( + throw createTerminalBackgroundError( `Response failed: ${parsed.error?.message || parsed.message || "Unknown failure"}`, ) } @@ -907,6 +1041,16 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio this.lastResponseOutput = parsed.response.output } + // Emit completed status for UI lifecycle + yield { + type: "status", + mode: this.currentRequestIsBackground ? "background" : undefined, + status: "completed", + ...(parsed.response?.id ? { responseId: parsed.response.id } : {}), + } + // Clear background marker on completion + this.currentRequestIsBackground = undefined + // Check if the done event contains the complete output (as a fallback) if ( !hasContent && @@ -1014,6 +1158,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio // This can happen in certain edge cases and shouldn't break the flow } catch (error) { if (error instanceof Error) { + // Preserve terminal background errors so callers can avoid resume attempts + if ((error as any).isTerminalBackgroundError) { + throw error + } throw new Error(`Error processing response stream: ${error.message}`) } throw new Error("Unexpected error processing response stream") @@ -1022,6 +1170,264 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } } + /** + * Attempt to resume a dropped background stream; if resume fails, fall back to polling. + */ + private async *attemptResumeOrPoll(responseId: string, lastSeq: number, model: OpenAiNativeModel): ApiStream { + // Emit reconnecting status + yield { + type: "status", + mode: "background", + status: "reconnecting", + responseId, + } + + const apiKey = this.options.openAiNativeApiKey ?? "not-provided" + const baseUrl = this.options.openAiNativeBaseUrl || "https://api.openai.com" + const resumeMaxRetries = this.options.openAiNativeBackgroundResumeMaxRetries ?? 3 + const resumeBaseDelayMs = this.options.openAiNativeBackgroundResumeBaseDelayMs ?? 1000 + + // Try streaming resume with exponential backoff + for (let attempt = 0; attempt < resumeMaxRetries; attempt++) { + try { + const resumeUrl = `${baseUrl}/v1/responses/${responseId}?stream=true&starting_after=${lastSeq}` + const res = await fetch(resumeUrl, { + method: "GET", + headers: { + Authorization: `Bearer ${apiKey}`, + Accept: "text/event-stream", + }, + signal: this.abortController?.signal, + }) + + if (!res.ok) { + const status = res.status + if (status === 401 || status === 403 || status === 404) { + yield { + type: "status", + mode: "background", + status: "failed", + responseId, + } + + const terminalErr = createTerminalBackgroundError(`Resume request failed (${status})`) + ;(terminalErr as any).status = status + throw terminalErr + } + + throw new Error(`Resume request failed (${status})`) + } + if (!res.body) { + throw new Error("Resume request failed (no body)") + } + + this.resumeCutoffSequence = lastSeq + + // Handshake accepted: immediately switch UI from reconnecting -> in_progress + yield { + type: "status", + mode: "background", + status: "in_progress", + responseId, + } + + try { + for await (const chunk of this.handleStreamResponse(res.body, model)) { + // Avoid double-emitting in_progress if the inner handler surfaces it + if (chunk.type === "status" && (chunk as any).status === "in_progress") { + continue + } + yield chunk + } + // Successful resume + this.resumeCutoffSequence = undefined + return + } catch (e) { + // Resume stream failed mid-flight; reset and throw to retry + this.resumeCutoffSequence = undefined + throw e + } + } catch (err: any) { + // If terminal error, don't keep retrying resume; fall back to polling immediately + const delay = resumeBaseDelayMs * Math.pow(2, attempt) + const msg = err instanceof Error ? err.message : String(err) + + if (isTerminalBackgroundError(err)) { + console.error(`[OpenAiNative][resume] terminal background error on attempt ${attempt + 1}: ${msg}`) + break + } + + // Otherwise retry with backoff (transient failure) + console.warn(`[OpenAiNative][resume] attempt ${attempt + 1} failed; retrying in ${delay}ms: ${msg}`) + if (delay > 0) { + await new Promise((r) => setTimeout(r, delay)) + } + } + } + + // Resume failed - begin polling fallback + yield { + type: "status", + mode: "background", + status: "polling", + responseId, + } + + const pollIntervalMs = this.options.openAiNativeBackgroundPollIntervalMs ?? 2000 + const pollMaxMinutes = this.options.openAiNativeBackgroundPollMaxMinutes ?? 20 + const deadline = Date.now() + pollMaxMinutes * 60_000 + + let lastEmittedStatus: "queued" | "in_progress" | "completed" | "failed" | "canceled" | undefined = undefined + + while (Date.now() <= deadline) { + try { + const pollRes = await fetch(`${baseUrl}/v1/responses/${responseId}`, { + method: "GET", + headers: { + Authorization: `Bearer ${apiKey}`, + }, + signal: this.abortController?.signal, + }) + + if (!pollRes.ok) { + const status = pollRes.status + if (status === 401 || status === 403 || status === 404) { + yield { + type: "status", + mode: "background", + status: "failed", + responseId, + } + const terminalErr = createTerminalBackgroundError(`Polling failed with status ${status}`) + ;(terminalErr as any).status = status + throw terminalErr + } + + // transient; wait and retry + await new Promise((r) => setTimeout(r, pollIntervalMs)) + continue + } + + let raw: any + try { + raw = await pollRes.json() + } catch { + await new Promise((r) => setTimeout(r, pollIntervalMs)) + continue + } + + const resp = raw?.response ?? raw + const status: string | undefined = resp?.status + const respId: string | undefined = resp?.id ?? responseId + + // Capture resolved service tier if present + if (resp?.service_tier) { + this.lastServiceTier = resp.service_tier as ServiceTier + } + + // Emit status transitions + if ( + status && + (status === "queued" || + status === "in_progress" || + status === "completed" || + status === "failed" || + status === "canceled") + ) { + if (status !== lastEmittedStatus) { + yield { + type: "status", + mode: "background", + status: status as any, + ...(respId ? { responseId: respId } : {}), + } + lastEmittedStatus = status as any + } + } + + if (status === "completed") { + // Synthesize final output + const output = resp?.output ?? raw?.output + if (Array.isArray(output)) { + for (const outputItem of output) { + if (outputItem.type === "text" && Array.isArray(outputItem.content)) { + for (const content of outputItem.content) { + if (content?.type === "text" && typeof content.text === "string") { + yield { type: "text", text: content.text } + } + } + } else if (outputItem.type === "message" && Array.isArray(outputItem.content)) { + for (const content of outputItem.content) { + if ( + (content?.type === "output_text" || content?.type === "text") && + typeof content.text === "string" + ) { + yield { type: "text", text: content.text } + } + } + } else if (outputItem.type === "reasoning" && Array.isArray(outputItem.summary)) { + for (const summary of outputItem.summary) { + if (summary?.type === "summary_text" && typeof summary.text === "string") { + yield { type: "reasoning", text: summary.text } + } + } + } + } + } + + // Synthesize usage + const usage = resp?.usage ?? raw?.usage + const usageData = this.normalizeUsage(usage, model) + if (usageData) { + yield usageData + } + + return + } + + if (status === "failed" || status === "canceled") { + const detail: string | undefined = resp?.error?.message ?? raw?.error?.message + const msg = detail ? `Response ${status}: ${detail}` : `Response ${status}: ${respId || responseId}` + throw createTerminalBackgroundError(msg) + } + } catch (err: any) { + // If we've already emitted a terminal status, propagate to consumer to stop polling. + if (lastEmittedStatus === "failed" || lastEmittedStatus === "canceled") { + throw err + } + + // Classify polling errors and log appropriately + const statusCode = err?.status ?? err?.response?.status + const msg = err instanceof Error ? err.message : String(err) + + // Permanent errors: stop polling + if (statusCode === 401 || statusCode === 403 || statusCode === 404) { + console.error(`[OpenAiNative][poll] permanent error (status ${statusCode}); stopping: ${msg}`) + throw createTerminalBackgroundError(`Polling failed with status ${statusCode}: ${msg}`) + } + + // Rate limit: transient, will retry + if (statusCode === 429) { + console.warn(`[OpenAiNative][poll] rate limited; will retry: ${msg}`) + } else { + // Other transient/network errors + console.warn( + `[OpenAiNative][poll] transient error; will retry${statusCode ? ` (status ${statusCode})` : ""}: ${msg}`, + ) + } + } + + // Stop polling immediately on terminal background statuses + if (lastEmittedStatus === "failed" || lastEmittedStatus === "canceled") { + throw new Error(`Background polling terminated with status=${lastEmittedStatus} for ${responseId}`) + } + + await new Promise((r) => setTimeout(r, pollIntervalMs)) + } + + throw new Error(`Background response polling timed out for ${responseId}`) + } + /** * Shared processor for Responses API events. */ @@ -1037,6 +1443,43 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio // Capture top-level response id if (event?.response?.id) { this.lastResponseId = event.response.id as string + // Also track for per-request resume capability + this.currentRequestResponseId = event.response.id as string + } + // Record sequence number for cursor tracking + if (typeof event?.sequence_number === "number") { + this.lastSequenceNumber = event.sequence_number + // Also track for per-request resume capability + this.currentRequestSequenceNumber = event.sequence_number + } + + // Map lifecycle events to status chunks + const statusMap: Record = { + "response.queued": "queued", + "response.in_progress": "in_progress", + "response.completed": "completed", + "response.done": "completed", + "response.failed": "failed", + "response.canceled": "canceled", + } + const mappedStatus = statusMap[event?.type as string] + if (mappedStatus) { + yield { + type: "status", + mode: this.currentRequestIsBackground ? "background" : undefined, + status: mappedStatus, + ...(event?.response?.id ? { responseId: event.response.id } : {}), + } + // Clear background flag for terminal statuses + if (mappedStatus === "completed" || mappedStatus === "failed" || mappedStatus === "canceled") { + this.currentRequestIsBackground = undefined + } + // Throw terminal error to integrate with standard failure path (surfaced in UI) + if (mappedStatus === "failed" || mappedStatus === "canceled") { + const msg = (event as any)?.error?.message || (event as any)?.message || `Response ${mappedStatus}` + throw createTerminalBackgroundError(msg) + } + // Do not return; allow further handling (e.g., usage on done/completed) } // Handle known streaming text deltas @@ -1252,6 +1695,23 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio return this.lastResponseId } + /** + * Gets the last sequence number observed from streaming events. + * @returns The sequence number, or undefined if not available yet + */ + getLastSequenceNumber(): number | undefined { + return this.lastSequenceNumber + } + + /** + * Sets the last response ID for conversation continuity. + * Typically only used in tests or special flows. + * @param responseId The response ID to store + */ + setResponseId(responseId: string): void { + this.lastResponseId = responseId + } + async completePrompt(prompt: string): Promise { // Create AbortController for cancellation this.abortController = new AbortController() diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 2a2065edd6e..586a86458c0 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -49,7 +49,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl ...(this.options.openAiHeaders || {}), } - const timeout = getApiRequestTimeout() + let timeout = getApiRequestTimeout() + try { + const modelInfo = this.getModel().info + if (modelInfo?.disableTimeout === true) { + timeout = 0 + } + } catch {} if (isAzureAiInference) { // Azure AI Inference Service (e.g., for DeepSeek) uses a different path structure diff --git a/src/api/transform/stream.ts b/src/api/transform/stream.ts index a4a0fe4a9a7..a3011617996 100644 --- a/src/api/transform/stream.ts +++ b/src/api/transform/stream.ts @@ -10,6 +10,7 @@ export type ApiStreamChunk = | ApiStreamToolCallDeltaChunk | ApiStreamToolCallEndChunk | ApiStreamToolCallPartialChunk + | ApiStreamStatusChunk | ApiStreamError export interface ApiStreamError { @@ -85,3 +86,10 @@ export interface GroundingSource { url: string snippet?: string } + +export interface ApiStreamStatusChunk { + type: "status" + mode?: "background" + status: "queued" | "in_progress" | "completed" | "failed" | "canceled" | "reconnecting" | "polling" + responseId?: string +} diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 8ed9ab56405..311884fe56e 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -2351,7 +2351,6 @@ export class Task extends EventEmitter implements TaskLike { // lastMessage.ts = Date.now() DO NOT update ts since it is used as a key for virtuoso list lastMessage.partial = false // instead of streaming partialMessage events, we do a save and post like normal to persist to disk - console.log("updating partial message", lastMessage) } // Update `api_req_started` to have cancelled and cost, so that @@ -2437,6 +2436,7 @@ export class Task extends EventEmitter implements TaskLike { if (!chunk) { // Sometimes chunk is undefined, no idea that can cause // it, but this workaround seems to fix it. + item = await iterator.next() continue } @@ -2612,6 +2612,25 @@ export class Task extends EventEmitter implements TaskLike { presentAssistantMessage(this) break } + + case "status": { + try { + const apiReqMsg = this.clineMessages[lastApiReqIndex] + if (apiReqMsg && apiReqMsg.type === "say" && apiReqMsg.say === "api_req_started") { + ;(apiReqMsg as any).metadata = (apiReqMsg as any).metadata || {} + if (chunk.mode === "background") { + ;(apiReqMsg as any).metadata.background = true + } + ;(apiReqMsg as any).metadata.backgroundStatus = chunk.status + if (chunk.responseId) { + ;(apiReqMsg as any).metadata.responseId = chunk.responseId + } + // Update the specific message; avoid full-state refresh on every status chunk to reduce re-renders + await this.updateClineMessage(apiReqMsg) + } + } catch {} + break + } case "text": { assistantMessage += chunk.text @@ -2686,6 +2705,10 @@ export class Task extends EventEmitter implements TaskLike { "\n\n[Response interrupted by a tool use result. Only one tool may be used at a time and should be placed at the end of the message.]" break } + // Prefetch the next item after processing the current chunk. + // This ensures terminal status chunks (e.g., failed/canceled/completed) + // are not skipped when the provider throws on the following next(). + item = await iterator.next() } // Finalize any remaining streaming tool calls that weren't explicitly ended @@ -3168,8 +3191,31 @@ export class Task extends EventEmitter implements TaskLike { continue } else { // If there's no assistant_responses, that means we got no text - // or tool_use content blocks from API which we should assume is - // an error. + // or tool_use content blocks from API which we should assume is an error. + // Prefer any streaming failure details captured on the last api_req_started message. + let errorText = + "Unexpected API Response: The language model did not provide any assistant messages. This may indicate an issue with the API or the model's output." + try { + const lastApiReqStartedIdx = findLastIndex( + this.clineMessages, + (m) => m.type === "say" && m.say === "api_req_started", + ) + if (lastApiReqStartedIdx !== -1) { + const info = JSON.parse( + this.clineMessages[lastApiReqStartedIdx].text || "{}", + ) as ClineApiReqInfo + if ( + typeof info?.streamingFailedMessage === "string" && + info.streamingFailedMessage.trim().length > 0 + ) { + errorText = info.streamingFailedMessage + } + } + } catch { + // ignore parse issues and keep default message + } + + await this.say("error", errorText) // IMPORTANT: For native tool protocol, we already added the user message to // apiConversationHistory at line 1876. Since the assistant failed to respond, diff --git a/src/shared/api.ts b/src/shared/api.ts index 4f4c8a4ae9a..b014d0dae9f 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -24,6 +24,20 @@ export type ApiHandlerOptions = Omit & { * When undefined, Ollama will use the model's default num_ctx from the Modelfile. */ ollamaNumCtx?: number + /** + * Opt-in for OpenAI Responses background mode when using apiProvider=openai-native. + * Defaults to false when omitted. + */ + openAiNativeBackgroundMode?: boolean + /** + * Auto-resume/poll configuration for OpenAI Responses background mode. + * These are plumbed-only (no UI). Defaults are resolved in the handler. + */ + openAiNativeBackgroundAutoResume?: boolean + openAiNativeBackgroundResumeMaxRetries?: number + openAiNativeBackgroundResumeBaseDelayMs?: number + openAiNativeBackgroundPollIntervalMs?: number + openAiNativeBackgroundPollMaxMinutes?: number } // RouterName diff --git a/webview-ui/src/components/chat/ChatRow.tsx b/webview-ui/src/components/chat/ChatRow.tsx index 48cd46350d5..1171c2ad310 100644 --- a/webview-ui/src/components/chat/ChatRow.tsx +++ b/webview-ui/src/components/chat/ChatRow.tsx @@ -15,6 +15,7 @@ import { useExtensionState } from "@src/context/ExtensionStateContext" import { findMatchingResourceOrTemplate } from "@src/utils/mcp" import { vscode } from "@src/utils/vscode" import { formatPathTooltip } from "@src/utils/formatPathTooltip" +import { labelForBackgroundStatus } from "@src/utils/backgroundStatus" import { ToolUseBlock, ToolUseBlockHeader } from "../common/ToolUseBlock" import UpdateTodoListToolBlock from "./UpdateTodoListToolBlock" @@ -313,6 +314,21 @@ export const ChatRowContent = ({ /> ) + // Background mode UI label/icon handling + const meta: any = message.metadata + const isBackground = meta?.background === true + const bgStatus = meta?.backgroundStatus as + | "queued" + | "in_progress" + | "reconnecting" + | "polling" + | "completed" + | "failed" + | "canceled" + | undefined + const bgDone = + isBackground && (bgStatus === "completed" || bgStatus === "failed" || bgStatus === "canceled") + const label = isBackground ? labelForBackgroundStatus(bgStatus) : undefined return [ apiReqCancelReason !== null && apiReqCancelReason !== undefined ? ( apiReqCancelReason === "user_cancelled" ? ( @@ -320,6 +336,12 @@ export const ChatRowContent = ({ ) : ( getIconSpan("error", errorColor) ) + ) : bgDone ? ( + bgStatus === "completed" ? ( + getIconSpan("arrow-swap", normalColor) + ) : ( + getIconSpan("error", bgStatus === "canceled" ? cancelledColor : errorColor) + ) ) : cost !== null && cost !== undefined ? ( getIconSpan("arrow-swap", normalColor) ) : apiRequestFailedMessage ? ( @@ -328,7 +350,9 @@ export const ChatRowContent = ({ ), apiReqCancelReason !== null && apiReqCancelReason !== undefined ? ( - apiReqCancelReason === "user_cancelled" ? ( + isBackground && label ? ( + {label} + ) : apiReqCancelReason === "user_cancelled" ? ( {t("chat:apiRequest.cancelled")} @@ -337,6 +361,8 @@ export const ChatRowContent = ({ {t("chat:apiRequest.streamingFailed")} ) + ) : label ? ( + {label} ) : cost !== null && cost !== undefined ? ( {t("chat:apiRequest.title")} ) : apiRequestFailedMessage ? ( @@ -1066,8 +1092,14 @@ export const ChatRowContent = ({ ) case "api_req_started": // Determine if the API request is in progress + const bgMeta: any = message.metadata + const bgStatus = bgMeta?.background === true ? bgMeta?.backgroundStatus : undefined + const bgDone = bgStatus === "completed" || bgStatus === "failed" || bgStatus === "canceled" const isApiRequestInProgress = - apiReqCancelReason === undefined && apiRequestFailedMessage === undefined && cost === undefined + apiReqCancelReason === undefined && + apiRequestFailedMessage === undefined && + cost === undefined && + !bgDone return ( <> diff --git a/webview-ui/src/components/chat/ChatView.tsx b/webview-ui/src/components/chat/ChatView.tsx index 6ee163fe41b..fd61af6078c 100644 --- a/webview-ui/src/components/chat/ChatView.tsx +++ b/webview-ui/src/components/chat/ChatView.tsx @@ -505,27 +505,49 @@ const ChatViewComponent: React.ForwardRefRenderFunction message.say === "api_req_started", + ) + + // Extract background terminal state and cancel reason/cost if present + let bgDone = false + let cancelReason: string | null | undefined = undefined + let cost: any = undefined + + if (lastApiReqStarted && lastApiReqStarted.say === "api_req_started") { + const meta: any = (lastApiReqStarted as any).metadata + const bgStatus = meta?.background === true ? meta?.backgroundStatus : undefined + bgDone = bgStatus === "completed" || bgStatus === "failed" || bgStatus === "canceled" + + try { + if (lastApiReqStarted.text !== null && lastApiReqStarted.text !== undefined) { + const info = JSON.parse(lastApiReqStarted.text) + cost = info?.cost + cancelReason = info?.cancelReason + } + } catch { + // ignore malformed json + } + } + + // If background reached a terminal state or the provider recorded a cancel reason, + // treat UI as not streaming regardless of partial flags or missing cost. + if (bgDone || cancelReason != null) { + return false + } + // Partial assistant content means streaming unless overridden by the terminal checks above. + const isLastMessagePartial = modifiedMessages.at(-1)?.partial === true if (isLastMessagePartial) { return true - } else { - const lastApiReqStarted = findLast( - modifiedMessages, - (message: ClineMessage) => message.say === "api_req_started", - ) - - if ( - lastApiReqStarted && - lastApiReqStarted.text !== null && - lastApiReqStarted.text !== undefined && - lastApiReqStarted.say === "api_req_started" - ) { - const cost = JSON.parse(lastApiReqStarted.text).cost + } - if (cost === undefined) { - return true // API request has not finished yet. - } + // Otherwise, if the API request hasn't finished (no cost yet), consider it streaming. + if (lastApiReqStarted && lastApiReqStarted.say === "api_req_started") { + if (cost === undefined) { + return true } } diff --git a/webview-ui/src/utils/__tests__/backgroundStatus.spec.ts b/webview-ui/src/utils/__tests__/backgroundStatus.spec.ts new file mode 100644 index 00000000000..aac4c73b3e9 --- /dev/null +++ b/webview-ui/src/utils/__tests__/backgroundStatus.spec.ts @@ -0,0 +1,35 @@ +import { labelForBackgroundStatus } from "@src/utils/backgroundStatus" + +describe("labelForBackgroundStatus()", () => { + it("maps queued", () => { + expect(labelForBackgroundStatus("queued")).toBe("API Request: background mode (queued)…") + }) + + it("maps in_progress", () => { + expect(labelForBackgroundStatus("in_progress")).toBe("API Request: background mode (in progress)…") + }) + + it("maps reconnecting", () => { + expect(labelForBackgroundStatus("reconnecting")).toBe("API Request: background mode (reconnecting…)") + }) + + it("maps polling", () => { + expect(labelForBackgroundStatus("polling")).toBe("API Request: background mode (polling…)") + }) + + it("maps completed", () => { + expect(labelForBackgroundStatus("completed")).toBe("API Request: background mode (completed)") + }) + + it("maps failed", () => { + expect(labelForBackgroundStatus("failed")).toBe("API Request: background mode (failed)") + }) + + it("maps canceled", () => { + expect(labelForBackgroundStatus("canceled")).toBe("API Request: background mode (canceled)") + }) + + it("maps undefined to generic label", () => { + expect(labelForBackgroundStatus(undefined)).toBe("API Request: background mode") + }) +}) diff --git a/webview-ui/src/utils/backgroundStatus.ts b/webview-ui/src/utils/backgroundStatus.ts new file mode 100644 index 00000000000..ad56c2d6e2a --- /dev/null +++ b/webview-ui/src/utils/backgroundStatus.ts @@ -0,0 +1,29 @@ +export type BackgroundStatus = + | "queued" + | "in_progress" + | "completed" + | "failed" + | "canceled" + | "reconnecting" + | "polling" + +export function labelForBackgroundStatus(s?: BackgroundStatus): string { + switch (s) { + case "queued": + return "API Request: background mode (queued)…" + case "in_progress": + return "API Request: background mode (in progress)…" + case "reconnecting": + return "API Request: background mode (reconnecting…)" + case "polling": + return "API Request: background mode (polling…)" + case "completed": + return "API Request: background mode (completed)" + case "failed": + return "API Request: background mode (failed)" + case "canceled": + return "API Request: background mode (canceled)" + default: + return "API Request: background mode" + } +}