RooCodeInc · hannesrudolph · Oct 10, 2025 · Oct 10, 2025 · Oct 10, 2025 · Oct 12, 2025
@@ -49,6 +49,9 @@ logs
 
 # Qdrant
 qdrant_storage/
-
 # Architect plans
-plans/
+plans/
+
+# ignore temp background docs
+TEMP_OPENAI_BACKGROUND_TASK_DOCS.DM
+TEMP_DOCS/
@@ -279,6 +279,18 @@ export const clineMessageSchema = z.object({
 	isProtected: z.boolean().optional(),
 	apiProtocol: z.union([z.literal("openai"), z.literal("anthropic")]).optional(),
 	isAnswered: z.boolean().optional(),
+	/**
+	 * Optional metadata for API request tracking.
+	 * Used for background mode status display.
+	 */
+	metadata: z
+		.object({
+			background: z.boolean().optional(),
+			backgroundStatus: z
+				.enum(["queued", "in_progress", "reconnecting", "polling", "completed", "failed", "canceled"])
+				.optional(),
+		})
+		.optional(),
 })
 
 export type ClineMessage = z.infer<typeof clineMessageSchema>

@@ -86,6 +86,11 @@ export const modelInfoSchema = z.object({
 	// Capability flag to indicate whether the model supports temperature parameter
 	supportsTemperature: z.boolean().optional(),
 	defaultTemperature: z.number().optional(),
+	// When true, force-disable request timeouts for this model (providers will set timeout=0)
+	disableTimeout: z.boolean().optional(),
+	// When true, this model must be invoked using Responses background mode.
+	// Providers should auto-enable background:true, stream:true, and store:true.
+	backgroundMode: z.boolean().optional(),
 	requiredReasoningBudget: z.boolean().optional(),
 	supportsReasoningEffort: z
 		.union([z.boolean(), z.array(z.enum(["disable", "none", "minimal", "low", "medium", "high", "xhigh"]))])

@@ -304,6 +304,15 @@ const openAiNativeSchema = apiModelIdProviderModelSchema.extend({
 	// OpenAI Responses API service tier for openai-native provider only.
 	// UI should only expose this when the selected model supports flex/priority.
 	openAiNativeServiceTier: serviceTierSchema.optional(),
+	// Enable OpenAI Responses background mode when using Responses API.
+	// Opt-in; defaults to false when omitted.
+	openAiNativeBackgroundMode: z.boolean().optional(),
+	// Background auto-resume/poll settings (no UI; plumbed via options)
+	openAiNativeBackgroundAutoResume: z.boolean().optional(),
+	openAiNativeBackgroundResumeMaxRetries: z.number().int().min(0).optional(),
+	openAiNativeBackgroundResumeBaseDelayMs: z.number().int().min(0).optional(),
+	openAiNativeBackgroundPollIntervalMs: z.number().int().min(0).optional(),
+	openAiNativeBackgroundPollMaxMinutes: z.number().int().min(1).optional(),
 })
 
 const mistralSchema = apiModelIdProviderModelSchema.extend({

@@ -47,6 +47,21 @@ export const openAiNativeModels = {
 		],
 		description: "GPT-5.1: The best model for coding and agentic tasks across domains",
 	},
+	"gpt-5-pro-2025-10-06": {
+		maxTokens: 128000,
+		contextWindow: 400000,
+		supportsImages: true,
+		supportsPromptCache: false,
+		supportsReasoningEffort: false, // This is set to false to prevent the ui from displaying the reasoning effort selector
+		reasoningEffort: "high", // Pro model uses high reasoning effort by default and must be specified
+		inputPrice: 15.0,
+		outputPrice: 120.0,
+		description:
+			"GPT-5 Pro: A slow, reasoning-focused model for complex problems. Uses background mode with resilient streaming - requests may take some time and will automatically reconnect if they time out.",
+		supportsVerbosity: true,
+		supportsTemperature: false,
+		backgroundMode: true,
+	},
 	"gpt-5.1-codex": {
 		maxTokens: 128000,
 		contextWindow: 400000,

@@ -88,4 +88,34 @@ describe("LmStudioHandler timeout configuration", () => {
 			}),
 		)
 	})
+
+	it("should force zero timeout when model info disables timeout", () => {
+		;(getApiRequestTimeout as any).mockReturnValue(600000)
+
+		const spy = vitest.spyOn(LmStudioHandler.prototype as any, "getModel").mockReturnValue({
+			id: "llama2",
+			info: {
+				maxTokens: -1,
+				contextWindow: 128000,
+				supportsPromptCache: false,
+				supportsImages: true,
+				disableTimeout: true,
+			},
+		})
+
+		const options: ApiHandlerOptions = {
+			apiModelId: "llama2",
+			lmStudioModelId: "llama2",
+		}
+
+		new LmStudioHandler(options)
+
+		expect(mockOpenAIConstructor).toHaveBeenCalledWith(
+			expect.objectContaining({
+				timeout: 0,
+			}),
+		)
+
+		spy.mockRestore()
+	})
 })
@@ -389,6 +389,38 @@ describe("OpenAiNativeHandler - normalizeUsage", () => {
 		})
 	})
 
+	it("should produce identical usage chunk when background mode is enabled", () => {
+		const usage = {
+			input_tokens: 120,
+			output_tokens: 60,
+			cache_creation_input_tokens: 10,
+			cache_read_input_tokens: 30,
+		}
+
+		const baselineHandler = new OpenAiNativeHandler({
+			openAiNativeApiKey: "test-key",
+			apiModelId: "gpt-5-pro-2025-10-06",
+		})
+		const backgroundHandler = new OpenAiNativeHandler({
+			openAiNativeApiKey: "test-key",
+			apiModelId: "gpt-5-pro-2025-10-06",
+			openAiNativeBackgroundMode: true,
+		})
+
+		const baselineUsage = (baselineHandler as any).normalizeUsage(usage, baselineHandler.getModel())
+		const backgroundUsage = (backgroundHandler as any).normalizeUsage(usage, backgroundHandler.getModel())
+
+		expect(baselineUsage).toMatchObject({
+			type: "usage",
+			inputTokens: 120,
+			outputTokens: 60,
+			cacheWriteTokens: 10,
+			cacheReadTokens: 30,
+			totalCost: expect.any(Number),
+		})
+		expect(backgroundUsage).toEqual(baselineUsage)
+	})
+
 	describe("cost calculation", () => {
 		it("should pass total input tokens to calculateApiCostOpenAI", () => {
 			const usage = {