diff --git a/tests/ipcMain/openai-web-search.test.ts b/tests/ipcMain/openai-web-search.test.ts
new file mode 100644
index 000000000..5124b420d
--- /dev/null
+++ b/tests/ipcMain/openai-web-search.test.ts
@@ -0,0 +1,90 @@
+import {
+  setupWorkspace,
+  shouldRunIntegrationTests,
+  validateApiKeys,
+  type TestEnvironment,
+} from "./setup";
+import { sendMessageWithModel, createEventCollector, assertStreamSuccess } from "./helpers";
+
+// Skip all tests if TEST_INTEGRATION is not set
+const describeIntegration = shouldRunIntegrationTests() ? describe : describe.skip;
+
+// Validate API keys before running tests
+if (shouldRunIntegrationTests()) {
+  validateApiKeys(["OPENAI_API_KEY"]);
+}
+
+describeIntegration("OpenAI web_search integration tests", () => {
+  // Enable retries in CI for flaky API tests
+  if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) {
+    jest.retryTimes(3, { logErrorsBeforeRetry: true });
+  }
+
+  test.concurrent(
+    "should handle reasoning + web_search without itemId errors",
+    async () => {
+      // Setup test environment with OpenAI
+      const { env, workspaceId, cleanup } = await setupWorkspace("openai");
+      try {
+        // This prompt reliably triggers the reasoning + web_search bug:
+        // 1. Gold price search always triggers web_search (pricing data)
+        // 2. Mathematical computation requires reasoning
+        // 3. High reasoning effort ensures reasoning is present
+        // This combination exposed the itemId bug on main branch
+        const result = await sendMessageWithModel(
+          env.mockIpcRenderer,
+          workspaceId,
+          "Find the current gold price per ounce via web search. " +
+            "Then compute round(price^2) and determine how many Collatz steps it takes to reach 1.",
+          "openai",
+          "gpt-5-codex",
+          {
+            thinkingLevel: "high", // Ensure reasoning is used
+          }
+        );
+
+        // Verify the IPC call succeeded
+        expect(result.success).toBe(true);
+
+        // Collect and verify stream events
+        const collector = createEventCollector(env.sentEvents, workspaceId);
+
+        // Wait for stream to complete
+        const streamEnd = await collector.waitForEvent("stream-end", 120000);
+        expect(streamEnd).toBeDefined();
+
+        // Verify no errors occurred - this is the KEY test
+        // Before the fix, this would fail with:
+        // "Item 'ws_...' of type 'web_search_call' was provided without its required 'reasoning' item"
+        assertStreamSuccess(collector);
+
+        // Collect all events and verify both reasoning and web_search occurred
+        collector.collect();
+        const events = collector.getEvents();
+
+        // Verify we got reasoning (this is what triggers the bug)
+        const hasReasoning = events.some((e) => "type" in e && e.type === "reasoning-delta");
+
+        // Verify web_search was called
+        const hasWebSearchCall = events.some(
+          (e) =>
+            "type" in e &&
+            e.type === "tool-call-start" &&
+            "toolName" in e &&
+            e.toolName === "web_search"
+        );
+
+        // Both should be present for this test to be valid
+        expect(hasReasoning).toBe(true);
+        expect(hasWebSearchCall).toBe(true);
+
+        // Verify we received text deltas (the assistant's final answer)
+        const deltas = collector.getDeltas();
+        expect(deltas.length).toBeGreaterThan(0);
+      } finally {
+        await cleanup();
+      }
+    },
+    150000 // 150 second timeout - reasoning + web_search + computation takes time
+  );
+});