diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 8ed9ab56405..35f9011a86d 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -129,6 +129,7 @@ import { getMessagesSinceLastSummary, summarizeConversation, getEffectiveApiHist import { MessageQueueService } from "../message-queue/MessageQueueService" import { AutoApprovalHandler, checkAutoApproval } from "../auto-approval" import { MessageManager } from "../message-manager" +import { validateAndFixToolResultIds } from "./validateToolResultIds" const MAX_EXPONENTIAL_BACKOFF_SECONDS = 600 // 10 minutes const DEFAULT_USAGE_COLLECTION_TIMEOUT_MS = 5000 // 5 seconds @@ -811,7 +812,9 @@ export class Task extends EventEmitter implements TaskLike { this.apiConversationHistory.push(messageWithTs) } else { - const messageWithTs = { ...message, ts: Date.now() } + // For user messages, validate and fix tool_result IDs against the previous assistant message + const validatedMessage = validateAndFixToolResultIds(message, this.apiConversationHistory) + const messageWithTs = { ...validatedMessage, ts: Date.now() } this.apiConversationHistory.push(messageWithTs) } @@ -850,7 +853,9 @@ export class Task extends EventEmitter implements TaskLike { content: this.userMessageContent, } - const userMessageWithTs = { ...userMessage, ts: Date.now() } + // Validate and fix tool_result IDs against the previous assistant message + const validatedMessage = validateAndFixToolResultIds(userMessage, this.apiConversationHistory) + const userMessageWithTs = { ...validatedMessage, ts: Date.now() } this.apiConversationHistory.push(userMessageWithTs as ApiMessage) await this.saveApiConversationHistory() diff --git a/src/core/task/__tests__/validateToolResultIds.spec.ts b/src/core/task/__tests__/validateToolResultIds.spec.ts new file mode 100644 index 00000000000..67aaf238952 --- /dev/null +++ b/src/core/task/__tests__/validateToolResultIds.spec.ts @@ -0,0 +1,519 @@ +import { Anthropic } from "@anthropic-ai/sdk" +import { TelemetryService } from "@roo-code/telemetry" +import { validateAndFixToolResultIds, ToolResultIdMismatchError } from "../validateToolResultIds" + +// Mock TelemetryService +vi.mock("@roo-code/telemetry", () => ({ + TelemetryService: { + hasInstance: vi.fn(() => true), + instance: { + captureException: vi.fn(), + }, + }, +})) + +describe("validateAndFixToolResultIds", () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + describe("when there is no previous assistant message", () => { + it("should return the user message unchanged", () => { + const userMessage: Anthropic.MessageParam = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Result", + }, + ], + } + + const result = validateAndFixToolResultIds(userMessage, []) + + expect(result).toEqual(userMessage) + }) + }) + + describe("when tool_result IDs match tool_use IDs", () => { + it("should return the user message unchanged for single tool", () => { + const assistantMessage: Anthropic.MessageParam = { + role: "assistant", + content: [ + { + type: "tool_use", + id: "tool-123", + name: "read_file", + input: { path: "test.txt" }, + }, + ], + } + + const userMessage: Anthropic.MessageParam = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "File content", + }, + ], + } + + const result = validateAndFixToolResultIds(userMessage, [assistantMessage]) + + expect(result).toEqual(userMessage) + }) + + it("should return the user message unchanged for multiple tools", () => { + const assistantMessage: Anthropic.MessageParam = { + role: "assistant", + content: [ + { + type: "tool_use", + id: "tool-1", + name: "read_file", + input: { path: "a.txt" }, + }, + { + type: "tool_use", + id: "tool-2", + name: "read_file", + input: { path: "b.txt" }, + }, + ], + } + + const userMessage: Anthropic.MessageParam = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "tool-1", + content: "Content A", + }, + { + type: "tool_result", + tool_use_id: "tool-2", + content: "Content B", + }, + ], + } + + const result = validateAndFixToolResultIds(userMessage, [assistantMessage]) + + expect(result).toEqual(userMessage) + }) + }) + + describe("when tool_result IDs do not match tool_use IDs", () => { + it("should fix single mismatched tool_use_id by position", () => { + const assistantMessage: Anthropic.MessageParam = { + role: "assistant", + content: [ + { + type: "tool_use", + id: "correct-id-123", + name: "read_file", + input: { path: "test.txt" }, + }, + ], + } + + const userMessage: Anthropic.MessageParam = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "wrong-id-456", + content: "File content", + }, + ], + } + + const result = validateAndFixToolResultIds(userMessage, [assistantMessage]) + + expect(Array.isArray(result.content)).toBe(true) + const resultContent = result.content as Anthropic.ToolResultBlockParam[] + expect(resultContent[0].tool_use_id).toBe("correct-id-123") + expect(resultContent[0].content).toBe("File content") + }) + + it("should fix multiple mismatched tool_use_ids by position", () => { + const assistantMessage: Anthropic.MessageParam = { + role: "assistant", + content: [ + { + type: "tool_use", + id: "correct-1", + name: "read_file", + input: { path: "a.txt" }, + }, + { + type: "tool_use", + id: "correct-2", + name: "read_file", + input: { path: "b.txt" }, + }, + ], + } + + const userMessage: Anthropic.MessageParam = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "wrong-1", + content: "Content A", + }, + { + type: "tool_result", + tool_use_id: "wrong-2", + content: "Content B", + }, + ], + } + + const result = validateAndFixToolResultIds(userMessage, [assistantMessage]) + + expect(Array.isArray(result.content)).toBe(true) + const resultContent = result.content as Anthropic.ToolResultBlockParam[] + expect(resultContent[0].tool_use_id).toBe("correct-1") + expect(resultContent[1].tool_use_id).toBe("correct-2") + }) + + it("should partially fix when some IDs match and some don't", () => { + const assistantMessage: Anthropic.MessageParam = { + role: "assistant", + content: [ + { + type: "tool_use", + id: "id-1", + name: "read_file", + input: { path: "a.txt" }, + }, + { + type: "tool_use", + id: "id-2", + name: "read_file", + input: { path: "b.txt" }, + }, + ], + } + + const userMessage: Anthropic.MessageParam = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "id-1", // Correct + content: "Content A", + }, + { + type: "tool_result", + tool_use_id: "wrong-id", // Wrong + content: "Content B", + }, + ], + } + + const result = validateAndFixToolResultIds(userMessage, [assistantMessage]) + + expect(Array.isArray(result.content)).toBe(true) + const resultContent = result.content as Anthropic.ToolResultBlockParam[] + expect(resultContent[0].tool_use_id).toBe("id-1") + expect(resultContent[1].tool_use_id).toBe("id-2") + }) + }) + + describe("when user message has non-tool_result content", () => { + it("should preserve text blocks alongside tool_result blocks", () => { + const assistantMessage: Anthropic.MessageParam = { + role: "assistant", + content: [ + { + type: "tool_use", + id: "tool-123", + name: "read_file", + input: { path: "test.txt" }, + }, + ], + } + + const userMessage: Anthropic.MessageParam = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "wrong-id", + content: "File content", + }, + { + type: "text", + text: "Additional context", + }, + ], + } + + const result = validateAndFixToolResultIds(userMessage, [assistantMessage]) + + expect(Array.isArray(result.content)).toBe(true) + const resultContent = result.content as Array + expect(resultContent[0].type).toBe("tool_result") + expect((resultContent[0] as Anthropic.ToolResultBlockParam).tool_use_id).toBe("tool-123") + expect(resultContent[1].type).toBe("text") + expect((resultContent[1] as Anthropic.TextBlockParam).text).toBe("Additional context") + }) + }) + + describe("when assistant message has non-tool_use content", () => { + it("should only consider tool_use blocks for matching", () => { + const assistantMessage: Anthropic.MessageParam = { + role: "assistant", + content: [ + { + type: "text", + text: "Let me read that file for you.", + }, + { + type: "tool_use", + id: "tool-123", + name: "read_file", + input: { path: "test.txt" }, + }, + ], + } + + const userMessage: Anthropic.MessageParam = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "wrong-id", + content: "File content", + }, + ], + } + + const result = validateAndFixToolResultIds(userMessage, [assistantMessage]) + + expect(Array.isArray(result.content)).toBe(true) + const resultContent = result.content as Anthropic.ToolResultBlockParam[] + expect(resultContent[0].tool_use_id).toBe("tool-123") + }) + }) + + describe("when user message content is a string", () => { + it("should return the message unchanged", () => { + const assistantMessage: Anthropic.MessageParam = { + role: "assistant", + content: [ + { + type: "tool_use", + id: "tool-123", + name: "read_file", + input: { path: "test.txt" }, + }, + ], + } + + const userMessage: Anthropic.MessageParam = { + role: "user", + content: "Just a plain text message", + } + + const result = validateAndFixToolResultIds(userMessage, [assistantMessage]) + + expect(result).toEqual(userMessage) + }) + }) + + describe("when assistant message content is a string", () => { + it("should return the user message unchanged", () => { + const assistantMessage: Anthropic.MessageParam = { + role: "assistant", + content: "Just some text, no tool use", + } + + const userMessage: Anthropic.MessageParam = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Result", + }, + ], + } + + const result = validateAndFixToolResultIds(userMessage, [assistantMessage]) + + expect(result).toEqual(userMessage) + }) + }) + + describe("when there are more tool_results than tool_uses", () => { + it("should leave extra tool_results unchanged", () => { + const assistantMessage: Anthropic.MessageParam = { + role: "assistant", + content: [ + { + type: "tool_use", + id: "tool-1", + name: "read_file", + input: { path: "test.txt" }, + }, + ], + } + + const userMessage: Anthropic.MessageParam = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "wrong-1", + content: "Content 1", + }, + { + type: "tool_result", + tool_use_id: "extra-id", + content: "Content 2", + }, + ], + } + + const result = validateAndFixToolResultIds(userMessage, [assistantMessage]) + + expect(Array.isArray(result.content)).toBe(true) + const resultContent = result.content as Anthropic.ToolResultBlockParam[] + expect(resultContent[0].tool_use_id).toBe("tool-1") + // Extra tool_result should remain unchanged + expect(resultContent[1].tool_use_id).toBe("extra-id") + }) + }) + + describe("when there are more tool_uses than tool_results", () => { + it("should fix the available tool_results", () => { + const assistantMessage: Anthropic.MessageParam = { + role: "assistant", + content: [ + { + type: "tool_use", + id: "tool-1", + name: "read_file", + input: { path: "a.txt" }, + }, + { + type: "tool_use", + id: "tool-2", + name: "read_file", + input: { path: "b.txt" }, + }, + ], + } + + const userMessage: Anthropic.MessageParam = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "wrong-1", + content: "Content 1", + }, + ], + } + + const result = validateAndFixToolResultIds(userMessage, [assistantMessage]) + + expect(Array.isArray(result.content)).toBe(true) + const resultContent = result.content as Anthropic.ToolResultBlockParam[] + expect(resultContent.length).toBe(1) + expect(resultContent[0].tool_use_id).toBe("tool-1") + }) + }) + + describe("telemetry", () => { + it("should call captureException when there is a mismatch", () => { + const assistantMessage: Anthropic.MessageParam = { + role: "assistant", + content: [ + { + type: "tool_use", + id: "correct-id", + name: "read_file", + input: { path: "test.txt" }, + }, + ], + } + + const userMessage: Anthropic.MessageParam = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "wrong-id", + content: "Content", + }, + ], + } + + validateAndFixToolResultIds(userMessage, [assistantMessage]) + + expect(TelemetryService.instance.captureException).toHaveBeenCalledTimes(1) + expect(TelemetryService.instance.captureException).toHaveBeenCalledWith( + expect.any(ToolResultIdMismatchError), + expect.objectContaining({ + toolResultIds: ["wrong-id"], + toolUseIds: ["correct-id"], + toolResultCount: 1, + toolUseCount: 1, + }), + ) + }) + + it("should not call captureException when IDs match", () => { + const assistantMessage: Anthropic.MessageParam = { + role: "assistant", + content: [ + { + type: "tool_use", + id: "tool-123", + name: "read_file", + input: { path: "test.txt" }, + }, + ], + } + + const userMessage: Anthropic.MessageParam = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Content", + }, + ], + } + + validateAndFixToolResultIds(userMessage, [assistantMessage]) + + expect(TelemetryService.instance.captureException).not.toHaveBeenCalled() + }) + }) + + describe("ToolResultIdMismatchError", () => { + it("should create error with correct properties", () => { + const error = new ToolResultIdMismatchError( + "Mismatch detected", + ["result-1", "result-2"], + ["use-1", "use-2"], + ) + + expect(error.name).toBe("ToolResultIdMismatchError") + expect(error.message).toBe("Mismatch detected") + expect(error.toolResultIds).toEqual(["result-1", "result-2"]) + expect(error.toolUseIds).toEqual(["use-1", "use-2"]) + }) + }) +}) diff --git a/src/core/task/validateToolResultIds.ts b/src/core/task/validateToolResultIds.ts new file mode 100644 index 00000000000..159caca79d8 --- /dev/null +++ b/src/core/task/validateToolResultIds.ts @@ -0,0 +1,141 @@ +import { Anthropic } from "@anthropic-ai/sdk" +import { TelemetryService } from "@roo-code/telemetry" +import { findLastIndex } from "../../shared/array" + +/** + * Custom error class for tool result ID mismatches. + * Used for structured error tracking via PostHog. + */ +export class ToolResultIdMismatchError extends Error { + constructor( + message: string, + public readonly toolResultIds: string[], + public readonly toolUseIds: string[], + ) { + super(message) + this.name = "ToolResultIdMismatchError" + } +} + +/** + * Validates and fixes tool_result IDs in a user message against the previous assistant message. + * + * This is a centralized validation that catches all tool_use/tool_result ID mismatches + * before messages are added to the API conversation history. It handles scenarios like: + * - Race conditions during streaming + * - Message editing scenarios + * - Resume/delegation scenarios + * + * @param userMessage - The user message being added to history + * @param apiConversationHistory - The conversation history to find the previous assistant message from + * @returns The validated user message with corrected tool_use_ids + */ +export function validateAndFixToolResultIds( + userMessage: Anthropic.MessageParam, + apiConversationHistory: Anthropic.MessageParam[], +): Anthropic.MessageParam { + // Only process user messages with array content + if (userMessage.role !== "user" || !Array.isArray(userMessage.content)) { + return userMessage + } + + // Find tool_result blocks in the user message + const toolResults = userMessage.content.filter( + (block): block is Anthropic.ToolResultBlockParam => block.type === "tool_result", + ) + + // No tool results to validate + if (toolResults.length === 0) { + return userMessage + } + + // Find the previous assistant message from conversation history + const prevAssistantIdx = findLastIndex(apiConversationHistory, (msg) => msg.role === "assistant") + if (prevAssistantIdx === -1) { + return userMessage + } + + const previousAssistantMessage = apiConversationHistory[prevAssistantIdx] + + // Get tool_use blocks from the assistant message + const assistantContent = previousAssistantMessage.content + if (!Array.isArray(assistantContent)) { + return userMessage + } + + const toolUseBlocks = assistantContent.filter((block): block is Anthropic.ToolUseBlock => block.type === "tool_use") + + // No tool_use blocks to match against + if (toolUseBlocks.length === 0) { + return userMessage + } + + // Build a set of valid tool_use IDs + const validToolUseIds = new Set(toolUseBlocks.map((block) => block.id)) + + // Check if any tool_result has an invalid ID + const hasInvalidIds = toolResults.some((result) => !validToolUseIds.has(result.tool_use_id)) + + if (!hasInvalidIds) { + // All IDs are valid, no changes needed + return userMessage + } + + // We have mismatches - need to fix them + const toolResultIdList = toolResults.map((r) => r.tool_use_id) + const toolUseIdList = toolUseBlocks.map((b) => b.id) + + // Report the mismatch to PostHog error tracking + if (TelemetryService.hasInstance()) { + TelemetryService.instance.captureException( + new ToolResultIdMismatchError( + `Detected tool_result ID mismatch. tool_result IDs: [${toolResultIdList.join(", ")}], tool_use IDs: [${toolUseIdList.join(", ")}]`, + toolResultIdList, + toolUseIdList, + ), + { + toolResultIds: toolResultIdList, + toolUseIds: toolUseIdList, + toolResultCount: toolResults.length, + toolUseCount: toolUseBlocks.length, + }, + ) + } + + // Create a mapping of tool_result IDs to corrected IDs + // Strategy: Match by position (first tool_result -> first tool_use, etc.) + // This handles most cases where the mismatch is due to ID confusion + const correctedContent = userMessage.content.map((block) => { + if (block.type !== "tool_result") { + return block + } + + // If the ID is already valid, keep it + if (validToolUseIds.has(block.tool_use_id)) { + return block + } + + // Find which tool_result index this block is by comparing references. + // This correctly handles duplicate tool_use_ids - we find the actual block's + // position among all tool_results, not the first block with a matching ID. + const toolResultIndex = toolResults.indexOf(block as Anthropic.ToolResultBlockParam) + + // Try to match by position - only fix if there's a corresponding tool_use + if (toolResultIndex !== -1 && toolResultIndex < toolUseBlocks.length) { + const correctId = toolUseBlocks[toolResultIndex].id + return { + ...block, + tool_use_id: correctId, + } + } + + // No corresponding tool_use for this tool_result - leave it unchanged + // This can happen when there are more tool_results than tool_uses + return block + }) + + return { + ...userMessage, + content: correctedContent, + } +}