From a6bd30df00e68526e49fd188aeff1736e32959a9 Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Tue, 23 Dec 2025 00:39:26 -0700 Subject: [PATCH] feat: enable mergeToolResultText for Roo Code Cloud provider Add mergeToolResultText option to convertToOpenAiMessages() that merges text content after tool_result blocks into the last tool message instead of creating separate user messages. Enable this for the Roo Code Cloud provider. This improves compatibility with reasoning/thinking models (like DeepSeek-reasoner, GLM-4.7, etc.) where a user message after tool results causes the model to drop all previous reasoning_content. - Add mergeToolResultText option to ConvertToOpenAiMessagesOptions interface - Implement merging logic in openai-format.ts with proper edge case handling - Enable mergeToolResultText: true in roo.ts provider - Add 6 unit tests covering the new behavior --- src/api/providers/roo.ts | 8 +- .../transform/__tests__/openai-format.spec.ts | 177 ++++++++++++++++++ src/api/transform/openai-format.ts | 52 +++-- 3 files changed, 225 insertions(+), 12 deletions(-) diff --git a/src/api/providers/roo.ts b/src/api/providers/roo.ts index b4a626832b4..ebc174cf46a 100644 --- a/src/api/providers/roo.ts +++ b/src/api/providers/roo.ts @@ -100,7 +100,13 @@ export class RooHandler extends BaseOpenAiCompatibleProvider { model, max_tokens, temperature, - messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)], + // Enable mergeToolResultText to merge environment_details and other text content + // after tool_results into the last tool message. This prevents reasoning/thinking + // models from dropping reasoning_content when they see a user message after tool results. + messages: [ + { role: "system", content: systemPrompt }, + ...convertToOpenAiMessages(messages, { mergeToolResultText: true }), + ], stream: true, stream_options: { include_usage: true }, ...(reasoning && { reasoning }), diff --git a/src/api/transform/__tests__/openai-format.spec.ts b/src/api/transform/__tests__/openai-format.spec.ts index da9329fa326..29fd712c84f 100644 --- a/src/api/transform/__tests__/openai-format.spec.ts +++ b/src/api/transform/__tests__/openai-format.spec.ts @@ -224,4 +224,181 @@ describe("convertToOpenAiMessages", () => { const assistantMessage = openAiMessages[0] as OpenAI.Chat.ChatCompletionAssistantMessageParam expect(assistantMessage.tool_calls![0].id).toBe("custom_toolu_123") }) + + describe("mergeToolResultText option", () => { + it("should merge text content into last tool message when mergeToolResultText is true", () => { + const anthropicMessages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Tool result content", + }, + { + type: "text", + text: "\nSome context\n", + }, + ], + }, + ] + + const openAiMessages = convertToOpenAiMessages(anthropicMessages, { mergeToolResultText: true }) + + // Should produce only one tool message with merged content + expect(openAiMessages).toHaveLength(1) + const toolMessage = openAiMessages[0] as OpenAI.Chat.ChatCompletionToolMessageParam + expect(toolMessage.role).toBe("tool") + expect(toolMessage.tool_call_id).toBe("tool-123") + expect(toolMessage.content).toBe( + "Tool result content\n\n\nSome context\n", + ) + }) + + it("should merge text into last tool message when multiple tool results exist", () => { + const anthropicMessages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "call_1", + content: "First result", + }, + { + type: "tool_result", + tool_use_id: "call_2", + content: "Second result", + }, + { + type: "text", + text: "Context", + }, + ], + }, + ] + + const openAiMessages = convertToOpenAiMessages(anthropicMessages, { mergeToolResultText: true }) + + // Should produce two tool messages, with text merged into the last one + expect(openAiMessages).toHaveLength(2) + expect((openAiMessages[0] as OpenAI.Chat.ChatCompletionToolMessageParam).content).toBe("First result") + expect((openAiMessages[1] as OpenAI.Chat.ChatCompletionToolMessageParam).content).toBe( + "Second result\n\nContext", + ) + }) + + it("should NOT merge text when images are present (fall back to user message)", () => { + const anthropicMessages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Tool result content", + }, + { + type: "image", + source: { + type: "base64", + media_type: "image/png", + data: "base64data", + }, + }, + ], + }, + ] + + const openAiMessages = convertToOpenAiMessages(anthropicMessages, { mergeToolResultText: true }) + + // Should produce a tool message AND a user message (because image is present) + expect(openAiMessages).toHaveLength(2) + expect((openAiMessages[0] as OpenAI.Chat.ChatCompletionToolMessageParam).role).toBe("tool") + expect(openAiMessages[1].role).toBe("user") + }) + + it("should create separate user message when mergeToolResultText is false", () => { + const anthropicMessages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Tool result content", + }, + { + type: "text", + text: "\nSome context\n", + }, + ], + }, + ] + + const openAiMessages = convertToOpenAiMessages(anthropicMessages, { mergeToolResultText: false }) + + // Should produce a tool message AND a separate user message (default behavior) + expect(openAiMessages).toHaveLength(2) + expect((openAiMessages[0] as OpenAI.Chat.ChatCompletionToolMessageParam).role).toBe("tool") + expect((openAiMessages[0] as OpenAI.Chat.ChatCompletionToolMessageParam).content).toBe( + "Tool result content", + ) + expect(openAiMessages[1].role).toBe("user") + }) + + it("should work with normalizeToolCallId when mergeToolResultText is true", () => { + const anthropicMessages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "call_5019f900a247472bacde0b82", + content: "Tool result content", + }, + { + type: "text", + text: "Context", + }, + ], + }, + ] + + const openAiMessages = convertToOpenAiMessages(anthropicMessages, { + mergeToolResultText: true, + normalizeToolCallId: normalizeMistralToolCallId, + }) + + // Should merge AND normalize the ID + expect(openAiMessages).toHaveLength(1) + const toolMessage = openAiMessages[0] as OpenAI.Chat.ChatCompletionToolMessageParam + expect(toolMessage.role).toBe("tool") + expect(toolMessage.tool_call_id).toBe(normalizeMistralToolCallId("call_5019f900a247472bacde0b82")) + expect(toolMessage.content).toBe( + "Tool result content\n\nContext", + ) + }) + + it("should handle user messages with only text content (no tool results)", () => { + const anthropicMessages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "text", + text: "Hello, how are you?", + }, + ], + }, + ] + + const openAiMessages = convertToOpenAiMessages(anthropicMessages, { mergeToolResultText: true }) + + // Should produce a normal user message + expect(openAiMessages).toHaveLength(1) + expect(openAiMessages[0].role).toBe("user") + }) + }) }) diff --git a/src/api/transform/openai-format.ts b/src/api/transform/openai-format.ts index 7ca4ddb993c..e481864034b 100644 --- a/src/api/transform/openai-format.ts +++ b/src/api/transform/openai-format.ts @@ -11,6 +11,14 @@ export interface ConvertToOpenAiMessagesOptions { * This allows callers to declare provider-specific ID format requirements. */ normalizeToolCallId?: (id: string) => string + /** + * If true, merge text content after tool_results into the last tool message + * instead of creating a separate user message. This is critical for providers + * with reasoning/thinking models (like DeepSeek-reasoner, GLM-4.7, etc.) where + * a user message after tool results causes the model to drop all previous + * reasoning_content. Default is false for backward compatibility. + */ + mergeToolResultText?: boolean } export function convertToOpenAiMessages( @@ -95,18 +103,40 @@ export function convertToOpenAiMessages( // Process non-tool messages if (nonToolMessages.length > 0) { - openAiMessages.push({ - role: "user", - content: nonToolMessages.map((part) => { - if (part.type === "image") { - return { - type: "image_url", - image_url: { url: `data:${part.source.media_type};base64,${part.source.data}` }, + // Check if we should merge text into the last tool message + // This is critical for reasoning/thinking models where a user message + // after tool results causes the model to drop all previous reasoning_content + const hasOnlyTextContent = nonToolMessages.every((part) => part.type === "text") + const hasToolMessages = toolMessages.length > 0 + const shouldMergeIntoToolMessage = + options?.mergeToolResultText && hasToolMessages && hasOnlyTextContent + + if (shouldMergeIntoToolMessage) { + // Merge text content into the last tool message + const lastToolMessage = openAiMessages[ + openAiMessages.length - 1 + ] as OpenAI.Chat.ChatCompletionToolMessageParam + if (lastToolMessage?.role === "tool") { + const additionalText = nonToolMessages + .map((part) => (part as Anthropic.TextBlockParam).text) + .join("\n") + lastToolMessage.content = `${lastToolMessage.content}\n\n${additionalText}` + } + } else { + // Standard behavior: add user message with text/image content + openAiMessages.push({ + role: "user", + content: nonToolMessages.map((part) => { + if (part.type === "image") { + return { + type: "image_url", + image_url: { url: `data:${part.source.media_type};base64,${part.source.data}` }, + } } - } - return { type: "text", text: part.text } - }), - }) + return { type: "text", text: part.text } + }), + }) + } } } else if (anthropicMessage.role === "assistant") { const { nonToolMessages, toolMessages } = anthropicMessage.content.reduce<{