diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts index 01e747e11b1..4e5aef23a53 100644 --- a/src/api/providers/deepseek.ts +++ b/src/api/providers/deepseek.ts @@ -54,7 +54,13 @@ export class DeepSeekHandler extends OpenAiHandler { // Convert messages to R1 format (merges consecutive same-role messages) // This is required for DeepSeek which does not support successive messages with the same role - const convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]) + // For thinking models (deepseek-reasoner), enable mergeToolResultText to preserve reasoning_content + // during tool call sequences. Without this, environment_details text after tool_results would + // create user messages that cause DeepSeek to drop all previous reasoning_content. + // See: https://api-docs.deepseek.com/guides/thinking_mode + const convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages], { + mergeToolResultText: isThinkingModel, + }) const requestOptions: DeepSeekChatCompletionParams = { model: modelId, diff --git a/src/api/transform/__tests__/r1-format.spec.ts b/src/api/transform/__tests__/r1-format.spec.ts index edfe9dc5d14..3d875e9392f 100644 --- a/src/api/transform/__tests__/r1-format.spec.ts +++ b/src/api/transform/__tests__/r1-format.spec.ts @@ -394,5 +394,226 @@ describe("convertToR1Format", () => { content: "Follow up response", }) }) + + describe("mergeToolResultText option for DeepSeek interleaved thinking", () => { + it("should merge text content into last tool message when mergeToolResultText is true", () => { + const input: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "call_123", + content: "Tool result content", + }, + { + type: "text", + text: "\nSome context\n", + }, + ], + }, + ] + + const result = convertToR1Format(input, { mergeToolResultText: true }) + + // Should produce only one tool message with merged content + expect(result).toHaveLength(1) + expect(result[0]).toEqual({ + role: "tool", + tool_call_id: "call_123", + content: "Tool result content\n\n\nSome context\n", + }) + }) + + it("should NOT merge text when mergeToolResultText is false (default behavior)", () => { + const input: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "call_123", + content: "Tool result content", + }, + { + type: "text", + text: "Please continue", + }, + ], + }, + ] + + // Without option (default behavior) + const result = convertToR1Format(input) + + // Should produce two messages: tool message + user message + expect(result).toHaveLength(2) + expect(result[0]).toEqual({ + role: "tool", + tool_call_id: "call_123", + content: "Tool result content", + }) + expect(result[1]).toEqual({ + role: "user", + content: "Please continue", + }) + }) + + it("should merge text into last tool message when multiple tool results exist", () => { + const input: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "call_1", + content: "First result", + }, + { + type: "tool_result", + tool_use_id: "call_2", + content: "Second result", + }, + { + type: "text", + text: "Context", + }, + ], + }, + ] + + const result = convertToR1Format(input, { mergeToolResultText: true }) + + // Should produce two tool messages, with text merged into the last one + expect(result).toHaveLength(2) + expect(result[0]).toEqual({ + role: "tool", + tool_call_id: "call_1", + content: "First result", + }) + expect(result[1]).toEqual({ + role: "tool", + tool_call_id: "call_2", + content: "Second result\n\nContext", + }) + }) + + it("should NOT merge when there are images (images need user message)", () => { + const input: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "call_123", + content: "Tool result", + }, + { + type: "text", + text: "Check this image", + }, + { + type: "image", + source: { + type: "base64", + media_type: "image/jpeg", + data: "imagedata", + }, + }, + ], + }, + ] + + const result = convertToR1Format(input, { mergeToolResultText: true }) + + // Should produce tool message + user message with image + expect(result).toHaveLength(2) + expect(result[0]).toEqual({ + role: "tool", + tool_call_id: "call_123", + content: "Tool result", + }) + expect(result[1]).toMatchObject({ + role: "user", + content: expect.arrayContaining([ + { type: "text", text: "Check this image" }, + { type: "image_url", image_url: expect.any(Object) }, + ]), + }) + }) + + it("should NOT merge when there are no tool results (text-only should remain user message)", () => { + const input: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "text", + text: "Just a regular message", + }, + ], + }, + ] + + const result = convertToR1Format(input, { mergeToolResultText: true }) + + // Should produce user message as normal + expect(result).toHaveLength(1) + expect(result[0]).toEqual({ + role: "user", + content: "Just a regular message", + }) + }) + + it("should preserve reasoning_content on assistant messages in same conversation", () => { + const input = [ + { role: "user" as const, content: "Start" }, + { + role: "assistant" as const, + content: [ + { + type: "tool_use" as const, + id: "call_123", + name: "test_tool", + input: {}, + }, + ], + reasoning_content: "Let me think about this...", + }, + { + role: "user" as const, + content: [ + { + type: "tool_result" as const, + tool_use_id: "call_123", + content: "Result", + }, + { + type: "text" as const, + text: "Context", + }, + ], + }, + ] + + const result = convertToR1Format(input as Anthropic.Messages.MessageParam[], { + mergeToolResultText: true, + }) + + // Should have: user, assistant (with reasoning + tool_calls), tool + expect(result).toHaveLength(3) + expect(result[0]).toEqual({ role: "user", content: "Start" }) + expect((result[1] as any).reasoning_content).toBe("Let me think about this...") + expect((result[1] as any).tool_calls).toBeDefined() + // Tool message should have merged content + expect(result[2]).toEqual({ + role: "tool", + tool_call_id: "call_123", + content: "Result\n\nContext", + }) + // Most importantly: NO user message after tool message + expect(result.filter((m) => m.role === "user")).toHaveLength(1) + }) + }) }) }) diff --git a/src/api/transform/r1-format.ts b/src/api/transform/r1-format.ts index d4a7bef1ae7..8231e24f76f 100644 --- a/src/api/transform/r1-format.ts +++ b/src/api/transform/r1-format.ts @@ -26,11 +26,20 @@ export type DeepSeekAssistantMessage = AssistantMessage & { * - Preserves reasoning_content on assistant messages for tool call continuations * - Tool result messages are converted to OpenAI tool messages * - reasoning_content from previous assistant messages is preserved until a new user turn + * - Text content after tool_results (like environment_details) is merged into the last tool message + * to avoid creating user messages that would cause reasoning_content to be dropped * * @param messages Array of Anthropic messages + * @param options Optional configuration for message conversion + * @param options.mergeToolResultText If true, merge text content after tool_results into the last + * tool message instead of creating a separate user message. + * This is critical for DeepSeek's interleaved thinking mode. * @returns Array of OpenAI messages where consecutive messages with the same role are combined */ -export function convertToR1Format(messages: AnthropicMessage[]): Message[] { +export function convertToR1Format( + messages: AnthropicMessage[], + options?: { mergeToolResultText?: boolean }, +): Message[] { const result: Message[] = [] for (const message of messages) { @@ -87,37 +96,54 @@ export function convertToR1Format(messages: AnthropicMessage[]): Message[] { result.push(toolMessage) } - // Then add user message with text/image content if any + // Handle text/image content after tool results if (textParts.length > 0 || imageParts.length > 0) { - let content: UserMessage["content"] - if (imageParts.length > 0) { - const parts: (ContentPartText | ContentPartImage)[] = [] - if (textParts.length > 0) { - parts.push({ type: "text", text: textParts.join("\n") }) + // For DeepSeek interleaved thinking: when mergeToolResultText is enabled and we have + // tool results followed by text, merge the text into the last tool message to avoid + // creating a user message that would cause reasoning_content to be dropped. + // This is critical because DeepSeek drops all reasoning_content when it sees a user message. + const shouldMergeIntoToolMessage = + options?.mergeToolResultText && toolResults.length > 0 && imageParts.length === 0 + + if (shouldMergeIntoToolMessage) { + // Merge text content into the last tool message + const lastToolMessage = result[result.length - 1] as ToolMessage + if (lastToolMessage?.role === "tool") { + const additionalText = textParts.join("\n") + lastToolMessage.content = `${lastToolMessage.content}\n\n${additionalText}` } - parts.push(...imageParts) - content = parts } else { - content = textParts.join("\n") - } + // Standard behavior: add user message with text/image content + let content: UserMessage["content"] + if (imageParts.length > 0) { + const parts: (ContentPartText | ContentPartImage)[] = [] + if (textParts.length > 0) { + parts.push({ type: "text", text: textParts.join("\n") }) + } + parts.push(...imageParts) + content = parts + } else { + content = textParts.join("\n") + } - // Check if we can merge with the last message - const lastMessage = result[result.length - 1] - if (lastMessage?.role === "user") { - // Merge with existing user message - if (typeof lastMessage.content === "string" && typeof content === "string") { - lastMessage.content += `\n${content}` + // Check if we can merge with the last message + const lastMessage = result[result.length - 1] + if (lastMessage?.role === "user") { + // Merge with existing user message + if (typeof lastMessage.content === "string" && typeof content === "string") { + lastMessage.content += `\n${content}` + } else { + const lastContent = Array.isArray(lastMessage.content) + ? lastMessage.content + : [{ type: "text" as const, text: lastMessage.content || "" }] + const newContent = Array.isArray(content) + ? content + : [{ type: "text" as const, text: content }] + lastMessage.content = [...lastContent, ...newContent] as UserMessage["content"] + } } else { - const lastContent = Array.isArray(lastMessage.content) - ? lastMessage.content - : [{ type: "text" as const, text: lastMessage.content || "" }] - const newContent = Array.isArray(content) - ? content - : [{ type: "text" as const, text: content }] - lastMessage.content = [...lastContent, ...newContent] as UserMessage["content"] + result.push({ role: "user", content }) } - } else { - result.push({ role: "user", content }) } } } else {