diff --git a/packages/types/src/providers/zai.ts b/packages/types/src/providers/zai.ts index 1dfface9f6c..93cf9bb23bc 100644 --- a/packages/types/src/providers/zai.ts +++ b/packages/types/src/providers/zai.ts @@ -12,7 +12,7 @@ export type InternationalZAiModelId = keyof typeof internationalZAiModels export const internationalZAiDefaultModelId: InternationalZAiModelId = "glm-4.6" export const internationalZAiModels = { "glm-4.5": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 131_072, supportsImages: false, supportsPromptCache: true, @@ -26,7 +26,7 @@ export const internationalZAiModels = { "GLM-4.5 is Zhipu's latest featured model. Its comprehensive capabilities in reasoning, coding, and agent reach the state-of-the-art (SOTA) level among open-source models, with a context length of up to 128k.", }, "glm-4.5-air": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 131_072, supportsImages: false, supportsPromptCache: true, @@ -40,7 +40,7 @@ export const internationalZAiModels = { "GLM-4.5-Air is the lightweight version of GLM-4.5. It balances performance and cost-effectiveness, and can flexibly switch to hybrid thinking models.", }, "glm-4.5-x": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 131_072, supportsImages: false, supportsPromptCache: true, @@ -54,7 +54,7 @@ export const internationalZAiModels = { "GLM-4.5-X is a high-performance variant optimized for strong reasoning with ultra-fast responses.", }, "glm-4.5-airx": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 131_072, supportsImages: false, supportsPromptCache: true, @@ -67,7 +67,7 @@ export const internationalZAiModels = { description: "GLM-4.5-AirX is a lightweight, ultra-fast variant delivering strong performance with lower cost.", }, "glm-4.5-flash": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 131_072, supportsImages: false, supportsPromptCache: true, @@ -94,7 +94,7 @@ export const internationalZAiModels = { "GLM-4.5V is Z.AI's multimodal visual reasoning model (image/video/text/file input), optimized for GUI tasks, grounding, and document/video understanding.", }, "glm-4.6": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 200_000, supportsImages: false, supportsPromptCache: true, @@ -107,8 +107,25 @@ export const internationalZAiModels = { description: "GLM-4.6 is Zhipu's newest model with an extended context window of up to 200k tokens, providing enhanced capabilities for processing longer documents and conversations.", }, + "glm-4.7": { + maxTokens: 16_384, + contextWindow: 200_000, + supportsImages: false, + supportsPromptCache: true, + supportsNativeTools: true, + defaultToolProtocol: "native", + supportsReasoningEffort: ["disable", "medium"], + reasoningEffort: "medium", + preserveReasoning: true, + inputPrice: 0.6, + outputPrice: 2.2, + cacheWritesPrice: 0, + cacheReadsPrice: 0.11, + description: + "GLM-4.7 is Zhipu's latest model with built-in thinking capabilities enabled by default. It provides enhanced reasoning for complex tasks while maintaining fast response times.", + }, "glm-4-32b-0414-128k": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 131_072, supportsImages: false, supportsPromptCache: false, @@ -126,7 +143,7 @@ export type MainlandZAiModelId = keyof typeof mainlandZAiModels export const mainlandZAiDefaultModelId: MainlandZAiModelId = "glm-4.6" export const mainlandZAiModels = { "glm-4.5": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 131_072, supportsImages: false, supportsPromptCache: true, @@ -140,7 +157,7 @@ export const mainlandZAiModels = { "GLM-4.5 is Zhipu's latest featured model. Its comprehensive capabilities in reasoning, coding, and agent reach the state-of-the-art (SOTA) level among open-source models, with a context length of up to 128k.", }, "glm-4.5-air": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 131_072, supportsImages: false, supportsPromptCache: true, @@ -154,7 +171,7 @@ export const mainlandZAiModels = { "GLM-4.5-Air is the lightweight version of GLM-4.5. It balances performance and cost-effectiveness, and can flexibly switch to hybrid thinking models.", }, "glm-4.5-x": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 131_072, supportsImages: false, supportsPromptCache: true, @@ -168,7 +185,7 @@ export const mainlandZAiModels = { "GLM-4.5-X is a high-performance variant optimized for strong reasoning with ultra-fast responses.", }, "glm-4.5-airx": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 131_072, supportsImages: false, supportsPromptCache: true, @@ -181,7 +198,7 @@ export const mainlandZAiModels = { description: "GLM-4.5-AirX is a lightweight, ultra-fast variant delivering strong performance with lower cost.", }, "glm-4.5-flash": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 131_072, supportsImages: false, supportsPromptCache: true, @@ -208,7 +225,7 @@ export const mainlandZAiModels = { "GLM-4.5V is Z.AI's multimodal visual reasoning model (image/video/text/file input), optimized for GUI tasks, grounding, and document/video understanding.", }, "glm-4.6": { - maxTokens: 98_304, + maxTokens: 16_384, contextWindow: 204_800, supportsImages: false, supportsPromptCache: true, @@ -221,6 +238,23 @@ export const mainlandZAiModels = { description: "GLM-4.6 is Zhipu's newest model with an extended context window of up to 200k tokens, providing enhanced capabilities for processing longer documents and conversations.", }, + "glm-4.7": { + maxTokens: 16_384, + contextWindow: 204_800, + supportsImages: false, + supportsPromptCache: true, + supportsNativeTools: true, + defaultToolProtocol: "native", + supportsReasoningEffort: ["disable", "medium"], + reasoningEffort: "medium", + preserveReasoning: true, + inputPrice: 0.29, + outputPrice: 1.14, + cacheWritesPrice: 0, + cacheReadsPrice: 0.057, + description: + "GLM-4.7 is Zhipu's latest model with built-in thinking capabilities enabled by default. It provides enhanced reasoning for complex tasks while maintaining fast response times.", + }, } as const satisfies Record export const ZAI_DEFAULT_TEMPERATURE = 0.6 diff --git a/src/api/providers/__tests__/zai.spec.ts b/src/api/providers/__tests__/zai.spec.ts index 707abee09f6..34323b108d3 100644 --- a/src/api/providers/__tests__/zai.spec.ts +++ b/src/api/providers/__tests__/zai.spec.ts @@ -82,6 +82,22 @@ describe("ZAiHandler", () => { expect(model.info.contextWindow).toBe(200_000) }) + it("should return GLM-4.7 international model with thinking support", () => { + const testModelId: InternationalZAiModelId = "glm-4.7" + const handlerWithModel = new ZAiHandler({ + apiModelId: testModelId, + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual(internationalZAiModels[testModelId]) + expect(model.info.contextWindow).toBe(200_000) + expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"]) + expect(model.info.reasoningEffort).toBe("medium") + expect(model.info.preserveReasoning).toBe(true) + }) + it("should return GLM-4.5v international model with vision support", () => { const testModelId: InternationalZAiModelId = "glm-4.5v" const handlerWithModel = new ZAiHandler({ @@ -161,6 +177,22 @@ describe("ZAiHandler", () => { expect(model.info.maxTokens).toBe(16_384) expect(model.info.contextWindow).toBe(131_072) }) + + it("should return GLM-4.7 China model with thinking support", () => { + const testModelId: MainlandZAiModelId = "glm-4.7" + const handlerWithModel = new ZAiHandler({ + apiModelId: testModelId, + zaiApiKey: "test-zai-api-key", + zaiApiLine: "china_coding", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual(mainlandZAiModels[testModelId]) + expect(model.info.contextWindow).toBe(204_800) + expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"]) + expect(model.info.reasoningEffort).toBe("medium") + expect(model.info.preserveReasoning).toBe(true) + }) }) describe("International API", () => { @@ -371,4 +403,123 @@ describe("ZAiHandler", () => { ) }) }) + + describe("GLM-4.7 Thinking Mode", () => { + it("should enable thinking by default for GLM-4.7 (default reasoningEffort is medium)", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-4.7", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + // No reasoningEffort setting - should use model default (medium) + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + // For GLM-4.7 with default reasoning (medium), thinking should be enabled + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "glm-4.7", + thinking: { type: "enabled" }, + }), + ) + }) + + it("should disable thinking for GLM-4.7 when reasoningEffort is set to disable", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-4.7", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + enableReasoningEffort: true, + reasoningEffort: "disable", + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + // For GLM-4.7 with reasoning disabled, thinking should be disabled + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "glm-4.7", + thinking: { type: "disabled" }, + }), + ) + }) + + it("should enable thinking for GLM-4.7 when reasoningEffort is set to medium", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-4.7", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + enableReasoningEffort: true, + reasoningEffort: "medium", + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + // For GLM-4.7 with reasoning set to medium, thinking should be enabled + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "glm-4.7", + thinking: { type: "enabled" }, + }), + ) + }) + + it("should NOT add thinking parameter for non-thinking models like GLM-4.6", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-4.6", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + // For GLM-4.6 (no thinking support), thinking parameter should not be present + const callArgs = mockCreate.mock.calls[0][0] + expect(callArgs.thinking).toBeUndefined() + }) + }) }) diff --git a/src/api/providers/zai.ts b/src/api/providers/zai.ts index 25074f5f0b9..fb715de185b 100644 --- a/src/api/providers/zai.ts +++ b/src/api/providers/zai.ts @@ -1,3 +1,6 @@ +import { Anthropic } from "@anthropic-ai/sdk" +import OpenAI from "openai" + import { internationalZAiModels, mainlandZAiModels, @@ -8,10 +11,17 @@ import { zaiApiLineConfigs, } from "@roo-code/types" -import type { ApiHandlerOptions } from "../../shared/api" +import { type ApiHandlerOptions, getModelMaxOutputTokens, shouldUseReasoningEffort } from "../../shared/api" +import { convertToZAiFormat } from "../transform/zai-format" +import type { ApiHandlerCreateMessageMetadata } from "../index" import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider" +// Custom interface for Z.ai params to support thinking mode +type ZAiChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParamsStreaming & { + thinking?: { type: "enabled" | "disabled" } +} + export class ZAiHandler extends BaseOpenAiCompatibleProvider { constructor(options: ApiHandlerOptions) { const isChina = zaiApiLineConfigs[options.zaiApiLine ?? "international_coding"].isChina @@ -28,4 +38,76 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider { defaultTemperature: ZAI_DEFAULT_TEMPERATURE, }) } + + /** + * Override createStream to handle GLM-4.7's thinking mode. + * GLM-4.7 has thinking enabled by default in the API, so we need to + * explicitly send { type: "disabled" } when the user turns off reasoning. + */ + protected override createStream( + systemPrompt: string, + messages: Anthropic.Messages.MessageParam[], + metadata?: ApiHandlerCreateMessageMetadata, + requestOptions?: OpenAI.RequestOptions, + ) { + const { id: modelId, info } = this.getModel() + + // Check if this is a GLM-4.7 model with thinking support + const isThinkingModel = modelId === "glm-4.7" && Array.isArray(info.supportsReasoningEffort) + + if (isThinkingModel) { + // For GLM-4.7, thinking is ON by default in the API. + // We need to explicitly disable it when reasoning is off. + const useReasoning = shouldUseReasoningEffort({ model: info, settings: this.options }) + + // Create the stream with our custom thinking parameter + return this.createStreamWithThinking(systemPrompt, messages, metadata, useReasoning) + } + + // For non-thinking models, use the default behavior + return super.createStream(systemPrompt, messages, metadata, requestOptions) + } + + /** + * Creates a stream with explicit thinking control for GLM-4.7 + */ + private createStreamWithThinking( + systemPrompt: string, + messages: Anthropic.Messages.MessageParam[], + metadata?: ApiHandlerCreateMessageMetadata, + useReasoning?: boolean, + ) { + const { id: model, info } = this.getModel() + + const max_tokens = + getModelMaxOutputTokens({ + modelId: model, + model: info, + settings: this.options, + format: "openai", + }) ?? undefined + + const temperature = this.options.modelTemperature ?? this.defaultTemperature + + // Use Z.ai format to preserve reasoning_content and convert post-tool text to system messages + const convertedMessages = convertToZAiFormat(messages, { convertToolResultTextToSystem: true }) + + const params: ZAiChatCompletionParams = { + model, + max_tokens, + temperature, + messages: [{ role: "system", content: systemPrompt }, ...convertedMessages], + stream: true, + stream_options: { include_usage: true }, + // For GLM-4.7: thinking is ON by default, so we explicitly disable when needed + thinking: useReasoning ? { type: "enabled" } : { type: "disabled" }, + ...(metadata?.tools && { tools: this.convertToolsForOpenAI(metadata.tools) }), + ...(metadata?.tool_choice && { tool_choice: metadata.tool_choice }), + ...(metadata?.toolProtocol === "native" && { + parallel_tool_calls: metadata.parallelToolCalls ?? false, + }), + } + + return this.client.chat.completions.create(params) + } } diff --git a/src/api/transform/zai-format.ts b/src/api/transform/zai-format.ts new file mode 100644 index 00000000000..b29a221b9ef --- /dev/null +++ b/src/api/transform/zai-format.ts @@ -0,0 +1,241 @@ +import { Anthropic } from "@anthropic-ai/sdk" +import OpenAI from "openai" + +type ContentPartText = OpenAI.Chat.ChatCompletionContentPartText +type ContentPartImage = OpenAI.Chat.ChatCompletionContentPartImage +type UserMessage = OpenAI.Chat.ChatCompletionUserMessageParam +type AssistantMessage = OpenAI.Chat.ChatCompletionAssistantMessageParam +type SystemMessage = OpenAI.Chat.ChatCompletionSystemMessageParam +type ToolMessage = OpenAI.Chat.ChatCompletionToolMessageParam +type Message = OpenAI.Chat.ChatCompletionMessageParam +type AnthropicMessage = Anthropic.Messages.MessageParam + +/** + * Extended assistant message type to support Z.ai's interleaved thinking. + * Z.ai's API returns reasoning_content alongside content and tool_calls, + * and requires it to be passed back in subsequent requests for preserved thinking. + */ +export type ZAiAssistantMessage = AssistantMessage & { + reasoning_content?: string +} + +/** + * Converts Anthropic messages to OpenAI format optimized for Z.ai's GLM-4.7 thinking mode. + * + * Key differences from standard OpenAI format: + * - Preserves reasoning_content on assistant messages for interleaved thinking + * - Text content after tool_results (like environment_details) is converted to system messages + * instead of user messages, preventing reasoning_content from being dropped + * + * @param messages Array of Anthropic messages + * @param options Optional configuration for message conversion + * @param options.convertToolResultTextToSystem If true, convert text content after tool_results + * to system messages instead of user messages. + * This preserves reasoning_content continuity. + * @returns Array of OpenAI messages optimized for Z.ai's thinking mode + */ +export function convertToZAiFormat( + messages: AnthropicMessage[], + options?: { convertToolResultTextToSystem?: boolean }, +): Message[] { + const result: Message[] = [] + + for (const message of messages) { + // Check if the message has reasoning_content (for Z.ai interleaved thinking) + const messageWithReasoning = message as AnthropicMessage & { reasoning_content?: string } + const reasoningContent = messageWithReasoning.reasoning_content + + if (message.role === "user") { + // Handle user messages - may contain tool_result blocks + if (Array.isArray(message.content)) { + const textParts: string[] = [] + const imageParts: ContentPartImage[] = [] + const toolResults: { tool_use_id: string; content: string }[] = [] + + for (const part of message.content) { + if (part.type === "text") { + textParts.push(part.text) + } else if (part.type === "image") { + imageParts.push({ + type: "image_url", + image_url: { url: `data:${part.source.media_type};base64,${part.source.data}` }, + }) + } else if (part.type === "tool_result") { + // Convert tool_result to OpenAI tool message format + let content: string + if (typeof part.content === "string") { + content = part.content + } else if (Array.isArray(part.content)) { + content = + part.content + ?.map((c) => { + if (c.type === "text") return c.text + if (c.type === "image") return "(image)" + return "" + }) + .join("\n") ?? "" + } else { + content = "" + } + toolResults.push({ + tool_use_id: part.tool_use_id, + content, + }) + } + } + + // Add tool messages first (they must follow assistant tool_use) + for (const toolResult of toolResults) { + const toolMessage: ToolMessage = { + role: "tool", + tool_call_id: toolResult.tool_use_id, + content: toolResult.content, + } + result.push(toolMessage) + } + + // Handle text/image content after tool results + if (textParts.length > 0 || imageParts.length > 0) { + // For Z.ai interleaved thinking: when convertToolResultTextToSystem is enabled and we have + // tool results followed by text (like environment_details), convert to system message + // instead of user message to avoid dropping reasoning_content. + const shouldConvertToSystem = + options?.convertToolResultTextToSystem && toolResults.length > 0 && imageParts.length === 0 + + if (shouldConvertToSystem) { + // Convert text content to system message + const systemMessage: SystemMessage = { + role: "system", + content: textParts.join("\n"), + } + result.push(systemMessage) + } else { + // Standard behavior: add user message with text/image content + let content: UserMessage["content"] + if (imageParts.length > 0) { + const parts: (ContentPartText | ContentPartImage)[] = [] + if (textParts.length > 0) { + parts.push({ type: "text", text: textParts.join("\n") }) + } + parts.push(...imageParts) + content = parts + } else { + content = textParts.join("\n") + } + + // Check if we can merge with the last message + const lastMessage = result[result.length - 1] + if (lastMessage?.role === "user") { + // Merge with existing user message + if (typeof lastMessage.content === "string" && typeof content === "string") { + lastMessage.content += `\n${content}` + } else { + const lastContent = Array.isArray(lastMessage.content) + ? lastMessage.content + : [{ type: "text" as const, text: lastMessage.content || "" }] + const newContent = Array.isArray(content) + ? content + : [{ type: "text" as const, text: content }] + lastMessage.content = [...lastContent, ...newContent] as UserMessage["content"] + } + } else { + result.push({ role: "user", content }) + } + } + } + } else { + // Simple string content + const lastMessage = result[result.length - 1] + if (lastMessage?.role === "user") { + if (typeof lastMessage.content === "string") { + lastMessage.content += `\n${message.content}` + } else { + ;(lastMessage.content as (ContentPartText | ContentPartImage)[]).push({ + type: "text", + text: message.content, + }) + } + } else { + result.push({ role: "user", content: message.content }) + } + } + } else if (message.role === "assistant") { + // Handle assistant messages - may contain tool_use blocks and reasoning blocks + if (Array.isArray(message.content)) { + const textParts: string[] = [] + const toolCalls: OpenAI.Chat.ChatCompletionMessageToolCall[] = [] + let extractedReasoning: string | undefined + + for (const part of message.content) { + if (part.type === "text") { + textParts.push(part.text) + } else if (part.type === "tool_use") { + toolCalls.push({ + id: part.id, + type: "function", + function: { + name: part.name, + arguments: JSON.stringify(part.input), + }, + }) + } else if ((part as any).type === "reasoning" && (part as any).text) { + // Extract reasoning from content blocks (Task stores it this way) + extractedReasoning = (part as any).text + } + } + + // Use reasoning from content blocks if not provided at top level + const finalReasoning = reasoningContent || extractedReasoning + + const assistantMessage: ZAiAssistantMessage = { + role: "assistant", + content: textParts.length > 0 ? textParts.join("\n") : null, + ...(toolCalls.length > 0 && { tool_calls: toolCalls }), + // Preserve reasoning_content for Z.ai interleaved thinking + ...(finalReasoning && { reasoning_content: finalReasoning }), + } + + // Check if we can merge with the last message (only if no tool calls) + const lastMessage = result[result.length - 1] + if (lastMessage?.role === "assistant" && !toolCalls.length && !(lastMessage as any).tool_calls) { + // Merge text content + if (typeof lastMessage.content === "string" && typeof assistantMessage.content === "string") { + lastMessage.content += `\n${assistantMessage.content}` + } else if (assistantMessage.content) { + const lastContent = lastMessage.content || "" + lastMessage.content = `${lastContent}\n${assistantMessage.content}` + } + // Preserve reasoning_content from the new message if present + if (finalReasoning) { + ;(lastMessage as ZAiAssistantMessage).reasoning_content = finalReasoning + } + } else { + result.push(assistantMessage) + } + } else { + // Simple string content + const lastMessage = result[result.length - 1] + if (lastMessage?.role === "assistant" && !(lastMessage as any).tool_calls) { + if (typeof lastMessage.content === "string") { + lastMessage.content += `\n${message.content}` + } else { + lastMessage.content = message.content + } + // Preserve reasoning_content from the new message if present + if (reasoningContent) { + ;(lastMessage as ZAiAssistantMessage).reasoning_content = reasoningContent + } + } else { + const assistantMessage: ZAiAssistantMessage = { + role: "assistant", + content: message.content, + ...(reasoningContent && { reasoning_content: reasoningContent }), + } + result.push(assistantMessage) + } + } + } + } + + return result +}