Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 47 additions & 13 deletions packages/types/src/providers/zai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ export type InternationalZAiModelId = keyof typeof internationalZAiModels
export const internationalZAiDefaultModelId: InternationalZAiModelId = "glm-4.6"
export const internationalZAiModels = {
"glm-4.5": {
maxTokens: 98_304,
maxTokens: 16_384,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: true,
Expand All @@ -26,7 +26,7 @@ export const internationalZAiModels = {
"GLM-4.5 is Zhipu's latest featured model. Its comprehensive capabilities in reasoning, coding, and agent reach the state-of-the-art (SOTA) level among open-source models, with a context length of up to 128k.",
},
"glm-4.5-air": {
maxTokens: 98_304,
maxTokens: 16_384,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: true,
Expand All @@ -40,7 +40,7 @@ export const internationalZAiModels = {
"GLM-4.5-Air is the lightweight version of GLM-4.5. It balances performance and cost-effectiveness, and can flexibly switch to hybrid thinking models.",
},
"glm-4.5-x": {
maxTokens: 98_304,
maxTokens: 16_384,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: true,
Expand All @@ -54,7 +54,7 @@ export const internationalZAiModels = {
"GLM-4.5-X is a high-performance variant optimized for strong reasoning with ultra-fast responses.",
},
"glm-4.5-airx": {
maxTokens: 98_304,
maxTokens: 16_384,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: true,
Expand All @@ -67,7 +67,7 @@ export const internationalZAiModels = {
description: "GLM-4.5-AirX is a lightweight, ultra-fast variant delivering strong performance with lower cost.",
},
"glm-4.5-flash": {
maxTokens: 98_304,
maxTokens: 16_384,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: true,
Expand All @@ -94,7 +94,7 @@ export const internationalZAiModels = {
"GLM-4.5V is Z.AI's multimodal visual reasoning model (image/video/text/file input), optimized for GUI tasks, grounding, and document/video understanding.",
},
"glm-4.6": {
maxTokens: 98_304,
maxTokens: 16_384,
contextWindow: 200_000,
supportsImages: false,
supportsPromptCache: true,
Expand All @@ -107,8 +107,25 @@ export const internationalZAiModels = {
description:
"GLM-4.6 is Zhipu's newest model with an extended context window of up to 200k tokens, providing enhanced capabilities for processing longer documents and conversations.",
},
"glm-4.7": {
maxTokens: 16_384,
contextWindow: 200_000,
supportsImages: false,
supportsPromptCache: true,
supportsNativeTools: true,
defaultToolProtocol: "native",
supportsReasoningEffort: ["disable", "medium"],
reasoningEffort: "medium",
preserveReasoning: true,
inputPrice: 0.6,
outputPrice: 2.2,
cacheWritesPrice: 0,
cacheReadsPrice: 0.11,
description:
"GLM-4.7 is Zhipu's latest model with built-in thinking capabilities enabled by default. It provides enhanced reasoning for complex tasks while maintaining fast response times.",
},
"glm-4-32b-0414-128k": {
maxTokens: 98_304,
maxTokens: 16_384,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: false,
Expand All @@ -126,7 +143,7 @@ export type MainlandZAiModelId = keyof typeof mainlandZAiModels
export const mainlandZAiDefaultModelId: MainlandZAiModelId = "glm-4.6"
export const mainlandZAiModels = {
"glm-4.5": {
maxTokens: 98_304,
maxTokens: 16_384,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: true,
Expand All @@ -140,7 +157,7 @@ export const mainlandZAiModels = {
"GLM-4.5 is Zhipu's latest featured model. Its comprehensive capabilities in reasoning, coding, and agent reach the state-of-the-art (SOTA) level among open-source models, with a context length of up to 128k.",
},
"glm-4.5-air": {
maxTokens: 98_304,
maxTokens: 16_384,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: true,
Expand All @@ -154,7 +171,7 @@ export const mainlandZAiModels = {
"GLM-4.5-Air is the lightweight version of GLM-4.5. It balances performance and cost-effectiveness, and can flexibly switch to hybrid thinking models.",
},
"glm-4.5-x": {
maxTokens: 98_304,
maxTokens: 16_384,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: true,
Expand All @@ -168,7 +185,7 @@ export const mainlandZAiModels = {
"GLM-4.5-X is a high-performance variant optimized for strong reasoning with ultra-fast responses.",
},
"glm-4.5-airx": {
maxTokens: 98_304,
maxTokens: 16_384,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: true,
Expand All @@ -181,7 +198,7 @@ export const mainlandZAiModels = {
description: "GLM-4.5-AirX is a lightweight, ultra-fast variant delivering strong performance with lower cost.",
},
"glm-4.5-flash": {
maxTokens: 98_304,
maxTokens: 16_384,
contextWindow: 131_072,
supportsImages: false,
supportsPromptCache: true,
Expand All @@ -208,7 +225,7 @@ export const mainlandZAiModels = {
"GLM-4.5V is Z.AI's multimodal visual reasoning model (image/video/text/file input), optimized for GUI tasks, grounding, and document/video understanding.",
},
"glm-4.6": {
maxTokens: 98_304,
maxTokens: 16_384,
contextWindow: 204_800,
supportsImages: false,
supportsPromptCache: true,
Expand All @@ -221,6 +238,23 @@ export const mainlandZAiModels = {
description:
"GLM-4.6 is Zhipu's newest model with an extended context window of up to 200k tokens, providing enhanced capabilities for processing longer documents and conversations.",
},
"glm-4.7": {
maxTokens: 16_384,
contextWindow: 204_800,
supportsImages: false,
supportsPromptCache: true,
supportsNativeTools: true,
defaultToolProtocol: "native",
supportsReasoningEffort: ["disable", "medium"],
reasoningEffort: "medium",
preserveReasoning: true,
inputPrice: 0.29,
outputPrice: 1.14,
cacheWritesPrice: 0,
cacheReadsPrice: 0.057,
description:
"GLM-4.7 is Zhipu's latest model with built-in thinking capabilities enabled by default. It provides enhanced reasoning for complex tasks while maintaining fast response times.",
},
} as const satisfies Record<string, ModelInfo>

export const ZAI_DEFAULT_TEMPERATURE = 0.6
Expand Down
151 changes: 151 additions & 0 deletions src/api/providers/__tests__/zai.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,22 @@ describe("ZAiHandler", () => {
expect(model.info.contextWindow).toBe(200_000)
})

it("should return GLM-4.7 international model with thinking support", () => {
const testModelId: InternationalZAiModelId = "glm-4.7"
const handlerWithModel = new ZAiHandler({
apiModelId: testModelId,
zaiApiKey: "test-zai-api-key",
zaiApiLine: "international_coding",
})
const model = handlerWithModel.getModel()
expect(model.id).toBe(testModelId)
expect(model.info).toEqual(internationalZAiModels[testModelId])
expect(model.info.contextWindow).toBe(200_000)
expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"])
expect(model.info.reasoningEffort).toBe("medium")
expect(model.info.preserveReasoning).toBe(true)
})

it("should return GLM-4.5v international model with vision support", () => {
const testModelId: InternationalZAiModelId = "glm-4.5v"
const handlerWithModel = new ZAiHandler({
Expand Down Expand Up @@ -161,6 +177,22 @@ describe("ZAiHandler", () => {
expect(model.info.maxTokens).toBe(16_384)
expect(model.info.contextWindow).toBe(131_072)
})

it("should return GLM-4.7 China model with thinking support", () => {
const testModelId: MainlandZAiModelId = "glm-4.7"
const handlerWithModel = new ZAiHandler({
apiModelId: testModelId,
zaiApiKey: "test-zai-api-key",
zaiApiLine: "china_coding",
})
const model = handlerWithModel.getModel()
expect(model.id).toBe(testModelId)
expect(model.info).toEqual(mainlandZAiModels[testModelId])
expect(model.info.contextWindow).toBe(204_800)
expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"])
expect(model.info.reasoningEffort).toBe("medium")
expect(model.info.preserveReasoning).toBe(true)
})
})

describe("International API", () => {
Expand Down Expand Up @@ -371,4 +403,123 @@ describe("ZAiHandler", () => {
)
})
})

describe("GLM-4.7 Thinking Mode", () => {
it("should enable thinking by default for GLM-4.7 (default reasoningEffort is medium)", async () => {
const handlerWithModel = new ZAiHandler({
apiModelId: "glm-4.7",
zaiApiKey: "test-zai-api-key",
zaiApiLine: "international_coding",
// No reasoningEffort setting - should use model default (medium)
})

mockCreate.mockImplementationOnce(() => {
return {
[Symbol.asyncIterator]: () => ({
async next() {
return { done: true }
},
}),
}
})

const messageGenerator = handlerWithModel.createMessage("system prompt", [])
await messageGenerator.next()

// For GLM-4.7 with default reasoning (medium), thinking should be enabled
expect(mockCreate).toHaveBeenCalledWith(
expect.objectContaining({
model: "glm-4.7",
thinking: { type: "enabled" },
}),
)
})

it("should disable thinking for GLM-4.7 when reasoningEffort is set to disable", async () => {
const handlerWithModel = new ZAiHandler({
apiModelId: "glm-4.7",
zaiApiKey: "test-zai-api-key",
zaiApiLine: "international_coding",
enableReasoningEffort: true,
reasoningEffort: "disable",
})

mockCreate.mockImplementationOnce(() => {
return {
[Symbol.asyncIterator]: () => ({
async next() {
return { done: true }
},
}),
}
})

const messageGenerator = handlerWithModel.createMessage("system prompt", [])
await messageGenerator.next()

// For GLM-4.7 with reasoning disabled, thinking should be disabled
expect(mockCreate).toHaveBeenCalledWith(
expect.objectContaining({
model: "glm-4.7",
thinking: { type: "disabled" },
}),
)
})

it("should enable thinking for GLM-4.7 when reasoningEffort is set to medium", async () => {
const handlerWithModel = new ZAiHandler({
apiModelId: "glm-4.7",
zaiApiKey: "test-zai-api-key",
zaiApiLine: "international_coding",
enableReasoningEffort: true,
reasoningEffort: "medium",
})

mockCreate.mockImplementationOnce(() => {
return {
[Symbol.asyncIterator]: () => ({
async next() {
return { done: true }
},
}),
}
})

const messageGenerator = handlerWithModel.createMessage("system prompt", [])
await messageGenerator.next()

// For GLM-4.7 with reasoning set to medium, thinking should be enabled
expect(mockCreate).toHaveBeenCalledWith(
expect.objectContaining({
model: "glm-4.7",
thinking: { type: "enabled" },
}),
)
})

it("should NOT add thinking parameter for non-thinking models like GLM-4.6", async () => {
const handlerWithModel = new ZAiHandler({
apiModelId: "glm-4.6",
zaiApiKey: "test-zai-api-key",
zaiApiLine: "international_coding",
})

mockCreate.mockImplementationOnce(() => {
return {
[Symbol.asyncIterator]: () => ({
async next() {
return { done: true }
},
}),
}
})

const messageGenerator = handlerWithModel.createMessage("system prompt", [])
await messageGenerator.next()

// For GLM-4.6 (no thinking support), thinking parameter should not be present
const callArgs = mockCreate.mock.calls[0][0]
expect(callArgs.thinking).toBeUndefined()
})
})
})
Loading
Loading