From f55e0b3258a709f4127f0bf7165a86a2ba3deab1 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Wed, 1 Oct 2025 02:08:59 +0000 Subject: [PATCH 1/2] feat: add GLM-4.6-FP8 and LongCat-Flash-Thinking-FP8 models to Chutes AI provider --- packages/types/src/providers/chutes.ts | 11 ++++++ src/api/providers/__tests__/chutes.spec.ts | 46 ++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/packages/types/src/providers/chutes.ts b/packages/types/src/providers/chutes.ts index 20fe15017f9..f0dbc4ba058 100644 --- a/packages/types/src/providers/chutes.ts +++ b/packages/types/src/providers/chutes.ts @@ -35,6 +35,7 @@ export type ChutesModelId = | "zai-org/GLM-4.5-turbo" | "zai-org/GLM-4.6-FP8" | "zai-org/GLM-4.6-turbo" + | "meituan-longcat/LongCat-Flash-Thinking-FP8" | "moonshotai/Kimi-K2-Instruct-75k" | "moonshotai/Kimi-K2-Instruct-0905" | "Qwen/Qwen3-235B-A22B-Thinking-2507" @@ -339,6 +340,16 @@ export const chutesModels = { outputPrice: 3.25, description: "GLM-4.6-turbo model with 200K-token context window, optimized for fast inference.", }, + "meituan-longcat/LongCat-Flash-Thinking-FP8": { + maxTokens: 32768, + contextWindow: 128000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: + "LongCat Flash Thinking FP8 model with 128K context window, optimized for complex reasoning and coding tasks.", + }, "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8": { maxTokens: 32768, contextWindow: 262144, diff --git a/src/api/providers/__tests__/chutes.spec.ts b/src/api/providers/__tests__/chutes.spec.ts index 5c555033386..d4812cbc07a 100644 --- a/src/api/providers/__tests__/chutes.spec.ts +++ b/src/api/providers/__tests__/chutes.spec.ts @@ -275,6 +275,52 @@ describe("ChutesHandler", () => { ) }) + it("should return zai-org/GLM-4.6-FP8 model with correct configuration", () => { + const testModelId: ChutesModelId = "zai-org/GLM-4.6-FP8" + const handlerWithModel = new ChutesHandler({ + apiModelId: testModelId, + chutesApiKey: "test-chutes-api-key", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual( + expect.objectContaining({ + maxTokens: 32768, + contextWindow: 200000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: + "GLM-4.6-FP8 model with 200K token context window, state-of-the-art performance with fast inference.", + temperature: 0.5, // Default temperature for non-DeepSeek models + }), + ) + }) + + it("should return meituan-longcat/LongCat-Flash-Thinking-FP8 model with correct configuration", () => { + const testModelId: ChutesModelId = "meituan-longcat/LongCat-Flash-Thinking-FP8" + const handlerWithModel = new ChutesHandler({ + apiModelId: testModelId, + chutesApiKey: "test-chutes-api-key", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual( + expect.objectContaining({ + maxTokens: 32768, + contextWindow: 128000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: + "LongCat Flash Thinking FP8 model with 128K context window, optimized for complex reasoning and coding tasks.", + temperature: 0.5, // Default temperature for non-DeepSeek models + }), + ) + }) + it("should return Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8 model with correct configuration", () => { const testModelId: ChutesModelId = "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8" const handlerWithModel = new ChutesHandler({ From c404987012b235dfb4f356de2559a5c6a021223a Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Mon, 27 Oct 2025 12:41:07 -0500 Subject: [PATCH 2/2] fix: update GLM-4.6-FP8 test expectations and add GLM-4.6-turbo test - Updated GLM-4.6-FP8 test to match resolved merge configuration (contextWindow: 202752, detailed description) - Added missing test for GLM-4.6-turbo model with correct configuration - All 25 tests now pass --- src/api/providers/__tests__/chutes.spec.ts | 26 ++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/api/providers/__tests__/chutes.spec.ts b/src/api/providers/__tests__/chutes.spec.ts index d4812cbc07a..c7fa0dd750f 100644 --- a/src/api/providers/__tests__/chutes.spec.ts +++ b/src/api/providers/__tests__/chutes.spec.ts @@ -286,13 +286,35 @@ describe("ChutesHandler", () => { expect(model.info).toEqual( expect.objectContaining({ maxTokens: 32768, - contextWindow: 200000, + contextWindow: 202752, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, description: - "GLM-4.6-FP8 model with 200K token context window, state-of-the-art performance with fast inference.", + "GLM-4.6 introduces major upgrades over GLM-4.5, including a longer 200K-token context window for complex tasks, stronger coding performance in benchmarks and real-world tools (such as Claude Code, Cline, Roo Code, and Kilo Code), improved reasoning with tool use during inference, more capable and efficient agent integration, and refined writing that better matches human style, readability, and natural role-play scenarios.", + temperature: 0.5, // Default temperature for non-DeepSeek models + }), + ) + }) + + it("should return zai-org/GLM-4.6-turbo model with correct configuration", () => { + const testModelId: ChutesModelId = "zai-org/GLM-4.6-turbo" + const handlerWithModel = new ChutesHandler({ + apiModelId: testModelId, + chutesApiKey: "test-chutes-api-key", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual( + expect.objectContaining({ + maxTokens: 202752, + contextWindow: 202752, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 1.15, + outputPrice: 3.25, + description: "GLM-4.6-turbo model with 200K-token context window, optimized for fast inference.", temperature: 0.5, // Default temperature for non-DeepSeek models }), )