diff --git a/packages/types/src/providers/chutes.ts b/packages/types/src/providers/chutes.ts index 20fe15017f9..f0dbc4ba058 100644 --- a/packages/types/src/providers/chutes.ts +++ b/packages/types/src/providers/chutes.ts @@ -35,6 +35,7 @@ export type ChutesModelId = | "zai-org/GLM-4.5-turbo" | "zai-org/GLM-4.6-FP8" | "zai-org/GLM-4.6-turbo" + | "meituan-longcat/LongCat-Flash-Thinking-FP8" | "moonshotai/Kimi-K2-Instruct-75k" | "moonshotai/Kimi-K2-Instruct-0905" | "Qwen/Qwen3-235B-A22B-Thinking-2507" @@ -339,6 +340,16 @@ export const chutesModels = { outputPrice: 3.25, description: "GLM-4.6-turbo model with 200K-token context window, optimized for fast inference.", }, + "meituan-longcat/LongCat-Flash-Thinking-FP8": { + maxTokens: 32768, + contextWindow: 128000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: + "LongCat Flash Thinking FP8 model with 128K context window, optimized for complex reasoning and coding tasks.", + }, "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8": { maxTokens: 32768, contextWindow: 262144, diff --git a/src/api/providers/__tests__/chutes.spec.ts b/src/api/providers/__tests__/chutes.spec.ts index 5c555033386..c7fa0dd750f 100644 --- a/src/api/providers/__tests__/chutes.spec.ts +++ b/src/api/providers/__tests__/chutes.spec.ts @@ -275,6 +275,74 @@ describe("ChutesHandler", () => { ) }) + it("should return zai-org/GLM-4.6-FP8 model with correct configuration", () => { + const testModelId: ChutesModelId = "zai-org/GLM-4.6-FP8" + const handlerWithModel = new ChutesHandler({ + apiModelId: testModelId, + chutesApiKey: "test-chutes-api-key", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual( + expect.objectContaining({ + maxTokens: 32768, + contextWindow: 202752, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: + "GLM-4.6 introduces major upgrades over GLM-4.5, including a longer 200K-token context window for complex tasks, stronger coding performance in benchmarks and real-world tools (such as Claude Code, Cline, Roo Code, and Kilo Code), improved reasoning with tool use during inference, more capable and efficient agent integration, and refined writing that better matches human style, readability, and natural role-play scenarios.", + temperature: 0.5, // Default temperature for non-DeepSeek models + }), + ) + }) + + it("should return zai-org/GLM-4.6-turbo model with correct configuration", () => { + const testModelId: ChutesModelId = "zai-org/GLM-4.6-turbo" + const handlerWithModel = new ChutesHandler({ + apiModelId: testModelId, + chutesApiKey: "test-chutes-api-key", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual( + expect.objectContaining({ + maxTokens: 202752, + contextWindow: 202752, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 1.15, + outputPrice: 3.25, + description: "GLM-4.6-turbo model with 200K-token context window, optimized for fast inference.", + temperature: 0.5, // Default temperature for non-DeepSeek models + }), + ) + }) + + it("should return meituan-longcat/LongCat-Flash-Thinking-FP8 model with correct configuration", () => { + const testModelId: ChutesModelId = "meituan-longcat/LongCat-Flash-Thinking-FP8" + const handlerWithModel = new ChutesHandler({ + apiModelId: testModelId, + chutesApiKey: "test-chutes-api-key", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual( + expect.objectContaining({ + maxTokens: 32768, + contextWindow: 128000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: + "LongCat Flash Thinking FP8 model with 128K context window, optimized for complex reasoning and coding tasks.", + temperature: 0.5, // Default temperature for non-DeepSeek models + }), + ) + }) + it("should return Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8 model with correct configuration", () => { const testModelId: ChutesModelId = "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8" const handlerWithModel = new ChutesHandler({