From 5c6b848afb2c3fc5bdd0e8e5b8baa2c315ac8279 Mon Sep 17 00:00:00 2001 From: Seb Duerr Date: Mon, 29 Dec 2025 16:28:58 -0800 Subject: [PATCH 1/2] fix: update Cerebras maxTokens to 16384 --- merge_conflict_resolution.md | 42 ++++++++++++++++++++++++ packages/types/src/providers/cerebras.ts | 10 +++--- 2 files changed, 47 insertions(+), 5 deletions(-) create mode 100644 merge_conflict_resolution.md diff --git a/merge_conflict_resolution.md b/merge_conflict_resolution.md new file mode 100644 index 00000000000..c9a043e25a3 --- /dev/null +++ b/merge_conflict_resolution.md @@ -0,0 +1,42 @@ +# Merge Conflict Resolution Notes + +### Changes Made: + +#### 1. Conservative Token Limits: + +- Introduced `CEREBRAS_DEFAULT_MAX_TOKENS` with a value of `8_192` to avoid premature rate limiting. +- Updated logic to use this conservative default instead of the model maximum. + +#### 2. Integration Tracking: + +- Added `X-Cerebras-3rd-Party-Integration: roocode` header to all Cerebras API requests. + +#### 3. Model Cleanup: + +- Removed outdated models: + - `qwen-3-coder-480b-free` + - `qwen-3-coder-480b` + - `qwen-3-235b-a22b-thinking-2507` +- Updated `src/api/providers/cerebras.ts` to remove deprecated model mapping logic. + +### Files Affected: + +1. **`packages/types/src/providers/cerebras.ts`** + + - Removed outdated models. + +2. **`src/api/providers/cerebras.ts`** + - Added `CEREBRAS_DEFAULT_MAX_TOKENS` and `CEREBRAS_INTEGRATION_HEADER` constants. + - Updated `getModel` logic to validate model IDs. + - Updated API request logic to include the new header and use conservative token limits. + +### Testing Notes: + +- Verified functionality with `zai-glm-4.6` and `gpt-oss-120b` models. +- Confirmed that the new headers are included in API requests. +- Ensured that the application builds and passes all linting/type checks. + +### Next Steps: + +- Copy these changes into the appropriate files to resolve conflicts. +- Re-test the application to ensure everything works as expected. diff --git a/packages/types/src/providers/cerebras.ts b/packages/types/src/providers/cerebras.ts index 54b314b6db9..8e0c2f9413c 100644 --- a/packages/types/src/providers/cerebras.ts +++ b/packages/types/src/providers/cerebras.ts @@ -7,7 +7,7 @@ export const cerebrasDefaultModelId: CerebrasModelId = "gpt-oss-120b" export const cerebrasModels = { "zai-glm-4.6": { - maxTokens: 8192, // Conservative default to avoid premature rate limiting (Cerebras reserves quota upfront) + maxTokens: 16384, // Conservative default to avoid premature rate limiting (Cerebras reserves quota upfront) contextWindow: 131072, supportsImages: false, supportsPromptCache: false, @@ -18,7 +18,7 @@ export const cerebrasModels = { description: "Highly intelligent general purpose model with up to 1,000 tokens/s", }, "qwen-3-235b-a22b-instruct-2507": { - maxTokens: 8192, // Conservative default to avoid premature rate limiting + maxTokens: 16384, // Conservative default to avoid premature rate limiting contextWindow: 64000, supportsImages: false, supportsPromptCache: false, @@ -29,7 +29,7 @@ export const cerebrasModels = { description: "Intelligent model with ~1400 tokens/s", }, "llama-3.3-70b": { - maxTokens: 8192, // Conservative default to avoid premature rate limiting + maxTokens: 16384, // Conservative default to avoid premature rate limiting contextWindow: 64000, supportsImages: false, supportsPromptCache: false, @@ -40,7 +40,7 @@ export const cerebrasModels = { description: "Powerful model with ~2600 tokens/s", }, "qwen-3-32b": { - maxTokens: 8192, // Conservative default to avoid premature rate limiting + maxTokens: 16384, // Conservative default to avoid premature rate limiting contextWindow: 64000, supportsImages: false, supportsPromptCache: false, @@ -51,7 +51,7 @@ export const cerebrasModels = { description: "SOTA coding performance with ~2500 tokens/s", }, "gpt-oss-120b": { - maxTokens: 8192, // Conservative default to avoid premature rate limiting + maxTokens: 16384, // Conservative default to avoid premature rate limiting contextWindow: 64000, supportsImages: false, supportsPromptCache: false, From 8fd79cf1ad2e539bc61a4ad6ac9c915b23cfe4d0 Mon Sep 17 00:00:00 2001 From: Seb Duerr Date: Mon, 29 Dec 2025 17:11:27 -0800 Subject: [PATCH 2/2] chore: remove accidental merge_conflict_resolution.md --- merge_conflict_resolution.md | 42 ------------------------------------ 1 file changed, 42 deletions(-) delete mode 100644 merge_conflict_resolution.md diff --git a/merge_conflict_resolution.md b/merge_conflict_resolution.md deleted file mode 100644 index c9a043e25a3..00000000000 --- a/merge_conflict_resolution.md +++ /dev/null @@ -1,42 +0,0 @@ -# Merge Conflict Resolution Notes - -### Changes Made: - -#### 1. Conservative Token Limits: - -- Introduced `CEREBRAS_DEFAULT_MAX_TOKENS` with a value of `8_192` to avoid premature rate limiting. -- Updated logic to use this conservative default instead of the model maximum. - -#### 2. Integration Tracking: - -- Added `X-Cerebras-3rd-Party-Integration: roocode` header to all Cerebras API requests. - -#### 3. Model Cleanup: - -- Removed outdated models: - - `qwen-3-coder-480b-free` - - `qwen-3-coder-480b` - - `qwen-3-235b-a22b-thinking-2507` -- Updated `src/api/providers/cerebras.ts` to remove deprecated model mapping logic. - -### Files Affected: - -1. **`packages/types/src/providers/cerebras.ts`** - - - Removed outdated models. - -2. **`src/api/providers/cerebras.ts`** - - Added `CEREBRAS_DEFAULT_MAX_TOKENS` and `CEREBRAS_INTEGRATION_HEADER` constants. - - Updated `getModel` logic to validate model IDs. - - Updated API request logic to include the new header and use conservative token limits. - -### Testing Notes: - -- Verified functionality with `zai-glm-4.6` and `gpt-oss-120b` models. -- Confirmed that the new headers are included in API requests. -- Ensured that the application builds and passes all linting/type checks. - -### Next Steps: - -- Copy these changes into the appropriate files to resolve conflicts. -- Re-test the application to ensure everything works as expected.