diff --git a/packages/types/src/providers/cerebras.ts b/packages/types/src/providers/cerebras.ts index 2868252fb49..be705744111 100644 --- a/packages/types/src/providers/cerebras.ts +++ b/packages/types/src/providers/cerebras.ts @@ -11,6 +11,7 @@ export const cerebrasModels = { contextWindow: 131072, supportsImages: false, supportsPromptCache: false, + supportsNativeTools: true, inputPrice: 0, outputPrice: 0, description: "Highly intelligent general purpose model with up to 1,000 tokens/s", @@ -20,6 +21,7 @@ export const cerebrasModels = { contextWindow: 64000, supportsImages: false, supportsPromptCache: false, + supportsNativeTools: true, inputPrice: 0, outputPrice: 0, description: "Intelligent model with ~1400 tokens/s", @@ -29,6 +31,7 @@ export const cerebrasModels = { contextWindow: 64000, supportsImages: false, supportsPromptCache: false, + supportsNativeTools: true, inputPrice: 0, outputPrice: 0, description: "Powerful model with ~2600 tokens/s", @@ -38,6 +41,7 @@ export const cerebrasModels = { contextWindow: 64000, supportsImages: false, supportsPromptCache: false, + supportsNativeTools: true, inputPrice: 0, outputPrice: 0, description: "SOTA coding performance with ~2500 tokens/s", @@ -47,6 +51,7 @@ export const cerebrasModels = { contextWindow: 64000, supportsImages: false, supportsPromptCache: false, + supportsNativeTools: true, inputPrice: 0, outputPrice: 0, description: diff --git a/src/api/providers/base-provider.ts b/src/api/providers/base-provider.ts index a0611a7b3fc..84c8cf6fe97 100644 --- a/src/api/providers/base-provider.ts +++ b/src/api/providers/base-provider.ts @@ -20,7 +20,8 @@ export abstract class BaseProvider implements ApiHandler { /** * Converts an array of tools to be compatible with OpenAI's strict mode. - * Filters for function tools and applies schema conversion to their parameters. + * Filters for function tools, applies schema conversion to their parameters, + * and ensures all tools have consistent strict: true values. */ protected convertToolsForOpenAI(tools: any[] | undefined): any[] | undefined { if (!tools) { @@ -33,6 +34,7 @@ export abstract class BaseProvider implements ApiHandler { ...tool, function: { ...tool.function, + strict: true, parameters: this.convertToolSchemaForOpenAI(tool.function.parameters), }, } diff --git a/src/api/providers/cerebras.ts b/src/api/providers/cerebras.ts index b9ea00d6f97..398e32f4901 100644 --- a/src/api/providers/cerebras.ts +++ b/src/api/providers/cerebras.ts @@ -16,68 +16,6 @@ import { t } from "../../i18n" const CEREBRAS_BASE_URL = "https://api.cerebras.ai/v1" const CEREBRAS_DEFAULT_TEMPERATURE = 0 -/** - * Removes thinking tokens from text to prevent model confusion when processing conversation history. - * This is crucial because models can get confused by their own thinking tokens in input. - */ -function stripThinkingTokens(text: string): string { - // Remove ... blocks entirely, including nested ones - return text.replace(/[\s\S]*?<\/think>/g, "").trim() -} - -/** - * Flattens OpenAI message content to simple strings that Cerebras can handle. - * Cerebras doesn't support complex content arrays like OpenAI does. - */ -function flattenMessageContent(content: any): string { - if (typeof content === "string") { - return content - } - - if (Array.isArray(content)) { - return content - .map((part) => { - if (typeof part === "string") { - return part - } - if (part.type === "text") { - return part.text || "" - } - if (part.type === "image_url") { - return "[Image]" // Placeholder for images since Cerebras doesn't support images - } - return "" - }) - .filter(Boolean) - .join("\n") - } - - // Fallback for any other content types - return String(content || "") -} - -/** - * Converts OpenAI messages to Cerebras-compatible format with simple string content. - * Also strips thinking tokens from assistant messages to prevent model confusion. - */ -function convertToCerebrasMessages(openaiMessages: any[]): Array<{ role: string; content: string }> { - return openaiMessages - .map((msg) => { - let content = flattenMessageContent(msg.content) - - // Strip thinking tokens from assistant messages to prevent confusion - if (msg.role === "assistant") { - content = stripThinkingTokens(content) - } - - return { - role: msg.role, - content, - } - }) - .filter((msg) => msg.content.trim() !== "") // Remove empty messages -} - export class CerebrasHandler extends BaseProvider implements SingleCompletionHandler { private apiKey: string private providerModels: typeof cerebrasModels @@ -106,26 +44,70 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan } } + /** + * Override convertToolSchemaForOpenAI to remove unsupported schema fields for Cerebras. + * Cerebras doesn't support minItems/maxItems in array schemas with strict mode. + */ + protected override convertToolSchemaForOpenAI(schema: any): any { + const converted = super.convertToolSchemaForOpenAI(schema) + return this.stripUnsupportedSchemaFields(converted) + } + + /** + * Recursively strips unsupported schema fields for Cerebras. + * Cerebras strict mode doesn't support minItems, maxItems on arrays. + */ + private stripUnsupportedSchemaFields(schema: any): any { + if (!schema || typeof schema !== "object") { + return schema + } + + const result = { ...schema } + + // Remove unsupported array constraints + if (result.type === "array" || (Array.isArray(result.type) && result.type.includes("array"))) { + delete result.minItems + delete result.maxItems + } + + // Recursively process properties + if (result.properties) { + const newProps = { ...result.properties } + for (const key of Object.keys(newProps)) { + newProps[key] = this.stripUnsupportedSchemaFields(newProps[key]) + } + result.properties = newProps + } + + // Recursively process array items + if (result.items) { + result.items = this.stripUnsupportedSchemaFields(result.items) + } + + return result + } + async *createMessage( systemPrompt: string, messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { - const { - id: model, - info: { maxTokens: max_tokens }, - } = this.getModel() + const { id: model, info: modelInfo } = this.getModel() + const max_tokens = modelInfo.maxTokens + const supportsNativeTools = modelInfo.supportsNativeTools ?? false const temperature = this.options.modelTemperature ?? CEREBRAS_DEFAULT_TEMPERATURE - // Convert Anthropic messages to OpenAI format, then flatten for Cerebras - // This will automatically strip thinking tokens from assistant messages + // Check if we should use native tool calling + const useNativeTools = + supportsNativeTools && metadata?.tools && metadata.tools.length > 0 && metadata?.toolProtocol !== "xml" + + // Convert Anthropic messages to OpenAI format (Cerebras is OpenAI-compatible) const openaiMessages = convertToOpenAiMessages(messages) - const cerebrasMessages = convertToCerebrasMessages(openaiMessages) // Prepare request body following Cerebras API specification exactly - const requestBody = { + const requestBody: Record = { model, - messages: [{ role: "system", content: systemPrompt }, ...cerebrasMessages], + messages: [{ role: "system", content: systemPrompt }, ...openaiMessages], stream: true, // Use max_completion_tokens (Cerebras-specific parameter) ...(max_tokens && max_tokens > 0 && max_tokens <= 32768 ? { max_completion_tokens: max_tokens } : {}), @@ -135,6 +117,10 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan temperature: Math.max(0, Math.min(1.5, temperature)), } : {}), + // Native tool calling support + ...(useNativeTools && { tools: this.convertToolsForOpenAI(metadata.tools) }), + ...(useNativeTools && metadata.tool_choice && { tool_choice: metadata.tool_choice }), + ...(useNativeTools && { parallel_tool_calls: metadata?.parallelToolCalls ?? false }), } try { @@ -216,9 +202,11 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan const parsed = JSON.parse(jsonStr) + const delta = parsed.choices?.[0]?.delta + // Handle text content - parse for thinking tokens - if (parsed.choices?.[0]?.delta?.content) { - const content = parsed.choices[0].delta.content + if (delta?.content) { + const content = delta.content // Use XmlMatcher to parse ... tags for (const chunk of matcher.update(content)) { @@ -226,6 +214,19 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan } } + // Handle tool calls in stream - emit partial chunks for NativeToolCallParser + if (delta?.tool_calls) { + for (const toolCall of delta.tool_calls) { + yield { + type: "tool_call_partial", + index: toolCall.index, + id: toolCall.id, + name: toolCall.function?.name, + arguments: toolCall.function?.arguments, + } + } + } + // Handle usage information if available if (parsed.usage) { inputTokens = parsed.usage.prompt_tokens || 0 @@ -248,7 +249,11 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan // Provide token usage estimate if not available from API if (inputTokens === 0 || outputTokens === 0) { - const inputText = systemPrompt + cerebrasMessages.map((m) => m.content).join("") + const inputText = + systemPrompt + + openaiMessages + .map((m: any) => (typeof m.content === "string" ? m.content : JSON.stringify(m.content))) + .join("") inputTokens = inputTokens || Math.ceil(inputText.length / 4) // Rough estimate: 4 chars per token outputTokens = outputTokens || Math.ceil((max_tokens || 1000) / 10) // Rough estimate }