diff --git a/packages/types/src/providers/gemini.ts b/packages/types/src/providers/gemini.ts index 45bbaa855bf..e7a73e6d0e4 100644 --- a/packages/types/src/providers/gemini.ts +++ b/packages/types/src/providers/gemini.ts @@ -10,6 +10,7 @@ export const geminiModels = { maxTokens: 65_536, contextWindow: 1_048_576, supportsImages: true, + supportsNativeTools: true, supportsPromptCache: true, supportsReasoningEffort: ["low", "high"], reasoningEffort: "low", @@ -35,6 +36,7 @@ export const geminiModels = { maxTokens: 64_000, contextWindow: 1_048_576, supportsImages: true, + supportsNativeTools: true, supportsPromptCache: true, inputPrice: 2.5, // This is the pricing for prompts above 200k tokens. outputPrice: 15, @@ -62,6 +64,7 @@ export const geminiModels = { maxTokens: 65_535, contextWindow: 1_048_576, supportsImages: true, + supportsNativeTools: true, supportsPromptCache: true, inputPrice: 2.5, // This is the pricing for prompts above 200k tokens. outputPrice: 15, @@ -88,6 +91,7 @@ export const geminiModels = { maxTokens: 65_535, contextWindow: 1_048_576, supportsImages: true, + supportsNativeTools: true, supportsPromptCache: true, inputPrice: 2.5, // This is the pricing for prompts above 200k tokens. outputPrice: 15, @@ -112,6 +116,7 @@ export const geminiModels = { maxTokens: 65_535, contextWindow: 1_048_576, supportsImages: true, + supportsNativeTools: true, supportsPromptCache: true, inputPrice: 2.5, // This is the pricing for prompts above 200k tokens. outputPrice: 15, @@ -140,6 +145,7 @@ export const geminiModels = { maxTokens: 65_536, contextWindow: 1_048_576, supportsImages: true, + supportsNativeTools: true, supportsPromptCache: true, inputPrice: 0.3, outputPrice: 2.5, @@ -152,6 +158,7 @@ export const geminiModels = { maxTokens: 65_536, contextWindow: 1_048_576, supportsImages: true, + supportsNativeTools: true, supportsPromptCache: true, inputPrice: 0.3, outputPrice: 2.5, @@ -164,6 +171,7 @@ export const geminiModels = { maxTokens: 64_000, contextWindow: 1_048_576, supportsImages: true, + supportsNativeTools: true, supportsPromptCache: true, inputPrice: 0.3, outputPrice: 2.5, @@ -178,6 +186,7 @@ export const geminiModels = { maxTokens: 65_536, contextWindow: 1_048_576, supportsImages: true, + supportsNativeTools: true, supportsPromptCache: true, inputPrice: 0.1, outputPrice: 0.4, @@ -190,6 +199,7 @@ export const geminiModels = { maxTokens: 65_536, contextWindow: 1_048_576, supportsImages: true, + supportsNativeTools: true, supportsPromptCache: true, inputPrice: 0.1, outputPrice: 0.4, diff --git a/src/api/providers/gemini.ts b/src/api/providers/gemini.ts index f54a3edb5d7..89c816e8157 100644 --- a/src/api/providers/gemini.ts +++ b/src/api/providers/gemini.ts @@ -5,6 +5,8 @@ import { type GenerateContentParameters, type GenerateContentConfig, type GroundingMetadata, + FunctionCallingConfigMode, + Content, } from "@google/genai" import type { JWTInput } from "google-auth-library" @@ -101,17 +103,46 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl return true }) - const contents = geminiMessages.map((message) => - convertAnthropicMessageToGemini(message, { includeThoughtSignatures }), - ) + // Build a map of tool IDs to names from previous messages + // This is needed because Anthropic's tool_result blocks only contain the ID, + // but Gemini requires the name in functionResponse + const toolIdToName = new Map() + for (const message of messages) { + if (Array.isArray(message.content)) { + for (const block of message.content) { + if (block.type === "tool_use") { + toolIdToName.set(block.id, block.name) + } + } + } + } + + const contents = geminiMessages + .map((message) => convertAnthropicMessageToGemini(message, { includeThoughtSignatures, toolIdToName })) + .flat() const tools: GenerateContentConfig["tools"] = [] - if (this.options.enableUrlContext) { - tools.push({ urlContext: {} }) - } - if (this.options.enableGrounding) { - tools.push({ googleSearch: {} }) + // Google built-in tools (Grounding, URL Context) are currently mutually exclusive + // with function declarations in the Gemini API. If native function calling is + // used (Agent tools), we must prioritize it and skip built-in tools to avoid + // "Tool use with function calling is unsupported" (HTTP 400) errors. + if (metadata?.tools && metadata.tools.length > 0) { + tools.push({ + functionDeclarations: metadata.tools.map((tool) => ({ + name: (tool as any).function.name, + description: (tool as any).function.description, + parametersJsonSchema: (tool as any).function.parameters, + })), + }) + } else { + if (this.options.enableUrlContext) { + tools.push({ urlContext: {} }) + } + + if (this.options.enableGrounding) { + tools.push({ googleSearch: {} }) + } } // Determine temperature respecting model capabilities and defaults: @@ -133,6 +164,34 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl ...(tools.length > 0 ? { tools } : {}), } + if (metadata?.tool_choice) { + const choice = metadata.tool_choice + let mode: FunctionCallingConfigMode + let allowedFunctionNames: string[] | undefined + + if (choice === "auto") { + mode = FunctionCallingConfigMode.AUTO + } else if (choice === "none") { + mode = FunctionCallingConfigMode.NONE + } else if (choice === "required") { + // "required" means the model must call at least one tool; Gemini uses ANY for this. + mode = FunctionCallingConfigMode.ANY + } else if (typeof choice === "object" && "function" in choice && choice.type === "function") { + mode = FunctionCallingConfigMode.ANY + allowedFunctionNames = [choice.function.name] + } else { + // Fall back to AUTO for unknown values to avoid unintentionally broadening tool access. + mode = FunctionCallingConfigMode.AUTO + } + + config.toolConfig = { + functionCallingConfig: { + mode, + ...(allowedFunctionNames ? { allowedFunctionNames } : {}), + }, + } + } + const params: GenerateContentParameters = { model, contents, config } try { const result = await this.client.models.generateContentStream(params) @@ -141,6 +200,8 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl let pendingGroundingMetadata: GroundingMetadata | undefined let finalResponse: { responseId?: string } | undefined + let toolCallCounter = 0 + for await (const chunk of result) { // Track the final structured response (per SDK pattern: candidate.finishReason) if (chunk.candidates && chunk.candidates[0]?.finishReason) { @@ -159,6 +220,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl thought?: boolean text?: string thoughtSignature?: string + functionCall?: { name: string; args: Record } }>) { // Capture thought signatures so they can be persisted into API history. const thoughtSignature = part.thoughtSignature @@ -173,6 +235,14 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl if (part.text) { yield { type: "reasoning", text: part.text } } + } else if (part.functionCall) { + const callId = `${part.functionCall.name}-${toolCallCounter++}` + yield { + type: "tool_call", + id: callId, + name: part.functionCall.name, + arguments: JSON.stringify(part.functionCall.args), + } } else { // This is regular content if (part.text) { @@ -350,12 +420,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl const countTokensRequest = { model, // Token counting does not need encrypted continuation; always drop thoughtSignature. - contents: [ - { - role: "user", - parts: convertAnthropicContentToGemini(content, { includeThoughtSignatures: false }), - }, - ], + contents: convertAnthropicContentToGemini(content, { includeThoughtSignatures: false }), } const response = await this.client.models.countTokens(countTokensRequest) diff --git a/src/api/transform/__tests__/gemini-format.spec.ts b/src/api/transform/__tests__/gemini-format.spec.ts index 92e0def2a4e..e30b01bd736 100644 --- a/src/api/transform/__tests__/gemini-format.spec.ts +++ b/src/api/transform/__tests__/gemini-format.spec.ts @@ -13,10 +13,12 @@ describe("convertAnthropicMessageToGemini", () => { const result = convertAnthropicMessageToGemini(anthropicMessage) - expect(result).toEqual({ - role: "user", - parts: [{ text: "Hello, world!" }], - }) + expect(result).toEqual([ + { + role: "user", + parts: [{ text: "Hello, world!" }], + }, + ]) }) it("should convert assistant role to model role", () => { @@ -27,10 +29,12 @@ describe("convertAnthropicMessageToGemini", () => { const result = convertAnthropicMessageToGemini(anthropicMessage) - expect(result).toEqual({ - role: "model", - parts: [{ text: "I'm an assistant" }], - }) + expect(result).toEqual([ + { + role: "model", + parts: [{ text: "I'm an assistant" }], + }, + ]) }) it("should convert a message with text blocks", () => { @@ -44,10 +48,12 @@ describe("convertAnthropicMessageToGemini", () => { const result = convertAnthropicMessageToGemini(anthropicMessage) - expect(result).toEqual({ - role: "user", - parts: [{ text: "First paragraph" }, { text: "Second paragraph" }], - }) + expect(result).toEqual([ + { + role: "user", + parts: [{ text: "First paragraph" }, { text: "Second paragraph" }], + }, + ]) }) it("should convert a message with an image", () => { @@ -68,18 +74,20 @@ describe("convertAnthropicMessageToGemini", () => { const result = convertAnthropicMessageToGemini(anthropicMessage) - expect(result).toEqual({ - role: "user", - parts: [ - { text: "Check out this image:" }, - { - inlineData: { - data: "base64encodeddata", - mimeType: "image/jpeg", + expect(result).toEqual([ + { + role: "user", + parts: [ + { text: "Check out this image:" }, + { + inlineData: { + data: "base64encodeddata", + mimeType: "image/jpeg", + }, }, - }, - ], - }) + ], + }, + ]) }) it("should throw an error for unsupported image source type", () => { @@ -115,22 +123,27 @@ describe("convertAnthropicMessageToGemini", () => { const result = convertAnthropicMessageToGemini(anthropicMessage) - expect(result).toEqual({ - role: "model", - parts: [ - { text: "Let me calculate that for you." }, - { - functionCall: { - name: "calculator", - args: { operation: "add", numbers: [2, 3] }, + expect(result).toEqual([ + { + role: "model", + parts: [ + { text: "Let me calculate that for you." }, + { + functionCall: { + name: "calculator", + args: { operation: "add", numbers: [2, 3] }, + }, + thoughtSignature: "skip_thought_signature_validator", }, - thoughtSignature: "skip_thought_signature_validator", - }, - ], - }) + ], + }, + ]) }) it("should convert a message with tool result as string", () => { + const toolIdToName = new Map() + toolIdToName.set("calculator-123", "calculator") + const anthropicMessage: Anthropic.Messages.MessageParam = { role: "user", content: [ @@ -143,23 +156,25 @@ describe("convertAnthropicMessageToGemini", () => { ], } - const result = convertAnthropicMessageToGemini(anthropicMessage) + const result = convertAnthropicMessageToGemini(anthropicMessage, { toolIdToName }) - expect(result).toEqual({ - role: "user", - parts: [ - { text: "Here's the result:" }, - { - functionResponse: { - name: "calculator", - response: { + expect(result).toEqual([ + { + role: "user", + parts: [ + { text: "Here's the result:" }, + { + functionResponse: { name: "calculator", - content: "The result is 5", + response: { + name: "calculator", + content: "The result is 5", + }, }, }, - }, - ], - }) + ], + }, + ]) }) it("should handle empty tool result content", () => { @@ -177,13 +192,13 @@ describe("convertAnthropicMessageToGemini", () => { const result = convertAnthropicMessageToGemini(anthropicMessage) // Should skip the empty tool result - expect(result).toEqual({ - role: "user", - parts: [], - }) + expect(result).toEqual([]) }) it("should convert a message with tool result as array with text only", () => { + const toolIdToName = new Map() + toolIdToName.set("search-123", "search") + const anthropicMessage: Anthropic.Messages.MessageParam = { role: "user", content: [ @@ -198,25 +213,30 @@ describe("convertAnthropicMessageToGemini", () => { ], } - const result = convertAnthropicMessageToGemini(anthropicMessage) + const result = convertAnthropicMessageToGemini(anthropicMessage, { toolIdToName }) - expect(result).toEqual({ - role: "user", - parts: [ - { - functionResponse: { - name: "search", - response: { + expect(result).toEqual([ + { + role: "user", + parts: [ + { + functionResponse: { name: "search", - content: "First result\n\nSecond result", + response: { + name: "search", + content: "First result\n\nSecond result", + }, }, }, - }, - ], - }) + ], + }, + ]) }) it("should convert a message with tool result as array with text and images", () => { + const toolIdToName = new Map() + toolIdToName.set("search-123", "search") + const anthropicMessage: Anthropic.Messages.MessageParam = { role: "user", content: [ @@ -246,37 +266,42 @@ describe("convertAnthropicMessageToGemini", () => { ], } - const result = convertAnthropicMessageToGemini(anthropicMessage) + const result = convertAnthropicMessageToGemini(anthropicMessage, { toolIdToName }) - expect(result).toEqual({ - role: "user", - parts: [ - { - functionResponse: { - name: "search", - response: { + expect(result).toEqual([ + { + role: "user", + parts: [ + { + functionResponse: { name: "search", - content: "Search results:\n\n(See next part for image)", + response: { + name: "search", + content: "Search results:\n\n(See next part for image)", + }, }, }, - }, - { - inlineData: { - data: "image1data", - mimeType: "image/png", + { + inlineData: { + data: "image1data", + mimeType: "image/png", + }, }, - }, - { - inlineData: { - data: "image2data", - mimeType: "image/jpeg", + { + inlineData: { + data: "image2data", + mimeType: "image/jpeg", + }, }, - }, - ], - }) + ], + }, + ]) }) it("should convert a message with tool result containing only images", () => { + const toolIdToName = new Map() + toolIdToName.set("imagesearch-123", "imagesearch") + const anthropicMessage: Anthropic.Messages.MessageParam = { role: "user", content: [ @@ -297,28 +322,102 @@ describe("convertAnthropicMessageToGemini", () => { ], } - const result = convertAnthropicMessageToGemini(anthropicMessage) + const result = convertAnthropicMessageToGemini(anthropicMessage, { toolIdToName }) - expect(result).toEqual({ + expect(result).toEqual([ + { + role: "user", + parts: [ + { + functionResponse: { + name: "imagesearch", + response: { + name: "imagesearch", + content: "\n\n(See next part for image)", + }, + }, + }, + { + inlineData: { + data: "onlyimagedata", + mimeType: "image/png", + }, + }, + ], + }, + ]) + }) + + it("should handle tool names with hyphens using toolIdToName map", () => { + const toolIdToName = new Map() + toolIdToName.set("search-files-123", "search-files") + + const anthropicMessage: Anthropic.Messages.MessageParam = { role: "user", - parts: [ + content: [ { - functionResponse: { - name: "imagesearch", - response: { - name: "imagesearch", - content: "\n\n(See next part for image)", + type: "tool_result", + tool_use_id: "search-files-123", + content: "found files", + }, + ], + } + + const result = convertAnthropicMessageToGemini(anthropicMessage, { toolIdToName }) + + expect(result).toEqual([ + { + role: "user", + parts: [ + { + functionResponse: { + name: "search-files", + response: { + name: "search-files", + content: "found files", + }, }, }, + ], + }, + ]) + }) + + it("should throw error when toolIdToName map is not provided", () => { + const anthropicMessage: Anthropic.Messages.MessageParam = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "calculator-123", + content: "result is 5", }, + ], + } + + expect(() => convertAnthropicMessageToGemini(anthropicMessage)).toThrow( + 'Unable to find tool name for tool_use_id "calculator-123"', + ) + }) + + it("should throw error when tool_use_id is not in the map", () => { + const toolIdToName = new Map() + toolIdToName.set("other-tool-456", "other-tool") + + const anthropicMessage: Anthropic.Messages.MessageParam = { + role: "user", + content: [ { - inlineData: { - data: "onlyimagedata", - mimeType: "image/png", - }, + type: "tool_result", + tool_use_id: "calculator-123", + content: "result is 5", }, ], - }) + } + + expect(() => convertAnthropicMessageToGemini(anthropicMessage, { toolIdToName })).toThrow( + 'Unable to find tool name for tool_use_id "calculator-123"', + ) }) it("should throw an error for unsupported content block type", () => { diff --git a/src/api/transform/gemini-format.ts b/src/api/transform/gemini-format.ts index 1d3532241b6..ffb8b8f789c 100644 --- a/src/api/transform/gemini-format.ts +++ b/src/api/transform/gemini-format.ts @@ -15,9 +15,10 @@ function isThoughtSignatureContentBlock(block: ExtendedContentBlockParam): block export function convertAnthropicContentToGemini( content: ExtendedAnthropicContent, - options?: { includeThoughtSignatures?: boolean }, + options?: { includeThoughtSignatures?: boolean; toolIdToName?: Map }, ): Part[] { const includeThoughtSignatures = options?.includeThoughtSignatures ?? true + const toolIdToName = options?.toolIdToName // First pass: find thoughtSignature if it exists in the content blocks let activeThoughtSignature: string | undefined @@ -78,8 +79,17 @@ export function convertAnthropicContentToGemini( return [] } - // Extract tool name from tool_use_id (e.g., "calculator-123" -> "calculator") - const toolName = block.tool_use_id.split("-")[0] + // Get tool name from the map (built from tool_use blocks in message history). + // The map must contain the tool name - if it doesn't, this indicates a bug + // where the conversation history is incomplete or tool_use blocks are missing. + const toolName = toolIdToName?.get(block.tool_use_id) + if (!toolName) { + throw new Error( + `Unable to find tool name for tool_use_id "${block.tool_use_id}". ` + + `This indicates the conversation history is missing the corresponding tool_use block. ` + + `Available tool IDs: ${Array.from(toolIdToName?.keys() ?? []).join(", ") || "none"}`, + ) + } if (typeof block.content === "string") { return { @@ -122,14 +132,18 @@ export function convertAnthropicContentToGemini( export function convertAnthropicMessageToGemini( message: Anthropic.Messages.MessageParam, - options?: { includeThoughtSignatures?: boolean }, -): Content { - return { - role: message.role === "assistant" ? "model" : "user", - parts: convertAnthropicContentToGemini(message.content, { - ...options, - includeThoughtSignatures: - message.role === "assistant" ? (options?.includeThoughtSignatures ?? true) : false, - }), + options?: { includeThoughtSignatures?: boolean; toolIdToName?: Map }, +): Content[] { + const parts = convertAnthropicContentToGemini(message.content, options) + + if (parts.length === 0) { + return [] } + + return [ + { + role: message.role === "assistant" ? "model" : "user", + parts, + }, + ] }