From b974489426979d91b99e4aea7d1922dc800bf1d3 Mon Sep 17 00:00:00 2001 From: Ryan Swift Date: Sun, 4 Jan 2026 13:45:22 -0500 Subject: [PATCH] feat: display Google Gemini cached token stats Extract cached token counts from Google's response metadata so they're visible in OpenCode's usage display. Gemini 2.5+ models use implicit caching (automatic, server-side). However, OpenCode wasn't reading the cached token counts from Google's metadata location (usageMetadata.cachedContentTokenCount) rather than the standard location. This enables users to see their Gemini cache hits in the session context usage display, and cost calculations will correctly account for cached tokens. Verified working: tested with gemini-3-flash-preview, observed cache.read values of 16K, 49K, and 107K tokens in a multi-turn conversation. Future opportunity: For guaranteed cache hits, explicit caching could be implemented using GoogleAICacheManager + providerOptions.google.cachedContent. See: https://ai.google.dev/gemini-api/docs/caching --- packages/opencode/src/session/index.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/opencode/src/session/index.ts b/packages/opencode/src/session/index.ts index 0776590d6a9..338c814ef0b 100644 --- a/packages/opencode/src/session/index.ts +++ b/packages/opencode/src/session/index.ts @@ -311,7 +311,7 @@ export namespace Session { for (const child of await children(sessionID)) { await remove(child.id) } - await unshare(sessionID).catch(() => {}) + await unshare(sessionID).catch(() => { }) for (const msg of await Storage.list(["message", sessionID])) { for (const part of await Storage.list(["part", msg.at(-1)!])) { await Storage.remove(part) @@ -397,7 +397,9 @@ export namespace Session { metadata: z.custom().optional(), }), (input) => { - const cachedInputTokens = input.usage.cachedInputTokens ?? 0 + // Google returns cached token counts in usageMetadata.cachedContentTokenCount rather than the standard location + const cachedInputTokens = + input.usage.cachedInputTokens ?? (input.metadata?.["google"] as any)?.usageMetadata?.cachedContentTokenCount ?? 0 const excludesCachedTokens = !!(input.metadata?.["anthropic"] || input.metadata?.["bedrock"]) const adjustedInputTokens = excludesCachedTokens ? (input.usage.inputTokens ?? 0) @@ -426,6 +428,7 @@ export namespace Session { input.model.cost?.experimentalOver200K && tokens.input + tokens.cache.read > 200_000 ? input.model.cost.experimentalOver200K : input.model.cost + return { cost: safe( new Decimal(0)