From b974489426979d91b99e4aea7d1922dc800bf1d3 Mon Sep 17 00:00:00 2001
From: Ryan Swift <ryan@mlh.io>
Date: Sun, 4 Jan 2026 13:45:22 -0500
Subject: [PATCH] feat: display Google Gemini cached token stats

Extract cached token counts from Google's response metadata so they're
visible in OpenCode's usage display.

Gemini 2.5+ models use implicit caching (automatic, server-side). However,
OpenCode wasn't reading the cached token counts from Google's metadata
location (usageMetadata.cachedContentTokenCount) rather than the standard
location.

This enables users to see their Gemini cache hits in the session context
usage display, and cost calculations will correctly account for cached
tokens.

Verified working: tested with gemini-3-flash-preview, observed cache.read
values of 16K, 49K, and 107K tokens in a multi-turn conversation.

Future opportunity: For guaranteed cache hits, explicit caching could be
implemented using GoogleAICacheManager + providerOptions.google.cachedContent.
See: https://ai.google.dev/gemini-api/docs/caching
---
 packages/opencode/src/session/index.ts | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/packages/opencode/src/session/index.ts b/packages/opencode/src/session/index.ts
index 0776590d6a9..338c814ef0b 100644
--- a/packages/opencode/src/session/index.ts
+++ b/packages/opencode/src/session/index.ts
@@ -311,7 +311,7 @@ export namespace Session {
       for (const child of await children(sessionID)) {
         await remove(child.id)
       }
-      await unshare(sessionID).catch(() => {})
+      await unshare(sessionID).catch(() => { })
       for (const msg of await Storage.list(["message", sessionID])) {
         for (const part of await Storage.list(["part", msg.at(-1)!])) {
           await Storage.remove(part)
@@ -397,7 +397,9 @@ export namespace Session {
       metadata: z.custom<ProviderMetadata>().optional(),
     }),
     (input) => {
-      const cachedInputTokens = input.usage.cachedInputTokens ?? 0
+      // Google returns cached token counts in usageMetadata.cachedContentTokenCount rather than the standard location
+      const cachedInputTokens =
+        input.usage.cachedInputTokens ?? (input.metadata?.["google"] as any)?.usageMetadata?.cachedContentTokenCount ?? 0
       const excludesCachedTokens = !!(input.metadata?.["anthropic"] || input.metadata?.["bedrock"])
       const adjustedInputTokens = excludesCachedTokens
         ? (input.usage.inputTokens ?? 0)
@@ -426,6 +428,7 @@ export namespace Session {
         input.model.cost?.experimentalOver200K && tokens.input + tokens.cache.read > 200_000
           ? input.model.cost.experimentalOver200K
           : input.model.cost
+
       return {
         cost: safe(
           new Decimal(0)