ding113 · ding113 · Jan 28, 2026 · Jan 28, 2026 · gemini-code-assist · Jan 28, 2026
diff --git a/src/app/v1/_lib/proxy/response-handler.ts b/src/app/v1/_lib/proxy/response-handler.ts
@@ -34,6 +34,9 @@ export type UsageMetrics = {
   cache_creation_1h_input_tokens?: number;
   cache_ttl?: "5m" | "1h" | "mixed";
   cache_read_input_tokens?: number;
+  // 图片 modality tokens（从 candidatesTokensDetails/promptTokensDetails 提取）
+  input_image_tokens?: number;
+  output_image_tokens?: number;
 };
 
 /**
@@ -1288,6 +1291,71 @@ function extractUsageMetrics(value: unknown): UsageMetrics | null {
     hasAny = true;
   }
 
+  // Gemini modality-specific token details (IMAGE/TEXT)
+  // candidatesTokensDetails: 输出 token 按 modality 分类
+  const candidatesDetails = usage.candidatesTokensDetails as
+    | Array<{ modality?: string; tokenCount?: number }>
+    | undefined;
+  if (Array.isArray(candidatesDetails) && candidatesDetails.length > 0) {
+    let imageTokens = 0;
+    let textTokens = 0;
+    let hasValidToken = false;
+    for (const detail of candidatesDetails) {
+      if (typeof detail.tokenCount === "number" && detail.tokenCount > 0) {
+        hasValidToken = true;
+        const modalityUpper = detail.modality?.toUpperCase();
+        if (modalityUpper === "IMAGE") {
+          imageTokens += detail.tokenCount;
+        } else {
+          textTokens += detail.tokenCount;
+        }
+      }
+    }
+    if (imageTokens > 0) {
+      result.output_image_tokens = imageTokens;
+      hasAny = true;
+    }
+    if (hasValidToken) {
+      // 计算未分类的 TEXT tokens: candidatesTokenCount - details总和
+      // 这些可能是图片生成的内部开销，按 TEXT 价格计费
+      const detailsSum = imageTokens + textTokens;
+      const candidatesTotal =
+        typeof usage.candidatesTokenCount === "number" ? usage.candidatesTokenCount : 0;
+      const unaccountedTokens = Math.max(candidatesTotal - detailsSum, 0);
+      result.output_tokens = textTokens + unaccountedTokens;
+      hasAny = true;
+    }
+  }
+
+  // promptTokensDetails: 输入 token 按 modality 分类
+  const promptDetails = usage.promptTokensDetails as
+    | Array<{ modality?: string; tokenCount?: number }>
+    | undefined;
+  if (Array.isArray(promptDetails) && promptDetails.length > 0) {
+    let imageTokens = 0;
+    let textTokens = 0;
+    let hasValidToken = false;
+    for (const detail of promptDetails) {
+      if (typeof detail.tokenCount === "number" && detail.tokenCount > 0) {
+        hasValidToken = true;
+        const modalityUpper = detail.modality?.toUpperCase();
+        if (modalityUpper === "IMAGE") {
+          imageTokens += detail.tokenCount;
+        } else {
+          textTokens += detail.tokenCount;
+        }
+      }
+    }
+    if (imageTokens > 0) {
+      result.input_image_tokens = imageTokens;
+      hasAny = true;
+    }
+    if (hasValidToken) {
+      result.input_tokens = textTokens;
+      hasAny = true;
+    }
+  }
+
   if (typeof usage.output_tokens === "number") {
     result.output_tokens = usage.output_tokens;
     hasAny = true;

diff --git a/src/lib/utils/cost-calculation.ts b/src/lib/utils/cost-calculation.ts
@@ -14,6 +14,9 @@ type UsageMetrics = {
   cache_creation_1h_input_tokens?: number;
   cache_ttl?: "5m" | "1h" | "mixed";
   cache_read_input_tokens?: number;
+  // 图片 modality tokens（从 candidatesTokensDetails/promptTokensDetails 提取）
+  input_image_tokens?: number;
+  output_image_tokens?: number;
 };
 
 function multiplyCost(quantity: number | undefined, unitCost: number | undefined): Decimal {
@@ -285,6 +288,21 @@ export function calculateRequestCost(
     segments.push(multiplyCost(usage.cache_read_input_tokens, cacheReadCost));
   }
 
+  // 图片 token 费用（Gemini image generation models）
+  // 输出图片 token：优先使用 output_cost_per_image_token，否则回退到 output_cost_per_token
+  if (usage.output_image_tokens != null && usage.output_image_tokens > 0) {
+    const imageCostPerToken =
+      priceData.output_cost_per_image_token ?? priceData.output_cost_per_token;
+    segments.push(multiplyCost(usage.output_image_tokens, imageCostPerToken));
+  }
+
+  // 输入图片 token：优先使用 input_cost_per_image_token，否则回退到 input_cost_per_token
+  if (usage.input_image_tokens != null && usage.input_image_tokens > 0) {
+    const imageCostPerToken =
+      priceData.input_cost_per_image_token ?? priceData.input_cost_per_token;
+    segments.push(multiplyCost(usage.input_image_tokens, imageCostPerToken));
+  }
+
   const total = segments.reduce((acc, segment) => acc.plus(segment), new Decimal(0));
 
   // 应用倍率

diff --git a/src/types/model-price.ts b/src/types/model-price.ts
@@ -20,6 +20,12 @@ export interface ModelPriceData {
 
   // 图片生成价格
   output_cost_per_image?: number;
+  // 图片 token 价格（按 token 计费，用于 Gemini 等模型的图片输出）
+  output_cost_per_image_token?: number;
+  // 图片输入价格（按张计费）
+  input_cost_per_image?: number;
+  // 图片输入 token 价格（按 token 计费）
+  input_cost_per_image_token?: number;
 
   // 搜索上下文价格
   search_context_cost_per_query?: {

diff --git a/tests/unit/lib/cost-calculation-image-tokens.test.ts b/tests/unit/lib/cost-calculation-image-tokens.test.ts
@@ -0,0 +1,152 @@
+import { describe, expect, test } from "vitest";
+import { calculateRequestCost } from "@/lib/utils/cost-calculation";
+
+describe("calculateRequestCost: image token pricing (Gemini image generation)", () => {
+  test("output_image_tokens 应使用 output_cost_per_image_token 计费", () => {
+    const cost = calculateRequestCost(
+      { output_image_tokens: 2000 },
+      {
+        output_cost_per_token: 0.000012,
+        output_cost_per_image_token: 0.00012,
+      }
+    );
+
+    // 2000 * 0.00012 = 0.24
+    expect(cost.toString()).toBe("0.24");
+  });
+
+  test("output_image_tokens 未配置 image 价格时应回退到 output_cost_per_token", () => {
+    const cost = calculateRequestCost(
+      { output_image_tokens: 2000 },
+      {
+        output_cost_per_token: 0.000012,
+      }
+    );
+
+    // 2000 * 0.000012 = 0.024
+    expect(cost.toString()).toBe("0.024");
+  });
+
+  test("input_image_tokens 应使用 input_cost_per_image_token 计费", () => {
+    const cost = calculateRequestCost(
+      { input_image_tokens: 560 },
+      {
+        input_cost_per_token: 0.000002,
+        input_cost_per_image_token: 0.00000196,
+      }
+    );
+
+    // 560 * 0.00000196 = 0.0010976
+    expect(cost.toNumber()).toBeCloseTo(0.0010976, 6);
+  });
+
+  test("input_image_tokens 未配置 image 价格时应回退到 input_cost_per_token", () => {
+    const cost = calculateRequestCost(
+      { input_image_tokens: 560 },
+      {
+        input_cost_per_token: 0.000002,
+      }
+    );
+
+    // 560 * 0.000002 = 0.00112
+    expect(cost.toString()).toBe("0.00112");
+  });
+
+  test("混合响应：text + image tokens 应分别计费", () => {
+    const cost = calculateRequestCost(
+      {
+        input_tokens: 326,
+        output_tokens: 340,
+        output_image_tokens: 2000,
+      },
+      {
+        input_cost_per_token: 0.000002,
+        output_cost_per_token: 0.000012,
+        output_cost_per_image_token: 0.00012,
+      }
+    );
+
+    // input: 326 * 0.000002 = 0.000652
+    // output text: 340 * 0.000012 = 0.00408
+    // output image: 2000 * 0.00012 = 0.24
+    // total: 0.000652 + 0.00408 + 0.24 = 0.244732
+    expect(cost.toNumber()).toBeCloseTo(0.244732, 6);
+  });
+
+  test("完整 Gemini image 响应计费示例", () => {
+    const cost = calculateRequestCost(
+      {
+        input_tokens: 326,
+        output_tokens: 340,
+        output_image_tokens: 2000,
+      },
+      {
+        input_cost_per_token: 0.000002,
+        output_cost_per_token: 0.000012,
+        output_cost_per_image_token: 0.00012,
+      }
+    );
+
+    // Google 官方价格验证
+    // input: 326 * $0.000002 = $0.000652
+    // output text: 340 * $0.000012 = $0.00408
+    // output image: 2000 * $0.00012 = $0.24 (4K image = 2000 tokens)
+    // total: $0.244732
+    expect(cost.toNumber()).toBeCloseTo(0.244732, 6);
+  });
+
+  test("倍率应同时作用于 image token 费用", () => {
+    const cost = calculateRequestCost(
+      { output_image_tokens: 2000 },
+      {
+        output_cost_per_image_token: 0.00012,
+      },
+      2
+    );
+
+    // 2000 * 0.00012 * 2 = 0.48
+    expect(cost.toString()).toBe("0.48");
+  });
+
+  test("output_image_tokens 为 0 时不应产生费用", () => {
+    const cost = calculateRequestCost(
+      { output_image_tokens: 0 },
+      {
+        output_cost_per_image_token: 0.00012,
+      }
+    );
+
+    expect(cost.toString()).toBe("0");
+  });
+
+  test("output_image_tokens 为 undefined 时不应产生费用", () => {
+    const cost = calculateRequestCost(
+      { output_tokens: 1000 },
+      {
+        output_cost_per_token: 0.000012,
+        output_cost_per_image_token: 0.00012,
+      }
+    );
+
+    // 只计算 output_tokens: 1000 * 0.000012 = 0.012
+    expect(cost.toString()).toBe("0.012");
+  });
+
+  test("同时有 input_image_tokens 和 output_image_tokens", () => {
+    const cost = calculateRequestCost(
+      {
+        input_image_tokens: 560,
+        output_image_tokens: 2000,
+      },
+      {
+        input_cost_per_image_token: 0.00000196,
+        output_cost_per_image_token: 0.00012,
+      }
+    );
+
+    // input: 560 * 0.00000196 = 0.0010976
+    // output: 2000 * 0.00012 = 0.24
+    // total: 0.2410976
+    expect(cost.toNumber()).toBeCloseTo(0.2410976, 6);
+  });
+});