From 6341e83e3b9abc4bdad6a46057677b62d0672d4a Mon Sep 17 00:00:00 2001 From: sususu98 Date: Wed, 28 Jan 2026 10:44:45 +0800 Subject: [PATCH] =?UTF-8?q?fix(billing):=20=E4=BF=AE=E5=A4=8D=20Gemini=20?= =?UTF-8?q?=E5=9B=BE=E7=89=87=E7=94=9F=E6=88=90=E6=A8=A1=E5=9E=8B=E7=9A=84?= =?UTF-8?q?=20IMAGE=20modality=20token=20=E8=AE=A1=E8=B4=B9=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 问题背景: - gemini-3-pro-image-preview 等图片生成模型返回的 usage 中包含 candidatesTokensDetails - 该字段按 modality 细分 token (IMAGE/TEXT) - IMAGE modality token 价格为 $0.00012/token,是普通 TEXT token 的 10 倍 - 原系统未解析此字段,导致 IMAGE token 按 TEXT 价格计费,计费偏低约 7.6 倍 类型扩展 (src/types/model-price.ts): - 新增 output_cost_per_image_token: 输出图片 token 单价 (按 token 计费) - 新增 input_cost_per_image_token: 输入图片 token 单价 (按 token 计费) - 保留 input_cost_per_image: 输入图片固定价格 (按张计费,$0.0011/张) - 保留 output_cost_per_image: 输出图片固定价格 (按张计费) Usage 提取逻辑 (src/app/v1/_lib/proxy/response-handler.ts): - 解析 candidatesTokensDetails 提取 output_image_tokens 和 output_tokens (TEXT) - 解析 promptTokensDetails 提取 input_image_tokens 和 input_tokens (TEXT) - 使用 toUpperCase() 进行大小写不敏感匹配 (IMAGE/image/Image) - 添加 hasValidToken 守卫,仅在解析到有效 token 时覆盖原始值 - 修复 promptTokensDetails 解析不完整导致 input IMAGE tokens 被重复计费的问题 - 计算 candidatesTokenCount 与 details 总和的差值作为未分类 TEXT tokens (这些是图片生成的内部开销,按 TEXT 价格计费) 计费逻辑 (src/lib/utils/cost-calculation.ts): - output_image_tokens 优先使用 output_cost_per_image_token 计费 - input_image_tokens 优先使用 input_cost_per_image_token 计费 - 若未配置 image token 价格,回退到普通 token 价格 (向后兼容) - 倍率 (multiplier) 同时作用于 image token 费用 测试覆盖: - 新增 cost-calculation-image-tokens.test.ts (10 个测试) - 扩展 extract-usage-metrics.test.ts (12 个 Gemini image 测试) - 覆盖场景: 纯 IMAGE、IMAGE+TEXT 混合、无效数据、大小写变体、向后兼容、 混合输入输出、candidatesTokenCount 差值计算 计费示例 (完整图片生成请求): - promptTokenCount=326, candidatesTokenCount=2340, thoughtsTokenCount=337 - candidatesTokensDetails: IMAGE=2000 (差值 340 为未分类 TEXT) - 输入 TEXT: 326 × $0.000002 = $0.000652 - 输出 TEXT: (340+337) × $0.000012 = $0.008124 - 输出 IMAGE: 2000 × $0.00012 = $0.240000 - 总计: $0.248776 (修复前 $0.244696,少收 $0.00408) Fixes #663 --- src/app/v1/_lib/proxy/response-handler.ts | 68 ++++++++ src/lib/utils/cost-calculation.ts | 18 ++ src/types/model-price.ts | 6 + .../lib/cost-calculation-image-tokens.test.ts | 152 ++++++++++++++++ .../unit/proxy/extract-usage-metrics.test.ts | 164 ++++++++++++++++++ 5 files changed, 408 insertions(+) create mode 100644 tests/unit/lib/cost-calculation-image-tokens.test.ts diff --git a/src/app/v1/_lib/proxy/response-handler.ts b/src/app/v1/_lib/proxy/response-handler.ts index 33b4fe437..0cfe29be6 100644 --- a/src/app/v1/_lib/proxy/response-handler.ts +++ b/src/app/v1/_lib/proxy/response-handler.ts @@ -34,6 +34,9 @@ export type UsageMetrics = { cache_creation_1h_input_tokens?: number; cache_ttl?: "5m" | "1h" | "mixed"; cache_read_input_tokens?: number; + // 图片 modality tokens(从 candidatesTokensDetails/promptTokensDetails 提取) + input_image_tokens?: number; + output_image_tokens?: number; }; /** @@ -1288,6 +1291,71 @@ function extractUsageMetrics(value: unknown): UsageMetrics | null { hasAny = true; } + // Gemini modality-specific token details (IMAGE/TEXT) + // candidatesTokensDetails: 输出 token 按 modality 分类 + const candidatesDetails = usage.candidatesTokensDetails as + | Array<{ modality?: string; tokenCount?: number }> + | undefined; + if (Array.isArray(candidatesDetails) && candidatesDetails.length > 0) { + let imageTokens = 0; + let textTokens = 0; + let hasValidToken = false; + for (const detail of candidatesDetails) { + if (typeof detail.tokenCount === "number" && detail.tokenCount > 0) { + hasValidToken = true; + const modalityUpper = detail.modality?.toUpperCase(); + if (modalityUpper === "IMAGE") { + imageTokens += detail.tokenCount; + } else { + textTokens += detail.tokenCount; + } + } + } + if (imageTokens > 0) { + result.output_image_tokens = imageTokens; + hasAny = true; + } + if (hasValidToken) { + // 计算未分类的 TEXT tokens: candidatesTokenCount - details总和 + // 这些可能是图片生成的内部开销,按 TEXT 价格计费 + const detailsSum = imageTokens + textTokens; + const candidatesTotal = + typeof usage.candidatesTokenCount === "number" ? usage.candidatesTokenCount : 0; + const unaccountedTokens = Math.max(candidatesTotal - detailsSum, 0); + result.output_tokens = textTokens + unaccountedTokens; + hasAny = true; + } + } + + // promptTokensDetails: 输入 token 按 modality 分类 + const promptDetails = usage.promptTokensDetails as + | Array<{ modality?: string; tokenCount?: number }> + | undefined; + if (Array.isArray(promptDetails) && promptDetails.length > 0) { + let imageTokens = 0; + let textTokens = 0; + let hasValidToken = false; + for (const detail of promptDetails) { + if (typeof detail.tokenCount === "number" && detail.tokenCount > 0) { + hasValidToken = true; + const modalityUpper = detail.modality?.toUpperCase(); + if (modalityUpper === "IMAGE") { + imageTokens += detail.tokenCount; + } else { + textTokens += detail.tokenCount; + } + } + } + if (imageTokens > 0) { + result.input_image_tokens = imageTokens; + hasAny = true; + } + if (hasValidToken) { + result.input_tokens = textTokens; + hasAny = true; + } + } + if (typeof usage.output_tokens === "number") { result.output_tokens = usage.output_tokens; hasAny = true; diff --git a/src/lib/utils/cost-calculation.ts b/src/lib/utils/cost-calculation.ts index a38b702de..1212a1f99 100644 --- a/src/lib/utils/cost-calculation.ts +++ b/src/lib/utils/cost-calculation.ts @@ -14,6 +14,9 @@ type UsageMetrics = { cache_creation_1h_input_tokens?: number; cache_ttl?: "5m" | "1h" | "mixed"; cache_read_input_tokens?: number; + // 图片 modality tokens(从 candidatesTokensDetails/promptTokensDetails 提取) + input_image_tokens?: number; + output_image_tokens?: number; }; function multiplyCost(quantity: number | undefined, unitCost: number | undefined): Decimal { @@ -285,6 +288,21 @@ export function calculateRequestCost( segments.push(multiplyCost(usage.cache_read_input_tokens, cacheReadCost)); } + // 图片 token 费用(Gemini image generation models) + // 输出图片 token:优先使用 output_cost_per_image_token,否则回退到 output_cost_per_token + if (usage.output_image_tokens != null && usage.output_image_tokens > 0) { + const imageCostPerToken = + priceData.output_cost_per_image_token ?? priceData.output_cost_per_token; + segments.push(multiplyCost(usage.output_image_tokens, imageCostPerToken)); + } + + // 输入图片 token:优先使用 input_cost_per_image_token,否则回退到 input_cost_per_token + if (usage.input_image_tokens != null && usage.input_image_tokens > 0) { + const imageCostPerToken = + priceData.input_cost_per_image_token ?? priceData.input_cost_per_token; + segments.push(multiplyCost(usage.input_image_tokens, imageCostPerToken)); + } + const total = segments.reduce((acc, segment) => acc.plus(segment), new Decimal(0)); // 应用倍率 diff --git a/src/types/model-price.ts b/src/types/model-price.ts index ab2b62797..295d00f77 100644 --- a/src/types/model-price.ts +++ b/src/types/model-price.ts @@ -20,6 +20,12 @@ export interface ModelPriceData { // 图片生成价格 output_cost_per_image?: number; + // 图片 token 价格(按 token 计费,用于 Gemini 等模型的图片输出) + output_cost_per_image_token?: number; + // 图片输入价格(按张计费) + input_cost_per_image?: number; + // 图片输入 token 价格(按 token 计费) + input_cost_per_image_token?: number; // 搜索上下文价格 search_context_cost_per_query?: { diff --git a/tests/unit/lib/cost-calculation-image-tokens.test.ts b/tests/unit/lib/cost-calculation-image-tokens.test.ts new file mode 100644 index 000000000..e23e6d0ee --- /dev/null +++ b/tests/unit/lib/cost-calculation-image-tokens.test.ts @@ -0,0 +1,152 @@ +import { describe, expect, test } from "vitest"; +import { calculateRequestCost } from "@/lib/utils/cost-calculation"; + +describe("calculateRequestCost: image token pricing (Gemini image generation)", () => { + test("output_image_tokens 应使用 output_cost_per_image_token 计费", () => { + const cost = calculateRequestCost( + { output_image_tokens: 2000 }, + { + output_cost_per_token: 0.000012, + output_cost_per_image_token: 0.00012, + } + ); + + // 2000 * 0.00012 = 0.24 + expect(cost.toString()).toBe("0.24"); + }); + + test("output_image_tokens 未配置 image 价格时应回退到 output_cost_per_token", () => { + const cost = calculateRequestCost( + { output_image_tokens: 2000 }, + { + output_cost_per_token: 0.000012, + } + ); + + // 2000 * 0.000012 = 0.024 + expect(cost.toString()).toBe("0.024"); + }); + + test("input_image_tokens 应使用 input_cost_per_image_token 计费", () => { + const cost = calculateRequestCost( + { input_image_tokens: 560 }, + { + input_cost_per_token: 0.000002, + input_cost_per_image_token: 0.00000196, + } + ); + + // 560 * 0.00000196 = 0.0010976 + expect(cost.toNumber()).toBeCloseTo(0.0010976, 6); + }); + + test("input_image_tokens 未配置 image 价格时应回退到 input_cost_per_token", () => { + const cost = calculateRequestCost( + { input_image_tokens: 560 }, + { + input_cost_per_token: 0.000002, + } + ); + + // 560 * 0.000002 = 0.00112 + expect(cost.toString()).toBe("0.00112"); + }); + + test("混合响应:text + image tokens 应分别计费", () => { + const cost = calculateRequestCost( + { + input_tokens: 326, + output_tokens: 340, + output_image_tokens: 2000, + }, + { + input_cost_per_token: 0.000002, + output_cost_per_token: 0.000012, + output_cost_per_image_token: 0.00012, + } + ); + + // input: 326 * 0.000002 = 0.000652 + // output text: 340 * 0.000012 = 0.00408 + // output image: 2000 * 0.00012 = 0.24 + // total: 0.000652 + 0.00408 + 0.24 = 0.244732 + expect(cost.toNumber()).toBeCloseTo(0.244732, 6); + }); + + test("完整 Gemini image 响应计费示例", () => { + const cost = calculateRequestCost( + { + input_tokens: 326, + output_tokens: 340, + output_image_tokens: 2000, + }, + { + input_cost_per_token: 0.000002, + output_cost_per_token: 0.000012, + output_cost_per_image_token: 0.00012, + } + ); + + // Google 官方价格验证 + // input: 326 * $0.000002 = $0.000652 + // output text: 340 * $0.000012 = $0.00408 + // output image: 2000 * $0.00012 = $0.24 (4K image = 2000 tokens) + // total: $0.244732 + expect(cost.toNumber()).toBeCloseTo(0.244732, 6); + }); + + test("倍率应同时作用于 image token 费用", () => { + const cost = calculateRequestCost( + { output_image_tokens: 2000 }, + { + output_cost_per_image_token: 0.00012, + }, + 2 + ); + + // 2000 * 0.00012 * 2 = 0.48 + expect(cost.toString()).toBe("0.48"); + }); + + test("output_image_tokens 为 0 时不应产生费用", () => { + const cost = calculateRequestCost( + { output_image_tokens: 0 }, + { + output_cost_per_image_token: 0.00012, + } + ); + + expect(cost.toString()).toBe("0"); + }); + + test("output_image_tokens 为 undefined 时不应产生费用", () => { + const cost = calculateRequestCost( + { output_tokens: 1000 }, + { + output_cost_per_token: 0.000012, + output_cost_per_image_token: 0.00012, + } + ); + + // 只计算 output_tokens: 1000 * 0.000012 = 0.012 + expect(cost.toString()).toBe("0.012"); + }); + + test("同时有 input_image_tokens 和 output_image_tokens", () => { + const cost = calculateRequestCost( + { + input_image_tokens: 560, + output_image_tokens: 2000, + }, + { + input_cost_per_image_token: 0.00000196, + output_cost_per_image_token: 0.00012, + } + ); + + // input: 560 * 0.00000196 = 0.0010976 + // output: 2000 * 0.00012 = 0.24 + // total: 0.2410976 + expect(cost.toNumber()).toBeCloseTo(0.2410976, 6); + }); +}); diff --git a/tests/unit/proxy/extract-usage-metrics.test.ts b/tests/unit/proxy/extract-usage-metrics.test.ts index 8b845cb09..1318a432d 100644 --- a/tests/unit/proxy/extract-usage-metrics.test.ts +++ b/tests/unit/proxy/extract-usage-metrics.test.ts @@ -341,6 +341,170 @@ describe("extractUsageMetrics", () => { // output_tokens = candidatesTokenCount + thoughtsTokenCount expect(result.usageMetrics?.output_tokens).toBe(600); }); + + it("应从 candidatesTokensDetails 提取 IMAGE modality tokens", () => { + const response = JSON.stringify({ + usageMetadata: { + promptTokenCount: 326, + candidatesTokenCount: 2340, + candidatesTokensDetails: [ + { modality: "IMAGE", tokenCount: 2000 }, + { modality: "TEXT", tokenCount: 340 }, + ], + }, + }); + + const result = parseUsageFromResponseText(response, "gemini"); + + expect(result.usageMetrics?.output_image_tokens).toBe(2000); + expect(result.usageMetrics?.output_tokens).toBe(340); + }); + + it("应从 promptTokensDetails 提取 IMAGE modality tokens", () => { + const response = JSON.stringify({ + usageMetadata: { + promptTokenCount: 886, + candidatesTokenCount: 500, + promptTokensDetails: [ + { modality: "TEXT", tokenCount: 326 }, + { modality: "IMAGE", tokenCount: 560 }, + ], + }, + }); + + const result = parseUsageFromResponseText(response, "gemini"); + + expect(result.usageMetrics?.input_image_tokens).toBe(560); + expect(result.usageMetrics?.input_tokens).toBe(326); + }); + + it("应正确解析混合输入输出的完整 usage", () => { + const response = JSON.stringify({ + usageMetadata: { + promptTokenCount: 357, + candidatesTokenCount: 2100, + totalTokenCount: 2580, + promptTokensDetails: [ + { modality: "TEXT", tokenCount: 99 }, + { modality: "IMAGE", tokenCount: 258 }, + ], + candidatesTokensDetails: [{ modality: "IMAGE", tokenCount: 2000 }], + thoughtsTokenCount: 123, + }, + }); + + const result = parseUsageFromResponseText(response, "gemini"); + + expect(result.usageMetrics?.input_tokens).toBe(99); + expect(result.usageMetrics?.input_image_tokens).toBe(258); + // output_tokens = (candidatesTokenCount - IMAGE详情) + thoughtsTokenCount + // = (2100 - 2000) + 123 = 223 + expect(result.usageMetrics?.output_tokens).toBe(223); + expect(result.usageMetrics?.output_image_tokens).toBe(2000); + }); + + it("应处理只有 IMAGE modality 的 candidatesTokensDetails", () => { + const response = JSON.stringify({ + usageMetadata: { + promptTokenCount: 100, + candidatesTokenCount: 2000, + candidatesTokensDetails: [{ modality: "IMAGE", tokenCount: 2000 }], + }, + }); + + const result = parseUsageFromResponseText(response, "gemini"); + + expect(result.usageMetrics?.output_image_tokens).toBe(2000); + // candidatesTokenCount = 2000, IMAGE = 2000, 未分类 = 0 + expect(result.usageMetrics?.output_tokens).toBe(0); + }); + + it("应计算 candidatesTokenCount 与 details 的差值作为未分类 TEXT", () => { + const response = JSON.stringify({ + usageMetadata: { + promptTokenCount: 326, + candidatesTokenCount: 2340, + candidatesTokensDetails: [{ modality: "IMAGE", tokenCount: 2000 }], + thoughtsTokenCount: 337, + }, + }); + + const result = parseUsageFromResponseText(response, "gemini"); + + // 未分类 = 2340 - 2000 = 340 + // output_tokens = 340 + 337 (thoughts) = 677 + expect(result.usageMetrics?.output_tokens).toBe(677); + expect(result.usageMetrics?.output_image_tokens).toBe(2000); + }); + + it("应处理缺失 candidatesTokensDetails 的情况(向后兼容)", () => { + const response = JSON.stringify({ + usageMetadata: { + promptTokenCount: 1000, + candidatesTokenCount: 500, + }, + }); + + const result = parseUsageFromResponseText(response, "gemini"); + + expect(result.usageMetrics?.output_tokens).toBe(500); + expect(result.usageMetrics?.output_image_tokens).toBeUndefined(); + expect(result.usageMetrics?.input_image_tokens).toBeUndefined(); + }); + + it("应处理空的 candidatesTokensDetails 数组", () => { + const response = JSON.stringify({ + usageMetadata: { + promptTokenCount: 1000, + candidatesTokenCount: 500, + candidatesTokensDetails: [], + }, + }); + + const result = parseUsageFromResponseText(response, "gemini"); + + expect(result.usageMetrics?.output_tokens).toBe(500); + expect(result.usageMetrics?.output_image_tokens).toBeUndefined(); + }); + + it("应处理 candidatesTokensDetails 中无效 tokenCount 的情况", () => { + const response = JSON.stringify({ + usageMetadata: { + promptTokenCount: 1000, + candidatesTokenCount: 500, + candidatesTokensDetails: [ + { modality: "TEXT" }, + { modality: "IMAGE", tokenCount: null }, + { modality: "TEXT", tokenCount: -1 }, + ], + }, + }); + + const result = parseUsageFromResponseText(response, "gemini"); + + // 无效数据不应覆盖原始 candidatesTokenCount + expect(result.usageMetrics?.output_tokens).toBe(500); + expect(result.usageMetrics?.output_image_tokens).toBeUndefined(); + }); + + it("应处理 modality 大小写变体", () => { + const response = JSON.stringify({ + usageMetadata: { + promptTokenCount: 100, + candidatesTokenCount: 2340, + candidatesTokensDetails: [ + { modality: "image", tokenCount: 2000 }, + { modality: "Image", tokenCount: 100 }, + { modality: "TEXT", tokenCount: 240 }, + ], + }, + }); + + const result = parseUsageFromResponseText(response, "gemini"); + + expect(result.usageMetrics?.output_image_tokens).toBe(2100); + expect(result.usageMetrics?.output_tokens).toBe(240); + }); }); describe("OpenAI Response API 格式", () => {