ding113 · ding113 · Feb 15, 2026 · Feb 15, 2026 · Feb 15, 2026 · Feb 15, 2026
diff --git a/.env.example b/.env.example
@@ -128,6 +128,16 @@ FETCH_HEADERS_TIMEOUT=600000
 FETCH_BODY_TIMEOUT=600000
 MAX_RETRY_ATTEMPTS_DEFAULT=2                # 单供应商最大尝试次数（含首次调用），范围 1-10，留空使用默认值 2
 
+# Langfuse Observability (optional, auto-enabled when keys are set)
+# 功能说明：企业级 LLM 可观测性集成，自动追踪所有代理请求的完整生命周期
+# - 配置 PUBLIC_KEY 和 SECRET_KEY 后自动启用
+# - 支持 Langfuse Cloud 和自托管实例
+LANGFUSE_PUBLIC_KEY=                        # Langfuse project public key (pk-lf-...)
+LANGFUSE_SECRET_KEY=                        # Langfuse project secret key (sk-lf-...)
+LANGFUSE_BASE_URL=https://cloud.langfuse.com  # Langfuse server URL (self-hosted or cloud)
+LANGFUSE_SAMPLE_RATE=1.0                    # Trace sampling rate (0.0-1.0, default: 1.0 = 100%)
+LANGFUSE_DEBUG=false                        # Enable Langfuse debug logging
-# Langfuse Observability (optional, auto-enabled when keys are set)
-# 功能说明：企业级 LLM 可观测性集成，自动追踪所有代理请求的完整生命周期
-# - 配置 PUBLIC_KEY 和 SECRET_KEY 后自动启用
-# - 支持 Langfuse Cloud 和自托管实例
-LANGFUSE_PUBLIC_KEY=                        # Langfuse project public key (pk-lf-...)
-LANGFUSE_SECRET_KEY=                        # Langfuse project secret key (sk-lf-...)
-LANGFUSE_BASE_URL=https://cloud.langfuse.com  # Langfuse server URL (self-hosted or cloud)
-LANGFUSE_SAMPLE_RATE=1.0                    # Trace sampling rate (0.0-1.0, default: 1.0 = 100%)
-LANGFUSE_DEBUG=false                        # Enable Langfuse debug logging
+# Langfuse Observability (optional, auto-enabled when keys are set)
+# 功能说明：企业级 LLM 可观测性集成，自动追踪所有代理请求的完整生命周期
+# - 配置 PUBLIC_KEY 和 SECRET_KEY 后自动启用
+# - 支持 Langfuse Cloud 和自托管实例
+LANGFUSE_PUBLIC_KEY=                        # Langfuse project public key (pk-lf-...)
+LANGFUSE_SECRET_KEY=                        # Langfuse project secret key (sk-lf-...)
+LANGFUSE_BASE_URL=https://cloud.langfuse.com  # Langfuse server URL (self-hosted or cloud)
+LANGFUSE_SAMPLE_RATE=1.0                    # Trace sampling rate (0.0-1.0, default: 1.0 = 100%)
+LANGFUSE_DEBUG=false                        # Enable Langfuse debug logging
+LANGFUSE_MAX_IO_SIZE=100000                 # Max I/O size per trace (chars, default: 100000, max: 10000000)
-# Langfuse Observability (optional, auto-enabled when keys are set)
-# 功能说明：企业级 LLM 可观测性集成，自动追踪所有代理请求的完整生命周期
-# - 配置 PUBLIC_KEY 和 SECRET_KEY 后自动启用
-# - 支持 Langfuse Cloud 和自托管实例
-LANGFUSE_PUBLIC_KEY=                        # Langfuse project public key (pk-lf-...)
-LANGFUSE_SECRET_KEY=                        # Langfuse project secret key (sk-lf-...)
-LANGFUSE_BASE_URL=https://cloud.langfuse.com  # Langfuse server URL (self-hosted or cloud)
-LANGFUSE_SAMPLE_RATE=1.0                    # Trace sampling rate (0.0-1.0, default: 1.0 = 100%)
-LANGFUSE_DEBUG=false                        # Enable Langfuse debug logging
+# Langfuse Observability (optional, auto-enabled when keys are set)
+# 功能说明：企业级 LLM 可观测性集成，自动追踪所有代理请求的完整生命周期
+# - 配置 PUBLIC_KEY 和 SECRET_KEY 后自动启用
+# - 支持 Langfuse Cloud 和自托管实例
+LANGFUSE_PUBLIC_KEY=                        # Langfuse project public key (pk-lf-...)
+LANGFUSE_SECRET_KEY=                        # Langfuse project secret key (sk-lf-...)
+LANGFUSE_BASE_URL=https://cloud.langfuse.com  # Langfuse server URL (self-hosted or cloud)
+LANGFUSE_SAMPLE_RATE=1.0                    # Trace sampling rate (0.0-1.0, default: 1.0 = 100%)
+LANGFUSE_DEBUG=false                        # Enable Langfuse debug logging
+LANGFUSE_MAX_IO_SIZE=100000                 # Max I/O size per trace (chars, default: 100000, max: 10000000)
+
 # 智能探测配置
 # 功能说明：当熔断器处于 OPEN 状态时，定期探测供应商以实现更快恢复
 # - ENABLE_SMART_PROBING：是否启用智能探测（默认：false）

diff --git a/package.json b/package.json
@@ -44,7 +44,11 @@
     "@hono/zod-openapi": "^1",
     "@hookform/resolvers": "^5",
     "@iarna/toml": "^2.2.5",
+    "@langfuse/client": "^4.6.1",
+    "@langfuse/otel": "^4.6.1",
+    "@langfuse/tracing": "^4.6.1",
     "@lobehub/icons": "^2",
+    "@opentelemetry/sdk-node": "^0.212.0",
     "@radix-ui/react-alert-dialog": "^1",
     "@radix-ui/react-avatar": "^1",
     "@radix-ui/react-checkbox": "^1",

diff --git a/src/app/v1/_lib/proxy-handler.ts b/src/app/v1/_lib/proxy-handler.ts
@@ -78,9 +78,12 @@ export async function handleProxyRequest(c: Context): Promise<Response> {
       });
     }
 
+    session.recordForwardStart();
     const response = await ProxyForwarder.send(session);
     const handled = await ProxyResponseHandler.dispatch(session, response);
-    return await attachSessionIdToErrorResponse(session.sessionId, handled);
+    const finalResponse = await attachSessionIdToErrorResponse(session.sessionId, handled);
+
+    return finalResponse;
   } catch (error) {
     logger.error("Proxy handler error:", error);
     if (session) {

diff --git a/src/app/v1/_lib/proxy/forwarder.ts b/src/app/v1/_lib/proxy/forwarder.ts
@@ -1648,6 +1648,7 @@ export class ProxyForwarder {
 
         const bodyString = JSON.stringify(bodyToSerialize);
         requestBody = bodyString;
+        session.forwardedRequestBody = bodyString;
       }
 
       // 检测流式请求：Gemini 支持两种方式
@@ -1974,6 +1975,7 @@ export class ProxyForwarder {
 
         const bodyString = JSON.stringify(messageToSend);
         requestBody = bodyString;
+        session.forwardedRequestBody = bodyString;
 
         try {
           const parsed = JSON.parse(bodyString);

diff --git a/src/app/v1/_lib/proxy/response-handler.ts b/src/app/v1/_lib/proxy/response-handler.ts
@@ -8,7 +8,8 @@ import { RateLimitService } from "@/lib/rate-limit";
 import type { LeaseWindowType } from "@/lib/rate-limit/lease";
 import { SessionManager } from "@/lib/session-manager";
 import { SessionTracker } from "@/lib/session-tracker";
-import { calculateRequestCost } from "@/lib/utils/cost-calculation";
+import type { CostBreakdown } from "@/lib/utils/cost-calculation";
+import { calculateRequestCost, calculateRequestCostBreakdown } from "@/lib/utils/cost-calculation";
 import { hasValidPriceData } from "@/lib/utils/price-data";
 import { isSSEText, parseSSEData } from "@/lib/utils/sse";
 import { detectUpstreamErrorFromSseOrJsonText } from "@/lib/utils/upstream-error-detection";
@@ -39,6 +40,49 @@ export type UsageMetrics = {
   output_image_tokens?: number;
 };
 
+/**
+ * Fire Langfuse trace asynchronously. Non-blocking, error-tolerant.
+ */
+function emitLangfuseTrace(
+  session: ProxySession,
+  data: {
+    responseHeaders: Headers;
+    responseText: string;
+    usageMetrics: UsageMetrics | null;
+    costUsd: string | undefined;
+    costBreakdown?: CostBreakdown;
+    statusCode: number;
+    durationMs: number;
+    isStreaming: boolean;
+    sseEventCount?: number;
+    errorMessage?: string;
+  }
+): void {
+  if (!process.env.LANGFUSE_PUBLIC_KEY || !process.env.LANGFUSE_SECRET_KEY) return;
-  if (!process.env.LANGFUSE_PUBLIC_KEY || !process.env.LANGFUSE_SECRET_KEY) return;
+  if (!process.env.LANGFUSE_PUBLIC_KEY || !process.env.LANGFUSE_SECRET_KEY) return;
-  if (!process.env.LANGFUSE_PUBLIC_KEY || !process.env.LANGFUSE_SECRET_KEY) return;
+  if (!process.env.LANGFUSE_PUBLIC_KEY || !process.env.LANGFUSE_SECRET_KEY) return;
+
+  void import("@/lib/langfuse/trace-proxy-request")
+    .then(({ traceProxyRequest }) => {
+      void traceProxyRequest({
+        session,
+        responseHeaders: data.responseHeaders,
+        durationMs: data.durationMs,
+        statusCode: data.statusCode,
+        isStreaming: data.isStreaming,
+        responseText: data.responseText,
+        usageMetrics: data.usageMetrics,
+        costUsd: data.costUsd,
+        costBreakdown: data.costBreakdown,
+        sseEventCount: data.sseEventCount,
+        errorMessage: data.errorMessage,
+      });
+    })
+    .catch((err) => {
+      logger.warn("[ResponseHandler] Langfuse trace failed", {
+        error: err instanceof Error ? err.message : String(err),
+      });
+    });
+}
+
 /**
  * 清理 Response headers 中的传输相关 header
  *
@@ -520,6 +564,18 @@ export class ProxyResponseHandler {
               duration,
               errorMessageForFinalize
             );
+
+            emitLangfuseTrace(session, {
+              responseHeaders: response.headers,
+              responseText,
+              usageMetrics: parseUsageFromResponseText(responseText, provider.providerType)
+                .usageMetrics,
+              costUsd: undefined,
+              statusCode,
+              durationMs: duration,
+              isStreaming: false,
+              errorMessage: errorMessageForFinalize,
+            });
           } catch (error) {
             if (!isClientAbortError(error as Error)) {
               logger.error(
@@ -687,10 +743,11 @@ export class ProxyResponseHandler {
           await trackCostToRedis(session, usageMetrics);
         }
 
-        // 更新 session 使用量到 Redis（用于实时监控）
-        if (session.sessionId && usageMetrics) {
-          // 计算成本（复用相同逻辑）
-          let costUsdStr: string | undefined;
+        // Calculate cost for session tracking (with multiplier) and Langfuse (raw)
+        let costUsdStr: string | undefined;
+        let rawCostUsdStr: string | undefined;
+        let costBreakdown: CostBreakdown | undefined;
+        if (usageMetrics) {
           try {
             if (session.request.model) {
               const priceData = await session.getCachedPriceDataByBillingSource();
@@ -704,14 +761,41 @@ export class ProxyResponseHandler {
                 if (cost.gt(0)) {
                   costUsdStr = cost.toString();
                 }
+                // Raw cost without multiplier for Langfuse
+                if (provider.costMultiplier !== 1) {
+                  const rawCost = calculateRequestCost(
+                    usageMetrics,
+                    priceData,
+                    1.0,
+                    session.getContext1mApplied()
+                  );
+                  if (rawCost.gt(0)) {
+                    rawCostUsdStr = rawCost.toString();
+                  }
+                } else {
+                  rawCostUsdStr = costUsdStr;
+                }
+                // Cost breakdown for Langfuse (raw, no multiplier)
+                try {
+                  costBreakdown = calculateRequestCostBreakdown(
+                    usageMetrics,
+                    priceData,
+                    session.getContext1mApplied()
+                  );
+                } catch {
+                  /* non-critical */
+                }
               }
             }
           } catch (error) {
             logger.error("[ResponseHandler] Failed to calculate session cost, skipping", {
               error: error instanceof Error ? error.message : String(error),
             });
           }
+        }
 
+        // 更新 session 使用量到 Redis（用于实时监控）
+        if (session.sessionId && usageMetrics) {
           void SessionManager.updateSessionUsage(session.sessionId, {
             inputTokens: usageMetrics.input_tokens,
             outputTokens: usageMetrics.output_tokens,
@@ -782,6 +866,17 @@ export class ProxyResponseHandler {
           providerName: provider.name,
           statusCode,
         });
+
+        emitLangfuseTrace(session, {
+          responseHeaders: response.headers,
+          responseText,
+          usageMetrics,
+          costUsd: rawCostUsdStr,
+          costBreakdown,
+          statusCode,
+          durationMs: Date.now() - session.startTime,
+          isStreaming: false,
+        });
       } catch (error) {
         // 检测 AbortError 的来源：响应超时 vs 客户端中断
         const err = error as Error;
@@ -1220,6 +1315,18 @@ export class ProxyResponseHandler {
               finalized.errorMessage ?? undefined,
               finalized.providerIdForPersistence ?? undefined
             );
+
+            emitLangfuseTrace(session, {
+              responseHeaders: response.headers,
+              responseText: allContent,
+              usageMetrics: parseUsageFromResponseText(allContent, provider.providerType)
+                .usageMetrics,
+              costUsd: undefined,
+              statusCode: finalized.effectiveStatusCode,
+              durationMs: duration,
+              isStreaming: true,
+              errorMessage: finalized.errorMessage ?? undefined,
+            });
           } catch (error) {
             const err = error instanceof Error ? error : new Error(String(error));
             const clientAborted = session.clientAbortSignal?.aborted ?? false;
@@ -1588,11 +1695,13 @@ export class ProxyResponseHandler {
         // 追踪消费到 Redis（用于限流）
         await trackCostToRedis(session, usageForCost);
 
-        // 更新 session 使用量到 Redis（用于实时监控）
-        if (session.sessionId) {
-          let costUsdStr: string | undefined;
+        // Calculate cost for session tracking (with multiplier) and Langfuse (raw)
+        let costUsdStr: string | undefined;
+        let rawCostUsdStr: string | undefined;
+        let costBreakdown: CostBreakdown | undefined;
+        if (usageForCost) {
           try {
-            if (usageForCost && session.request.model) {
+            if (session.request.model) {
               const priceData = await session.getCachedPriceDataByBillingSource();
               if (priceData) {
                 const cost = calculateRequestCost(
@@ -1604,14 +1713,41 @@ export class ProxyResponseHandler {
                 if (cost.gt(0)) {
                   costUsdStr = cost.toString();
                 }
+                // Raw cost without multiplier for Langfuse
+                if (provider.costMultiplier !== 1) {
+                  const rawCost = calculateRequestCost(
+                    usageForCost,
+                    priceData,
+                    1.0,
+                    session.getContext1mApplied()
+                  );
+                  if (rawCost.gt(0)) {
+                    rawCostUsdStr = rawCost.toString();
+                  }
+                } else {
+                  rawCostUsdStr = costUsdStr;
+                }
+                // Cost breakdown for Langfuse (raw, no multiplier)
+                try {
+                  costBreakdown = calculateRequestCostBreakdown(
+                    usageForCost,
+                    priceData,
+                    session.getContext1mApplied()
+                  );
+                } catch {
+                  /* non-critical */
+                }
               }
             }
           } catch (error) {
             logger.error("[ResponseHandler] Failed to calculate session cost (stream), skipping", {
               error: error instanceof Error ? error.message : String(error),
             });
           }
+        }
 
+        // 更新 session 使用量到 Redis（用于实时监控）
+        if (session.sessionId) {
           const payload: SessionUsageUpdate = {
             status: effectiveStatusCode >= 200 && effectiveStatusCode < 300 ? "completed" : "error",
             statusCode: effectiveStatusCode,
@@ -1650,6 +1786,19 @@ export class ProxyResponseHandler {
           providerId: providerIdForPersistence ?? session.provider?.id, // 更新最终供应商ID（重试切换后）
           context1mApplied: session.getContext1mApplied(),
         });
+
+        emitLangfuseTrace(session, {
+          responseHeaders: response.headers,
+          responseText: allContent,
+          usageMetrics: usageForCost,
+          costUsd: rawCostUsdStr,
+          costBreakdown,
+          statusCode: effectiveStatusCode,
+          durationMs: duration,
+          isStreaming: true,
+          sseEventCount: chunks.length,
+          errorMessage: streamErrorMessage ?? undefined,
+        });
       };
 
       try {
@@ -2919,6 +3068,18 @@ async function persistRequestFailure(options: {
       });
     }
   }
+
+  // Emit Langfuse trace for error/abort paths
+  emitLangfuseTrace(session, {
+    responseHeaders: new Headers(),
+    responseText: "",
+    usageMetrics: null,
+    costUsd: undefined,
+    statusCode,
+    durationMs: duration,
+    isStreaming: phase === "stream",
+    errorMessage,
+  });
 }
 
 /**

diff --git a/src/app/v1/_lib/proxy/session.ts b/src/app/v1/_lib/proxy/session.ts
@@ -67,6 +67,12 @@ export class ProxySession {
   // Time To First Byte (ms). Streaming: first chunk. Non-stream: equals durationMs.
   ttfbMs: number | null = null;
 
+  // Timestamp when guard pipeline finished and forwarding started (epoch ms).
+  forwardStartTime: number | null = null;
+
+  // Actual serialized request body sent to upstream (after all preprocessing).
+  forwardedRequestBody: string | null = null;
+
   // Session ID（用于会话粘性和并发限流）
   sessionId: string | null;
 
@@ -313,6 +319,16 @@ export class ProxySession {
     return value;
   }
 
+  /**
+   * Record the timestamp when guard pipeline finished and upstream forwarding begins.
+   * Called once; subsequent calls are no-ops.
+   */
+  recordForwardStart(): void {
+    if (this.forwardStartTime === null) {
+      this.forwardStartTime = Date.now();
+    }
+  }
+
   /**
    * 设置 session ID
    */

diff --git a/src/instrumentation.ts b/src/instrumentation.ts
@@ -140,6 +140,15 @@ function warmupApiKeyVacuumFilter(): void {
 export async function register() {
   // 仅在服务器端执行
   if (process.env.NEXT_RUNTIME === "nodejs") {
+    // Initialize Langfuse observability (no-op if env vars not set)
+    try {
+      const { initLangfuse } = await import("@/lib/langfuse");
+      await initLangfuse();
+    } catch (error) {
+      logger.warn("[Instrumentation] Langfuse initialization failed (non-critical)", {
+        error: error instanceof Error ? error.message : String(error),
+      });
+    }
     // Skip initialization in CI environment (no DB connection needed)
     if (process.env.CI === "true") {
       logger.warn(
@@ -216,6 +225,16 @@ export async function register() {
           });
         }
 
+        // Flush Langfuse pending spans
+        try {
+          const { shutdownLangfuse } = await import("@/lib/langfuse");
+          await shutdownLangfuse();
+        } catch (error) {
+          logger.warn("[Instrumentation] Failed to shutdown Langfuse", {
+            error: error instanceof Error ? error.message : String(error),
+          });
+        }
+
         // 尽力将 message_request 的异步批量更新刷入数据库（避免终止时丢失尾部日志）
         try {
           const { stopMessageRequestWriteBuffer } = await import(