From 9f099529f6ce96816a7233477d0d95a6b82f4c37 Mon Sep 17 00:00:00 2001
From: Arvin Xu <arvinx@foxmail.com>
Date: Fri, 7 Mar 2025 02:46:39 +0800
Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20fix:=20fix=20litellm=20streaming?=
 =?UTF-8?q?=20usage=20and=20refactor=20the=20usage=20chunk=20(#6734)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix litellm usage

* update implement

* refactor to fix agent test

* update

* fix tests

* fix tests

* fix tests

* fix tests

* Update ui

* Update ui

* fix tests

* refactor token calc

* refactor token calc

* add cached display

* update i18n
---
 locales/ar/chat.json                          |   6 +-
 locales/ar/models.json                        |  15 +-
 locales/bg-BG/chat.json                       |   6 +-
 locales/bg-BG/models.json                     |  15 +-
 locales/de-DE/chat.json                       |   6 +-
 locales/de-DE/models.json                     |  15 +-
 locales/en-US/chat.json                       |   6 +-
 locales/en-US/models.json                     |  15 +-
 locales/es-ES/chat.json                       |   6 +-
 locales/es-ES/models.json                     |  15 +-
 locales/fa-IR/chat.json                       |   6 +-
 locales/fa-IR/models.json                     |  15 +-
 locales/fr-FR/chat.json                       |   6 +-
 locales/fr-FR/models.json                     |  15 +-
 locales/it-IT/chat.json                       |   6 +-
 locales/it-IT/models.json                     |  15 +-
 locales/ja-JP/chat.json                       |   6 +-
 locales/ja-JP/models.json                     |  15 +-
 locales/ko-KR/chat.json                       |   6 +-
 locales/ko-KR/models.json                     |  15 +-
 locales/nl-NL/chat.json                       |   6 +-
 locales/nl-NL/models.json                     |  15 +-
 locales/pl-PL/chat.json                       |   6 +-
 locales/pl-PL/models.json                     |  15 +-
 locales/pt-BR/chat.json                       |   6 +-
 locales/pt-BR/models.json                     |  15 +-
 locales/ru-RU/chat.json                       |   6 +-
 locales/ru-RU/models.json                     |  15 +-
 locales/tr-TR/chat.json                       |   6 +-
 locales/tr-TR/models.json                     |  15 +-
 locales/vi-VN/chat.json                       |   6 +-
 locales/vi-VN/models.json                     |  15 +-
 locales/zh-CN/chat.json                       |   6 +-
 locales/zh-CN/models.json                     |  15 +-
 locales/zh-TW/chat.json                       |   6 +-
 locales/zh-TW/models.json                     |  15 +-
 src/config/aiModels/perplexity.ts             |  56 +--
 .../Extras/Usage/UsageDetail/ModelCard.tsx    |  36 +-
 .../Extras/Usage/UsageDetail/index.tsx        | 112 ++++--
 .../Extras/Usage/UsageDetail/tokens.test.ts   | 253 +++++++++++++
 .../Extras/Usage/UsageDetail/tokens.ts        | 111 +++---
 src/libs/agent-runtime/baichuan/index.test.ts |  59 ++-
 src/libs/agent-runtime/groq/index.test.ts     | 320 ++---------------
 src/libs/agent-runtime/mistral/index.test.ts  | 339 ++----------------
 .../agent-runtime/perplexity/index.test.ts    |  22 +-
 src/libs/agent-runtime/providerTestUtils.ts   |  58 +++
 .../agent-runtime/togetherai/index.test.ts    | 302 +---------------
 .../openaiCompatibleFactory/index.test.ts     |   3 +
 .../utils/openaiCompatibleFactory/index.ts    |   7 +-
 .../utils/streams/anthropic.test.ts           |  94 ++++-
 .../agent-runtime/utils/streams/anthropic.ts  |  33 +-
 .../utils/streams/openai.test.ts              | 272 +++++++++-----
 .../agent-runtime/utils/streams/openai.ts     |  25 +-
 .../utils/usageConverter.test.ts              | 249 +++++++++++++
 .../agent-runtime/utils/usageConverter.ts     |  50 +++
 src/libs/agent-runtime/zeroone/index.test.ts  | 301 +---------------
 src/locales/default/chat.ts                   |   4 +
 src/types/message/base.ts                     |  18 +-
 src/utils/filter.test.ts                      | 122 -------
 src/utils/filter.ts                           |  29 --
 60 files changed, 1508 insertions(+), 1745 deletions(-)
 create mode 100644 src/features/Conversation/Extras/Usage/UsageDetail/tokens.test.ts
 create mode 100644 src/libs/agent-runtime/utils/usageConverter.test.ts
 create mode 100644 src/libs/agent-runtime/utils/usageConverter.ts
 delete mode 100644 src/utils/filter.test.ts
 delete mode 100644 src/utils/filter.ts

diff --git a/locales/ar/chat.json b/locales/ar/chat.json
index be1d46e0643a1..cae88d4660be3 100644
--- a/locales/ar/chat.json
+++ b/locales/ar/chat.json
@@ -89,16 +89,20 @@
         "inputCharts": "${{amount}}/M حرف",
         "inputMinutes": "${{amount}}/دقيقة",
         "inputTokens": "مدخلات {{amount}}/نقطة · ${{amount}}/M",
-        "outputTokens": "مخرجات {{amount}}/نقطة · ${{amount}}/M"
+        "outputTokens": "مخرجات {{amount}}/نقطة · ${{amount}}/M",
+        "writeCacheInputTokens": "تخزين إدخال الكتابة {{amount}}/نقطة · ${{amount}}/ميغابايت"
       }
     },
     "tokenDetails": {
+      "average": "متوسط السعر",
       "input": "مدخلات",
       "inputAudio": "مدخلات صوتية",
       "inputCached": "مدخلات مخزنة",
+      "inputCitation": "اقتباس الإدخال",
       "inputText": "مدخلات نصية",
       "inputTitle": "تفاصيل المدخلات",
       "inputUncached": "مدخلات غير مخزنة",
+      "inputWriteCached": "تخزين إدخال الكتابة",
       "output": "مخرجات",
       "outputAudio": "مخرجات صوتية",
       "outputText": "مخرجات نصية",
diff --git a/locales/ar/models.json b/locales/ar/models.json
index bec40e1a4bcda..8d47719f23e23 100644
--- a/locales/ar/models.json
+++ b/locales/ar/models.json
@@ -1121,15 +1121,6 @@
   "llama-3.1-8b-instant": {
     "description": "Llama 3.1 8B هو نموذج عالي الأداء، يوفر قدرة سريعة على توليد النصوص، مما يجعله مثاليًا لمجموعة من التطبيقات التي تتطلب كفاءة كبيرة وتكلفة فعالة."
   },
-  "llama-3.1-sonar-huge-128k-online": {
-    "description": "نموذج Llama 3.1 Sonar Huge Online، يتمتع بـ 405B من المعلمات، يدعم طول سياق حوالي 127,000 علامة، مصمم لتطبيقات دردشة معقدة عبر الإنترنت."
-  },
-  "llama-3.1-sonar-large-128k-online": {
-    "description": "نموذج Llama 3.1 Sonar Large Online، يتمتع بـ 70B من المعلمات، يدعم طول سياق حوالي 127,000 علامة، مناسب لمهام دردشة عالية السعة ومتنوعة."
-  },
-  "llama-3.1-sonar-small-128k-online": {
-    "description": "نموذج Llama 3.1 Sonar Small Online، يتمتع بـ 8B من المعلمات، يدعم طول سياق حوالي 127,000 علامة، مصمم للدردشة عبر الإنترنت، قادر على معالجة تفاعلات نصية متنوعة بكفاءة."
-  },
   "llama-3.2-11b-vision-instruct": {
     "description": "قدرة استدلال الصور التي تبرز في الصور عالية الدقة، مناسبة لتطبيقات الفهم البصري."
   },
@@ -1643,6 +1634,9 @@
   "qwq-32b-preview": {
     "description": "نموذج QwQ هو نموذج بحث تجريبي تم تطويره بواسطة فريق Qwen، يركز على تعزيز قدرات الاستدلال للذكاء الاصطناعي."
   },
+  "r1-1776": {
+    "description": "R1-1776 هو إصدار من نموذج DeepSeek R1، تم تدريبه لاحقًا لتقديم معلومات حقائق غير خاضعة للرقابة وغير متحيزة."
+  },
   "solar-mini": {
     "description": "Solar Mini هو نموذج LLM مدمج، يتفوق على GPT-3.5، ويتميز بقدرات متعددة اللغات قوية، ويدعم الإنجليزية والكورية، ويقدم حلولًا فعالة وصغيرة الحجم."
   },
@@ -1655,6 +1649,9 @@
   "sonar": {
     "description": "منتج بحث خفيف الوزن يعتمد على سياق البحث، أسرع وأرخص من Sonar Pro."
   },
+  "sonar-deep-research": {
+    "description": "تقوم Deep Research بإجراء أبحاث شاملة على مستوى الخبراء وتجميعها في تقارير يمكن الوصول إليها وقابلة للتنفيذ."
+  },
   "sonar-pro": {
     "description": "منتج بحث متقدم يدعم سياق البحث، مع دعم للاستعلامات المتقدمة والمتابعة."
   },
diff --git a/locales/bg-BG/chat.json b/locales/bg-BG/chat.json
index 85404b2aba521..a0f9d07105424 100644
--- a/locales/bg-BG/chat.json
+++ b/locales/bg-BG/chat.json
@@ -89,16 +89,20 @@
         "inputCharts": "${{amount}}/M символи",
         "inputMinutes": "${{amount}}/минута",
         "inputTokens": "Входящи {{amount}}/кредити · ${{amount}}/M",
-        "outputTokens": "Изходящи {{amount}}/кредити · ${{amount}}/M"
+        "outputTokens": "Изходящи {{amount}}/кредити · ${{amount}}/M",
+        "writeCacheInputTokens": "Кеширане на входящи данни {{amount}}/точки · ${{amount}}/M"
       }
     },
     "tokenDetails": {
+      "average": "Средна цена",
       "input": "Вход",
       "inputAudio": "Аудио вход",
       "inputCached": "Кеширан вход",
+      "inputCitation": "Цитиране на входящи данни",
       "inputText": "Текстов вход",
       "inputTitle": "Детайли за входа",
       "inputUncached": "Некеширан вход",
+      "inputWriteCached": "Входящи кеширани данни",
       "output": "Изход",
       "outputAudio": "Аудио изход",
       "outputText": "Текстов изход",
diff --git a/locales/bg-BG/models.json b/locales/bg-BG/models.json
index ca156b64361fc..27b7015ddaf1b 100644
--- a/locales/bg-BG/models.json
+++ b/locales/bg-BG/models.json
@@ -1121,15 +1121,6 @@
   "llama-3.1-8b-instant": {
     "description": "Llama 3.1 8B е модел с висока производителност, предлагащ бързи способности за генериране на текст, особено подходящ за приложения, изискващи мащабна ефективност и икономичност."
   },
-  "llama-3.1-sonar-huge-128k-online": {
-    "description": "Llama 3.1 Sonar Huge Online модел, с 405B параметри, поддържащ контекстова дължина от около 127,000 маркера, проектиран за сложни онлайн чат приложения."
-  },
-  "llama-3.1-sonar-large-128k-online": {
-    "description": "Llama 3.1 Sonar Large Online модел, с 70B параметри, поддържащ контекстова дължина от около 127,000 маркера, подходящ за задачи с висока капацитет и разнообразие в чата."
-  },
-  "llama-3.1-sonar-small-128k-online": {
-    "description": "Llama 3.1 Sonar Small Online модел, с 8B параметри, поддържащ контекстова дължина от около 127,000 маркера, проектиран за онлайн чат, способен да обработва ефективно различни текстови взаимодействия."
-  },
   "llama-3.2-11b-vision-instruct": {
     "description": "Изключителни способности за визуално разсъждение върху изображения с висока разделителна способност, подходящи за приложения за визуално разбиране."
   },
@@ -1643,6 +1634,9 @@
   "qwq-32b-preview": {
     "description": "QwQ моделът е експериментален изследователски модел, разработен от екипа на Qwen, който се фокусира върху подобряване на AI разсъдъчните способности."
   },
+  "r1-1776": {
+    "description": "R1-1776 е версия на модела DeepSeek R1, след обучението, която предоставя непроверена и безпристрастна фактическа информация."
+  },
   "solar-mini": {
     "description": "Solar Mini е компактен LLM, който превъзхожда GPT-3.5, с мощни многоезични способности, поддържа английски и корейски, предоставяйки ефективно и компактно решение."
   },
@@ -1655,6 +1649,9 @@
   "sonar": {
     "description": "Лек продукт за търсене, базиран на контекст на търсене, по-бърз и по-евтин от Sonar Pro."
   },
+  "sonar-deep-research": {
+    "description": "Deep Research извършва задълбочени експертни изследвания и ги обобщава в достъпни и приложими доклади."
+  },
   "sonar-pro": {
     "description": "Разширен продукт за търсене, който поддържа контекст на търсене, напреднали запитвания и проследяване."
   },
diff --git a/locales/de-DE/chat.json b/locales/de-DE/chat.json
index d3cd9d8fd86b5..f2f3308067aa8 100644
--- a/locales/de-DE/chat.json
+++ b/locales/de-DE/chat.json
@@ -89,16 +89,20 @@
         "inputCharts": "${{amount}}/M Zeichen",
         "inputMinutes": "${{amount}}/Minute",
         "inputTokens": "Eingabe {{amount}}/Punkte · ${{amount}}/M",
-        "outputTokens": "Ausgabe {{amount}}/Punkte · ${{amount}}/M"
+        "outputTokens": "Ausgabe {{amount}}/Punkte · ${{amount}}/M",
+        "writeCacheInputTokens": "Cache-Eingabe schreiben {{amount}}/Punkte · ${{amount}}/M"
       }
     },
     "tokenDetails": {
+      "average": "Durchschnittspreis",
       "input": "Eingabe",
       "inputAudio": "Audioeingabe",
       "inputCached": "Eingabe zwischengespeichert",
+      "inputCitation": "Eingabe zitieren",
       "inputText": "Text-Eingabe",
       "inputTitle": "Eingabedetails",
       "inputUncached": "Eingabe nicht zwischengespeichert",
+      "inputWriteCached": "Eingabe Cache schreiben",
       "output": "Ausgabe",
       "outputAudio": "Audioausgabe",
       "outputText": "Text-Ausgabe",
diff --git a/locales/de-DE/models.json b/locales/de-DE/models.json
index bca2c3ab2ed5d..7b26cf24877c2 100644
--- a/locales/de-DE/models.json
+++ b/locales/de-DE/models.json
@@ -1121,15 +1121,6 @@
   "llama-3.1-8b-instant": {
     "description": "Llama 3.1 8B ist ein leistungsstarkes Modell, das schnelle Textgenerierungsfähigkeiten bietet und sich hervorragend für Anwendungen eignet, die große Effizienz und Kosteneffektivität erfordern."
   },
-  "llama-3.1-sonar-huge-128k-online": {
-    "description": "Das Llama 3.1 Sonar Huge Online-Modell hat 405B Parameter und unterstützt eine Kontextlänge von etwa 127.000 Markierungen, es wurde für komplexe Online-Chat-Anwendungen entwickelt."
-  },
-  "llama-3.1-sonar-large-128k-online": {
-    "description": "Das Llama 3.1 Sonar Large Online-Modell hat 70B Parameter und unterstützt eine Kontextlänge von etwa 127.000 Markierungen, es eignet sich für hochvolumige und vielfältige Chat-Aufgaben."
-  },
-  "llama-3.1-sonar-small-128k-online": {
-    "description": "Das Llama 3.1 Sonar Small Online-Modell hat 8B Parameter und unterstützt eine Kontextlänge von etwa 127.000 Markierungen, es wurde speziell für Online-Chat entwickelt und kann verschiedene Textinteraktionen effizient verarbeiten."
-  },
   "llama-3.2-11b-vision-instruct": {
     "description": "Überlegene Bildverarbeitungsfähigkeiten auf hochauflösenden Bildern, geeignet für visuelle Verständnisanwendungen."
   },
@@ -1643,6 +1634,9 @@
   "qwq-32b-preview": {
     "description": "Das QwQ-Modell ist ein experimentelles Forschungsmodell, das vom Qwen-Team entwickelt wurde und sich auf die Verbesserung der KI-Inferenzfähigkeiten konzentriert."
   },
+  "r1-1776": {
+    "description": "R1-1776 ist eine Version des DeepSeek R1 Modells, die nachtrainiert wurde, um unverfälschte, unvoreingenommene Fakteninformationen bereitzustellen."
+  },
   "solar-mini": {
     "description": "Solar Mini ist ein kompaktes LLM, das besser abschneidet als GPT-3.5 und über starke Mehrsprachigkeitsfähigkeiten verfügt. Es unterstützt Englisch und Koreanisch und bietet eine effiziente und kompakte Lösung."
   },
@@ -1655,6 +1649,9 @@
   "sonar": {
     "description": "Ein leichtgewichtiges Suchprodukt, das auf kontextbezogener Suche basiert und schneller und günstiger ist als Sonar Pro."
   },
+  "sonar-deep-research": {
+    "description": "Deep Research führt umfassende Expertenforschung durch und fasst diese in zugänglichen, umsetzbaren Berichten zusammen."
+  },
   "sonar-pro": {
     "description": "Ein fortschrittliches Suchprodukt, das kontextbezogene Suche unterstützt und erweiterte Abfragen sowie Nachverfolgung ermöglicht."
   },
diff --git a/locales/en-US/chat.json b/locales/en-US/chat.json
index 4fa12af5160ee..2a3d0a7e78ebd 100644
--- a/locales/en-US/chat.json
+++ b/locales/en-US/chat.json
@@ -89,16 +89,20 @@
         "inputCharts": "${{amount}}/M characters",
         "inputMinutes": "${{amount}}/minute",
         "inputTokens": "Input {{amount}}/credits · ${{amount}}/M",
-        "outputTokens": "Output {{amount}}/credits · ${{amount}}/M"
+        "outputTokens": "Output {{amount}}/credits · ${{amount}}/M",
+        "writeCacheInputTokens": "Cache input write {{amount}}/points · ${{amount}}/M"
       }
     },
     "tokenDetails": {
+      "average": "Average unit price",
       "input": "Input",
       "inputAudio": "Audio Input",
       "inputCached": "Cached Input",
+      "inputCitation": "Input citation",
       "inputText": "Text Input",
       "inputTitle": "Input Details",
       "inputUncached": "Uncached Input",
+      "inputWriteCached": "Input cache write",
       "output": "Output",
       "outputAudio": "Audio Output",
       "outputText": "Text Output",
diff --git a/locales/en-US/models.json b/locales/en-US/models.json
index d5ef8d0399b90..e4ec3ba3a4927 100644
--- a/locales/en-US/models.json
+++ b/locales/en-US/models.json
@@ -1121,15 +1121,6 @@
   "llama-3.1-8b-instant": {
     "description": "Llama 3.1 8B is a high-performance model that offers rapid text generation capabilities, making it ideal for applications requiring large-scale efficiency and cost-effectiveness."
   },
-  "llama-3.1-sonar-huge-128k-online": {
-    "description": "Llama 3.1 Sonar Huge Online model, featuring 405B parameters, supports a context length of approximately 127,000 tokens, designed for complex online chat applications."
-  },
-  "llama-3.1-sonar-large-128k-online": {
-    "description": "Llama 3.1 Sonar Large Online model, featuring 70B parameters, supports a context length of approximately 127,000 tokens, suitable for high-capacity and diverse chat tasks."
-  },
-  "llama-3.1-sonar-small-128k-online": {
-    "description": "Llama 3.1 Sonar Small Online model, featuring 8B parameters, supports a context length of approximately 127,000 tokens, designed for online chat, efficiently handling various text interactions."
-  },
   "llama-3.2-11b-vision-instruct": {
     "description": "Excellent image reasoning capabilities on high-resolution images, suitable for visual understanding applications."
   },
@@ -1643,6 +1634,9 @@
   "qwq-32b-preview": {
     "description": "The QwQ model is an experimental research model developed by the Qwen team, focusing on enhancing AI reasoning capabilities."
   },
+  "r1-1776": {
+    "description": "R1-1776 is a version of the DeepSeek R1 model, fine-tuned to provide unfiltered, unbiased factual information."
+  },
   "solar-mini": {
     "description": "Solar Mini is a compact LLM that outperforms GPT-3.5, featuring strong multilingual capabilities and supporting English and Korean, providing an efficient and compact solution."
   },
@@ -1655,6 +1649,9 @@
   "sonar": {
     "description": "A lightweight search product based on contextual search, faster and cheaper than Sonar Pro."
   },
+  "sonar-deep-research": {
+    "description": "Deep Research conducts comprehensive expert-level studies and synthesizes them into accessible, actionable reports."
+  },
   "sonar-pro": {
     "description": "An advanced search product that supports contextual search, advanced queries, and follow-ups."
   },
diff --git a/locales/es-ES/chat.json b/locales/es-ES/chat.json
index 86c7242127bfd..d5b2927b31d5d 100644
--- a/locales/es-ES/chat.json
+++ b/locales/es-ES/chat.json
@@ -89,16 +89,20 @@
         "inputCharts": "${{amount}}/M caracteres",
         "inputMinutes": "${{amount}}/minuto",
         "inputTokens": "Entradas {{amount}}/créditos · ${{amount}}/M",
-        "outputTokens": "Salidas {{amount}}/créditos · ${{amount}}/M"
+        "outputTokens": "Salidas {{amount}}/créditos · ${{amount}}/M",
+        "writeCacheInputTokens": "Escritura en caché de entrada {{amount}}/puntos · ${{amount}}/M"
       }
     },
     "tokenDetails": {
+      "average": "Precio promedio",
       "input": "Entrada",
       "inputAudio": "Entrada de audio",
       "inputCached": "Entrada en caché",
+      "inputCitation": "Citación de entrada",
       "inputText": "Entrada de texto",
       "inputTitle": "Detalles de entrada",
       "inputUncached": "Entrada no en caché",
+      "inputWriteCached": "Escritura en caché de entrada",
       "output": "Salida",
       "outputAudio": "Salida de audio",
       "outputText": "Salida de texto",
diff --git a/locales/es-ES/models.json b/locales/es-ES/models.json
index 836b516de59f6..829396c77252f 100644
--- a/locales/es-ES/models.json
+++ b/locales/es-ES/models.json
@@ -1121,15 +1121,6 @@
   "llama-3.1-8b-instant": {
     "description": "Llama 3.1 8B es un modelo de alto rendimiento que ofrece una rápida capacidad de generación de texto, ideal para aplicaciones que requieren eficiencia a gran escala y rentabilidad."
   },
-  "llama-3.1-sonar-huge-128k-online": {
-    "description": "El modelo Llama 3.1 Sonar Huge Online, con 405B de parámetros, soporta una longitud de contexto de aproximadamente 127,000 tokens, diseñado para aplicaciones de chat en línea complejas."
-  },
-  "llama-3.1-sonar-large-128k-online": {
-    "description": "El modelo Llama 3.1 Sonar Large Online, con 70B de parámetros, soporta una longitud de contexto de aproximadamente 127,000 tokens, adecuado para tareas de chat de alta capacidad y diversidad."
-  },
-  "llama-3.1-sonar-small-128k-online": {
-    "description": "El modelo Llama 3.1 Sonar Small Online, con 8B de parámetros, soporta una longitud de contexto de aproximadamente 127,000 tokens, diseñado para chat en línea, capaz de manejar eficientemente diversas interacciones textuales."
-  },
   "llama-3.2-11b-vision-instruct": {
     "description": "Capacidad excepcional de razonamiento visual en imágenes de alta resolución, adecuada para aplicaciones de comprensión visual."
   },
@@ -1643,6 +1634,9 @@
   "qwq-32b-preview": {
     "description": "El modelo QwQ es un modelo de investigación experimental desarrollado por el equipo de Qwen, enfocado en mejorar la capacidad de razonamiento de la IA."
   },
+  "r1-1776": {
+    "description": "R1-1776 es una versión del modelo DeepSeek R1, que ha sido entrenada posteriormente para proporcionar información factual sin censura y sin sesgos."
+  },
   "solar-mini": {
     "description": "Solar Mini es un LLM compacto que supera a GPT-3.5, con potentes capacidades multilingües, soportando inglés y coreano, ofreciendo soluciones eficientes y compactas."
   },
@@ -1655,6 +1649,9 @@
   "sonar": {
     "description": "Producto de búsqueda ligero basado en contexto de búsqueda, más rápido y económico que Sonar Pro."
   },
+  "sonar-deep-research": {
+    "description": "Deep Research realiza una investigación exhaustiva a nivel de expertos y la compila en informes accesibles y prácticos."
+  },
   "sonar-pro": {
     "description": "Producto de búsqueda avanzada que soporta contexto de búsqueda, consultas avanzadas y seguimiento."
   },
diff --git a/locales/fa-IR/chat.json b/locales/fa-IR/chat.json
index b574e4c917282..b26322b797a7b 100644
--- a/locales/fa-IR/chat.json
+++ b/locales/fa-IR/chat.json
@@ -89,16 +89,20 @@
         "inputCharts": "${{amount}}/M کاراکتر",
         "inputMinutes": "${{amount}}/دقیقه",
         "inputTokens": "ورودی {{amount}}/اعتبار · ${{amount}}/M",
-        "outputTokens": "خروجی {{amount}}/اعتبار · ${{amount}}/M"
+        "outputTokens": "خروجی {{amount}}/اعتبار · ${{amount}}/M",
+        "writeCacheInputTokens": "ذخیره ورودی نوشتن {{amount}}/امتیاز · ${{amount}}/M"
       }
     },
     "tokenDetails": {
+      "average": "میانگین قیمت",
       "input": "ورودی",
       "inputAudio": "ورودی صوتی",
       "inputCached": "ورودی کش شده",
+      "inputCitation": "ارجاع ورودی",
       "inputText": "ورودی متنی",
       "inputTitle": "جزئیات ورودی",
       "inputUncached": "ورودی غیر کش شده",
+      "inputWriteCached": "ذخیره ورودی نوشتن",
       "output": "خروجی",
       "outputAudio": "خروجی صوتی",
       "outputText": "خروجی متنی",
diff --git a/locales/fa-IR/models.json b/locales/fa-IR/models.json
index a7a2e7d65a761..2440fbaff9b70 100644
--- a/locales/fa-IR/models.json
+++ b/locales/fa-IR/models.json
@@ -1121,15 +1121,6 @@
   "llama-3.1-8b-instant": {
     "description": "Llama 3.1 8B یک مدل با کارایی بالا است که توانایی تولید سریع متن را فراهم می‌کند و برای کاربردهایی که به بهره‌وری و صرفه‌جویی در هزینه در مقیاس بزرگ نیاز دارند، بسیار مناسب است."
   },
-  "llama-3.1-sonar-huge-128k-online": {
-    "description": "مدل Llama 3.1 Sonar Huge Online با 405 میلیارد پارامتر، پشتیبانی از طول زمینه حدود 127,000 نشانه، طراحی شده برای برنامه‌های چت آنلاین پیچیده."
-  },
-  "llama-3.1-sonar-large-128k-online": {
-    "description": "مدل Llama 3.1 Sonar Large Online با 70 میلیارد پارامتر، پشتیبانی از طول زمینه حدود 127,000 نشانه، مناسب برای وظایف چت با حجم بالا و متنوع."
-  },
-  "llama-3.1-sonar-small-128k-online": {
-    "description": "مدل Llama 3.1 Sonar Small Online با 8 میلیارد پارامتر، پشتیبانی از طول زمینه‌ای حدود 127,000 نشانه، به‌طور ویژه برای چت آنلاین طراحی شده و می‌تواند به‌طور کارآمد انواع تعاملات متنی را پردازش کند."
-  },
   "llama-3.2-11b-vision-instruct": {
     "description": "توانایی استدلال تصویری عالی در تصاویر با وضوح بالا، مناسب برای برنامه‌های درک بصری."
   },
@@ -1643,6 +1634,9 @@
   "qwq-32b-preview": {
     "description": "مدل QwQ یک مدل تحقیقاتی تجربی است که توسط تیم Qwen توسعه یافته و بر تقویت توانایی استدلال AI تمرکز دارد."
   },
+  "r1-1776": {
+    "description": "R1-1776 نسخه‌ای از مدل DeepSeek R1 است که پس از آموزش مجدد، اطلاعات واقعی بدون سانسور و بدون تعصب را ارائه می‌دهد."
+  },
   "solar-mini": {
     "description": "Solar Mini یک LLM فشرده است که عملکردی بهتر از GPT-3.5 دارد و دارای توانایی‌های چند زبانه قوی است و از انگلیسی و کره‌ای پشتیبانی می‌کند و راه‌حل‌های کارآمد و کوچکی را ارائه می‌دهد."
   },
@@ -1655,6 +1649,9 @@
   "sonar": {
     "description": "محصول جستجوی سبک بر اساس زمینه جستجو که سریع‌تر و ارزان‌تر از Sonar Pro است."
   },
+  "sonar-deep-research": {
+    "description": "تحقیق عمیق، تحقیقاتی جامع و تخصصی را انجام می‌دهد و آن را به گزارش‌های قابل دسترسی و قابل استفاده تبدیل می‌کند."
+  },
   "sonar-pro": {
     "description": "محصول جستجوی پیشرفته که از جستجوی زمینه پشتیبانی می‌کند و قابلیت‌های پیشرفته‌ای برای پرسش و پیگیری دارد."
   },
diff --git a/locales/fr-FR/chat.json b/locales/fr-FR/chat.json
index ec3bceade3319..1c38c4516d568 100644
--- a/locales/fr-FR/chat.json
+++ b/locales/fr-FR/chat.json
@@ -89,16 +89,20 @@
         "inputCharts": "${{amount}}/M caractères",
         "inputMinutes": "${{amount}}/minute",
         "inputTokens": "Entrée {{amount}}/crédit · ${{amount}}/M",
-        "outputTokens": "Sortie {{amount}}/crédit · ${{amount}}/M"
+        "outputTokens": "Sortie {{amount}}/crédit · ${{amount}}/M",
+        "writeCacheInputTokens": "Écriture de cache d'entrée {{amount}}/points · ${{amount}}/M"
       }
     },
     "tokenDetails": {
+      "average": "Prix moyen",
       "input": "Entrée",
       "inputAudio": "Entrée audio",
       "inputCached": "Entrée mise en cache",
+      "inputCitation": "Citation d'entrée",
       "inputText": "Entrée texte",
       "inputTitle": "Détails de l'entrée",
       "inputUncached": "Entrée non mise en cache",
+      "inputWriteCached": "Écriture de cache d'entrée",
       "output": "Sortie",
       "outputAudio": "Sortie audio",
       "outputText": "Sortie texte",
diff --git a/locales/fr-FR/models.json b/locales/fr-FR/models.json
index 07ddb4d6f1669..a9863c054c627 100644
--- a/locales/fr-FR/models.json
+++ b/locales/fr-FR/models.json
@@ -1121,15 +1121,6 @@
   "llama-3.1-8b-instant": {
     "description": "Llama 3.1 8B est un modèle à haute performance, offrant une capacité de génération de texte rapide, particulièrement adapté aux scénarios d'application nécessitant une efficacité à grande échelle et un rapport coût-efficacité."
   },
-  "llama-3.1-sonar-huge-128k-online": {
-    "description": "Le modèle Llama 3.1 Sonar Huge Online, avec 405B de paramètres, prend en charge une longueur de contexte d'environ 127 000 jetons, conçu pour des applications de chat en ligne complexes."
-  },
-  "llama-3.1-sonar-large-128k-online": {
-    "description": "Le modèle Llama 3.1 Sonar Large Online, avec 70B de paramètres, prend en charge une longueur de contexte d'environ 127 000 jetons, adapté aux tâches de chat à haute capacité et diversifiées."
-  },
-  "llama-3.1-sonar-small-128k-online": {
-    "description": "Le modèle Llama 3.1 Sonar Small Online, avec 8B de paramètres, prend en charge une longueur de contexte d'environ 127 000 jetons, conçu pour le chat en ligne, capable de traiter efficacement diverses interactions textuelles."
-  },
   "llama-3.2-11b-vision-instruct": {
     "description": "Capacités d'inférence d'image exceptionnelles sur des images haute résolution, adaptées aux applications de compréhension visuelle."
   },
@@ -1643,6 +1634,9 @@
   "qwq-32b-preview": {
     "description": "Le modèle QwQ est un modèle de recherche expérimental développé par l'équipe Qwen, axé sur l'amélioration des capacités de raisonnement de l'IA."
   },
+  "r1-1776": {
+    "description": "R1-1776 est une version du modèle DeepSeek R1, après un entraînement supplémentaire, fournissant des informations factuelles non filtrées et impartiales."
+  },
   "solar-mini": {
     "description": "Solar Mini est un LLM compact, offrant des performances supérieures à celles de GPT-3.5, avec de puissantes capacités multilingues, prenant en charge l'anglais et le coréen, et fournissant une solution efficace et compacte."
   },
@@ -1655,6 +1649,9 @@
   "sonar": {
     "description": "Produit de recherche léger basé sur le contexte de recherche, plus rapide et moins cher que Sonar Pro."
   },
+  "sonar-deep-research": {
+    "description": "Deep Research effectue des recherches approfondies de niveau expert et les synthétise en rapports accessibles et exploitables."
+  },
   "sonar-pro": {
     "description": "Produit de recherche avancé prenant en charge le contexte de recherche, avec des requêtes avancées et un suivi."
   },
diff --git a/locales/it-IT/chat.json b/locales/it-IT/chat.json
index c63718407347d..105623eef6e28 100644
--- a/locales/it-IT/chat.json
+++ b/locales/it-IT/chat.json
@@ -89,16 +89,20 @@
         "inputCharts": "${{amount}}/M caratteri",
         "inputMinutes": "${{amount}}/minuto",
         "inputTokens": "Input {{amount}}/crediti · ${{amount}}/M",
-        "outputTokens": "Output {{amount}}/crediti · ${{amount}}/M"
+        "outputTokens": "Output {{amount}}/crediti · ${{amount}}/M",
+        "writeCacheInputTokens": "Scrittura cache input {{amount}}/crediti · ${{amount}}/M"
       }
     },
     "tokenDetails": {
+      "average": "Prezzo medio",
       "input": "Input",
       "inputAudio": "Input audio",
       "inputCached": "Input memorizzato",
+      "inputCitation": "Citazione input",
       "inputText": "Input testo",
       "inputTitle": "Dettagli input",
       "inputUncached": "Input non memorizzato",
+      "inputWriteCached": "Scrittura cache input",
       "output": "Output",
       "outputAudio": "Output audio",
       "outputText": "Output testo",
diff --git a/locales/it-IT/models.json b/locales/it-IT/models.json
index 883addf242d00..3edbce9e7dffd 100644
--- a/locales/it-IT/models.json
+++ b/locales/it-IT/models.json
@@ -1121,15 +1121,6 @@
   "llama-3.1-8b-instant": {
     "description": "Llama 3.1 8B è un modello ad alte prestazioni, offre capacità di generazione di testo rapida, particolarmente adatto per scenari applicativi che richiedono efficienza su larga scala e costi contenuti."
   },
-  "llama-3.1-sonar-huge-128k-online": {
-    "description": "Il modello Llama 3.1 Sonar Huge Online, con 405B parametri, supporta una lunghezza di contesto di circa 127.000 token, progettato per applicazioni di chat online complesse."
-  },
-  "llama-3.1-sonar-large-128k-online": {
-    "description": "Il modello Llama 3.1 Sonar Large Online, con 70B parametri, supporta una lunghezza di contesto di circa 127.000 token, adatto per compiti di chat ad alta capacità e diversificati."
-  },
-  "llama-3.1-sonar-small-128k-online": {
-    "description": "Il modello Llama 3.1 Sonar Small Online, con 8B parametri, supporta una lunghezza di contesto di circa 127.000 token, progettato per chat online, in grado di gestire interazioni testuali in modo efficiente."
-  },
   "llama-3.2-11b-vision-instruct": {
     "description": "Eccellenti capacità di ragionamento visivo su immagini ad alta risoluzione, adatte ad applicazioni di comprensione visiva."
   },
@@ -1643,6 +1634,9 @@
   "qwq-32b-preview": {
     "description": "Il modello QwQ è un modello di ricerca sperimentale sviluppato dal team Qwen, focalizzato sul potenziamento delle capacità di ragionamento dell'IA."
   },
+  "r1-1776": {
+    "description": "R1-1776 è una versione del modello DeepSeek R1, addestrata successivamente per fornire informazioni fattuali non verificate e prive di pregiudizi."
+  },
   "solar-mini": {
     "description": "Solar Mini è un LLM compatto, con prestazioni superiori a GPT-3.5, dotato di potenti capacità multilingue, supporta inglese e coreano, offrendo soluzioni efficienti e compatte."
   },
@@ -1655,6 +1649,9 @@
   "sonar": {
     "description": "Prodotto di ricerca leggero basato sul contesto di ricerca, più veloce e più economico rispetto a Sonar Pro."
   },
+  "sonar-deep-research": {
+    "description": "Deep Research conduce ricerche complete a livello esperto e le sintetizza in rapporti accessibili e utilizzabili."
+  },
   "sonar-pro": {
     "description": "Prodotto di ricerca avanzata che supporta il contesto di ricerca, query avanzate e follow-up."
   },
diff --git a/locales/ja-JP/chat.json b/locales/ja-JP/chat.json
index 348e6407a5626..754ef0f9ba281 100644
--- a/locales/ja-JP/chat.json
+++ b/locales/ja-JP/chat.json
@@ -89,16 +89,20 @@
         "inputCharts": "${{amount}}/M 文字",
         "inputMinutes": "${{amount}}/分",
         "inputTokens": "入力 {{amount}}/クレジット · ${{amount}}/M",
-        "outputTokens": "出力 {{amount}}/クレジット · ${{amount}}/M"
+        "outputTokens": "出力 {{amount}}/クレジット · ${{amount}}/M",
+        "writeCacheInputTokens": "キャッシュ入力の書き込み {{amount}}/ポイント · ${{amount}}/M"
       }
     },
     "tokenDetails": {
+      "average": "平均単価",
       "input": "入力",
       "inputAudio": "音声入力",
       "inputCached": "キャッシュ入力",
+      "inputCitation": "引用入力",
       "inputText": "テキスト入力",
       "inputTitle": "入力の詳細",
       "inputUncached": "未キャッシュ入力",
+      "inputWriteCached": "入力キャッシュ書き込み",
       "output": "出力",
       "outputAudio": "音声出力",
       "outputText": "テキスト出力",
diff --git a/locales/ja-JP/models.json b/locales/ja-JP/models.json
index 74dbd19c5cdf8..eb53efa9ebec1 100644
--- a/locales/ja-JP/models.json
+++ b/locales/ja-JP/models.json
@@ -1121,15 +1121,6 @@
   "llama-3.1-8b-instant": {
     "description": "Llama 3.1 8Bは、高効率モデルであり、迅速なテキスト生成能力を提供し、大規模な効率とコスト効果が求められるアプリケーションシナリオに非常に適しています。"
   },
-  "llama-3.1-sonar-huge-128k-online": {
-    "description": "Llama 3.1 Sonar Huge Onlineモデルは、405Bパラメータを持ち、約127,000トークンのコンテキスト長をサポートし、複雑なオンラインチャットアプリケーション用に設計されています。"
-  },
-  "llama-3.1-sonar-large-128k-online": {
-    "description": "Llama 3.1 Sonar Large Onlineモデルは、70Bパラメータを持ち、約127,000トークンのコンテキスト長をサポートし、高容量で多様なチャットタスクに適しています。"
-  },
-  "llama-3.1-sonar-small-128k-online": {
-    "description": "Llama 3.1 Sonar Small Onlineモデルは、8Bパラメータを持ち、約127,000トークンのコンテキスト長をサポートし、オンラインチャット用に設計されており、さまざまなテキストインタラクションを効率的に処理できます。"
-  },
   "llama-3.2-11b-vision-instruct": {
     "description": "高解像度画像で優れた画像推論能力を発揮し、視覚理解アプリケーションに適しています。"
   },
@@ -1643,6 +1634,9 @@
   "qwq-32b-preview": {
     "description": "QwQモデルはQwenチームによって開発された実験的な研究モデルで、AIの推論能力を強化することに焦点を当てています。"
   },
+  "r1-1776": {
+    "description": "R1-1776は、DeepSeek R1モデルの一つのバージョンで、後処理を経て、検閲されていない偏りのない事実情報を提供します。"
+  },
   "solar-mini": {
     "description": "Solar MiniはコンパクトなLLMで、GPT-3.5を上回る性能を持ち、強力な多言語能力を備え、英語と韓国語をサポートし、高効率でコンパクトなソリューションを提供します。"
   },
@@ -1655,6 +1649,9 @@
   "sonar": {
     "description": "検索コンテキストに基づく軽量検索製品で、Sonar Proよりも速く、安価です。"
   },
+  "sonar-deep-research": {
+    "description": "Deep Researchは、専門家による包括的な研究を行い、それをアクセス可能で実行可能なレポートにまとめます。"
+  },
   "sonar-pro": {
     "description": "検索コンテキストをサポートする高度な検索製品で、高度なクエリとフォローアップをサポートします。"
   },
diff --git a/locales/ko-KR/chat.json b/locales/ko-KR/chat.json
index 11c1594a8b005..a106891b884a2 100644
--- a/locales/ko-KR/chat.json
+++ b/locales/ko-KR/chat.json
@@ -89,16 +89,20 @@
         "inputCharts": "${{amount}}/M 문자",
         "inputMinutes": "${{amount}}/분",
         "inputTokens": "입력 {{amount}}/포인트 · ${{amount}}/M",
-        "outputTokens": "출력 {{amount}}/포인트 · ${{amount}}/M"
+        "outputTokens": "출력 {{amount}}/포인트 · ${{amount}}/M",
+        "writeCacheInputTokens": "캐시 입력 쓰기 {{amount}}/포인트 · ${{amount}}/M"
       }
     },
     "tokenDetails": {
+      "average": "평균 단가",
       "input": "입력",
       "inputAudio": "오디오 입력",
       "inputCached": "입력 캐시",
+      "inputCitation": "입력 인용",
       "inputText": "텍스트 입력",
       "inputTitle": "입력 세부사항",
       "inputUncached": "입력 비캐시",
+      "inputWriteCached": "입력 캐시 쓰기",
       "output": "출력",
       "outputAudio": "오디오 출력",
       "outputText": "텍스트 출력",
diff --git a/locales/ko-KR/models.json b/locales/ko-KR/models.json
index 7a737b2f1c87d..f57cf8a32a6a9 100644
--- a/locales/ko-KR/models.json
+++ b/locales/ko-KR/models.json
@@ -1121,15 +1121,6 @@
   "llama-3.1-8b-instant": {
     "description": "Llama 3.1 8B는 효율적인 모델로, 빠른 텍스트 생성 능력을 제공하며, 대규모 효율성과 비용 효과성이 필요한 응용 프로그램에 매우 적합합니다."
   },
-  "llama-3.1-sonar-huge-128k-online": {
-    "description": "Llama 3.1 Sonar Huge Online 모델은 405B 매개변수를 갖추고 있으며, 약 127,000개의 토큰의 컨텍스트 길이를 지원하여 복잡한 온라인 채팅 애플리케이션을 위해 설계되었습니다."
-  },
-  "llama-3.1-sonar-large-128k-online": {
-    "description": "Llama 3.1 Sonar Large Online 모델은 70B 매개변수를 갖추고 있으며, 약 127,000개의 토큰의 컨텍스트 길이를 지원하여 대용량 및 다양한 채팅 작업에 적합합니다."
-  },
-  "llama-3.1-sonar-small-128k-online": {
-    "description": "Llama 3.1 Sonar Small Online 모델은 8B 매개변수를 갖추고 있으며, 약 127,000개의 토큰의 컨텍스트 길이를 지원하여 온라인 채팅을 위해 설계되었습니다."
-  },
   "llama-3.2-11b-vision-instruct": {
     "description": "고해상도 이미지에서 탁월한 이미지 추론 능력을 발휘하며, 시각 이해 응용 프로그램에 적합합니다."
   },
@@ -1643,6 +1634,9 @@
   "qwq-32b-preview": {
     "description": "QwQ 모델은 Qwen 팀이 개발한 실험적 연구 모델로, AI 추론 능력을 향상시키는 데 중점을 두고 있습니다."
   },
+  "r1-1776": {
+    "description": "R1-1776은 DeepSeek R1 모델의 한 버전으로, 후속 훈련을 거쳐 검토되지 않은 편향 없는 사실 정보를 제공합니다."
+  },
   "solar-mini": {
     "description": "Solar Mini는 컴팩트한 LLM으로, GPT-3.5보다 성능이 우수하며, 강력한 다국어 능력을 갖추고 있어 영어와 한국어를 지원하며, 효율적이고 소형의 솔루션을 제공합니다."
   },
@@ -1655,6 +1649,9 @@
   "sonar": {
     "description": "검색 맥락 기반의 경량 검색 제품으로, Sonar Pro보다 더 빠르고 저렴합니다."
   },
+  "sonar-deep-research": {
+    "description": "Deep Research는 포괄적인 전문가 수준의 연구를 수행하고 이를 접근 가능하고 실행 가능한 보고서로 통합합니다."
+  },
   "sonar-pro": {
     "description": "고급 쿼리 및 후속 작업을 지원하는 검색 맥락 기반의 고급 검색 제품입니다."
   },
diff --git a/locales/nl-NL/chat.json b/locales/nl-NL/chat.json
index 9cc0af0569345..6fbe0679ea854 100644
--- a/locales/nl-NL/chat.json
+++ b/locales/nl-NL/chat.json
@@ -89,16 +89,20 @@
         "inputCharts": "${{amount}}/M tekens",
         "inputMinutes": "${{amount}}/minuut",
         "inputTokens": "Invoer {{amount}}/credits · ${{amount}}/M",
-        "outputTokens": "Uitvoer {{amount}}/credits · ${{amount}}/M"
+        "outputTokens": "Uitvoer {{amount}}/credits · ${{amount}}/M",
+        "writeCacheInputTokens": "Cache-invoer schrijven {{amount}}/punten · ${{amount}}/M"
       }
     },
     "tokenDetails": {
+      "average": "Gemiddelde prijs",
       "input": "Invoer",
       "inputAudio": "Audio-invoer",
       "inputCached": "Gecacheerde invoer",
+      "inputCitation": "Invoer citeren",
       "inputText": "Tekstinvoer",
       "inputTitle": "Invoerdetails",
       "inputUncached": "Ongecacheerde invoer",
+      "inputWriteCached": "Invoer cache schrijven",
       "output": "Uitvoer",
       "outputAudio": "Audio-uitvoer",
       "outputText": "Tekstuitvoer",
diff --git a/locales/nl-NL/models.json b/locales/nl-NL/models.json
index fc9453cfdbaee..edc0c3f6c8bf4 100644
--- a/locales/nl-NL/models.json
+++ b/locales/nl-NL/models.json
@@ -1121,15 +1121,6 @@
   "llama-3.1-8b-instant": {
     "description": "Llama 3.1 8B is een hoogpresterend model dat snelle tekstgeneratiecapaciteiten biedt, zeer geschikt voor toepassingen die grootschalige efficiëntie en kosteneffectiviteit vereisen."
   },
-  "llama-3.1-sonar-huge-128k-online": {
-    "description": "Llama 3.1 Sonar Huge Online model, met 405B parameters, ondersteunt een contextlengte van ongeveer 127.000 tokens, ontworpen voor complexe online chattoepassingen."
-  },
-  "llama-3.1-sonar-large-128k-online": {
-    "description": "Llama 3.1 Sonar Large Online model, met 70B parameters, ondersteunt een contextlengte van ongeveer 127.000 tokens, geschikt voor hoge capaciteit en diverse chattaken."
-  },
-  "llama-3.1-sonar-small-128k-online": {
-    "description": "Llama 3.1 Sonar Small Online model, met 8B parameters, ondersteunt een contextlengte van ongeveer 127.000 tokens, speciaal ontworpen voor online chat en kan efficiënt verschillende tekstinteracties verwerken."
-  },
   "llama-3.2-11b-vision-instruct": {
     "description": "Uitstekende beeldredeneringscapaciteiten op hoge resolutie-afbeeldingen, geschikt voor visuele begrijptoepassingen."
   },
@@ -1643,6 +1634,9 @@
   "qwq-32b-preview": {
     "description": "Het QwQ-model is een experimenteel onderzoeksmodel ontwikkeld door het Qwen-team, gericht op het verbeteren van de AI-redeneringscapaciteiten."
   },
+  "r1-1776": {
+    "description": "R1-1776 is een versie van het DeepSeek R1-model, dat is bijgetraind om ongecensureerde, onpartijdige feitelijke informatie te bieden."
+  },
   "solar-mini": {
     "description": "Solar Mini is een compacte LLM die beter presteert dan GPT-3.5, met sterke meertalige capaciteiten, ondersteunt Engels en Koreaans, en biedt een efficiënte en compacte oplossing."
   },
@@ -1655,6 +1649,9 @@
   "sonar": {
     "description": "Een lichtgewicht zoekproduct op basis van contextuele zoekopdrachten, sneller en goedkoper dan Sonar Pro."
   },
+  "sonar-deep-research": {
+    "description": "Deep Research voert uitgebreide expertstudies uit en bundelt deze in toegankelijke, bruikbare rapporten."
+  },
   "sonar-pro": {
     "description": "Een geavanceerd zoekproduct dat contextuele zoekopdrachten ondersteunt, met geavanceerde query's en vervolgacties."
   },
diff --git a/locales/pl-PL/chat.json b/locales/pl-PL/chat.json
index 23d32d26aa0e5..f1459161b7f39 100644
--- a/locales/pl-PL/chat.json
+++ b/locales/pl-PL/chat.json
@@ -89,16 +89,20 @@
         "inputCharts": "${{amount}}/M znaków",
         "inputMinutes": "${{amount}}/minutę",
         "inputTokens": "Wejście {{amount}}/punktów · ${{amount}}/M",
-        "outputTokens": "Wyjście {{amount}}/punktów · ${{amount}}/M"
+        "outputTokens": "Wyjście {{amount}}/punktów · ${{amount}}/M",
+        "writeCacheInputTokens": "Zapisz wejście w pamięci podręcznej {{amount}}/punktów · ${{amount}}/M"
       }
     },
     "tokenDetails": {
+      "average": "Średnia cena",
       "input": "Wejście",
       "inputAudio": "Wejście audio",
       "inputCached": "Zbuforowane wejście",
+      "inputCitation": "Cytowanie wejścia",
       "inputText": "Wejście tekstowe",
       "inputTitle": "Szczegóły wejścia",
       "inputUncached": "Wejście niezbuforowane",
+      "inputWriteCached": "Zapisz wejście w pamięci podręcznej",
       "output": "Wyjście",
       "outputAudio": "Wyjście audio",
       "outputText": "Wyjście tekstowe",
diff --git a/locales/pl-PL/models.json b/locales/pl-PL/models.json
index d26ea80d0b901..8bd35aef20187 100644
--- a/locales/pl-PL/models.json
+++ b/locales/pl-PL/models.json
@@ -1121,15 +1121,6 @@
   "llama-3.1-8b-instant": {
     "description": "Llama 3.1 8B to model o wysokiej wydajności, oferujący szybkie możliwości generowania tekstu, idealny do zastosowań wymagających dużej efektywności i opłacalności."
   },
-  "llama-3.1-sonar-huge-128k-online": {
-    "description": "Model Llama 3.1 Sonar Huge Online, z 405B parametrami, obsługujący kontekst o długości około 127,000 tokenów, zaprojektowany do złożonych aplikacji czatu online."
-  },
-  "llama-3.1-sonar-large-128k-online": {
-    "description": "Model Llama 3.1 Sonar Large Online, z 70B parametrami, obsługujący kontekst o długości około 127,000 tokenów, idealny do zadań czatu o dużej pojemności i różnorodności."
-  },
-  "llama-3.1-sonar-small-128k-online": {
-    "description": "Model Llama 3.1 Sonar Small Online, z 8B parametrami, obsługujący kontekst o długości około 127,000 tokenów, zaprojektowany do czatów online, efektywnie przetwarzający różne interakcje tekstowe."
-  },
   "llama-3.2-11b-vision-instruct": {
     "description": "Wyjątkowe zdolności wnioskowania wizualnego na obrazach o wysokiej rozdzielczości, idealne do zastosowań związanych ze zrozumieniem wizualnym."
   },
@@ -1643,6 +1634,9 @@
   "qwq-32b-preview": {
     "description": "Model QwQ to eksperymentalny model badawczy opracowany przez zespół Qwen, skoncentrowany na zwiększeniu zdolności wnioskowania AI."
   },
+  "r1-1776": {
+    "description": "R1-1776 to wersja modelu DeepSeek R1, która została poddana dalszemu treningowi, aby dostarczać nieocenzurowane, bezstronne informacje faktograficzne."
+  },
   "solar-mini": {
     "description": "Solar Mini to kompaktowy LLM, który przewyższa GPT-3.5, posiadając potężne zdolności wielojęzyczne, wspierając angielski i koreański, oferując efektywne i zgrabne rozwiązania."
   },
@@ -1655,6 +1649,9 @@
   "sonar": {
     "description": "Lekki produkt wyszukiwania oparty na kontekście, szybszy i tańszy niż Sonar Pro."
   },
+  "sonar-deep-research": {
+    "description": "Deep Research przeprowadza kompleksowe badania na poziomie eksperckim i łączy je w dostępne, praktyczne raporty."
+  },
   "sonar-pro": {
     "description": "Zaawansowany produkt wyszukiwania wspierający kontekst wyszukiwania, oferujący zaawansowane zapytania i śledzenie."
   },
diff --git a/locales/pt-BR/chat.json b/locales/pt-BR/chat.json
index 56279a83b2ab3..dc887694c374f 100644
--- a/locales/pt-BR/chat.json
+++ b/locales/pt-BR/chat.json
@@ -89,16 +89,20 @@
         "inputCharts": "${{amount}}/M caracteres",
         "inputMinutes": "${{amount}}/minuto",
         "inputTokens": "Entrada {{amount}}/créditos · ${{amount}}/M",
-        "outputTokens": "Saída {{amount}}/créditos · ${{amount}}/M"
+        "outputTokens": "Saída {{amount}}/créditos · ${{amount}}/M",
+        "writeCacheInputTokens": "Cache de entrada de escrita {{amount}}/pontos · ${{amount}}/M"
       }
     },
     "tokenDetails": {
+      "average": "Preço médio",
       "input": "Entrada",
       "inputAudio": "Entrada de áudio",
       "inputCached": "Entrada em cache",
+      "inputCitation": "Citação de entrada",
       "inputText": "Entrada de texto",
       "inputTitle": "Detalhes da entrada",
       "inputUncached": "Entrada não cacheada",
+      "inputWriteCached": "Entrada de cache de escrita",
       "output": "Saída",
       "outputAudio": "Saída de áudio",
       "outputText": "Saída de texto",
diff --git a/locales/pt-BR/models.json b/locales/pt-BR/models.json
index b7e31a217560c..4101af8dc4a2e 100644
--- a/locales/pt-BR/models.json
+++ b/locales/pt-BR/models.json
@@ -1121,15 +1121,6 @@
   "llama-3.1-8b-instant": {
     "description": "Llama 3.1 8B é um modelo de alto desempenho, oferecendo capacidade de geração de texto rápida, ideal para cenários de aplicação que exigem eficiência em larga escala e custo-benefício."
   },
-  "llama-3.1-sonar-huge-128k-online": {
-    "description": "O modelo Llama 3.1 Sonar Huge Online possui 405B de parâmetros, suportando um comprimento de contexto de aproximadamente 127.000 tokens, projetado para aplicações de chat online complexas."
-  },
-  "llama-3.1-sonar-large-128k-online": {
-    "description": "O modelo Llama 3.1 Sonar Large Online possui 70B de parâmetros, suportando um comprimento de contexto de aproximadamente 127.000 tokens, adequado para tarefas de chat de alta capacidade e diversidade."
-  },
-  "llama-3.1-sonar-small-128k-online": {
-    "description": "O modelo Llama 3.1 Sonar Small Online possui 8B de parâmetros, suportando um comprimento de contexto de aproximadamente 127.000 tokens, projetado para chats online, capaz de processar eficientemente diversas interações textuais."
-  },
   "llama-3.2-11b-vision-instruct": {
     "description": "Capacidade excepcional de raciocínio visual em imagens de alta resolução, adequada para aplicações de compreensão visual."
   },
@@ -1643,6 +1634,9 @@
   "qwq-32b-preview": {
     "description": "O modelo QwQ é um modelo de pesquisa experimental desenvolvido pela equipe Qwen, focado em aprimorar a capacidade de raciocínio da IA."
   },
+  "r1-1776": {
+    "description": "R1-1776 é uma versão do modelo DeepSeek R1, treinada posteriormente para fornecer informações factuais não filtradas e imparciais."
+  },
   "solar-mini": {
     "description": "Solar Mini é um LLM compacto, com desempenho superior ao GPT-3.5, possuindo forte capacidade multilíngue, suportando inglês e coreano, oferecendo uma solução eficiente e compacta."
   },
@@ -1655,6 +1649,9 @@
   "sonar": {
     "description": "Produto de busca leve baseado em contexto de busca, mais rápido e mais barato que o Sonar Pro."
   },
+  "sonar-deep-research": {
+    "description": "A Pesquisa Profunda realiza uma pesquisa abrangente de nível especialista e a sintetiza em relatórios acessíveis e acionáveis."
+  },
   "sonar-pro": {
     "description": "Produto de busca avançada que suporta contexto de busca, consultas avançadas e acompanhamento."
   },
diff --git a/locales/ru-RU/chat.json b/locales/ru-RU/chat.json
index be3399e4d44ae..3f552be07b5ea 100644
--- a/locales/ru-RU/chat.json
+++ b/locales/ru-RU/chat.json
@@ -89,16 +89,20 @@
         "inputCharts": "${{amount}}/M символов",
         "inputMinutes": "${{amount}}/минуту",
         "inputTokens": "Входные {{amount}}/кредиты · ${{amount}}/M",
-        "outputTokens": "Выходные {{amount}}/кредиты · ${{amount}}/M"
+        "outputTokens": "Выходные {{amount}}/кредиты · ${{amount}}/M",
+        "writeCacheInputTokens": "Кэширование ввода записи {{amount}}/баллов · ${{amount}}/М"
       }
     },
     "tokenDetails": {
+      "average": "Средняя цена",
       "input": "Вход",
       "inputAudio": "Аудиовход",
       "inputCached": "Кэшированный вход",
+      "inputCitation": "Цитирование ввода",
       "inputText": "Текстовый вход",
       "inputTitle": "Детали входа",
       "inputUncached": "Некэшированный вход",
+      "inputWriteCached": "Запись кэшированного ввода",
       "output": "Выход",
       "outputAudio": "Аудиовыход",
       "outputText": "Текстовый выход",
diff --git a/locales/ru-RU/models.json b/locales/ru-RU/models.json
index 497bf44612055..0028502dfdee8 100644
--- a/locales/ru-RU/models.json
+++ b/locales/ru-RU/models.json
@@ -1121,15 +1121,6 @@
   "llama-3.1-8b-instant": {
     "description": "Llama 3.1 8B — это высокоэффективная модель, обеспечивающая быструю генерацию текста, идеально подходящая для приложений, требующих масштабной эффективности и экономичности."
   },
-  "llama-3.1-sonar-huge-128k-online": {
-    "description": "Модель Llama 3.1 Sonar Huge Online, обладающая 405B параметрами, поддерживает контекст длиной около 127,000 токенов, предназначена для сложных онлайн-чат-приложений."
-  },
-  "llama-3.1-sonar-large-128k-online": {
-    "description": "Модель Llama 3.1 Sonar Large Online, обладающая 70B параметрами, поддерживает контекст длиной около 127,000 токенов, подходит для задач с высокой нагрузкой и разнообразными чатами."
-  },
-  "llama-3.1-sonar-small-128k-online": {
-    "description": "Модель Llama 3.1 Sonar Small Online, обладающая 8B параметрами, поддерживает контекст длиной около 127,000 токенов, специально разработана для онлайн-чатов и эффективно обрабатывает различные текстовые взаимодействия."
-  },
   "llama-3.2-11b-vision-instruct": {
     "description": "Отличные способности к визуальному пониманию изображений на высоком разрешении, предназначенные для приложений визуального понимания."
   },
@@ -1643,6 +1634,9 @@
   "qwq-32b-preview": {
     "description": "Модель QwQ — это экспериментальная исследовательская модель, разработанная командой Qwen, сосредоточенная на улучшении возможностей вывода ИИ."
   },
+  "r1-1776": {
+    "description": "R1-1776 — это версия модели DeepSeek R1, прошедшая дообучение, которая предоставляет непроверенную, беспристрастную фактическую информацию."
+  },
   "solar-mini": {
     "description": "Solar Mini — это компактная LLM, которая превосходит GPT-3.5, обладает мощными многоязычными возможностями, поддерживает английский и корейский языки, предлагая эффективное и компактное решение."
   },
@@ -1655,6 +1649,9 @@
   "sonar": {
     "description": "Легковесный продукт поиска на основе контекста, быстрее и дешевле, чем Sonar Pro."
   },
+  "sonar-deep-research": {
+    "description": "Глубокое исследование проводит всесторонние экспертные исследования и сводит их в доступные и практичные отчеты."
+  },
   "sonar-pro": {
     "description": "Расширенный продукт поиска, поддерживающий контекст поиска, сложные запросы и последующие действия."
   },
diff --git a/locales/tr-TR/chat.json b/locales/tr-TR/chat.json
index fb6d3dc62935b..a8760eb196102 100644
--- a/locales/tr-TR/chat.json
+++ b/locales/tr-TR/chat.json
@@ -89,16 +89,20 @@
         "inputCharts": "${{amount}}/M karakter",
         "inputMinutes": "${{amount}}/dakika",
         "inputTokens": "Giriş {{amount}}/kredi · ${{amount}}/M",
-        "outputTokens": "Çıkış {{amount}}/kredi · ${{amount}}/M"
+        "outputTokens": "Çıkış {{amount}}/kredi · ${{amount}}/M",
+        "writeCacheInputTokens": "Giriş yazma önbelleği {{amount}}/puan · ${{amount}}/M"
       }
     },
     "tokenDetails": {
+      "average": "Ortalama birim fiyat",
       "input": "Giriş",
       "inputAudio": "Ses girişi",
       "inputCached": "Önceden yüklenmiş giriş",
+      "inputCitation": "Giriş alıntısı",
       "inputText": "Metin girişi",
       "inputTitle": "Giriş detayları",
       "inputUncached": "Önceden yüklenmemiş giriş",
+      "inputWriteCached": "Giriş önbelleği yazma",
       "output": "Çıkış",
       "outputAudio": "Ses çıkışı",
       "outputText": "Metin çıkışı",
diff --git a/locales/tr-TR/models.json b/locales/tr-TR/models.json
index 584c55d50e9e4..7aac7ecb97460 100644
--- a/locales/tr-TR/models.json
+++ b/locales/tr-TR/models.json
@@ -1121,15 +1121,6 @@
   "llama-3.1-8b-instant": {
     "description": "Llama 3.1 8B, hızlı metin üretim yeteneği sunan yüksek performanslı bir modeldir ve büyük ölçekli verimlilik ve maliyet etkinliği gerektiren uygulama senaryoları için son derece uygundur."
   },
-  "llama-3.1-sonar-huge-128k-online": {
-    "description": "Llama 3.1 Sonar Huge Online modeli, 405B parametreye sahiptir ve yaklaşık 127,000 belirteçlik bağlam uzunluğunu destekler, karmaşık çevrimiçi sohbet uygulamaları için tasarlanmıştır."
-  },
-  "llama-3.1-sonar-large-128k-online": {
-    "description": "Llama 3.1 Sonar Large Online modeli, 70B parametreye sahiptir ve yaklaşık 127,000 belirteçlik bağlam uzunluğunu destekler, yüksek kapasiteli ve çeşitli sohbet görevleri için uygundur."
-  },
-  "llama-3.1-sonar-small-128k-online": {
-    "description": "Llama 3.1 Sonar Small Online modeli, 8B parametreye sahiptir ve yaklaşık 127,000 belirteçlik bağlam uzunluğunu destekler, çevrimiçi sohbet için tasarlanmıştır ve çeşitli metin etkileşimlerini etkili bir şekilde işler."
-  },
   "llama-3.2-11b-vision-instruct": {
     "description": "Yüksek çözünürlüklü görüntülerde mükemmel görüntü akıl yürütme yeteneği, görsel anlama uygulamaları için uygundur."
   },
@@ -1643,6 +1634,9 @@
   "qwq-32b-preview": {
     "description": "QwQ modeli, Qwen ekibi tarafından geliştirilen deneysel bir araştırma modelidir ve AI akıl yürütme yeteneklerini artırmaya odaklanmaktadır."
   },
+  "r1-1776": {
+    "description": "R1-1776, DeepSeek R1 modelinin bir versiyonudur ve son eğitimle, sansürsüz, tarafsız gerçek bilgileri sunar."
+  },
   "solar-mini": {
     "description": "Solar Mini, GPT-3.5'ten daha iyi performansa sahip kompakt bir LLM'dir, güçlü çok dilli yeteneklere sahiptir, İngilizce ve Korece'yi destekler ve etkili, kompakt çözümler sunar."
   },
@@ -1655,6 +1649,9 @@
   "sonar": {
     "description": "Arama bağlamına dayalı hafif bir arama ürünüdür, Sonar Pro'dan daha hızlı ve daha ucuzdur."
   },
+  "sonar-deep-research": {
+    "description": "Deep Research, kapsamlı uzman düzeyinde araştırmalar yapar ve bunları erişilebilir, uygulanabilir raporlar haline getirir."
+  },
   "sonar-pro": {
     "description": "Gelişmiş sorgular ve takip desteği sunan, arama bağlamını destekleyen bir üst düzey arama ürünüdür."
   },
diff --git a/locales/vi-VN/chat.json b/locales/vi-VN/chat.json
index 828a6b33cb428..5efaad1321182 100644
--- a/locales/vi-VN/chat.json
+++ b/locales/vi-VN/chat.json
@@ -89,16 +89,20 @@
         "inputCharts": "${{amount}}/M ký tự",
         "inputMinutes": "${{amount}}/phút",
         "inputTokens": "Nhập {{amount}}/điểm · ${{amount}}/M",
-        "outputTokens": "Xuất {{amount}}/điểm · ${{amount}}/M"
+        "outputTokens": "Xuất {{amount}}/điểm · ${{amount}}/M",
+        "writeCacheInputTokens": "Ghi vào bộ nhớ đệm đầu vào {{amount}}/điểm · ${{amount}}/M"
       }
     },
     "tokenDetails": {
+      "average": "Giá trung bình",
       "input": "Nhập",
       "inputAudio": "Âm thanh nhập",
       "inputCached": "Nhập cached",
+      "inputCitation": "Trích dẫn đầu vào",
       "inputText": "Văn bản nhập",
       "inputTitle": "Chi tiết nhập",
       "inputUncached": "Nhập chưa cached",
+      "inputWriteCached": "Ghi vào bộ nhớ đệm đầu vào",
       "output": "Xuất",
       "outputAudio": "Âm thanh xuất",
       "outputText": "Văn bản xuất",
diff --git a/locales/vi-VN/models.json b/locales/vi-VN/models.json
index 9ea8d183ba4b9..835adbc8c9342 100644
--- a/locales/vi-VN/models.json
+++ b/locales/vi-VN/models.json
@@ -1121,15 +1121,6 @@
   "llama-3.1-8b-instant": {
     "description": "Llama 3.1 8B là một mô hình hiệu suất cao, cung cấp khả năng sinh văn bản nhanh chóng, rất phù hợp cho các tình huống ứng dụng cần hiệu quả quy mô lớn và tiết kiệm chi phí."
   },
-  "llama-3.1-sonar-huge-128k-online": {
-    "description": "Mô hình Llama 3.1 Sonar Huge Online, có 405B tham số, hỗ trợ độ dài ngữ cảnh khoảng 127,000 mã, được thiết kế cho các ứng dụng trò chuyện trực tuyến phức tạp."
-  },
-  "llama-3.1-sonar-large-128k-online": {
-    "description": "Mô hình Llama 3.1 Sonar Large Online, có 70B tham số, hỗ trợ độ dài ngữ cảnh khoảng 127,000 mã, phù hợp cho các nhiệm vụ trò chuyện có dung lượng lớn và đa dạng."
-  },
-  "llama-3.1-sonar-small-128k-online": {
-    "description": "Mô hình Llama 3.1 Sonar Small Online, có 8B tham số, hỗ trợ độ dài ngữ cảnh khoảng 127,000 mã, được thiết kế cho trò chuyện trực tuyến, có khả năng xử lý hiệu quả các tương tác văn bản khác nhau."
-  },
   "llama-3.2-11b-vision-instruct": {
     "description": "Khả năng suy luận hình ảnh xuất sắc trên hình ảnh độ phân giải cao, phù hợp cho các ứng dụng hiểu biết hình ảnh."
   },
@@ -1643,6 +1634,9 @@
   "qwq-32b-preview": {
     "description": "Mô hình QwQ là một mô hình nghiên cứu thử nghiệm được phát triển bởi đội ngũ Qwen, tập trung vào việc nâng cao khả năng suy luận của AI."
   },
+  "r1-1776": {
+    "description": "R1-1776 là một phiên bản của mô hình DeepSeek R1, đã được huấn luyện lại, cung cấp thông tin sự thật chưa được kiểm duyệt và không thiên lệch."
+  },
   "solar-mini": {
     "description": "Solar Mini là một LLM dạng nhỏ gọn, hiệu suất vượt trội hơn GPT-3.5, có khả năng đa ngôn ngữ mạnh mẽ, hỗ trợ tiếng Anh và tiếng Hàn, cung cấp giải pháp hiệu quả và nhỏ gọn."
   },
@@ -1655,6 +1649,9 @@
   "sonar": {
     "description": "Sản phẩm tìm kiếm nhẹ dựa trên ngữ cảnh tìm kiếm, nhanh hơn và rẻ hơn so với Sonar Pro."
   },
+  "sonar-deep-research": {
+    "description": "Nghiên cứu sâu tiến hành nghiên cứu chuyên gia toàn diện và tổng hợp thành các báo cáo có thể truy cập và có thể hành động."
+  },
   "sonar-pro": {
     "description": "Sản phẩm tìm kiếm nâng cao hỗ trợ ngữ cảnh tìm kiếm, cho phép truy vấn và theo dõi nâng cao."
   },
diff --git a/locales/zh-CN/chat.json b/locales/zh-CN/chat.json
index 8b77abc9a3b2c..9241fc525de39 100644
--- a/locales/zh-CN/chat.json
+++ b/locales/zh-CN/chat.json
@@ -89,16 +89,20 @@
         "inputCharts": "${{amount}}/M 字符",
         "inputMinutes": "${{amount}}/分钟",
         "inputTokens": "输入 {{amount}}/积分 · ${{amount}}/M",
-        "outputTokens": "输出 {{amount}}/积分 · ${{amount}}/M"
+        "outputTokens": "输出 {{amount}}/积分 · ${{amount}}/M",
+        "writeCacheInputTokens": "缓存输入写入 {{amount}}/积分 · ${{amount}}/M"
       }
     },
     "tokenDetails": {
+      "average": "平均单价",
       "input": "输入",
       "inputAudio": "音频输入",
       "inputCached": "输入缓存",
+      "inputCitation": "引用输入",
       "inputText": "文本输入",
       "inputTitle": "输入明细",
       "inputUncached": "输入未缓存",
+      "inputWriteCached": "输入缓存写入",
       "output": "输出",
       "outputAudio": "音频输出",
       "outputText": "文本输出",
diff --git a/locales/zh-CN/models.json b/locales/zh-CN/models.json
index a5d14b447de4c..3c60bb47702ff 100644
--- a/locales/zh-CN/models.json
+++ b/locales/zh-CN/models.json
@@ -1121,15 +1121,6 @@
   "llama-3.1-8b-instant": {
     "description": "Llama 3.1 8B 是一款高效能模型，提供了快速的文本生成能力，非常适合需要大规模效率和成本效益的应用场景。"
   },
-  "llama-3.1-sonar-huge-128k-online": {
-    "description": "Llama 3.1 Sonar Huge Online 模型，具备405B参数，支持约127,000个标记的上下文长度，设计用于复杂的在线聊天应用。"
-  },
-  "llama-3.1-sonar-large-128k-online": {
-    "description": "Llama 3.1 Sonar Large Online 模型，具备70B参数，支持约127,000个标记的上下文长度，适用于高容量和多样化聊天任务。"
-  },
-  "llama-3.1-sonar-small-128k-online": {
-    "description": "Llama 3.1 Sonar Small Online 模型，具备8B参数，支持约127,000个标记的上下文长度，专为在线聊天设计，能高效处理各种文本交互。"
-  },
   "llama-3.2-11b-vision-instruct": {
     "description": "在高分辨率图像上表现出色的图像推理能力，适用于视觉理解应用。"
   },
@@ -1643,6 +1634,9 @@
   "qwq-32b-preview": {
     "description": "QwQ模型是由 Qwen 团队开发的实验性研究模型，专注于增强 AI 推理能力。"
   },
+  "r1-1776": {
+    "description": "R1-1776 是 DeepSeek R1 模型的一个版本，经过后训练，可提供未经审查、无偏见的事实信息。"
+  },
   "solar-mini": {
     "description": "Solar Mini 是一种紧凑型 LLM，性能优于 GPT-3.5，具备强大的多语言能力，支持英语和韩语，提供高效小巧的解决方案。"
   },
@@ -1655,6 +1649,9 @@
   "sonar": {
     "description": "基于搜索上下文的轻量级搜索产品，比 Sonar Pro 更快、更便宜。"
   },
+  "sonar-deep-research": {
+    "description": "Deep Research 进行全面的专家级研究，并将其综合成可访问、可作的报告。"
+  },
   "sonar-pro": {
     "description": "支持搜索上下文的高级搜索产品，支持高级查询和跟进。"
   },
diff --git a/locales/zh-TW/chat.json b/locales/zh-TW/chat.json
index 368701ace8d7b..72fca94daec85 100644
--- a/locales/zh-TW/chat.json
+++ b/locales/zh-TW/chat.json
@@ -89,16 +89,20 @@
         "inputCharts": "${{amount}}/M 字元",
         "inputMinutes": "${{amount}}/分鐘",
         "inputTokens": "輸入 {{amount}}/積分 · ${{amount}}/M",
-        "outputTokens": "輸出 {{amount}}/積分 · ${{amount}}/M"
+        "outputTokens": "輸出 {{amount}}/積分 · ${{amount}}/M",
+        "writeCacheInputTokens": "快取輸入寫入 {{amount}}/積分 · ${{amount}}/M"
       }
     },
     "tokenDetails": {
+      "average": "平均單價",
       "input": "輸入",
       "inputAudio": "音頻輸入",
       "inputCached": "輸入快取",
+      "inputCitation": "引用輸入",
       "inputText": "文本輸入",
       "inputTitle": "輸入明細",
       "inputUncached": "輸入未快取",
+      "inputWriteCached": "輸入快取寫入",
       "output": "輸出",
       "outputAudio": "音頻輸出",
       "outputText": "文本輸出",
diff --git a/locales/zh-TW/models.json b/locales/zh-TW/models.json
index 05f4f5e704bc7..d1ffd24162587 100644
--- a/locales/zh-TW/models.json
+++ b/locales/zh-TW/models.json
@@ -1121,15 +1121,6 @@
   "llama-3.1-8b-instant": {
     "description": "Llama 3.1 8B 是一款高效能模型，提供了快速的文本生成能力，非常適合需要大規模效率和成本效益的應用場景。"
   },
-  "llama-3.1-sonar-huge-128k-online": {
-    "description": "Llama 3.1 Sonar Huge Online 模型，具備 405B 參數，支持約 127,000 個標記的上下文長度，設計用於複雜的在線聊天應用。"
-  },
-  "llama-3.1-sonar-large-128k-online": {
-    "description": "Llama 3.1 Sonar Large Online 模型，具備 70B 參數，支持約 127,000 個標記的上下文長度，適用於高容量和多樣化聊天任務。"
-  },
-  "llama-3.1-sonar-small-128k-online": {
-    "description": "Llama 3.1 Sonar Small Online 模型，具備 8B 參數，支持約 127,000 個標記的上下文長度，專為在線聊天設計，能高效處理各種文本交互。"
-  },
   "llama-3.2-11b-vision-instruct": {
     "description": "在高解析度圖像上表現優異的圖像推理能力，適用於視覺理解應用。"
   },
@@ -1643,6 +1634,9 @@
   "qwq-32b-preview": {
     "description": "QwQ模型是由 Qwen 團隊開發的實驗性研究模型，專注於增強 AI 推理能力。"
   },
+  "r1-1776": {
+    "description": "R1-1776 是 DeepSeek R1 模型的一個版本，經過後訓練，可提供未經審查、無偏見的事實資訊。"
+  },
   "solar-mini": {
     "description": "Solar Mini 是一種緊湊型 LLM，性能優於 GPT-3.5，具備強大的多語言能力，支持英語和韓語，提供高效小巧的解決方案。"
   },
@@ -1655,6 +1649,9 @@
   "sonar": {
     "description": "基於搜索上下文的輕量級搜索產品，比 Sonar Pro 更快、更便宜。"
   },
+  "sonar-deep-research": {
+    "description": "Deep Research 進行全面的專家級研究，並將其綜合成可訪問、可行的報告。"
+  },
   "sonar-pro": {
     "description": "支持搜索上下文的高級搜索產品，支持高級查詢和跟進。"
   },
diff --git a/src/config/aiModels/perplexity.ts b/src/config/aiModels/perplexity.ts
index 8e3e381213166..c6f53f060b739 100644
--- a/src/config/aiModels/perplexity.ts
+++ b/src/config/aiModels/perplexity.ts
@@ -1,6 +1,25 @@
 import { AIChatModelCard } from '@/types/aiModel';
 
 const perplexityChatModels: AIChatModelCard[] = [
+  {
+    abilities: {
+      reasoning: true,
+      search: true,
+    },
+    contextWindowTokens: 127_072,
+    description:
+      'Deep Research 进行全面的专家级研究，并将其综合成可访问、可作的报告。',
+    displayName: 'Sonar Deep Research',
+    enabled: true,
+    id: 'sonar-deep-research',
+    maxOutput: 8192,
+    pricing: { input: 2, output: 8 },
+    releasedAt: '2025-02-14',
+    settings: {
+      searchImpl: 'internal',
+    },
+    type: 'chat',
+  },
   {
     abilities: {
       reasoning: true,
@@ -12,6 +31,8 @@ const perplexityChatModels: AIChatModelCard[] = [
     enabled: true,
     id: 'sonar-reasoning-pro',
     maxOutput: 8192,
+    pricing: { input: 2, output: 8 },
+    releasedAt: '2025-01-21',
     settings: {
       searchImpl: 'internal',
     },
@@ -28,6 +49,8 @@ const perplexityChatModels: AIChatModelCard[] = [
     enabled: true,
     id: 'sonar-reasoning',
     maxOutput: 8192,
+    pricing: { input: 1, output: 5 },
+    releasedAt: '2025-01-21',
     settings: {
       searchImpl: 'internal',
     },
@@ -42,6 +65,8 @@ const perplexityChatModels: AIChatModelCard[] = [
     displayName: 'Sonar Pro',
     enabled: true,
     id: 'sonar-pro',
+    pricing: { input: 3, output: 15 },
+    releasedAt: '2025-01-21',
     settings: {
       searchImpl: 'internal',
     },
@@ -56,34 +81,25 @@ const perplexityChatModels: AIChatModelCard[] = [
     displayName: 'Sonar',
     enabled: true,
     id: 'sonar',
+    pricing: { input: 1, output: 1 },
+    releasedAt: '2025-01-21',
     settings: {
       searchImpl: 'internal',
     },
-    type: 'chat',
-  },
-  // The following will be deprecated on 02-22
-  {
-    contextWindowTokens: 127_072,
-    description:
-      'Llama 3.1 Sonar Small Online 模型，具备8B参数，支持约127,000个标记的上下文长度，专为在线聊天设计，能高效处理各种文本交互。',
-    displayName: 'Llama 3.1 Sonar Small Online',
-    id: 'llama-3.1-sonar-small-128k-online',
-    type: 'chat',
-  },
-  {
-    contextWindowTokens: 127_072,
-    description:
-      'Llama 3.1 Sonar Large Online 模型，具备70B参数，支持约127,000个标记的上下文长度，适用于高容量和多样化聊天任务。',
-    displayName: 'Llama 3.1 Sonar Large Online',
-    id: 'llama-3.1-sonar-large-128k-online',
+
     type: 'chat',
   },
   {
+    abilities: {
+      reasoning: true,
+    },
     contextWindowTokens: 127_072,
     description:
-      'Llama 3.1 Sonar Huge Online 模型，具备405B参数，支持约127,000个标记的上下文长度，设计用于复杂的在线聊天应用。',
-    displayName: 'Llama 3.1 Sonar Huge Online',
-    id: 'llama-3.1-sonar-huge-128k-online',
+      'R1-1776 是 DeepSeek R1 模型的一个版本，经过后训练，可提供未经审查、无偏见的事实信息。',
+    displayName: 'R1 1776',
+    id: 'r1-1776',
+    pricing: { input: 2, output: 8 },
+    releasedAt: '2025-02-18',
     type: 'chat',
   },
 ];
diff --git a/src/features/Conversation/Extras/Usage/UsageDetail/ModelCard.tsx b/src/features/Conversation/Extras/Usage/UsageDetail/ModelCard.tsx
index 0988d1ce28847..ebc6f60fc98eb 100644
--- a/src/features/Conversation/Extras/Usage/UsageDetail/ModelCard.tsx
+++ b/src/features/Conversation/Extras/Usage/UsageDetail/ModelCard.tsx
@@ -2,7 +2,7 @@ import { ModelIcon } from '@lobehub/icons';
 import { Icon, Tooltip } from '@lobehub/ui';
 import { Segmented } from 'antd';
 import { createStyles } from 'antd-style';
-import { ArrowDownToDot, ArrowUpFromDot, CircleFadingArrowUp } from 'lucide-react';
+import { ArrowDownToDot, ArrowUpFromDot, BookUp2Icon, CircleFadingArrowUp } from 'lucide-react';
 import { memo } from 'react';
 import { useTranslation } from 'react-i18next';
 import { Flexbox } from 'react-layout-kit';
@@ -45,12 +45,16 @@ const ModelCard = memo<ModelCardProps>(({ pricing, id, provider, displayName })
     pricing?.cachedInput,
     pricing?.currency as ModelPriceCurrency,
   );
+  const writeCacheInputPrice = formatPriceByCurrency(
+    pricing?.writeCacheInput,
+    pricing?.currency as ModelPriceCurrency,
+  );
   const outputPrice = formatPriceByCurrency(
     pricing?.output,
     pricing?.currency as ModelPriceCurrency,
   );
   return (
-    <>
+    <Flexbox gap={8}>
       <Flexbox
         align={'center'}
         className={styles.container}
@@ -91,26 +95,38 @@ const ModelCard = memo<ModelCardProps>(({ pricing, id, provider, displayName })
           </Flexbox>
         )}
       </Flexbox>
-      {isShowCredit && (
+      {isShowCredit ? (
         <Flexbox horizontal justify={'space-between'}>
           <div />
           <Flexbox align={'center'} className={styles.pricing} gap={8} horizontal>
             {t('messages.modelCard.creditPricing')}:
+            {pricing?.cachedInput && (
+              <Tooltip
+                title={t('messages.modelCard.pricing.inputCachedTokens', {
+                  amount: cachedInputPrice,
+                })}
+              >
+                <Flexbox gap={2} horizontal>
+                  <Icon icon={CircleFadingArrowUp} />
+                  {cachedInputPrice}
+                </Flexbox>
+              </Tooltip>
+            )}
             <Tooltip title={t('messages.modelCard.pricing.inputTokens', { amount: inputPrice })}>
               <Flexbox gap={2} horizontal>
                 <Icon icon={ArrowUpFromDot} />
                 {inputPrice}
               </Flexbox>
             </Tooltip>
-            {pricing?.cachedInput && (
+            {pricing?.writeCacheInput && (
               <Tooltip
-                title={t('messages.modelCard.pricing.inputCachedTokens', {
-                  amount: cachedInputPrice,
+                title={t('messages.modelCard.pricing.writeCacheInputTokens', {
+                  amount: writeCacheInputPrice,
                 })}
               >
                 <Flexbox gap={2} horizontal>
-                  <Icon icon={CircleFadingArrowUp} />
-                  {cachedInputPrice}
+                  <Icon icon={BookUp2Icon} />
+                  {writeCacheInputPrice}
                 </Flexbox>
               </Tooltip>
             )}
@@ -122,8 +138,10 @@ const ModelCard = memo<ModelCardProps>(({ pricing, id, provider, displayName })
             </Tooltip>
           </Flexbox>
         </Flexbox>
+      ) : (
+        <div style={{ height: 18 }} />
       )}
-    </>
+    </Flexbox>
   );
 });
 
diff --git a/src/features/Conversation/Extras/Usage/UsageDetail/index.tsx b/src/features/Conversation/Extras/Usage/UsageDetail/index.tsx
index bcc8d998d292f..579973c20aba2 100644
--- a/src/features/Conversation/Extras/Usage/UsageDetail/index.tsx
+++ b/src/features/Conversation/Extras/Usage/UsageDetail/index.tsx
@@ -37,6 +37,12 @@ const TokenDetail = memo<TokenDetailProps>(({ usage, model, provider }) => {
       title: t('messages.tokenDetails.inputAudio'),
       value: isShowCredit ? detailTokens.inputAudio.credit : detailTokens.inputAudio.token,
     },
+    !!detailTokens.inputCitation && {
+      color: theme.orange,
+      id: 'inputText',
+      title: t('messages.tokenDetails.inputCitation'),
+      value: isShowCredit ? detailTokens.inputCitation.credit : detailTokens.inputCitation.token,
+    },
     !!detailTokens.inputText && {
       color: theme.green,
       id: 'inputText',
@@ -46,11 +52,13 @@ const TokenDetail = memo<TokenDetailProps>(({ usage, model, provider }) => {
   ].filter(Boolean) as TokenProgressItem[];
 
   const outputDetails = [
-    !!detailTokens.reasoning && {
+    !!detailTokens.outputReasoning && {
       color: theme.pink,
       id: 'reasoning',
       title: t('messages.tokenDetails.reasoning'),
-      value: isShowCredit ? detailTokens.reasoning.credit : detailTokens.reasoning.token,
+      value: isShowCredit
+        ? detailTokens.outputReasoning.credit
+        : detailTokens.outputReasoning.token,
     },
     !!detailTokens.outputAudio && {
       color: theme.cyan9,
@@ -67,18 +75,26 @@ const TokenDetail = memo<TokenDetailProps>(({ usage, model, provider }) => {
   ].filter(Boolean) as TokenProgressItem[];
 
   const totalDetail = [
-    !!detailTokens.uncachedInput && {
+    !!detailTokens.inputCacheMiss && {
       color: theme.colorFill,
 
       id: 'uncachedInput',
       title: t('messages.tokenDetails.inputUncached'),
-      value: isShowCredit ? detailTokens.uncachedInput.credit : detailTokens.uncachedInput.token,
+      value: isShowCredit ? detailTokens.inputCacheMiss.credit : detailTokens.inputCacheMiss.token,
     },
-    !!detailTokens.cachedInput && {
+    !!detailTokens.inputCached && {
       color: theme.orange,
-      id: 'cachedInput',
+      id: 'inputCached',
       title: t('messages.tokenDetails.inputCached'),
-      value: isShowCredit ? detailTokens.cachedInput.credit : detailTokens.cachedInput.token,
+      value: isShowCredit ? detailTokens.inputCached.credit : detailTokens.inputCached.token,
+    },
+    !!detailTokens.inputCachedWrite && {
+      color: theme.yellow,
+      id: 'cachedWriteInput',
+      title: t('messages.tokenDetails.inputWriteCached'),
+      value: isShowCredit
+        ? detailTokens.inputCachedWrite.credit
+        : detailTokens.inputCachedWrite.token,
     },
     !!detailTokens.totalOutput && {
       color: theme.colorSuccess,
@@ -91,43 +107,69 @@ const TokenDetail = memo<TokenDetailProps>(({ usage, model, provider }) => {
   const displayTotal =
     isShowCredit && !!detailTokens.totalTokens
       ? formatNumber(detailTokens.totalTokens.credit)
-      : formatNumber(usage.totalTokens);
+      : formatNumber(detailTokens.totalTokens!.token);
 
+  const averagePricing = formatNumber(
+    detailTokens.totalTokens!.credit / detailTokens.totalTokens!.token,
+    2,
+  );
   return (
     <Popover
       arrow={false}
       content={
-        <Flexbox gap={20} style={{ minWidth: 200 }}>
+        <Flexbox gap={8} style={{ minWidth: 200 }}>
           {modelCard && <ModelCard {...modelCard} provider={provider} />}
-          {inputDetails.length > 1 && (
-            <>
-              <Flexbox align={'center'} gap={4} horizontal justify={'space-between'} width={'100%'}>
-                <div style={{ color: theme.colorTextDescription }}>
-                  {t('messages.tokenDetails.inputTitle')}
-                </div>
+
+          <Flexbox gap={20}>
+            {inputDetails.length > 1 && (
+              <Flexbox gap={4}>
+                <Flexbox
+                  align={'center'}
+                  gap={4}
+                  horizontal
+                  justify={'space-between'}
+                  width={'100%'}
+                >
+                  <div style={{ color: theme.colorTextDescription, fontSize: 12 }}>
+                    {t('messages.tokenDetails.inputTitle')}
+                  </div>
+                </Flexbox>
+                <TokenProgress data={inputDetails} showIcon />
               </Flexbox>
-              <TokenProgress data={inputDetails} showIcon />
-            </>
-          )}
-          {outputDetails.length > 1 && (
-            <>
-              <Flexbox align={'center'} gap={4} horizontal justify={'space-between'} width={'100%'}>
-                <div style={{ color: theme.colorTextDescription }}>
-                  {t('messages.tokenDetails.outputTitle')}
+            )}
+            {outputDetails.length > 1 && (
+              <>
+                <Flexbox
+                  align={'center'}
+                  gap={4}
+                  horizontal
+                  justify={'space-between'}
+                  width={'100%'}
+                >
+                  <div style={{ color: theme.colorTextDescription }}>
+                    {t('messages.tokenDetails.outputTitle')}
+                  </div>
+                </Flexbox>
+                <TokenProgress data={outputDetails} showIcon />
+              </>
+            )}
+            <Flexbox>
+              <TokenProgress data={totalDetail} showIcon />
+              <Divider style={{ marginBlock: 8 }} />
+              <Flexbox align={'center'} gap={4} horizontal justify={'space-between'}>
+                <div style={{ color: theme.colorTextSecondary }}>
+                  {t('messages.tokenDetails.total')}
                 </div>
+                <div style={{ fontWeight: 500 }}>{displayTotal}</div>
               </Flexbox>
-              <TokenProgress data={outputDetails} showIcon />
-            </>
-          )}
-
-          <Flexbox>
-            <TokenProgress data={totalDetail} showIcon />
-            <Divider style={{ marginBlock: 8 }} />
-            <Flexbox align={'center'} gap={4} horizontal justify={'space-between'}>
-              <div style={{ color: theme.colorTextSecondary }}>
-                {t('messages.tokenDetails.total')}
-              </div>
-              <div style={{ fontWeight: 500 }}>{displayTotal}</div>
+              {isShowCredit && (
+                <Flexbox align={'center'} gap={4} horizontal justify={'space-between'}>
+                  <div style={{ color: theme.colorTextSecondary }}>
+                    {t('messages.tokenDetails.average')}
+                  </div>
+                  <div style={{ fontWeight: 500 }}>{averagePricing}</div>
+                </Flexbox>
+              )}
             </Flexbox>
           </Flexbox>
         </Flexbox>
diff --git a/src/features/Conversation/Extras/Usage/UsageDetail/tokens.test.ts b/src/features/Conversation/Extras/Usage/UsageDetail/tokens.test.ts
new file mode 100644
index 0000000000000..bd88e2b02f7be
--- /dev/null
+++ b/src/features/Conversation/Extras/Usage/UsageDetail/tokens.test.ts
@@ -0,0 +1,253 @@
+import { describe, expect, it } from 'vitest';
+
+import { LobeDefaultAiModelListItem } from '@/types/aiModel';
+import { ModelTokensUsage } from '@/types/message';
+
+import { getDetailsToken } from './tokens';
+
+describe('getDetailsToken', () => {
+  // 基本测试数据
+  const mockModelCard: LobeDefaultAiModelListItem = {
+    pricing: {
+      input: 0.01,
+      output: 0.02,
+      cachedInput: 0.005,
+      audioInput: 0.03,
+      audioOutput: 0.04,
+    },
+  } as LobeDefaultAiModelListItem;
+
+  it('should return empty object when usage is empty', () => {
+    const usage: ModelTokensUsage = {};
+    const result = getDetailsToken(usage);
+
+    expect(result).toEqual({
+      cachedInput: undefined,
+      inputAudio: undefined,
+      inputCitation: undefined,
+      inputText: undefined,
+      outputAudio: undefined,
+      outputText: undefined,
+      reasoning: undefined,
+      totalOutput: undefined,
+      totalTokens: undefined,
+      uncachedInput: undefined,
+    });
+  });
+
+  it('should handle inputTextTokens correctly', () => {
+    const usage: ModelTokensUsage = {
+      inputTextTokens: 100,
+    };
+
+    const result = getDetailsToken(usage, mockModelCard);
+
+    expect(result.inputText).toEqual({
+      credit: 1, // 100 * 0.01 = 1
+      token: 100,
+    });
+  });
+
+  it('should handle legacy inputTokens property', () => {
+    const usage = {
+      inputTokens: 100,
+    } as any;
+
+    const result = getDetailsToken(usage, mockModelCard);
+
+    expect(result.inputText).toEqual({
+      credit: 1, // 100 * 0.01 = 1
+      token: 100,
+    });
+  });
+
+  it('should handle cachedTokens correctly', () => {
+    const usage = {
+      totalInputTokens: 200,
+      cachedTokens: 50,
+    } as ModelTokensUsage;
+
+    const result = getDetailsToken(usage, mockModelCard);
+
+    expect(result.inputCached).toEqual({
+      credit: 0, // 50 * 0.005 = 0.25, rounded to 0
+      token: 50,
+    });
+
+    expect(result.inputCacheMiss).toEqual({
+      credit: 2, // (200 - 50) * 0.01 = 1.5, rounded to 2
+      token: 150,
+    });
+  });
+
+  it('should handle outputTokens correctly', () => {
+    const usage = { outputTokens: 150 } as ModelTokensUsage;
+
+    const result = getDetailsToken(usage, mockModelCard);
+
+    expect(result.outputText).toEqual({
+      credit: 3, // 150 * 0.02 = 3
+      token: 150,
+    });
+
+    expect(result.totalOutput).toEqual({
+      credit: 3,
+      token: 150,
+    });
+  });
+
+  it('should handle reasoningTokens correctly', () => {
+    const usage = {
+      outputTokens: 200,
+      reasoningTokens: 50,
+    } as ModelTokensUsage;
+
+    const result = getDetailsToken(usage, mockModelCard);
+
+    expect(result.outputReasoning).toEqual({
+      credit: 1, // 50 * 0.02 = 1
+      token: 50,
+    });
+
+    expect(result.outputText).toEqual({
+      credit: 3, // (200 - 50) * 0.02 = 3
+      token: 150,
+    });
+  });
+
+  it('should handle audio tokens correctly', () => {
+    const usage = {
+      inputAudioTokens: 100,
+      outputAudioTokens: 50,
+      outputTokens: 150,
+    } as ModelTokensUsage;
+
+    const result = getDetailsToken(usage, mockModelCard);
+
+    expect(result.inputAudio).toEqual({
+      credit: 3, // 100 * 0.03 = 3
+      token: 100,
+    });
+
+    expect(result.outputAudio).toEqual({
+      credit: 2, // 50 * 0.04 = 2
+      id: 'outputAudio',
+      token: 50,
+    });
+
+    expect(result.outputText).toEqual({
+      credit: 2, // (150 - 50) * 0.02 = 2
+      token: 100,
+    });
+  });
+
+  it('should handle inputCitationTokens correctly', () => {
+    const usage: ModelTokensUsage = {
+      inputCitationTokens: 75,
+    };
+
+    const result = getDetailsToken(usage, mockModelCard);
+
+    expect(result.inputCitation).toEqual({
+      credit: 1, // 75 * 0.01 = 0.75, rounded to 1
+      token: 75,
+    });
+  });
+
+  it('should handle totalTokens correctly', () => {
+    const usage = {
+      totalTokens: 500,
+      totalInputTokens: 200,
+      inputCachedTokens: 50,
+      outputTokens: 300,
+    } as ModelTokensUsage;
+
+    const result = getDetailsToken(usage, mockModelCard);
+
+    // uncachedInput: (200 - 50) * 0.01 = 1.5 -> 2
+    // cachedInput: 50 * 0.005 = 0.25 -> 0
+    // totalOutput: 300 * 0.02 = 6
+    // totalCredit = 2 + 0 + 6 = 8
+
+    expect(result.totalTokens).toEqual({
+      credit: 8,
+      token: 500,
+    });
+  });
+
+  it('should handle missing pricing information', () => {
+    const usage = { inputTextTokens: 100, outputTokens: 200 } as ModelTokensUsage;
+
+    const result = getDetailsToken(usage);
+
+    expect(result.inputText).toEqual({
+      credit: '-',
+      token: 100,
+    });
+
+    expect(result.outputText).toEqual({
+      credit: '-',
+      token: 200,
+    });
+  });
+
+  it('should handle complex scenario with all token types', () => {
+    const usage: ModelTokensUsage = {
+      totalTokens: 1000,
+      totalInputTokens: 400,
+      inputTextTokens: 300,
+      inputAudioTokens: 50,
+      inputCitationTokens: 50,
+      inputCachedTokens: 100,
+      totalOutputTokens: 600,
+      outputAudioTokens: 100,
+      outputReasoningTokens: 200,
+    };
+
+    const result = getDetailsToken(usage, mockModelCard);
+
+    expect(result).toMatchObject({
+      inputCached: {
+        credit: 1, // 100 * 0.005 = 0.5, rounded to 1
+        token: 100,
+      },
+      inputCacheMiss: {
+        credit: 3, // (400 - 100) * 0.01 = 3
+        token: 300,
+      },
+      inputText: {
+        credit: 3, // 300 * 0.01 = 3
+        token: 300,
+      },
+      inputAudio: {
+        credit: 2, // 50 * 0.03 = 1.5, rounded to 2
+        token: 50,
+      },
+      inputCitation: {
+        credit: 1, // 50 * 0.01 = 0.5, rounded to 1
+        token: 50,
+      },
+      outputAudio: {
+        credit: 4, // 100 * 0.04 = 4
+        id: 'outputAudio',
+        token: 100,
+      },
+      outputReasoning: {
+        credit: 4, // 200 * 0.02 = 4
+        token: 200,
+      },
+      outputText: {
+        credit: 6, // (600 - 200 - 100) * 0.02 = 6
+        token: 300,
+      },
+      totalOutput: {
+        credit: 12, // 600 * 0.02 = 12
+        token: 600,
+      },
+      totalTokens: {
+        credit: 16, // 3 + 1 + 12 = 16
+        token: 1000,
+      },
+    });
+  });
+});
diff --git a/src/features/Conversation/Extras/Usage/UsageDetail/tokens.ts b/src/features/Conversation/Extras/Usage/UsageDetail/tokens.ts
index 649c3fd1cd533..45d9be4c4ceae 100644
--- a/src/features/Conversation/Extras/Usage/UsageDetail/tokens.ts
+++ b/src/features/Conversation/Extras/Usage/UsageDetail/tokens.ts
@@ -11,43 +11,75 @@ export const getDetailsToken = (
   usage: ModelTokensUsage,
   modelCard?: LobeDefaultAiModelListItem,
 ) => {
-  const uncachedInputCredit = (
-    !!usage.inputTokens
-      ? calcCredit(usage.inputTokens - (usage.cachedTokens || 0), modelCard?.pricing?.input)
-      : 0
+  const inputTextTokens = usage.inputTextTokens || (usage as any).inputTokens || 0;
+  const totalInputTokens = usage.totalInputTokens || (usage as any).inputTokens || 0;
+
+  const totalOutputTokens = usage.totalOutputTokens || (usage as any).outputTokens || 0;
+
+  const outputReasoningTokens = usage.outputReasoningTokens || (usage as any).reasoningTokens || 0;
+
+  const outputTextTokens = usage.outputTextTokens
+    ? usage.outputTextTokens
+    : totalOutputTokens - outputReasoningTokens - (usage.outputAudioTokens || 0);
+
+  const inputWriteCacheTokens = usage.inputWriteCacheTokens || 0;
+  const inputCacheTokens = usage.inputCachedTokens || (usage as any).cachedTokens || 0;
+
+  const inputCacheMissTokens = usage?.inputCacheMissTokens
+    ? usage?.inputCacheMissTokens
+    : totalInputTokens - (inputCacheTokens || 0);
+
+  const inputCacheMissCredit = (
+    !!inputCacheMissTokens ? calcCredit(inputCacheMissTokens, modelCard?.pricing?.input) : 0
   ) as number;
 
-  const cachedInputCredit = (
-    !!usage.cachedTokens ? calcCredit(usage.cachedTokens, modelCard?.pricing?.cachedInput) : 0
+  const inputCachedCredit = (
+    !!inputCacheTokens ? calcCredit(inputCacheTokens, modelCard?.pricing?.cachedInput) : 0
   ) as number;
 
-  const totalOutput = (
-    !!usage.outputTokens ? calcCredit(usage.outputTokens, modelCard?.pricing?.output) : 0
+  const inputWriteCachedCredit = !!inputWriteCacheTokens
+    ? (calcCredit(inputWriteCacheTokens, modelCard?.pricing?.writeCacheInput) as number)
+    : 0;
+
+  const totalOutputCredit = (
+    !!totalOutputTokens ? calcCredit(totalOutputTokens, modelCard?.pricing?.output) : 0
   ) as number;
+  const totalInputCredit = (
+    !!totalInputTokens ? calcCredit(totalInputTokens, modelCard?.pricing?.output) : 0
+  ) as number;
+
+  const totalCredit =
+    inputCacheMissCredit + inputCachedCredit + inputWriteCachedCredit + totalOutputCredit;
 
-  const totalTokens = uncachedInputCredit + cachedInputCredit + totalOutput;
   return {
-    cachedInput: !!usage.cachedTokens
-      ? {
-          credit: cachedInputCredit,
-          token: usage.cachedTokens,
-        }
-      : undefined,
     inputAudio: !!usage.inputAudioTokens
       ? {
           credit: calcCredit(usage.inputAudioTokens, modelCard?.pricing?.audioInput),
           token: usage.inputAudioTokens,
         }
       : undefined,
-    inputText: !!usage.inputTokens
+    inputCacheMiss: !!inputCacheMissTokens
+      ? { credit: inputCacheMissCredit, token: inputCacheMissTokens }
+      : undefined,
+    inputCached: !!inputCacheTokens
+      ? { credit: inputCachedCredit, token: inputCacheTokens }
+      : undefined,
+    inputCachedWrite: !!inputWriteCacheTokens
+      ? { credit: inputWriteCachedCredit, token: inputWriteCacheTokens }
+      : undefined,
+    inputCitation: !!usage.inputCitationTokens
+      ? {
+          credit: calcCredit(usage.inputCitationTokens, modelCard?.pricing?.input),
+          token: usage.inputCitationTokens,
+        }
+      : undefined,
+    inputText: !!inputTextTokens
       ? {
-          credit: calcCredit(
-            usage.inputTokens - (usage.inputAudioTokens || 0),
-            modelCard?.pricing?.input,
-          ),
-          token: usage.inputTokens - (usage.inputAudioTokens || 0),
+          credit: calcCredit(inputTextTokens, modelCard?.pricing?.input),
+          token: inputTextTokens,
         }
       : undefined,
+
     outputAudio: !!usage.outputAudioTokens
       ? {
           credit: calcCredit(usage.outputAudioTokens, modelCard?.pricing?.audioOutput),
@@ -55,40 +87,27 @@ export const getDetailsToken = (
           token: usage.outputAudioTokens,
         }
       : undefined,
-
-    outputText: !!usage.outputTokens
+    outputReasoning: !!outputReasoningTokens
       ? {
-          credit: calcCredit(
-            usage.outputTokens - (usage.reasoningTokens || 0) - (usage.outputAudioTokens || 0),
-            modelCard?.pricing?.output,
-          ),
-          token: usage.outputTokens - (usage.reasoningTokens || 0) - (usage.outputAudioTokens || 0),
+          credit: calcCredit(outputReasoningTokens, modelCard?.pricing?.output),
+          token: outputReasoningTokens,
         }
       : undefined,
-    reasoning: !!usage.reasoningTokens
+    outputText: !!outputTextTokens
       ? {
-          credit: calcCredit(usage.reasoningTokens, modelCard?.pricing?.output),
-          token: usage.reasoningTokens,
+          credit: calcCredit(outputTextTokens, modelCard?.pricing?.output),
+          token: outputTextTokens,
         }
       : undefined,
 
-    totalOutput: !!usage.outputTokens
-      ? {
-          credit: totalOutput,
-          token: usage.outputTokens,
-        }
+    totalInput: !!totalInputTokens
+      ? { credit: totalInputCredit, token: totalInputTokens }
       : undefined,
-    totalTokens: !!usage.totalTokens
-      ? {
-          credit: totalTokens,
-          token: usage.totalTokens,
-        }
+    totalOutput: !!totalOutputTokens
+      ? { credit: totalOutputCredit, token: totalOutputTokens }
       : undefined,
-    uncachedInput: !!usage.inputTokens
-      ? {
-          credit: uncachedInputCredit,
-          token: usage.inputTokens - (usage.cachedTokens || 0),
-        }
+    totalTokens: !!usage.totalTokens
+      ? { credit: totalCredit, token: usage.totalTokens }
       : undefined,
   };
 };
diff --git a/src/libs/agent-runtime/baichuan/index.test.ts b/src/libs/agent-runtime/baichuan/index.test.ts
index edaa5d80bc284..9356333dbea64 100644
--- a/src/libs/agent-runtime/baichuan/index.test.ts
+++ b/src/libs/agent-runtime/baichuan/index.test.ts
@@ -1,5 +1,7 @@
 // @vitest-environment node
-import { ModelProvider } from '@/libs/agent-runtime';
+import { Mock } from 'vitest';
+
+import { LobeOpenAICompatibleRuntime, ModelProvider } from '@/libs/agent-runtime';
 
 import { testProvider } from '../providerTestUtils';
 import { LobeBaichuanAI } from './index';
@@ -10,4 +12,59 @@ testProvider({
   defaultBaseURL: 'https://api.baichuan-ai.com/v1',
   chatDebugEnv: 'DEBUG_BAICHUAN_CHAT_COMPLETION',
   chatModel: 'hunyuan-lite',
+  test: {
+    skipAPICall: true,
+  },
+});
+
+let instance: LobeOpenAICompatibleRuntime;
+
+beforeEach(() => {
+  instance = new LobeBaichuanAI({ apiKey: 'test' });
+
+  // 使用 vi.spyOn 来模拟 chat.completions.create 方法
+  vi.spyOn(instance['client'].chat.completions, 'create').mockResolvedValue(
+    new ReadableStream() as any,
+  );
+});
+
+afterEach(() => {
+  vi.clearAllMocks();
+});
+
+describe('specific LobeBaichuanAI tests', () => {
+  it(`should call API with corresponding options`, async () => {
+    // Arrange
+    const mockStream = new ReadableStream();
+    const mockResponse = Promise.resolve(mockStream);
+
+    (instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse);
+
+    // Act
+    const result = await instance.chat({
+      max_tokens: 1024,
+      messages: [{ content: 'Hello', role: 'user' }],
+      model: 'open-mistral-7b',
+      temperature: 0.7,
+      stream: true,
+      top_p: 1,
+    });
+
+    // Assert
+    expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
+      {
+        max_tokens: 1024,
+        messages: [{ content: 'Hello', role: 'user' }],
+        model: 'open-mistral-7b',
+        stream: true,
+        stream_options: {
+          include_usage: true,
+        },
+        temperature: 0.35,
+        top_p: 1,
+      },
+      { headers: { Accept: '*/*' } },
+    );
+    expect(result).toBeInstanceOf(Response);
+  });
 });
diff --git a/src/libs/agent-runtime/groq/index.test.ts b/src/libs/agent-runtime/groq/index.test.ts
index d91aba1b12909..b4c07dfe42234 100644
--- a/src/libs/agent-runtime/groq/index.test.ts
+++ b/src/libs/agent-runtime/groq/index.test.ts
@@ -1,17 +1,18 @@
 // @vitest-environment node
-import OpenAI from 'openai';
-import { Mock, afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 
 import { LobeOpenAICompatibleRuntime } from '@/libs/agent-runtime';
+import { testProvider } from '@/libs/agent-runtime/providerTestUtils';
 
-import * as debugStreamModule from '../utils/debugStream';
 import { LobeGroq } from './index';
 
-const provider = 'groq';
-const defaultBaseURL = 'https://api.groq.com/openai/v1';
-
-const bizErrorType = 'ProviderBizError';
-const invalidErrorType = 'InvalidProviderAPIKey';
+testProvider({
+  provider: 'groq',
+  defaultBaseURL: 'https://api.groq.com/openai/v1',
+  chatModel: 'mistralai/mistral-7b-instruct:free',
+  Runtime: LobeGroq,
+  chatDebugEnv: 'DEBUG_GROQ_CHAT_COMPLETION',
+});
 
 // Mock the console.error to avoid polluting test output
 vi.spyOn(console, 'error').mockImplementation(() => {});
@@ -31,295 +32,46 @@ afterEach(() => {
   vi.clearAllMocks();
 });
 
-describe('LobeGroqAI', () => {
-  describe('init', () => {
-    it('should correctly initialize with an API key', async () => {
-      const instance = new LobeGroq({ apiKey: 'test_api_key' });
-      expect(instance).toBeInstanceOf(LobeGroq);
-      expect(instance.baseURL).toEqual(defaultBaseURL);
-    });
-  });
-
-  describe('chat', () => {
-    it('should call chat with corresponding options', async () => {
-      // Arrange
-      const mockStream = new ReadableStream();
-      const mockResponse = Promise.resolve(mockStream);
-
-      (instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse);
-
-      // Act
-      const result = await instance.chat({
-        max_tokens: 1024,
-        messages: [{ content: 'Hello', role: 'user' }],
-        model: 'mistralai/mistral-7b-instruct:free',
-        temperature: 0.7,
-        top_p: 1,
-      });
-
-      // Assert
-      expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
-        {
-          max_tokens: 1024,
-          stream: true,
-          messages: [{ content: 'Hello', role: 'user' }],
-          model: 'mistralai/mistral-7b-instruct:free',
-          temperature: 0.7,
-          top_p: 1,
-        },
-        { headers: { Accept: '*/*' } },
-      );
-      expect(result).toBeInstanceOf(Response);
-    });
-
-    describe('handlePayload option', () => {
-      it('should set stream to false when payload contains tools', async () => {
-        const mockCreateMethod = vi
-          .spyOn(instance['client'].chat.completions, 'create')
-          .mockResolvedValue({
-            id: 'chatcmpl-8xDx5AETP8mESQN7UB30GxTN2H1SO',
-            object: 'chat.completion',
-            created: 1709125675,
-            model: 'mistralai/mistral-7b-instruct:free',
-            system_fingerprint: 'fp_86156a94a0',
-            choices: [
-              {
-                index: 0,
-                message: { role: 'assistant', content: 'hello', refusal: null },
-                logprobs: null,
-                finish_reason: 'stop',
-              },
-            ],
-          });
-
-        await instance.chat({
-          messages: [{ content: 'Hello', role: 'user' }],
+describe('LobeGroqAI Temperature Tests', () => {
+  describe('handlePayload option', () => {
+    it('should set stream to false when payload contains tools', async () => {
+      const mockCreateMethod = vi
+        .spyOn(instance['client'].chat.completions, 'create')
+        .mockResolvedValue({
+          id: 'chatcmpl-8xDx5AETP8mESQN7UB30GxTN2H1SO',
+          object: 'chat.completion',
+          created: 1709125675,
           model: 'mistralai/mistral-7b-instruct:free',
-          temperature: 0,
-          tools: [
+          system_fingerprint: 'fp_86156a94a0',
+          choices: [
             {
-              type: 'function',
-              function: { name: 'tool1', description: '', parameters: {} },
+              index: 0,
+              message: { role: 'assistant', content: 'hello', refusal: null },
+              logprobs: null,
+              finish_reason: 'stop',
             },
           ],
         });
 
-        expect(mockCreateMethod).toHaveBeenCalledWith(
-          expect.objectContaining({ stream: false }),
-          expect.anything(),
-        );
-      });
-    });
-
-    describe('Error', () => {
-      it('should return OpenRouterBizError with an openai error response when OpenAI.APIError is thrown', async () => {
-        // Arrange
-        const apiError = new OpenAI.APIError(
-          400,
+      await instance.chat({
+        messages: [{ content: 'Hello', role: 'user' }],
+        model: 'mistralai/mistral-7b-instruct:free',
+        temperature: 0,
+        tools: [
           {
-            status: 400,
-            error: {
-              message: 'Bad Request',
-            },
+            type: 'function',
+            function: { name: 'tool1', description: '', parameters: {} },
           },
-          'Error message',
-          {},
-        );
-
-        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
-
-        // Act
-        try {
-          await instance.chat({
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'mistralai/mistral-7b-instruct:free',
-            temperature: 0,
-          });
-        } catch (e) {
-          expect(e).toEqual({
-            endpoint: defaultBaseURL,
-            error: {
-              error: { message: 'Bad Request' },
-              status: 400,
-            },
-            errorType: bizErrorType,
-            provider,
-          });
-        }
-      });
-
-      it('should throw AgentRuntimeError with InvalidOpenRouterAPIKey if no apiKey is provided', async () => {
-        try {
-          new LobeGroq({});
-        } catch (e) {
-          expect(e).toEqual({ errorType: invalidErrorType });
-        }
+        ],
       });
 
-      it('should return OpenRouterBizError with the cause when OpenAI.APIError is thrown with cause', async () => {
-        // Arrange
-        const errorInfo = {
-          stack: 'abc',
-          cause: {
-            message: 'api is undefined',
-          },
-        };
-        const apiError = new OpenAI.APIError(400, errorInfo, 'module error', {});
-
-        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
-
-        // Act
-        try {
-          await instance.chat({
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'mistralai/mistral-7b-instruct:free',
-            temperature: 0,
-          });
-        } catch (e) {
-          expect(e).toEqual({
-            endpoint: defaultBaseURL,
-            error: {
-              cause: { message: 'api is undefined' },
-              stack: 'abc',
-            },
-            errorType: bizErrorType,
-            provider,
-          });
-        }
-      });
-
-      it('should return OpenRouterBizError with an cause response with desensitize Url', async () => {
-        // Arrange
-        const errorInfo = {
-          stack: 'abc',
-          cause: { message: 'api is undefined' },
-        };
-        const apiError = new OpenAI.APIError(400, errorInfo, 'module error', {});
-
-        instance = new LobeGroq({
-          apiKey: 'test',
-
-          baseURL: 'https://api.abc.com/v1',
-        });
-
-        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
-
-        // Act
-        try {
-          await instance.chat({
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'mistralai/mistral-7b-instruct:free',
-            temperature: 0,
-          });
-        } catch (e) {
-          expect(e).toEqual({
-            endpoint: 'https://api.***.com/v1',
-            error: {
-              cause: { message: 'api is undefined' },
-              stack: 'abc',
-            },
-            errorType: bizErrorType,
-            provider,
-          });
-        }
-      });
-
-      it('should throw an InvalidOpenRouterAPIKey error type on 401 status code', async () => {
-        // Mock the API call to simulate a 401 error
-        const error = new Error('Unauthorized') as any;
-        error.status = 401;
-        vi.mocked(instance['client'].chat.completions.create).mockRejectedValue(error);
-
-        try {
-          await instance.chat({
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'mistralai/mistral-7b-instruct:free',
-            temperature: 0,
-          });
-        } catch (e) {
-          // Expect the chat method to throw an error with InvalidMoonshotAPIKey
-          expect(e).toEqual({
-            endpoint: defaultBaseURL,
-            error: new Error('Unauthorized'),
-            errorType: invalidErrorType,
-            provider,
-          });
-        }
-      });
-
-      it('should return AgentRuntimeError for non-OpenAI errors', async () => {
-        // Arrange
-        const genericError = new Error('Generic Error');
-
-        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(genericError);
-
-        // Act
-        try {
-          await instance.chat({
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'mistralai/mistral-7b-instruct:free',
-            temperature: 0,
-          });
-        } catch (e) {
-          expect(e).toEqual({
-            endpoint: defaultBaseURL,
-            errorType: 'AgentRuntimeError',
-            provider,
-            error: {
-              name: genericError.name,
-              cause: genericError.cause,
-              message: genericError.message,
-              stack: genericError.stack,
-            },
-          });
-        }
-      });
-    });
-
-    describe('DEBUG', () => {
-      it('should call debugStream and return StreamingTextResponse when DEBUG_OPENROUTER_CHAT_COMPLETION is 1', async () => {
-        // Arrange
-        const mockProdStream = new ReadableStream() as any; // 模拟的 prod 流
-        const mockDebugStream = new ReadableStream({
-          start(controller) {
-            controller.enqueue('Debug stream content');
-            controller.close();
-          },
-        }) as any;
-        mockDebugStream.toReadableStream = () => mockDebugStream; // 添加 toReadableStream 方法
-
-        // 模拟 chat.completions.create 返回值，包括模拟的 tee 方法
-        (instance['client'].chat.completions.create as Mock).mockResolvedValue({
-          tee: () => [mockProdStream, { toReadableStream: () => mockDebugStream }],
-        });
-
-        // 保存原始环境变量值
-        const originalDebugValue = process.env.DEBUG_GROQ_CHAT_COMPLETION;
-
-        // 模拟环境变量
-        process.env.DEBUG_GROQ_CHAT_COMPLETION = '1';
-        vi.spyOn(debugStreamModule, 'debugStream').mockImplementation(() => Promise.resolve());
-
-        // 执行测试
-        // 运行你的测试函数，确保它会在条件满足时调用 debugStream
-        // 假设的测试函数调用，你可能需要根据实际情况调整
-        await instance.chat({
-          messages: [{ content: 'Hello', role: 'user' }],
-          model: 'mistralai/mistral-7b-instruct:free',
-          temperature: 0,
-        });
-
-        // 验证 debugStream 被调用
-        expect(debugStreamModule.debugStream).toHaveBeenCalled();
-
-        // 恢复原始环境变量值
-        process.env.DEBUG_GROQ_CHAT_COMPLETION = originalDebugValue;
-      });
+      expect(mockCreateMethod).toHaveBeenCalledWith(
+        expect.objectContaining({ stream: false }),
+        expect.anything(),
+      );
     });
   });
-});
 
-describe('LobeGroqAI Temperature Tests', () => {
   it('should set temperature to 0.7', async () => {
     await instance.chat({
       messages: [{ content: 'Hello', role: 'user' }],
diff --git a/src/libs/agent-runtime/mistral/index.test.ts b/src/libs/agent-runtime/mistral/index.test.ts
index b4d2f2e9a9958..80e385308dc8a 100644
--- a/src/libs/agent-runtime/mistral/index.test.ts
+++ b/src/libs/agent-runtime/mistral/index.test.ts
@@ -1,19 +1,22 @@
 // @vitest-environment node
-import OpenAI from 'openai';
-import { Mock, afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import { Mock, afterEach, beforeEach, expect, it, vi } from 'vitest';
 
-import { ChatStreamCallbacks, LobeOpenAICompatibleRuntime } from '@/libs/agent-runtime';
+import { LobeOpenAICompatibleRuntime } from '@/libs/agent-runtime';
+import { testProvider } from '@/libs/agent-runtime/providerTestUtils';
 
-import * as debugStreamModule from '../utils/debugStream';
 import { LobeMistralAI } from './index';
 
-const provider = 'mistral';
-const defaultBaseURL = 'https://api.mistral.ai/v1';
-const bizErrorType = 'ProviderBizError';
-const invalidErrorType = 'InvalidProviderAPIKey';
+testProvider({
+  provider: 'mistral',
+  defaultBaseURL: 'https://api.mistral.ai/v1',
+  chatModel: 'open-mistral-7b',
+  Runtime: LobeMistralAI,
+  chatDebugEnv: 'DEBUG_MISTRAL_CHAT_COMPLETION',
 
-// Mock the console.error to avoid polluting test output
-vi.spyOn(console, 'error').mockImplementation(() => {});
+  test: {
+    skipAPICall: true,
+  },
+});
 
 let instance: LobeOpenAICompatibleRuntime;
 
@@ -30,302 +33,38 @@ afterEach(() => {
   vi.clearAllMocks();
 });
 
-describe('LobeMistralAI', () => {
-  describe('init', () => {
-    it('should correctly initialize with an API key', async () => {
-      const instance = new LobeMistralAI({ apiKey: 'test_api_key' });
-      expect(instance).toBeInstanceOf(LobeMistralAI);
-      expect(instance.baseURL).toEqual(defaultBaseURL);
+describe('specific LobeMistralAI tests', () => {
+  it(`should call API with corresponding options`, async () => {
+    // Arrange
+    const mockStream = new ReadableStream();
+    const mockResponse = Promise.resolve(mockStream);
+
+    (instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse);
+
+    // Act
+    const result = await instance.chat({
+      max_tokens: 1024,
+      messages: [{ content: 'Hello', role: 'user' }],
+      model: 'open-mistral-7b',
+      temperature: 0.7,
+      top_p: 1,
     });
-  });
-
-  describe('chat', () => {
-    it('should return a StreamingTextResponse on successful API call', async () => {
-      // Arrange
-      const mockStream = new ReadableStream();
-      const mockResponse = Promise.resolve(mockStream);
-
-      (instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse);
-
-      // Act
-      const result = await instance.chat({
-        messages: [{ content: 'Hello', role: 'user' }],
-        model: 'open-mistral-7b',
-        temperature: 0,
-      });
 
-      // Assert
-      expect(result).toBeInstanceOf(Response);
-    });
-
-    it('should call Mistral API with supported options in streaming mode', async () => {
-      // Arrange
-      const mockStream = new ReadableStream();
-      const mockResponse = Promise.resolve(mockStream);
-
-      (instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse);
-
-      // Act
-      const result = await instance.chat({
+    // Assert
+    expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
+      {
         max_tokens: 1024,
         messages: [{ content: 'Hello', role: 'user' }],
         model: 'open-mistral-7b',
-        temperature: 0.7,
-        top_p: 1,
-      });
-
-      // Assert
-      expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
-        {
-          max_tokens: 1024,
-          messages: [{ content: 'Hello', role: 'user' }],
-          model: 'open-mistral-7b',
-          stream: true,
-          temperature: 0.35,
-          top_p: 1,
+        stream: true,
+        stream_options: {
+          include_usage: true,
         },
-        { headers: { Accept: '*/*' } },
-      );
-      expect(result).toBeInstanceOf(Response);
-    });
-
-    it('should call Mistral API without unsupported options', async () => {
-      // Arrange
-      const mockStream = new ReadableStream();
-      const mockResponse = Promise.resolve(mockStream);
-
-      (instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse);
-
-      // Act
-      const result = await instance.chat({
-        frequency_penalty: 0.5, // unsupported option
-        max_tokens: 1024,
-        messages: [{ content: 'Hello', role: 'user' }],
-        model: 'open-mistral-7b',
-        presence_penalty: 0.5, // unsupported option
-        temperature: 0.7,
+        temperature: 0.35,
         top_p: 1,
-      });
-
-      // Assert
-      expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
-        {
-          max_tokens: 1024,
-          messages: [{ content: 'Hello', role: 'user' }],
-          model: 'open-mistral-7b',
-          stream: true,
-          temperature: 0.35,
-          top_p: 1,
-        },
-        { headers: { Accept: '*/*' } },
-      );
-      expect(result).toBeInstanceOf(Response);
-    });
-
-    describe('Error', () => {
-      it('should return MistralBizError with an openai error response when OpenAI.APIError is thrown', async () => {
-        // Arrange
-        const apiError = new OpenAI.APIError(
-          400,
-          {
-            status: 400,
-            error: {
-              message: 'Bad Request',
-            },
-          },
-          'Error message',
-          {},
-        );
-
-        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
-
-        // Act
-        try {
-          await instance.chat({
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'open-mistral-7b',
-            temperature: 0,
-          });
-        } catch (e) {
-          expect(e).toEqual({
-            endpoint: defaultBaseURL,
-            error: {
-              error: { message: 'Bad Request' },
-              status: 400,
-            },
-            errorType: bizErrorType,
-            provider,
-          });
-        }
-      });
-
-      it('should throw AgentRuntimeError with InvalidMistralAPIKey if no apiKey is provided', async () => {
-        try {
-          new LobeMistralAI({});
-        } catch (e) {
-          expect(e).toEqual({ errorType: invalidErrorType });
-        }
-      });
-
-      it('should return MistralBizError with the cause when OpenAI.APIError is thrown with cause', async () => {
-        // Arrange
-        const errorInfo = {
-          stack: 'abc',
-          cause: {
-            message: 'api is undefined',
-          },
-        };
-        const apiError = new OpenAI.APIError(400, errorInfo, 'module error', {});
-
-        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
-
-        // Act
-        try {
-          await instance.chat({
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'open-mistral-7b',
-            temperature: 0,
-          });
-        } catch (e) {
-          expect(e).toEqual({
-            endpoint: defaultBaseURL,
-            error: {
-              cause: { message: 'api is undefined' },
-              stack: 'abc',
-            },
-            errorType: bizErrorType,
-            provider,
-          });
-        }
-      });
-
-      it('should return MistralBizError with an cause response with desensitize Url', async () => {
-        // Arrange
-        const errorInfo = {
-          stack: 'abc',
-          cause: { message: 'api is undefined' },
-        };
-        const apiError = new OpenAI.APIError(400, errorInfo, 'module error', {});
-
-        instance = new LobeMistralAI({
-          apiKey: 'test',
-
-          baseURL: 'https://api.abc.com/v1',
-        });
-
-        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
-
-        // Act
-        try {
-          await instance.chat({
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'open-mistral-7b',
-            temperature: 0,
-          });
-        } catch (e) {
-          expect(e).toEqual({
-            endpoint: 'https://api.***.com/v1',
-            error: {
-              cause: { message: 'api is undefined' },
-              stack: 'abc',
-            },
-            errorType: bizErrorType,
-            provider,
-          });
-        }
-      });
-
-      it('should throw an InvalidMistralAPIKey error type on 401 status code', async () => {
-        // Mock the API call to simulate a 401 error
-        const error = new Error('Unauthorized') as any;
-        error.status = 401;
-        vi.mocked(instance['client'].chat.completions.create).mockRejectedValue(error);
-
-        try {
-          await instance.chat({
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'gpt-3.5-turbo',
-            temperature: 0,
-          });
-        } catch (e) {
-          // Expect the chat method to throw an error with InvalidMoonshotAPIKey
-          expect(e).toEqual({
-            endpoint: defaultBaseURL,
-            error: new Error('Unauthorized'),
-            errorType: invalidErrorType,
-            provider,
-          });
-        }
-      });
-
-      it('should return AgentRuntimeError for non-OpenAI errors', async () => {
-        // Arrange
-        const genericError = new Error('Generic Error');
-
-        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(genericError);
-
-        // Act
-        try {
-          await instance.chat({
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'open-mistral-7b',
-            temperature: 0,
-          });
-        } catch (e) {
-          expect(e).toEqual({
-            endpoint: defaultBaseURL,
-            errorType: 'AgentRuntimeError',
-            provider,
-            error: {
-              name: genericError.name,
-              cause: genericError.cause,
-              message: genericError.message,
-              stack: genericError.stack,
-            },
-          });
-        }
-      });
-    });
-
-    describe('DEBUG', () => {
-      it('should call debugStream and return StreamingTextResponse when DEBUG_MISTRAL_CHAT_COMPLETION is 1', async () => {
-        // Arrange
-        const mockProdStream = new ReadableStream() as any; // 模拟的 prod 流
-        const mockDebugStream = new ReadableStream({
-          start(controller) {
-            controller.enqueue('Debug stream content');
-            controller.close();
-          },
-        }) as any;
-        mockDebugStream.toReadableStream = () => mockDebugStream; // 添加 toReadableStream 方法
-
-        // 模拟 chat.completions.create 返回值，包括模拟的 tee 方法
-        (instance['client'].chat.completions.create as Mock).mockResolvedValue({
-          tee: () => [mockProdStream, { toReadableStream: () => mockDebugStream }],
-        });
-
-        // 保存原始环境变量值
-        const originalDebugValue = process.env.DEBUG_MISTRAL_CHAT_COMPLETION;
-
-        // 模拟环境变量
-        process.env.DEBUG_MISTRAL_CHAT_COMPLETION = '1';
-        vi.spyOn(debugStreamModule, 'debugStream').mockImplementation(() => Promise.resolve());
-
-        // 执行测试
-        // 运行你的测试函数，确保它会在条件满足时调用 debugStream
-        // 假设的测试函数调用，你可能需要根据实际情况调整
-        await instance.chat({
-          messages: [{ content: 'Hello', role: 'user' }],
-          model: 'open-mistral-7b',
-          temperature: 0,
-        });
-
-        // 验证 debugStream 被调用
-        expect(debugStreamModule.debugStream).toHaveBeenCalled();
-
-        // 恢复原始环境变量值
-        process.env.DEBUG_MISTRAL_CHAT_COMPLETION = originalDebugValue;
-      });
-    });
+      },
+      { headers: { Accept: '*/*' } },
+    );
+    expect(result).toBeInstanceOf(Response);
   });
 });
diff --git a/src/libs/agent-runtime/perplexity/index.test.ts b/src/libs/agent-runtime/perplexity/index.test.ts
index eb996d02ba9cc..814ddd549bd68 100644
--- a/src/libs/agent-runtime/perplexity/index.test.ts
+++ b/src/libs/agent-runtime/perplexity/index.test.ts
@@ -163,13 +163,13 @@ describe('LobePerplexityAI', () => {
         },
         {
           id: '506d64fb-e7f2-4d94-b80f-158369e9446d',
-          model: 'sonar-pro',
-          created: 1739896615,
+          model: 'sonar-reasoning-pro',
+          created: 1741250924,
           usage: {
-            prompt_tokens: 4,
-            completion_tokens: 14,
-            total_tokens: 18,
-            citation_tokens: 2217,
+            prompt_tokens: 2,
+            completion_tokens: 685,
+            total_tokens: 687,
+            citation_tokens: 3058,
             num_search_queries: 1,
           },
           citations: [
@@ -185,11 +185,13 @@ describe('LobePerplexityAI', () => {
           choices: [
             {
               index: 0,
-              finish_reason: null,
+              finish_reason: 'stop',
               message: {
                 role: 'assistant',
-                content: '杭州今天和未来几天的天气预报如',
+                content:
+                  '<think>\n好的，我现在要处理用户的我需要确定这个查询的类型。用户显然是在询问当前的天气情况和预报，因此属于天气预报类型。接下来我要查看提供的搜索结果，看看这些来源是否能提供准确的信息。\n\n第一个来源是weather.com.cn的、西北风5~6级等。接着查看第二个结果[2]是中央气象台的详细分时数据，比如7月18日和21日的温度、降水、风速等信息。[3]来自中国气象局的气象预报显示有阴天和多云交替的情况，（如星期三03/05阴温暖但空气质量差。[6][7]则是杭州市气象台的最新天气预报发布情况：后半夜转多云明天白天继续多云的天气。\n\n现在要将这些信息整合起来形成连贯的回答。需要注意是否有矛盾的地方以及按照可信度部或东北部常见四至五级阵风；昼夜温差较大比如最高温可达20多摄氏度最低至10℃左右这样需要提醒注意衣物调整防寒保暖同时也指出空气质量在某些时段可能不佳特别是根据[5]，AccuWeather提示空气质响出行健康的重点要素如空气指标并且保证引用每个相关数据都注明正确的出处编号避免遗漏重要细节同时保持回答简洁明了使用户一目了然.\n</think>\n\n杭州近期以阴到多云天气为主，夜间偶有小雨[1 未来三日预报\n- **今天傍晚至夜间**：局部小雨渐止转阴到多云\\[6\\] [7]\n- **明日（周六）**  \n  - 白天多云为主   \n  - 温度区间16℃~22℃，西北风5~6级 \\[2\\] [3]\n- **后天（周日）**\n\\] [3]\n\n### *注意事项*\n1. **昼夜温差大**：早晚低温多在10°C以下需加外套防风保温；\n2. **空气污染警告** AccuWeather指出当地PM指数超标易引发达呼吸道不适建议尽量减少户外长时间活动时r/china/zjejiang/hangzhou" target="_blank">墨迹实况雷达图</a>获取临近降水动态.',
               },
+              delta: { role: 'assistant', content: '' },
             },
           ],
         },
@@ -238,8 +240,8 @@ describe('LobePerplexityAI', () => {
           'event: text',
           'data: "天和未来几天的"\n',
           'id: 506d64fb-e7f2-4d94-b80f-158369e9446d',
-          'event: data',
-          'data: {"id":"506d64fb-e7f2-4d94-b80f-158369e9446d","index":0}\n',
+          'event: usage',
+          'data: {"inputCitationTokens":3058,"inputTextTokens":2,"outputTextTokens":685,"totalInputTokens":3060,"totalOutputTokens":685,"totalTokens":3745}\n',
         ].map((line) => `${line}\n`),
       );
 
diff --git a/src/libs/agent-runtime/providerTestUtils.ts b/src/libs/agent-runtime/providerTestUtils.ts
index c0c4f6101b5ae..4956b48bf40f2 100644
--- a/src/libs/agent-runtime/providerTestUtils.ts
+++ b/src/libs/agent-runtime/providerTestUtils.ts
@@ -13,6 +13,9 @@ interface TesstProviderParams {
   defaultBaseURL: string;
   invalidErrorType?: string;
   provider: string;
+  test?: {
+    skipAPICall?: boolean;
+  };
 }
 
 export const testProvider = ({
@@ -23,6 +26,7 @@ export const testProvider = ({
   Runtime,
   chatDebugEnv,
   chatModel,
+  test = {},
 }: TesstProviderParams) => {
   // Mock the console.error to avoid polluting test output
   vi.spyOn(console, 'error').mockImplementation(() => {});
@@ -52,6 +56,60 @@ export const testProvider = ({
     });
 
     describe('chat', () => {
+      it('should return a StreamingTextResponse on successful API call', async () => {
+        // Arrange
+        const mockStream = new ReadableStream();
+        const mockResponse = Promise.resolve(mockStream);
+
+        (instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse);
+
+        // Act
+        const result = await instance.chat({
+          messages: [{ content: 'Hello', role: 'user' }],
+          model: chatModel,
+          temperature: 0,
+        });
+
+        // Assert
+        expect(result).toBeInstanceOf(Response);
+      });
+
+      if (!test?.skipAPICall) {
+        it(`should call ${provider} API with corresponding options`, async () => {
+          // Arrange
+          const mockStream = new ReadableStream();
+          const mockResponse = Promise.resolve(mockStream);
+
+          (instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse);
+
+          // Act
+          const result = await instance.chat({
+            max_tokens: 1024,
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: chatModel,
+            temperature: 0.7,
+            top_p: 1,
+          });
+
+          // Assert
+          expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
+            {
+              max_tokens: 1024,
+              messages: [{ content: 'Hello', role: 'user' }],
+              model: chatModel,
+              stream: true,
+              stream_options: {
+                include_usage: true,
+              },
+              temperature: 0.7,
+              top_p: 1,
+            },
+            { headers: { Accept: '*/*' } },
+          );
+          expect(result).toBeInstanceOf(Response);
+        });
+      }
+
       describe('Error', () => {
         it('should return OpenAIBizError with an openai error response when OpenAI.APIError is thrown', async () => {
           // Arrange
diff --git a/src/libs/agent-runtime/togetherai/index.test.ts b/src/libs/agent-runtime/togetherai/index.test.ts
index 0ff9085c4c5d6..9aa8bf8097b8a 100644
--- a/src/libs/agent-runtime/togetherai/index.test.ts
+++ b/src/libs/agent-runtime/togetherai/index.test.ts
@@ -1,300 +1,12 @@
 // @vitest-environment node
-import OpenAI from 'openai';
-import { Mock, afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import { testProvider } from '@/libs/agent-runtime/providerTestUtils';
 
-import { LobeOpenAICompatibleRuntime } from '@/libs/agent-runtime';
-
-import * as debugStreamModule from '../utils/debugStream';
-import models from './fixtures/models.json';
 import { LobeTogetherAI } from './index';
 
-const provider = 'togetherai';
-const defaultBaseURL = 'https://api.together.xyz/v1';
-
-const bizErrorType = 'ProviderBizError';
-const invalidErrorType = 'InvalidProviderAPIKey';
-
-// Mock the console.error to avoid polluting test output
-vi.spyOn(console, 'error').mockImplementation(() => {});
-
-let instance: LobeOpenAICompatibleRuntime;
-
-beforeEach(() => {
-  instance = new LobeTogetherAI({ apiKey: 'test' });
-
-  // 使用 vi.spyOn 来模拟 chat.completions.create 方法
-  vi.spyOn(instance['client'].chat.completions, 'create').mockResolvedValue(
-    new ReadableStream() as any,
-  );
-});
-
-afterEach(() => {
-  vi.clearAllMocks();
-});
-
-describe('LobeTogetherAI', () => {
-  describe('init', () => {
-    it('should correctly initialize with an API key', async () => {
-      const instance = new LobeTogetherAI({ apiKey: 'test_api_key' });
-      expect(instance).toBeInstanceOf(LobeTogetherAI);
-      expect(instance.baseURL).toEqual(defaultBaseURL);
-    });
-  });
-
-  describe('chat', () => {
-    it('should return a StreamingTextResponse on successful API call', async () => {
-      // Arrange
-      const mockStream = new ReadableStream();
-      const mockResponse = Promise.resolve(mockStream);
-
-      (instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse);
-
-      // Act
-      const result = await instance.chat({
-        messages: [{ content: 'Hello', role: 'user' }],
-        model: 'mistralai/mistral-7b-instruct:free',
-        temperature: 0,
-      });
-
-      // Assert
-      expect(result).toBeInstanceOf(Response);
-    });
-
-    it('should call TogetherAI API with corresponding options', async () => {
-      // Arrange
-      const mockStream = new ReadableStream();
-      const mockResponse = Promise.resolve(mockStream);
-
-      (instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse);
-
-      // Act
-      const result = await instance.chat({
-        max_tokens: 1024,
-        messages: [{ content: 'Hello', role: 'user' }],
-        model: 'mistralai/mistral-7b-instruct:free',
-        temperature: 0.7,
-        top_p: 1,
-      });
-
-      // Assert
-      expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
-        {
-          max_tokens: 1024,
-          messages: [{ content: 'Hello', role: 'user' }],
-          model: 'mistralai/mistral-7b-instruct:free',
-          temperature: 0.7,
-          stream: true,
-          top_p: 1,
-        },
-        { headers: { Accept: '*/*' } },
-      );
-      expect(result).toBeInstanceOf(Response);
-    });
-
-    describe('Error', () => {
-      it('should return TogetherAIBizError with an openai error response when OpenAI.APIError is thrown', async () => {
-        // Arrange
-        const apiError = new OpenAI.APIError(
-          400,
-          {
-            status: 400,
-            error: {
-              message: 'Bad Request',
-            },
-          },
-          'Error message',
-          {},
-        );
-
-        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
-
-        // Act
-        try {
-          await instance.chat({
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'mistralai/mistral-7b-instruct:free',
-            temperature: 0,
-          });
-        } catch (e) {
-          expect(e).toEqual({
-            endpoint: defaultBaseURL,
-            error: {
-              error: { message: 'Bad Request' },
-              status: 400,
-            },
-            errorType: bizErrorType,
-            provider,
-          });
-        }
-      });
-
-      it('should throw AgentRuntimeError with InvalidTogetherAIAPIKey if no apiKey is provided', async () => {
-        try {
-          new LobeTogetherAI({});
-        } catch (e) {
-          expect(e).toEqual({ errorType: invalidErrorType });
-        }
-      });
-
-      it('should return TogetherAIBizError with the cause when OpenAI.APIError is thrown with cause', async () => {
-        // Arrange
-        const errorInfo = {
-          stack: 'abc',
-          cause: {
-            message: 'api is undefined',
-          },
-        };
-        const apiError = new OpenAI.APIError(400, errorInfo, 'module error', {});
-
-        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
-
-        // Act
-        try {
-          await instance.chat({
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'mistralai/mistral-7b-instruct:free',
-            temperature: 0,
-          });
-        } catch (e) {
-          expect(e).toEqual({
-            endpoint: defaultBaseURL,
-            error: {
-              cause: { message: 'api is undefined' },
-              stack: 'abc',
-            },
-            errorType: bizErrorType,
-            provider,
-          });
-        }
-      });
-
-      it('should return TogetherAIBizError with an cause response with desensitize Url', async () => {
-        // Arrange
-        const errorInfo = {
-          stack: 'abc',
-          cause: { message: 'api is undefined' },
-        };
-        const apiError = new OpenAI.APIError(400, errorInfo, 'module error', {});
-
-        instance = new LobeTogetherAI({
-          apiKey: 'test',
-
-          baseURL: 'https://api.abc.com/v1',
-        });
-
-        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
-
-        // Act
-        try {
-          await instance.chat({
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'mistralai/mistral-7b-instruct:free',
-            temperature: 0,
-          });
-        } catch (e) {
-          expect(e).toEqual({
-            endpoint: 'https://api.***.com/v1',
-            error: {
-              cause: { message: 'api is undefined' },
-              stack: 'abc',
-            },
-            errorType: bizErrorType,
-            provider,
-          });
-        }
-      });
-
-      it('should throw an InvalidTogetherAIAPIKey error type on 401 status code', async () => {
-        // Mock the API call to simulate a 401 error
-        const error = new Error('Unauthorized') as any;
-        error.status = 401;
-        vi.mocked(instance['client'].chat.completions.create).mockRejectedValue(error);
-
-        try {
-          await instance.chat({
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'mistralai/mistral-7b-instruct:free',
-            temperature: 0,
-          });
-        } catch (e) {
-          // Expect the chat method to throw an error with InvalidTogetherAIAPIKey
-          expect(e).toEqual({
-            endpoint: defaultBaseURL,
-            error: new Error('Unauthorized'),
-            errorType: invalidErrorType,
-            provider,
-          });
-        }
-      });
-
-      it('should return AgentRuntimeError for non-OpenAI errors', async () => {
-        // Arrange
-        const genericError = new Error('Generic Error');
-
-        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(genericError);
-
-        // Act
-        try {
-          await instance.chat({
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'mistralai/mistral-7b-instruct:free',
-            temperature: 0,
-          });
-        } catch (e) {
-          expect(e).toEqual({
-            endpoint: defaultBaseURL,
-            errorType: 'AgentRuntimeError',
-            provider,
-            error: {
-              name: genericError.name,
-              cause: genericError.cause,
-              message: genericError.message,
-              stack: genericError.stack,
-            },
-          });
-        }
-      });
-    });
-
-    describe('DEBUG', () => {
-      it('should call debugStream and return StreamingTextResponse when DEBUG_TOGETHERAI_CHAT_COMPLETION is 1', async () => {
-        // Arrange
-        const mockProdStream = new ReadableStream() as any; // 模拟的 prod 流
-        const mockDebugStream = new ReadableStream({
-          start(controller) {
-            controller.enqueue('Debug stream content');
-            controller.close();
-          },
-        }) as any;
-        mockDebugStream.toReadableStream = () => mockDebugStream; // 添加 toReadableStream 方法
-
-        // 模拟 chat.completions.create 返回值，包括模拟的 tee 方法
-        (instance['client'].chat.completions.create as Mock).mockResolvedValue({
-          tee: () => [mockProdStream, { toReadableStream: () => mockDebugStream }],
-        });
-
-        // 保存原始环境变量值
-        const originalDebugValue = process.env.DEBUG_TOGETHERAI_CHAT_COMPLETION;
-
-        // 模拟环境变量
-        process.env.DEBUG_TOGETHERAI_CHAT_COMPLETION = '1';
-        vi.spyOn(debugStreamModule, 'debugStream').mockImplementation(() => Promise.resolve());
-
-        // 执行测试
-        // 运行你的测试函数，确保它会在条件满足时调用 debugStream
-        // 假设的测试函数调用，你可能需要根据实际情况调整
-        await instance.chat({
-          messages: [{ content: 'Hello', role: 'user' }],
-          model: 'mistralai/mistral-7b-instruct:free',
-          temperature: 0,
-        });
-
-        // 验证 debugStream 被调用
-        expect(debugStreamModule.debugStream).toHaveBeenCalled();
-
-        // 恢复原始环境变量值
-        process.env.DEBUG_TOGETHERAI_CHAT_COMPLETION = originalDebugValue;
-      });
-    });
-  });
+testProvider({
+  provider: 'togetherai',
+  defaultBaseURL: 'https://api.together.xyz/v1',
+  chatModel: 'mistralai/mistral-7b-instruct:free',
+  Runtime: LobeTogetherAI,
+  chatDebugEnv: 'DEBUG_TOGETHERAI_CHAT_COMPLETION',
 });
diff --git a/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.test.ts b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.test.ts
index bcedb624c6601..5d85c08da3ff1 100644
--- a/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.test.ts
+++ b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.test.ts
@@ -106,6 +106,9 @@ describe('LobeOpenAICompatibleFactory', () => {
           model: 'mistralai/mistral-7b-instruct:free',
           temperature: 0.7,
           stream: true,
+          stream_options: {
+            include_usage: true,
+          },
           top_p: 1,
         },
         { headers: { Accept: '*/*' } },
diff --git a/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts
index 3f472b872ac17..c1c1d4073a681 100644
--- a/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts
+++ b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts
@@ -101,8 +101,9 @@ interface OpenAICompatibleFactoryOptions<T extends Record<string, any> = any> {
 export function transformResponseToStream(data: OpenAI.ChatCompletion) {
   return new ReadableStream({
     start(controller) {
+      const choices = data.choices || [];
       const chunk: OpenAI.ChatCompletionChunk = {
-        choices: data.choices.map((choice: OpenAI.ChatCompletion.Choice) => ({
+        choices: choices.map((choice: OpenAI.ChatCompletion.Choice) => ({
           delta: {
             content: choice.message.content,
             role: choice.message.role,
@@ -128,7 +129,7 @@ export function transformResponseToStream(data: OpenAI.ChatCompletion) {
       controller.enqueue(chunk);
 
       controller.enqueue({
-        choices: data.choices.map((choice: OpenAI.ChatCompletion.Choice) => ({
+        choices: choices.map((choice: OpenAI.ChatCompletion.Choice) => ({
           delta: {
             content: null,
             role: choice.message.role,
@@ -219,7 +220,9 @@ export const LobeOpenAICompatibleFactory = <T extends Record<string, any> = any>
             ...postPayload,
             messages,
             ...(chatCompletion?.noUserId ? {} : { user: options?.user }),
+            stream_options: postPayload.stream ? { include_usage: true } : undefined,
           };
+
           if (debug?.chatCompletion?.()) {
             console.log('[requestPayload]:', JSON.stringify(finalPayload, null, 2));
           }
diff --git a/src/libs/agent-runtime/utils/streams/anthropic.test.ts b/src/libs/agent-runtime/utils/streams/anthropic.test.ts
index 6b86b196e867c..4e38eab7a5a13 100644
--- a/src/libs/agent-runtime/utils/streams/anthropic.test.ts
+++ b/src/libs/agent-runtime/utils/streams/anthropic.test.ts
@@ -225,7 +225,7 @@ describe('AnthropicStream', () => {
 
         'id: msg_017aTuY86wNxth5TE544yqJq',
         'event: usage',
-        'data: {"inputTokens":457,"outputTokens":84,"totalTokens":541}\n',
+        'data: {"inputCacheMissTokens":457,"totalInputTokens":457,"totalOutputTokens":84,"totalTokens":541}\n',
       ].map((item) => `${item}\n`),
     );
 
@@ -381,8 +381,7 @@ describe('AnthropicStream', () => {
 
         'id: msg_0175ryA67RbGrnRrGBXFQEYK',
         'event: usage',
-        'data: {"inputTokens":485,"outputTokens":154,"totalTokens":639}\n',
-
+        'data: {"inputCacheMissTokens":485,"totalInputTokens":485,"totalOutputTokens":154,"totalTokens":639}\n',
         'id: msg_0175ryA67RbGrnRrGBXFQEYK',
         'event: stop',
         'data: "message_stop"\n',
@@ -392,6 +391,91 @@ describe('AnthropicStream', () => {
     expect(onToolCallMock).toHaveBeenCalledTimes(6);
   });
 
+  it('should handle prompts context caching', async () => {
+    const streams = [
+      {
+        type: 'message_start',
+        message: {
+          id: 'msg_01Vxc4yQTEjkDSba3N3BMbH8',
+          type: 'message',
+          role: 'assistant',
+          model: 'claude-3-7-sonnet-20250219',
+          content: [],
+          stop_reason: null,
+          stop_sequence: null,
+          usage: {
+            input_tokens: 6,
+            cache_creation_input_tokens: 457,
+            cache_read_input_tokens: 17918,
+            output_tokens: 2,
+          },
+        },
+      },
+      { type: 'content_block_start', index: 0, content_block: { type: 'text', text: '' } },
+      { type: 'content_block_delta', index: 0, delta: { type: 'text_delta', text: '\n\n根' } },
+      {
+        type: 'content_block_delta',
+        index: 0,
+        delta: { type: 'text_delta', text: '/\n[^20]: https://s' },
+      },
+      { type: 'content_block_stop', index: 0 },
+      {
+        type: 'message_delta',
+        delta: { stop_reason: 'end_turn', stop_sequence: null },
+        usage: { output_tokens: 3222 },
+      },
+      { type: 'message_stop' },
+    ];
+
+    const mockReadableStream = new ReadableStream({
+      start(controller) {
+        streams.forEach((chunk) => {
+          controller.enqueue(chunk);
+        });
+        controller.close();
+      },
+    });
+
+    const protocolStream = AnthropicStream(mockReadableStream);
+
+    const decoder = new TextDecoder();
+    const chunks = [];
+
+    // @ts-ignore
+    for await (const chunk of protocolStream) {
+      chunks.push(decoder.decode(chunk, { stream: true }));
+    }
+
+    expect(chunks).toEqual(
+      [
+        'id: msg_01Vxc4yQTEjkDSba3N3BMbH8',
+        'event: data',
+        'data: {"id":"msg_01Vxc4yQTEjkDSba3N3BMbH8","type":"message","role":"assistant","model":"claude-3-7-sonnet-20250219","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":6,"cache_creation_input_tokens":457,"cache_read_input_tokens":17918,"output_tokens":2}}\n',
+        'id: msg_01Vxc4yQTEjkDSba3N3BMbH8',
+        'event: data',
+        'data: ""\n',
+        'id: msg_01Vxc4yQTEjkDSba3N3BMbH8',
+        'event: text',
+        'data: "\\n\\n根"\n',
+        'id: msg_01Vxc4yQTEjkDSba3N3BMbH8',
+        'event: text',
+        'data: "/\\n[^20]: https://s"\n',
+        'id: msg_01Vxc4yQTEjkDSba3N3BMbH8',
+        'event: data',
+        'data: {"type":"content_block_stop","index":0}\n',
+        'id: msg_01Vxc4yQTEjkDSba3N3BMbH8',
+        'event: stop',
+        'data: "end_turn"\n',
+        'id: msg_01Vxc4yQTEjkDSba3N3BMbH8',
+        'event: usage',
+        'data: {"inputCacheMissTokens":6,"inputCachedTokens":17918,"inputWriteCacheTokens":457,"totalInputTokens":18381,"totalOutputTokens":3224,"totalTokens":21605}\n',
+
+        'id: msg_01Vxc4yQTEjkDSba3N3BMbH8',
+        'event: stop',
+        'data: "message_stop"\n',
+      ].map((item) => `${item}\n`),
+    );
+  });
   describe('thinking', () => {
     it('should handle normal thinking ', async () => {
       const streams = [
@@ -515,7 +599,7 @@ describe('AnthropicStream', () => {
           'data: "end_turn"\n',
           'id: msg_01MNsLe7n1uVLtu6W8rCFujD',
           'event: usage',
-          'data: {"inputTokens":46,"outputTokens":365,"totalTokens":411}\n',
+          'data: {"inputCacheMissTokens":46,"totalInputTokens":46,"totalOutputTokens":365,"totalTokens":411}\n',
           'id: msg_01MNsLe7n1uVLtu6W8rCFujD',
           'event: stop',
           'data: "message_stop"\n',
@@ -675,7 +759,7 @@ describe('AnthropicStream', () => {
           'data: "end_turn"\n',
           'id: msg_019q32esPvu3TftzZnL6JPys',
           'event: usage',
-          'data: {"inputTokens":92,"outputTokens":263,"totalTokens":355}\n',
+          'data: {"inputCacheMissTokens":92,"totalInputTokens":92,"totalOutputTokens":263,"totalTokens":355}\n',
           'id: msg_019q32esPvu3TftzZnL6JPys',
           'event: stop',
           'data: "message_stop"\n',
diff --git a/src/libs/agent-runtime/utils/streams/anthropic.ts b/src/libs/agent-runtime/utils/streams/anthropic.ts
index 67d1f60a20b8a..4e3af5ad9d66f 100644
--- a/src/libs/agent-runtime/utils/streams/anthropic.ts
+++ b/src/libs/agent-runtime/utils/streams/anthropic.ts
@@ -22,9 +22,24 @@ export const transformAnthropicStream = (
   switch (chunk.type) {
     case 'message_start': {
       context.id = chunk.message.id;
+      let totalInputTokens = chunk.message.usage?.input_tokens;
+
+      if (
+        chunk.message.usage?.cache_creation_input_tokens ||
+        chunk.message.usage?.cache_read_input_tokens
+      ) {
+        totalInputTokens =
+          chunk.message.usage?.input_tokens +
+          (chunk.message.usage.cache_creation_input_tokens || 0) +
+          (chunk.message.usage.cache_read_input_tokens || 0);
+      }
+
       context.usage = {
-        inputTokens: chunk.message.usage?.input_tokens,
-        outputTokens: chunk.message.usage?.output_tokens,
+        inputCacheMissTokens: chunk.message.usage?.input_tokens,
+        inputCachedTokens: chunk.message.usage?.cache_read_input_tokens || undefined,
+        inputWriteCacheTokens: chunk.message.usage?.cache_creation_input_tokens || undefined,
+        totalInputTokens,
+        totalOutputTokens: chunk.message.usage?.output_tokens,
       };
 
       return { data: chunk.message, id: chunk.message.id, type: 'data' };
@@ -140,18 +155,20 @@ export const transformAnthropicStream = (
     }
 
     case 'message_delta': {
-      const outputTokens = chunk.usage?.output_tokens + (context.usage?.outputTokens || 0);
-      const inputTokens = context.usage?.inputTokens || 0;
-      const totalTokens = inputTokens + outputTokens;
+      const totalOutputTokens =
+        chunk.usage?.output_tokens + (context.usage?.totalOutputTokens || 0);
+      const totalInputTokens = context.usage?.totalInputTokens || 0;
+      const totalTokens = totalInputTokens + totalOutputTokens;
 
       if (totalTokens > 0) {
         return [
           { data: chunk.delta.stop_reason, id: context.id, type: 'stop' },
           {
             data: {
-              inputTokens: inputTokens,
-              outputTokens: outputTokens,
-              totalTokens: inputTokens + outputTokens,
+              ...context.usage,
+              totalInputTokens,
+              totalOutputTokens,
+              totalTokens,
             } as ModelTokensUsage,
             id: context.id,
             type: 'usage',
diff --git a/src/libs/agent-runtime/utils/streams/openai.test.ts b/src/libs/agent-runtime/utils/streams/openai.test.ts
index ee1792f27c839..6a7be86e917f8 100644
--- a/src/libs/agent-runtime/utils/streams/openai.test.ts
+++ b/src/libs/agent-runtime/utils/streams/openai.test.ts
@@ -348,94 +348,198 @@ describe('OpenAIStream', () => {
     ]);
   });
 
-  it('should streaming token usage', async () => {
-    const data = [
-      {
-        id: 'chatcmpl-B7CcnaeK3jqWBMOhxg7SSKFwlk7dC',
-        object: 'chat.completion.chunk',
-        created: 1741056525,
-        model: 'gpt-4o-mini-2024-07-18',
-        choices: [{ index: 0, delta: { role: 'assistant', content: '' } }],
-        service_tier: 'default',
-        system_fingerprint: 'fp_06737a9306',
-      },
-      {
-        id: 'chatcmpl-B7CcnaeK3jqWBMOhxg7SSKFwlk7dC',
-        object: 'chat.completion.chunk',
-        created: 1741056525,
-        model: 'gpt-4o-mini-2024-07-18',
-        choices: [{ index: 0, delta: { content: '你好！' } }],
-        service_tier: 'default',
-        system_fingerprint: 'fp_06737a9306',
-      },
-      {
-        id: 'chatcmpl-B7CcnaeK3jqWBMOhxg7SSKFwlk7dC',
-        object: 'chat.completion.chunk',
-        created: 1741056525,
-        model: 'gpt-4o-mini-2024-07-18',
-        choices: [{ index: 0, delta: {}, finish_reason: 'stop' }],
-        service_tier: 'default',
-        system_fingerprint: 'fp_06737a9306',
-      },
-      {
-        id: 'chatcmpl-B7CcnaeK3jqWBMOhxg7SSKFwlk7dC',
-        object: 'chat.completion.chunk',
-        created: 1741056525,
-        model: 'gpt-4o-mini-2024-07-18',
-        choices: [],
-        service_tier: 'default',
-        system_fingerprint: 'fp_06737a9306',
-        usage: {
-          prompt_tokens: 1646,
-          completion_tokens: 11,
-          total_tokens: 1657,
-          prompt_tokens_details: { audio_tokens: 0, cached_tokens: 0 },
-          completion_tokens_details: {
-            accepted_prediction_tokens: 0,
-            audio_tokens: 0,
-            reasoning_tokens: 0,
-            rejected_prediction_tokens: 0,
+  describe('token usage', () => {
+    it('should streaming token usage', async () => {
+      const data = [
+        {
+          id: 'chatcmpl-B7CcnaeK3jqWBMOhxg7SSKFwlk7dC',
+          object: 'chat.completion.chunk',
+          created: 1741056525,
+          model: 'gpt-4o-mini-2024-07-18',
+          choices: [{ index: 0, delta: { role: 'assistant', content: '' } }],
+          service_tier: 'default',
+          system_fingerprint: 'fp_06737a9306',
+        },
+        {
+          id: 'chatcmpl-B7CcnaeK3jqWBMOhxg7SSKFwlk7dC',
+          object: 'chat.completion.chunk',
+          created: 1741056525,
+          model: 'gpt-4o-mini-2024-07-18',
+          choices: [{ index: 0, delta: { content: '你好！' } }],
+          service_tier: 'default',
+          system_fingerprint: 'fp_06737a9306',
+        },
+        {
+          id: 'chatcmpl-B7CcnaeK3jqWBMOhxg7SSKFwlk7dC',
+          object: 'chat.completion.chunk',
+          created: 1741056525,
+          model: 'gpt-4o-mini-2024-07-18',
+          choices: [{ index: 0, delta: {}, finish_reason: 'stop' }],
+          service_tier: 'default',
+          system_fingerprint: 'fp_06737a9306',
+        },
+        {
+          id: 'chatcmpl-B7CcnaeK3jqWBMOhxg7SSKFwlk7dC',
+          object: 'chat.completion.chunk',
+          created: 1741056525,
+          model: 'gpt-4o-mini-2024-07-18',
+          choices: [],
+          service_tier: 'default',
+          system_fingerprint: 'fp_06737a9306',
+          usage: {
+            prompt_tokens: 1646,
+            completion_tokens: 11,
+            total_tokens: 1657,
+            prompt_tokens_details: { audio_tokens: 0, cached_tokens: 0 },
+            completion_tokens_details: {
+              accepted_prediction_tokens: 0,
+              audio_tokens: 0,
+              reasoning_tokens: 0,
+              rejected_prediction_tokens: 0,
+            },
           },
         },
-      },
-    ];
+      ];
 
-    const mockOpenAIStream = new ReadableStream({
-      start(controller) {
-        data.forEach((chunk) => {
-          controller.enqueue(chunk);
-        });
+      const mockOpenAIStream = new ReadableStream({
+        start(controller) {
+          data.forEach((chunk) => {
+            controller.enqueue(chunk);
+          });
 
-        controller.close();
-      },
+          controller.close();
+        },
+      });
+
+      const protocolStream = OpenAIStream(mockOpenAIStream);
+
+      const decoder = new TextDecoder();
+      const chunks = [];
+
+      // @ts-ignore
+      for await (const chunk of protocolStream) {
+        chunks.push(decoder.decode(chunk, { stream: true }));
+      }
+
+      expect(chunks).toEqual(
+        [
+          'id: chatcmpl-B7CcnaeK3jqWBMOhxg7SSKFwlk7dC',
+          'event: text',
+          `data: ""\n`,
+          'id: chatcmpl-B7CcnaeK3jqWBMOhxg7SSKFwlk7dC',
+          'event: text',
+          `data: "你好！"\n`,
+          'id: chatcmpl-B7CcnaeK3jqWBMOhxg7SSKFwlk7dC',
+          'event: stop',
+          `data: "stop"\n`,
+          'id: chatcmpl-B7CcnaeK3jqWBMOhxg7SSKFwlk7dC',
+          'event: usage',
+          `data: {"inputCacheMissTokens":1646,"inputTextTokens":1646,"outputTextTokens":11,"totalInputTokens":1646,"totalOutputTokens":11,"totalTokens":1657}\n`,
+        ].map((i) => `${i}\n`),
+      );
     });
 
-    const protocolStream = OpenAIStream(mockOpenAIStream);
+    it('should streaming litellm token usage', async () => {
+      const data = [
+        {
+          id: 'chatcmpl-c1f6a6a6-fcf8-463a-96bf-cf634d3e98a5',
+          created: 1741188058,
+          model: 'gpt-4o-mini',
+          object: 'chat.completion.chunk',
+          system_fingerprint: 'fp_06737a9306',
+          choices: [{ index: 0, delta: { content: ' #' } }],
+          stream_options: { include_usage: true },
+        },
+        {
+          id: 'chatcmpl-c1f6a6a6-fcf8-463a-96bf-cf634d3e98a5',
+          created: 1741188068,
+          model: 'gpt-4o-mini',
+          object: 'chat.completion.chunk',
+          system_fingerprint: 'fp_06737a9306',
+          choices: [{ index: 0, delta: { content: '.' } }],
+          stream_options: { include_usage: true },
+        },
+        {
+          id: 'chatcmpl-c1f6a6a6-fcf8-463a-96bf-cf634d3e98a5',
+          created: 1741188068,
+          model: 'gpt-4o-mini',
+          object: 'chat.completion.chunk',
+          system_fingerprint: 'fp_06737a9306',
+          choices: [{ finish_reason: 'stop', index: 0, delta: {} }],
+          stream_options: { include_usage: true },
+        },
+        {
+          id: 'chatcmpl-c1f6a6a6-fcf8-463a-96bf-cf634d3e98a5',
+          created: 1741188068,
+          model: 'gpt-4o-mini',
+          object: 'chat.completion.chunk',
+          system_fingerprint: 'fp_06737a9306',
+          choices: [{ index: 0, delta: {} }],
+          stream_options: { include_usage: true },
+        },
+        {
+          id: 'chatcmpl-c1f6a6a6-fcf8-463a-96bf-cf634d3e98a5',
+          created: 1741188068,
+          model: 'gpt-4o-mini',
+          object: 'chat.completion.chunk',
+          system_fingerprint: 'fp_06737a9306',
+          choices: [{ index: 0, delta: {} }],
+          stream_options: { include_usage: true },
+          usage: {
+            completion_tokens: 1720,
+            prompt_tokens: 1797,
+            total_tokens: 3517,
+            completion_tokens_details: {
+              accepted_prediction_tokens: 0,
+              audio_tokens: 0,
+              reasoning_tokens: 0,
+              rejected_prediction_tokens: 0,
+            },
+            prompt_tokens_details: { audio_tokens: 0, cached_tokens: 0 },
+          },
+        },
+      ];
 
-    const decoder = new TextDecoder();
-    const chunks = [];
+      const mockOpenAIStream = new ReadableStream({
+        start(controller) {
+          data.forEach((chunk) => {
+            controller.enqueue(chunk);
+          });
 
-    // @ts-ignore
-    for await (const chunk of protocolStream) {
-      chunks.push(decoder.decode(chunk, { stream: true }));
-    }
+          controller.close();
+        },
+      });
 
-    expect(chunks).toEqual(
-      [
-        'id: chatcmpl-B7CcnaeK3jqWBMOhxg7SSKFwlk7dC',
-        'event: text',
-        `data: ""\n`,
-        'id: chatcmpl-B7CcnaeK3jqWBMOhxg7SSKFwlk7dC',
-        'event: text',
-        `data: "你好！"\n`,
-        'id: chatcmpl-B7CcnaeK3jqWBMOhxg7SSKFwlk7dC',
-        'event: stop',
-        `data: "stop"\n`,
-        'id: chatcmpl-B7CcnaeK3jqWBMOhxg7SSKFwlk7dC',
-        'event: usage',
-        `data: {"acceptedPredictionTokens":0,"cachedTokens":0,"inputAudioTokens":0,"inputTokens":1646,"outputAudioTokens":0,"outputTokens":11,"reasoningTokens":0,"rejectedPredictionTokens":0,"totalTokens":1657}\n`,
-      ].map((i) => `${i}\n`),
-    );
+      const protocolStream = OpenAIStream(mockOpenAIStream);
+
+      const decoder = new TextDecoder();
+      const chunks = [];
+
+      // @ts-ignore
+      for await (const chunk of protocolStream) {
+        chunks.push(decoder.decode(chunk, { stream: true }));
+      }
+
+      expect(chunks).toEqual(
+        [
+          'id: chatcmpl-c1f6a6a6-fcf8-463a-96bf-cf634d3e98a5',
+          'event: text',
+          `data: " #"\n`,
+          'id: chatcmpl-c1f6a6a6-fcf8-463a-96bf-cf634d3e98a5',
+          'event: text',
+          `data: "."\n`,
+          'id: chatcmpl-c1f6a6a6-fcf8-463a-96bf-cf634d3e98a5',
+          'event: stop',
+          `data: "stop"\n`,
+          'id: chatcmpl-c1f6a6a6-fcf8-463a-96bf-cf634d3e98a5',
+          'event: data',
+          `data: {"delta":{},"id":"chatcmpl-c1f6a6a6-fcf8-463a-96bf-cf634d3e98a5","index":0}\n`,
+          'id: chatcmpl-c1f6a6a6-fcf8-463a-96bf-cf634d3e98a5',
+          'event: usage',
+          `data: {"inputCacheMissTokens":1797,"inputTextTokens":1797,"outputTextTokens":1720,"totalInputTokens":1797,"totalOutputTokens":1720,"totalTokens":3517}\n`,
+        ].map((i) => `${i}\n`),
+      );
+    });
   });
 
   describe('Tools Calling', () => {
@@ -840,7 +944,7 @@ describe('OpenAIStream', () => {
           `data: "帮助。"\n`,
           'id: 1',
           'event: usage',
-          `data: {"cachedTokens":0,"inputCacheMissTokens":6,"inputTokens":6,"outputTokens":104,"reasoningTokens":70,"totalTokens":110}\n`,
+          `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputReasoningTokens":70,"outputTextTokens":34,"totalInputTokens":6,"totalOutputTokens":104,"totalTokens":110}\n`,
         ].map((i) => `${i}\n`),
       );
     });
@@ -1059,7 +1163,7 @@ describe('OpenAIStream', () => {
           `data: "帮助。"\n`,
           'id: 1',
           'event: usage',
-          `data: {"cachedTokens":0,"inputCacheMissTokens":6,"inputTokens":6,"outputTokens":104,"reasoningTokens":70,"totalTokens":110}\n`,
+          `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputReasoningTokens":70,"outputTextTokens":34,"totalInputTokens":6,"totalOutputTokens":104,"totalTokens":110}\n`,
         ].map((i) => `${i}\n`),
       );
     });
@@ -1260,7 +1364,7 @@ describe('OpenAIStream', () => {
           `data: "帮助。"\n`,
           'id: 1',
           'event: usage',
-          `data: {"cachedTokens":0,"inputCacheMissTokens":6,"inputTokens":6,"outputTokens":104,"reasoningTokens":70,"totalTokens":110}\n`,
+          `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputReasoningTokens":70,"outputTextTokens":34,"totalInputTokens":6,"totalOutputTokens":104,"totalTokens":110}\n`,
         ].map((i) => `${i}\n`),
       );
     });
@@ -1461,7 +1565,7 @@ describe('OpenAIStream', () => {
           `data: "帮助。"\n`,
           'id: 1',
           'event: usage',
-          `data: {"cachedTokens":0,"inputCacheMissTokens":6,"inputTokens":6,"outputTokens":104,"reasoningTokens":70,"totalTokens":110}\n`,
+          `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputReasoningTokens":70,"outputTextTokens":34,"totalInputTokens":6,"totalOutputTokens":104,"totalTokens":110}\n`,
         ].map((i) => `${i}\n`),
       );
     });
@@ -1662,7 +1766,7 @@ describe('OpenAIStream', () => {
           `data: "帮助。"\n`,
           'id: 1',
           'event: usage',
-          `data: {"cachedTokens":0,"inputCacheMissTokens":6,"inputTokens":6,"outputTokens":104,"reasoningTokens":70,"totalTokens":110}\n`,
+          `data: {"inputCacheMissTokens":6,"inputTextTokens":6,"outputReasoningTokens":70,"outputTextTokens":34,"totalInputTokens":6,"totalOutputTokens":104,"totalTokens":110}\n`,
         ].map((i) => `${i}\n`),
       );
     });
diff --git a/src/libs/agent-runtime/utils/streams/openai.ts b/src/libs/agent-runtime/utils/streams/openai.ts
index ecc463b9493ed..0b100bc369869 100644
--- a/src/libs/agent-runtime/utils/streams/openai.ts
+++ b/src/libs/agent-runtime/utils/streams/openai.ts
@@ -1,10 +1,11 @@
 import OpenAI from 'openai';
 import type { Stream } from 'openai/streaming';
 
-import { ChatMessageError, CitationItem, ModelTokensUsage } from '@/types/message';
+import { ChatMessageError, CitationItem } from '@/types/message';
 
 import { AgentRuntimeErrorType, ILobeAgentRuntimeErrorType } from '../../error';
 import { ChatStreamCallbacks } from '../../types';
+import { convertUsage } from '../usageConverter';
 import {
   FIRST_CHUNK_ERROR_KEY,
   StreamContext,
@@ -18,22 +19,6 @@ import {
   generateToolCallId,
 } from './protocol';
 
-const convertUsage = (usage: OpenAI.Completions.CompletionUsage): ModelTokensUsage => {
-  return {
-    acceptedPredictionTokens: usage.completion_tokens_details?.accepted_prediction_tokens,
-    cachedTokens:
-      (usage as any).prompt_cache_hit_tokens || usage.prompt_tokens_details?.cached_tokens,
-    inputAudioTokens: usage.prompt_tokens_details?.audio_tokens,
-    inputCacheMissTokens: (usage as any).prompt_cache_miss_tokens,
-    inputTokens: usage.prompt_tokens,
-    outputAudioTokens: usage.completion_tokens_details?.audio_tokens,
-    outputTokens: usage.completion_tokens,
-    reasoningTokens: usage.completion_tokens_details?.reasoning_tokens,
-    rejectedPredictionTokens: usage.completion_tokens_details?.rejected_prediction_tokens,
-    totalTokens: usage.total_tokens,
-  };
-};
-
 export const transformOpenAIStream = (
   chunk: OpenAI.ChatCompletionChunk,
   streamContext: StreamContext,
@@ -193,6 +178,12 @@ export const transformOpenAIStream = (
       return { data: item.delta, id: chunk.id, type: 'data' };
     }
 
+    // litellm 的返回结果中，存在 delta 为空，但是有 usage 的情况
+    if (chunk.usage) {
+      const usage = chunk.usage;
+      return { data: convertUsage(usage), id: chunk.id, type: 'usage' };
+    }
+
     // 其余情况下，返回 delta 和 index
     return {
       data: { delta: item.delta, id: chunk.id, index: item.index },
diff --git a/src/libs/agent-runtime/utils/usageConverter.test.ts b/src/libs/agent-runtime/utils/usageConverter.test.ts
new file mode 100644
index 0000000000000..5e55d06b19a4f
--- /dev/null
+++ b/src/libs/agent-runtime/utils/usageConverter.test.ts
@@ -0,0 +1,249 @@
+import OpenAI from 'openai';
+import { describe, expect, it } from 'vitest';
+
+import { convertUsage } from './usageConverter';
+
+describe('convertUsage', () => {
+  it('should convert basic OpenAI usage data correctly', () => {
+    // Arrange
+    const openaiUsage: OpenAI.Completions.CompletionUsage = {
+      prompt_tokens: 100,
+      completion_tokens: 50,
+      total_tokens: 150,
+    };
+
+    // Act
+    const result = convertUsage(openaiUsage);
+
+    // Assert
+    expect(result).toEqual({
+      inputTextTokens: 100,
+      totalInputTokens: 100,
+      totalOutputTokens: 50,
+      outputTextTokens: 50,
+      totalTokens: 150,
+    });
+  });
+
+  it('should handle PPLX citation tokens correctly', () => {
+    // Arrange
+    const pplxUsage = {
+      prompt_tokens: 80,
+      citation_tokens: 20,
+      completion_tokens: 50,
+      total_tokens: 150,
+    } as OpenAI.Completions.CompletionUsage;
+
+    // Act
+    const result = convertUsage(pplxUsage);
+
+    // Assert
+    expect(result).toEqual({
+      inputTextTokens: 80,
+      inputCitationTokens: 20,
+      totalInputTokens: 100,
+      totalOutputTokens: 50,
+      outputTextTokens: 50,
+      totalTokens: 170, // 150 + 20 (citation tokens)
+    });
+  });
+
+  it('should handle cached tokens correctly', () => {
+    // Arrange
+    const usageWithCache = {
+      prompt_tokens: 100,
+      prompt_cache_hit_tokens: 30,
+      prompt_cache_miss_tokens: 70,
+      completion_tokens: 50,
+      total_tokens: 150,
+    } as OpenAI.Completions.CompletionUsage;
+
+    // Act
+    const result = convertUsage(usageWithCache);
+
+    // Assert
+    expect(result).toEqual({
+      inputTextTokens: 100,
+      inputCachedTokens: 30,
+      inputCacheMissTokens: 70,
+      totalInputTokens: 100,
+      totalOutputTokens: 50,
+      outputTextTokens: 50,
+      totalTokens: 150,
+    });
+  });
+
+  it('should handle cached tokens using prompt_tokens_details', () => {
+    // Arrange
+    const usageWithTokenDetails = {
+      prompt_tokens: 100,
+      prompt_tokens_details: {
+        cached_tokens: 30,
+      },
+      completion_tokens: 50,
+      total_tokens: 150,
+    } as OpenAI.Completions.CompletionUsage;
+
+    // Act
+    const result = convertUsage(usageWithTokenDetails);
+
+    // Assert
+    expect(result).toEqual({
+      inputTextTokens: 100,
+      inputCachedTokens: 30,
+      inputCacheMissTokens: 70, // 100 - 30
+      totalInputTokens: 100,
+      totalOutputTokens: 50,
+      outputTextTokens: 50,
+      totalTokens: 150,
+    });
+  });
+
+  it('should handle audio tokens in input correctly', () => {
+    // Arrange
+    const usageWithAudioInput = {
+      prompt_tokens: 100,
+      prompt_tokens_details: {
+        audio_tokens: 20,
+      },
+      completion_tokens: 50,
+      total_tokens: 150,
+    } as OpenAI.Completions.CompletionUsage;
+
+    // Act
+    const result = convertUsage(usageWithAudioInput);
+
+    // Assert
+    expect(result).toEqual({
+      inputTextTokens: 100,
+      inputAudioTokens: 20,
+      totalInputTokens: 100,
+      totalOutputTokens: 50,
+      outputTextTokens: 50,
+      totalTokens: 150,
+    });
+  });
+
+  it('should handle detailed output tokens correctly', () => {
+    // Arrange
+    const usageWithOutputDetails = {
+      prompt_tokens: 100,
+      completion_tokens: 100,
+      completion_tokens_details: {
+        reasoning_tokens: 30,
+        audio_tokens: 20,
+      },
+      total_tokens: 200,
+    } as OpenAI.Completions.CompletionUsage;
+
+    // Act
+    const result = convertUsage(usageWithOutputDetails);
+
+    // Assert
+    expect(result).toEqual({
+      inputTextTokens: 100,
+      totalInputTokens: 100,
+      totalOutputTokens: 100,
+      outputReasoningTokens: 30,
+      outputAudioTokens: 20,
+      outputTextTokens: 50, // 100 - 30 - 20
+      totalTokens: 200,
+    });
+  });
+
+  it('should handle prediction tokens correctly', () => {
+    // Arrange
+    const usageWithPredictions = {
+      prompt_tokens: 100,
+      completion_tokens: 80,
+      completion_tokens_details: {
+        accepted_prediction_tokens: 30,
+        rejected_prediction_tokens: 10,
+      },
+      total_tokens: 180,
+    } as OpenAI.Completions.CompletionUsage;
+
+    // Act
+    const result = convertUsage(usageWithPredictions);
+
+    // Assert
+    expect(result).toEqual({
+      inputTextTokens: 100,
+      totalInputTokens: 100,
+      totalOutputTokens: 80,
+      outputTextTokens: 80,
+      acceptedPredictionTokens: 30,
+      rejectedPredictionTokens: 10,
+      totalTokens: 180,
+    });
+  });
+
+  it('should handle complex usage with all fields correctly', () => {
+    // Arrange
+    const complexUsage = {
+      prompt_tokens: 150,
+      prompt_tokens_details: {
+        audio_tokens: 50,
+        cached_tokens: 40,
+      },
+      citation_tokens: 30,
+      completion_tokens: 120,
+      completion_tokens_details: {
+        reasoning_tokens: 40,
+        audio_tokens: 30,
+        accepted_prediction_tokens: 20,
+        rejected_prediction_tokens: 5,
+      },
+      total_tokens: 300,
+    } as OpenAI.Completions.CompletionUsage;
+
+    // Act
+    const result = convertUsage(complexUsage);
+
+    // Assert
+    expect(result).toEqual({
+      inputTextTokens: 150,
+      inputAudioTokens: 50,
+      inputCachedTokens: 40,
+      inputCacheMissTokens: 140, // 180 - 40 (totalInputTokens - cachedTokens)
+      inputCitationTokens: 30,
+      totalInputTokens: 180, // 150 + 30
+      outputTextTokens: 50, // 120 - 40 - 30
+      outputReasoningTokens: 40,
+      outputAudioTokens: 30,
+      totalOutputTokens: 120,
+      acceptedPredictionTokens: 20,
+      rejectedPredictionTokens: 5,
+      totalTokens: 330, // 300 + 30 (citation_tokens)
+    });
+  });
+
+  it('should omit zero or undefined values in the final output', () => {
+    // Arrange
+    const usageWithZeros = {
+      prompt_tokens: 100,
+      completion_tokens: 50,
+      total_tokens: 150,
+      completion_tokens_details: {
+        reasoning_tokens: 0,
+        audio_tokens: undefined,
+      },
+    } as OpenAI.Completions.CompletionUsage;
+
+    // Act
+    const result = convertUsage(usageWithZeros);
+
+    // Assert
+    expect(result).toEqual({
+      inputTextTokens: 100,
+      totalInputTokens: 100,
+      totalOutputTokens: 50,
+      outputTextTokens: 50,
+      totalTokens: 150,
+    });
+
+    // These should not be present in the result
+    expect(result).not.toHaveProperty('outputReasoningTokens');
+    expect(result).not.toHaveProperty('outputAudioTokens');
+  });
+});
diff --git a/src/libs/agent-runtime/utils/usageConverter.ts b/src/libs/agent-runtime/utils/usageConverter.ts
new file mode 100644
index 0000000000000..8badc9bdfbee1
--- /dev/null
+++ b/src/libs/agent-runtime/utils/usageConverter.ts
@@ -0,0 +1,50 @@
+import OpenAI from 'openai';
+
+import { ModelTokensUsage } from '@/types/message';
+
+export const convertUsage = (usage: OpenAI.Completions.CompletionUsage): ModelTokensUsage => {
+  // 目前只有 pplx 才有 citation_tokens
+  const inputTextTokens = usage.prompt_tokens || 0;
+  const inputCitationTokens = (usage as any).citation_tokens || 0;
+  const totalInputTokens = inputCitationTokens + inputTextTokens;
+
+  const cachedTokens =
+    (usage as any).prompt_cache_hit_tokens || usage.prompt_tokens_details?.cached_tokens;
+
+  const inputCacheMissTokens =
+    (usage as any).prompt_cache_miss_tokens || totalInputTokens - cachedTokens;
+
+  const totalOutputTokens = usage.completion_tokens;
+  const outputReasoning = usage.completion_tokens_details?.reasoning_tokens || 0;
+  const outputAudioTokens = usage.completion_tokens_details?.audio_tokens || 0;
+  const outputTextTokens = totalOutputTokens - outputReasoning - outputAudioTokens;
+
+  const totalTokens = inputCitationTokens + usage.total_tokens;
+
+  const data = {
+    acceptedPredictionTokens: usage.completion_tokens_details?.accepted_prediction_tokens,
+    inputAudioTokens: usage.prompt_tokens_details?.audio_tokens,
+    inputCacheMissTokens: inputCacheMissTokens,
+    inputCachedTokens: cachedTokens,
+    inputCitationTokens: inputCitationTokens,
+    inputTextTokens: inputTextTokens,
+    outputAudioTokens: outputAudioTokens,
+    outputReasoningTokens: outputReasoning,
+    outputTextTokens: outputTextTokens,
+    rejectedPredictionTokens: usage.completion_tokens_details?.rejected_prediction_tokens,
+    totalInputTokens,
+    totalOutputTokens: totalOutputTokens,
+    totalTokens,
+  } satisfies ModelTokensUsage;
+
+  const finalData = {};
+
+  Object.entries(data).forEach(([key, value]) => {
+    if (!!value) {
+      // @ts-ignore
+      finalData[key] = value;
+    }
+  });
+
+  return finalData;
+};
diff --git a/src/libs/agent-runtime/zeroone/index.test.ts b/src/libs/agent-runtime/zeroone/index.test.ts
index e3b5ab090dc64..505a7ee33a15e 100644
--- a/src/libs/agent-runtime/zeroone/index.test.ts
+++ b/src/libs/agent-runtime/zeroone/index.test.ts
@@ -1,299 +1,12 @@
 // @vitest-environment node
-import OpenAI from 'openai';
-import { Mock, afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import { testProvider } from '@/libs/agent-runtime/providerTestUtils';
 
-import { ChatStreamCallbacks, LobeOpenAICompatibleRuntime } from '@/libs/agent-runtime';
-
-import * as debugStreamModule from '../utils/debugStream';
 import { LobeZeroOneAI } from './index';
 
-const provider = 'zeroone';
-const defaultBaseURL = 'https://api.lingyiwanwu.com/v1';
-
-const bizErrorType = 'ProviderBizError';
-const invalidErrorType = 'InvalidProviderAPIKey';
-
-// Mock the console.error to avoid polluting test output
-vi.spyOn(console, 'error').mockImplementation(() => {});
-
-let instance: LobeOpenAICompatibleRuntime;
-
-beforeEach(() => {
-  instance = new LobeZeroOneAI({ apiKey: 'test' });
-
-  // 使用 vi.spyOn 来模拟 chat.completions.create 方法
-  vi.spyOn(instance['client'].chat.completions, 'create').mockResolvedValue(
-    new ReadableStream() as any,
-  );
-});
-
-afterEach(() => {
-  vi.clearAllMocks();
-});
-
-describe('LobeZeroOneAI', () => {
-  describe('init', () => {
-    it('should correctly initialize with an API key', async () => {
-      const instance = new LobeZeroOneAI({ apiKey: 'test_api_key' });
-      expect(instance).toBeInstanceOf(LobeZeroOneAI);
-      expect(instance.baseURL).toEqual(defaultBaseURL);
-    });
-  });
-
-  describe('chat', () => {
-    it('should return a StreamingTextResponse on successful API call', async () => {
-      // Arrange
-      const mockStream = new ReadableStream();
-      const mockResponse = Promise.resolve(mockStream);
-
-      (instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse);
-
-      // Act
-      const result = await instance.chat({
-        messages: [{ content: 'Hello', role: 'user' }],
-        model: 'yi-34b-chat-0205',
-        temperature: 0,
-      });
-
-      // Assert
-      expect(result).toBeInstanceOf(Response);
-    });
-
-    it('should call ZeroOne API with corresponding options', async () => {
-      // Arrange
-      const mockStream = new ReadableStream();
-      const mockResponse = Promise.resolve(mockStream);
-
-      (instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse);
-
-      // Act
-      const result = await instance.chat({
-        max_tokens: 1024,
-        messages: [{ content: 'Hello', role: 'user' }],
-        model: 'yi-34b-chat-0205',
-        temperature: 0.7,
-        top_p: 1,
-      });
-
-      // Assert
-      expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
-        {
-          max_tokens: 1024,
-          messages: [{ content: 'Hello', role: 'user' }],
-          model: 'yi-34b-chat-0205',
-          temperature: 0.7,
-          stream: true,
-          top_p: 1,
-        },
-        { headers: { Accept: '*/*' } },
-      );
-      expect(result).toBeInstanceOf(Response);
-    });
-
-    describe('Error', () => {
-      it('should return ZeroOneBizError with an openai error response when OpenAI.APIError is thrown', async () => {
-        // Arrange
-        const apiError = new OpenAI.APIError(
-          400,
-          {
-            status: 400,
-            error: {
-              message: 'Bad Request',
-            },
-          },
-          'Error message',
-          {},
-        );
-
-        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
-
-        // Act
-        try {
-          await instance.chat({
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'yi-34b-chat-0205',
-            temperature: 0,
-          });
-        } catch (e) {
-          expect(e).toEqual({
-            endpoint: defaultBaseURL,
-            error: {
-              error: { message: 'Bad Request' },
-              status: 400,
-            },
-            errorType: bizErrorType,
-            provider,
-          });
-        }
-      });
-
-      it('should throw AgentRuntimeError with InvalidZeroOneAPIKey if no apiKey is provided', async () => {
-        try {
-          new LobeZeroOneAI({});
-        } catch (e) {
-          expect(e).toEqual({ errorType: invalidErrorType });
-        }
-      });
-
-      it('should return ZeroOneBizError with the cause when OpenAI.APIError is thrown with cause', async () => {
-        // Arrange
-        const errorInfo = {
-          stack: 'abc',
-          cause: {
-            message: 'api is undefined',
-          },
-        };
-        const apiError = new OpenAI.APIError(400, errorInfo, 'module error', {});
-
-        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
-
-        // Act
-        try {
-          await instance.chat({
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'yi-34b-chat-0205',
-            temperature: 0,
-          });
-        } catch (e) {
-          expect(e).toEqual({
-            endpoint: defaultBaseURL,
-            error: {
-              cause: { message: 'api is undefined' },
-              stack: 'abc',
-            },
-            errorType: bizErrorType,
-            provider,
-          });
-        }
-      });
-
-      it('should return ZeroOneBizError with an cause response with desensitize Url', async () => {
-        // Arrange
-        const errorInfo = {
-          stack: 'abc',
-          cause: { message: 'api is undefined' },
-        };
-        const apiError = new OpenAI.APIError(400, errorInfo, 'module error', {});
-
-        instance = new LobeZeroOneAI({
-          apiKey: 'test',
-
-          baseURL: 'https://api.abc.com/v1',
-        });
-
-        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
-
-        // Act
-        try {
-          await instance.chat({
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'yi-34b-chat-0205',
-            temperature: 0,
-          });
-        } catch (e) {
-          expect(e).toEqual({
-            endpoint: 'https://api.***.com/v1',
-            error: {
-              cause: { message: 'api is undefined' },
-              stack: 'abc',
-            },
-            errorType: bizErrorType,
-            provider,
-          });
-        }
-      });
-
-      it('should throw an InvalidZeroOneAPIKey error type on 401 status code', async () => {
-        // Mock the API call to simulate a 401 error
-        const error = new Error('Unauthorized') as any;
-        error.status = 401;
-        vi.mocked(instance['client'].chat.completions.create).mockRejectedValue(error);
-
-        try {
-          await instance.chat({
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'yi-34b-chat-0205',
-            temperature: 0,
-          });
-        } catch (e) {
-          // Expect the chat method to throw an error with InvalidMoonshotAPIKey
-          expect(e).toEqual({
-            endpoint: defaultBaseURL,
-            error: new Error('Unauthorized'),
-            errorType: invalidErrorType,
-            provider,
-          });
-        }
-      });
-
-      it('should return AgentRuntimeError for non-OpenAI errors', async () => {
-        // Arrange
-        const genericError = new Error('Generic Error');
-
-        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(genericError);
-
-        // Act
-        try {
-          await instance.chat({
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'yi-34b-chat-0205',
-            temperature: 0,
-          });
-        } catch (e) {
-          expect(e).toEqual({
-            endpoint: defaultBaseURL,
-            errorType: 'AgentRuntimeError',
-            provider,
-            error: {
-              name: genericError.name,
-              cause: genericError.cause,
-              message: genericError.message,
-              stack: genericError.stack,
-            },
-          });
-        }
-      });
-    });
-
-    describe('DEBUG', () => {
-      it('should call debugStream and return StreamingTextResponse when DEBUG_ZEROONE_CHAT_COMPLETION is 1', async () => {
-        // Arrange
-        const mockProdStream = new ReadableStream() as any; // 模拟的 prod 流
-        const mockDebugStream = new ReadableStream({
-          start(controller) {
-            controller.enqueue('Debug stream content');
-            controller.close();
-          },
-        }) as any;
-        mockDebugStream.toReadableStream = () => mockDebugStream; // 添加 toReadableStream 方法
-
-        // 模拟 chat.completions.create 返回值，包括模拟的 tee 方法
-        (instance['client'].chat.completions.create as Mock).mockResolvedValue({
-          tee: () => [mockProdStream, { toReadableStream: () => mockDebugStream }],
-        });
-
-        // 保存原始环境变量值
-        const originalDebugValue = process.env.DEBUG_ZEROONE_CHAT_COMPLETION;
-
-        // 模拟环境变量
-        process.env.DEBUG_ZEROONE_CHAT_COMPLETION = '1';
-        vi.spyOn(debugStreamModule, 'debugStream').mockImplementation(() => Promise.resolve());
-
-        // 执行测试
-        // 运行你的测试函数，确保它会在条件满足时调用 debugStream
-        // 假设的测试函数调用，你可能需要根据实际情况调整
-        await instance.chat({
-          messages: [{ content: 'Hello', role: 'user' }],
-          model: 'yi-34b-chat-0205',
-          temperature: 0,
-        });
-
-        // 验证 debugStream 被调用
-        expect(debugStreamModule.debugStream).toHaveBeenCalled();
-
-        // 恢复原始环境变量值
-        process.env.DEBUG_ZEROONE_CHAT_COMPLETION = originalDebugValue;
-      });
-    });
-  });
+testProvider({
+  Runtime: LobeZeroOneAI,
+  provider: 'zeroone',
+  defaultBaseURL: 'https://api.lingyiwanwu.com/v1',
+  chatDebugEnv: 'DEBUG_ZEROONE_CHAT_COMPLETION',
+  chatModel: 'yi-34b-chat-0205',
 });
diff --git a/src/locales/default/chat.ts b/src/locales/default/chat.ts
index 1bf57472406be..92c979151d12f 100644
--- a/src/locales/default/chat.ts
+++ b/src/locales/default/chat.ts
@@ -93,15 +93,19 @@ export default {
         inputMinutes: '${{amount}}/分钟',
         inputTokens: '输入 {{amount}}/积分 · ${{amount}}/M',
         outputTokens: '输出 {{amount}}/积分 · ${{amount}}/M',
+        writeCacheInputTokens: '缓存输入写入 {{amount}}/积分 · ${{amount}}/M',
       },
     },
     tokenDetails: {
+      average: '平均单价',
       input: '输入',
       inputAudio: '音频输入',
       inputCached: '输入缓存',
+      inputCitation: '引用输入',
       inputText: '文本输入',
       inputTitle: '输入明细',
       inputUncached: '输入未缓存',
+      inputWriteCached: '输入缓存写入',
       output: '输出',
       outputAudio: '音频输出',
       outputText: '文本输出',
diff --git a/src/types/message/base.ts b/src/types/message/base.ts
index b312bda3ae267..fde4b2166e2e5 100644
--- a/src/types/message/base.ts
+++ b/src/types/message/base.ts
@@ -15,14 +15,24 @@ export interface ModelReasoning {
 
 export interface ModelTokensUsage {
   acceptedPredictionTokens?: number;
-  cachedTokens?: number;
   inputAudioTokens?: number;
   inputCacheMissTokens?: number;
-  inputTokens?: number;
+  inputCachedTokens?: number;
+  /**
+   * currently only pplx has citation_tokens
+   */
+  inputCitationTokens?: number;
+  /**
+   * user prompt input
+   */
+  inputTextTokens?: number;
+  inputWriteCacheTokens?: number;
   outputAudioTokens?: number;
-  outputTokens?: number;
-  reasoningTokens?: number;
+  outputReasoningTokens?: number;
+  outputTextTokens?: number;
   rejectedPredictionTokens?: number;
+  totalInputTokens?: number;
+  totalOutputTokens?: number;
   totalTokens?: number;
 }
 
diff --git a/src/utils/filter.test.ts b/src/utils/filter.test.ts
deleted file mode 100644
index 47db6c2bfbcd1..0000000000000
--- a/src/utils/filter.test.ts
+++ /dev/null
@@ -1,122 +0,0 @@
-import { test } from 'vitest';
-
-test('placeholder', () => {});
-// describe('filterWithKeywords', () => {
-//   const data: Record<string, BaseDataModel> = {
-//     1: {
-//       id: '1',
-//       meta: {
-//         title: 'hello world',
-//         description: 'test case',
-//         tag: ['a', 'b'],
-//       },
-//     },
-//     2: {
-//       id: '2',
-//       meta: {
-//         title: 'goodbye',
-//         description: 'hello world',
-//         tag: ['c', 'd'],
-//       },
-//     },
-//   };
-//
-//   it('should return an empty object if map is empty', () => {
-//     const result = filterWithKeywords({}, 'hello');
-//     expect(result).toEqual({});
-//   });
-//
-//   it('should return the original map if keywords is empty', () => {
-//     const result = filterWithKeywords(data, '');
-//     expect(result).toEqual(data);
-//   });
-//
-//   it('should return a filtered map if keywords is not empty', () => {
-//     const result = filterWithKeywords(data, 'world');
-//     expect(result).toEqual({
-//       1: {
-//         id: '1',
-//         meta: {
-//           title: 'hello world',
-//           description: 'test case',
-//           tag: ['a', 'b'],
-//         },
-//       },
-//       2: {
-//         id: '2',
-//         meta: {
-//           title: 'goodbye',
-//           description: 'hello world',
-//           tag: ['c', 'd'],
-//         },
-//       },
-//     });
-//   });
-//
-//   it('should only consider title, description and tag properties if extraSearchStr is not provided', () => {
-//     const result = filterWithKeywords(data, 'test');
-//     expect(result).toEqual({
-//       1: {
-//         id: '1',
-//         meta: {
-//           title: 'hello world',
-//           description: 'test case',
-//           tag: ['a', 'b'],
-//         },
-//       },
-//     });
-//   });
-//
-//   it('should consider extraSearchStr in addition to title, description and tag properties if provided', () => {
-//     const extraSearchStr = (item: BaseDataModel) => {
-//       return item.meta.avatar || '';
-//     };
-//     const data: Record<string, BaseDataModel> = {
-//       a: {
-//         id: 'a',
-//         meta: {
-//           title: 'hello world',
-//           description: 'test case',
-//           tag: ['a', 'b'],
-//           avatar: 'xxx',
-//         },
-//       },
-//       b: {
-//         id: 'b',
-//         meta: {
-//           title: 'goodbye',
-//           description: 'hello world',
-//           tag: ['c', 'd'],
-//           avatar: 'yyy',
-//         },
-//       },
-//     };
-//
-//     const result = filterWithKeywords(data, 'yyy', extraSearchStr);
-//     expect(result).toEqual({
-//       b: {
-//         id: 'b',
-//         meta: {
-//           title: 'goodbye',
-//           description: 'hello world',
-//           tag: ['c', 'd'],
-//           avatar: 'yyy',
-//         },
-//       },
-//     });
-//   });
-//
-//   it('should ensure that each filtered object has at least one property that includes the keyword or extraSearchStr', () => {
-//     const result = filterWithKeywords(data, 't');
-//     expect(result).toEqual({
-//       1: {
-//         id: '1',
-//         meta: {
-//           title: 'hello world',
-//           description: 'test case',
-//           tag: ['a', 'b'],
-//         },
-//       },
-//     });
-//   });
-// });
diff --git a/src/utils/filter.ts b/src/utils/filter.ts
deleted file mode 100644
index 9af2c527d9f90..0000000000000
--- a/src/utils/filter.ts
+++ /dev/null
@@ -1,29 +0,0 @@
-import { BaseDataModel } from '@/types/meta';
-
-export const filterWithKeywords = <T extends BaseDataModel>(
-  map: Record<string, T>,
-  keywords: string,
-  extraSearchStr?: (item: T) => string | string[],
-) => {
-  if (!keywords) return map;
-
-  return Object.fromEntries(
-    Object.entries(map).filter(([, item]) => {
-      const meta = item.meta;
-
-      const keyList = [meta.title, meta.description, meta.tags?.join('')].filter(
-        Boolean,
-      ) as string[];
-
-      const defaultSearchKey = keyList.join('');
-
-      let extraSearchKey: string = '';
-      if (extraSearchStr) {
-        const searchStr = extraSearchStr(item);
-        extraSearchKey = Array.isArray(searchStr) ? searchStr.join('') : searchStr;
-      }
-
-      return `${defaultSearchKey}${extraSearchKey}`.toLowerCase().includes(keywords.toLowerCase());
-    }),
-  );
-};