diff --git a/src/Infrastructure/BotSharp.Abstraction/Conversations/Models/TokenStatsModel.cs b/src/Infrastructure/BotSharp.Abstraction/Conversations/Models/TokenStatsModel.cs index ca55e385e..4e032b667 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Conversations/Models/TokenStatsModel.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Conversations/Models/TokenStatsModel.cs @@ -5,8 +5,14 @@ public class TokenStatsModel public string Provider { get; set; } public string Model { get; set; } public string Prompt { get; set; } - public int PromptCount { get; set; } - public int CachedPromptCount { get; set; } - public int CompletionCount { get; set; } + public int TextInputTokens { get; set; } + public int CachedTextInputTokens { get; set; } + public int AudioInputTokens { get; set; } + public int CachedAudioInputTokens { get; set; } + public int TextOutputTokens { get; set; } + public int AudioOutputTokens { get; set; } public AgentLlmConfig LlmConfig { get; set; } + + public int TotalInputTokens => TextInputTokens + CachedTextInputTokens + AudioInputTokens + CachedAudioInputTokens; + public int TotalOutputTokens => TextOutputTokens + AudioOutputTokens; } diff --git a/src/Infrastructure/BotSharp.Abstraction/MLTasks/Settings/LlmModelSetting.cs b/src/Infrastructure/BotSharp.Abstraction/MLTasks/Settings/LlmModelSetting.cs index b10f88f30..e74a7c370 100644 --- a/src/Infrastructure/BotSharp.Abstraction/MLTasks/Settings/LlmModelSetting.cs +++ b/src/Infrastructure/BotSharp.Abstraction/MLTasks/Settings/LlmModelSetting.cs @@ -42,22 +42,12 @@ public class LlmModelSetting /// public bool ImageGeneration { get; set; } - /// - /// Prompt cost per 1K token - /// - public float PromptCost { get; set; } - - /// - /// Completion cost per 1K token - /// - public float CompletionCost { get; set; } - /// /// Embedding dimension /// public int Dimension { get; set; } - public LlmCost AdditionalCost { get; set; } = new(); + public LlmCost Cost { get; set; } = new(); public override string ToString() { @@ -65,12 +55,20 @@ public override string ToString() } } +/// +/// Cost per 1K tokens +/// public class LlmCost { - public float CachedPromptCost { get; set; } = 0f; - public float AudioPromptCost { get; set; } = 0f; - public float ReasoningCompletionCost { get; } = 0f; - public float AudioCompletionCost { get; } = 0f; + // Input + public float TextInputCost { get; set; } = 0f; + public float CachedTextInputCost { get; set; } = 0f; + public float AudioInputCost { get; set; } = 0f; + public float CachedAudioInputCost { get; set; } = 0f; + + // Output + public float TextOutputCost { get; set; } = 0f; + public float AudioOutputCost { get; set; } = 0f; } public enum LlmModelType diff --git a/src/Infrastructure/BotSharp.Core/Conversations/Services/TokenStatistics.cs b/src/Infrastructure/BotSharp.Core/Conversations/Services/TokenStatistics.cs index 5a0614ed3..945f8186e 100644 --- a/src/Infrastructure/BotSharp.Core/Conversations/Services/TokenStatistics.cs +++ b/src/Infrastructure/BotSharp.Core/Conversations/Services/TokenStatistics.cs @@ -35,28 +35,33 @@ public TokenStatistics(IServiceProvider services, ILogger logge public void AddToken(TokenStatsModel stats, RoleDialogModel message) { _model = stats.Model; - _promptTokenCount += stats.PromptCount; - _completionTokenCount += stats.CompletionCount; + _promptTokenCount += stats.TotalInputTokens; + _completionTokenCount += stats.TotalOutputTokens; var settingsService = _services.GetRequiredService(); var settings = settingsService.GetSetting(stats.Provider, _model); - var deltaPromptCost = (stats.PromptCount - stats.CachedPromptCount) / 1000f * settings.PromptCost; - var deltaCachedPromptCost = stats.CachedPromptCount / 1000f * (settings.AdditionalCost?.CachedPromptCost ?? 0f); - var deltaCompletionCost = stats.CompletionCount / 1000f * settings.CompletionCost; + var deltaTextInputCost = stats.TextInputTokens / 1000f * (settings.Cost?.TextInputCost ?? 0f); + var deltaCachedTextInputCost = stats.CachedTextInputTokens / 1000f * (settings.Cost?.CachedTextInputCost ?? 0f); + var deltaAudioInputCost = stats.AudioInputTokens / 1000f * (settings.Cost?.AudioInputCost ?? 0f); + var deltaCachedAudioInputCost = stats.CachedAudioInputTokens / 1000f * (settings.Cost?.CachedAudioInputCost ?? 0f); - var deltaTotal = deltaPromptCost + deltaCachedPromptCost + deltaCompletionCost; + var deltaTextOutputCost = stats.TextOutputTokens / 1000f * (settings.Cost?.TextOutputCost ?? 0f); + var deltaAudioOutputCost = stats.AudioOutputTokens / 1000f * (settings.Cost?.AudioOutputCost ?? 0f); + + var deltaPromptCost = deltaTextInputCost + deltaCachedTextInputCost + deltaAudioInputCost + deltaCachedAudioInputCost; + var deltaCompletionCost = deltaTextOutputCost + deltaAudioOutputCost; + + var deltaTotal = deltaPromptCost + deltaCompletionCost; _promptCost += deltaPromptCost; _completionCost += deltaCompletionCost; // Accumulated Token var stat = _services.GetRequiredService(); var inputCount = int.Parse(stat.GetState("prompt_total", "0")); - stat.SetState("prompt_total", stats.PromptCount + inputCount, isNeedVersion: false, source: StateSource.Application); + stat.SetState("prompt_total", stats.TotalInputTokens + inputCount, isNeedVersion: false, source: StateSource.Application); var outputCount = int.Parse(stat.GetState("completion_total", "0")); - stat.SetState("completion_total", stats.CompletionCount + outputCount, isNeedVersion: false, source: StateSource.Application); - var cachedCount = int.Parse(stat.GetState("cached_prompt_total", "0")); - stat.SetState("cached_prompt_total", stats.CachedPromptCount + cachedCount, isNeedVersion: false, source: StateSource.Application); + stat.SetState("completion_total", stats.TotalOutputTokens + outputCount, isNeedVersion: false, source: StateSource.Application); // Total cost var total_cost = float.Parse(stat.GetState("llm_total_cost", "0")); @@ -76,8 +81,8 @@ public void AddToken(TokenStatsModel stats, RoleDialogModel message) RecordTime = DateTime.UtcNow, IntervalType = StatsInterval.Day, Data = [ - new StatsKeyValuePair("prompt_token_count_total", stats.PromptCount), - new StatsKeyValuePair("completion_token_count_total", stats.CompletionCount), + new StatsKeyValuePair("prompt_token_count_total", stats.TotalInputTokens), + new StatsKeyValuePair("completion_token_count_total", stats.TotalOutputTokens), new StatsKeyValuePair("prompt_cost_total", deltaPromptCost), new StatsKeyValuePair("completion_cost_total", deltaCompletionCost) ] diff --git a/src/Infrastructure/BotSharp.OpenAPI/Controllers/InstructModeController.cs b/src/Infrastructure/BotSharp.OpenAPI/Controllers/InstructModeController.cs index abf3c6e37..ca3ec45a8 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/Controllers/InstructModeController.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/Controllers/InstructModeController.cs @@ -551,8 +551,8 @@ public async Task SpeechToText(IFormFile file, try { - var auditData = FileUtility.BuildFileDataFromFile(file); - var content = await fileInstruct.SpeechToText(new InstructFileModel { FileData = auditData }, text, new InstructOptions + var audioData = FileUtility.BuildFileDataFromFile(file); + var content = await fileInstruct.SpeechToText(new InstructFileModel { FileData = audioData }, text, new InstructOptions { Provider = provider, Model = model, diff --git a/src/Plugins/BotSharp.Plugin.AnthropicAI/Providers/ChatCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.AnthropicAI/Providers/ChatCompletionProvider.cs index 5c1015ef5..6c6c2da20 100644 --- a/src/Plugins/BotSharp.Plugin.AnthropicAI/Providers/ChatCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.AnthropicAI/Providers/ChatCompletionProvider.cs @@ -81,8 +81,8 @@ public async Task GetChatCompletions(Agent agent, List GetChatCompletions(Agent agent, List? response = null; ChatCompletion value = default; RoleDialogModel responseMessage; try { - var response = chatClient.CompleteChat(messages, options); + response = chatClient.CompleteChat(messages, options); value = response.Value; var reason = value.FinishReason; @@ -101,6 +103,9 @@ public async Task GetChatCompletions(Agent agent, List GetChatCompletions(Agent agent, List GetChatCompletionsAsync(Agent agent, RenderedInstruction = string.Join("\r\n", renderedInstructions) }; + var tokenUsage = response?.Value?.Usage; + var inputTokenDetails = response?.Value?.Usage?.InputTokenDetails; + // After chat completion hook foreach (var hook in hooks) { @@ -154,8 +163,9 @@ public async Task GetChatCompletionsAsync(Agent agent, Prompt = prompt, Provider = Provider, Model = _model, - PromptCount = response.Value?.Usage?.InputTokenCount ?? 0, - CompletionCount = response.Value?.Usage?.OutputTokenCount ?? 0 + TextInputTokens = (tokenUsage?.InputTokenCount ?? 0) - (inputTokenDetails?.CachedTokenCount ?? 0), + CachedTextInputTokens = inputTokenDetails?.CachedTokenCount ?? 0, + TextOutputTokens = tokenUsage?.OutputTokenCount ?? 0 }); } diff --git a/src/Plugins/BotSharp.Plugin.AzureOpenAI/Providers/Text/TextCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.AzureOpenAI/Providers/Text/TextCompletionProvider.cs index 2b6f3dfb1..f3bb7d1fa 100644 --- a/src/Plugins/BotSharp.Plugin.AzureOpenAI/Providers/Text/TextCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.AzureOpenAI/Providers/Text/TextCompletionProvider.cs @@ -78,14 +78,15 @@ public async Task GetCompletion(string text, string agentId, string mess CurrentAgentId = agentId, MessageId = messageId }; + Task.WaitAll(contentHooks.Select(hook => hook.AfterGenerated(responseMessage, new TokenStatsModel { Prompt = text, Provider = Provider, Model = _model, - PromptCount = response.Usage?.PromptTokens ?? default, - CompletionCount = response.Usage?.CompletionTokens ?? default + TextInputTokens = response?.Usage?.PromptTokens ?? 0, + TextOutputTokens = response?.Usage?.CompletionTokens ?? 0 })).ToArray()); return completion.Trim(); diff --git a/src/Plugins/BotSharp.Plugin.DeepSeekAI/Providers/Chat/ChatCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.DeepSeekAI/Providers/Chat/ChatCompletionProvider.cs index 4c21dd035..955d0ad22 100644 --- a/src/Plugins/BotSharp.Plugin.DeepSeekAI/Providers/Chat/ChatCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.DeepSeekAI/Providers/Chat/ChatCompletionProvider.cs @@ -73,6 +73,9 @@ public async Task GetChatCompletions(Agent agent, List GetChatCompletions(Agent agent, List GetChatCompletionsAsync(Agent agent, List GetChatCompletionsAsync(Agent agent, List GetCompletion(string text, string agentId, string mess MessageId = messageId }; + var tokenUsage = response?.Value?.Usage; + var inputTokenDetails = response?.Value?.Usage?.InputTokenDetails; + foreach (var hook in contentHooks) { await hook.AfterGenerated(responseMessage, new TokenStatsModel @@ -68,8 +71,9 @@ public async Task GetCompletion(string text, string agentId, string mess Prompt = text, Provider = Provider, Model = _model, - PromptCount = response?.Value?.Usage?.InputTokenCount ?? default, - CompletionCount = response?.Value?.Usage?.OutputTokenCount ?? default + TextInputTokens = (tokenUsage?.InputTokenCount ?? 0) - (inputTokenDetails?.CachedTokenCount ?? 0), + CachedTextInputTokens = inputTokenDetails?.CachedTokenCount ?? 0, + TextOutputTokens = tokenUsage?.OutputTokenCount ?? 0 }); } diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/GeminiChatCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/GeminiChatCompletionProvider.cs index d296d0aaf..4b3a72a8a 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/GeminiChatCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/GeminiChatCompletionProvider.cs @@ -81,9 +81,8 @@ public async Task GetChatCompletions(Agent agent, List GetChatCompletionsAsync(Agent agent, List GetCompletion(string text, string agentId, string mess Prompt = text, Provider = Provider, Model = _model, - PromptCount = response.UsageMetadata?.PromptTokenCount ?? 0, - CachedPromptCount = response.UsageMetadata?.CachedContentTokenCount ?? 0, - CompletionCount = response.UsageMetadata?.CandidatesTokenCount ?? 0 + TextInputTokens = response?.UsageMetadata?.PromptTokenCount ?? 0, + TextOutputTokens = response?.UsageMetadata?.CandidatesTokenCount ?? 0 }); } diff --git a/src/Plugins/BotSharp.Plugin.LangChain/Providers/TextCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.LangChain/Providers/TextCompletionProvider.cs index 70f0cbdcd..364849e62 100644 --- a/src/Plugins/BotSharp.Plugin.LangChain/Providers/TextCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.LangChain/Providers/TextCompletionProvider.cs @@ -46,14 +46,14 @@ public async Task GetCompletion(string text, string agentId, string mess }; Task.WaitAll(contentHooks.Select(hook => - hook.AfterGenerated(responseMessage, new TokenStatsModel - { - Prompt = text, - Provider = Provider, - Model = _model, - PromptCount = response.Usage.TotalTokens, - CompletionCount = response.Usage.OutputTokens - })).ToArray()); + hook.AfterGenerated(responseMessage, new TokenStatsModel + { + Prompt = text, + Provider = Provider, + Model = _model, + TextInputTokens = response.Usage.InputTokens, + TextOutputTokens = response.Usage.OutputTokens + })).ToArray()); return response.LastMessageContent; } diff --git a/src/Plugins/BotSharp.Plugin.MetaGLM/Providers/ChatCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.MetaGLM/Providers/ChatCompletionProvider.cs index b5b19671f..c1d4bef1f 100644 --- a/src/Plugins/BotSharp.Plugin.MetaGLM/Providers/ChatCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.MetaGLM/Providers/ChatCompletionProvider.cs @@ -78,8 +78,8 @@ public async Task GetChatCompletions(Agent agent, List GetChatCompletions(Agent agent, List GetChatCompletions(Agent agent, List GetChatCompletionsAsync(Agent agent, RenderedInstruction = string.Join("\r\n", renderedInstructions) }; + var tokenUsage = response?.Value?.Usage; + var inputTokenDetails = response?.Value?.Usage?.InputTokenDetails; + // After chat completion hook foreach (var hook in hooks) { @@ -128,8 +134,9 @@ public async Task GetChatCompletionsAsync(Agent agent, Prompt = prompt, Provider = Provider, Model = _model, - PromptCount = response.Value?.Usage?.InputTokenCount ?? 0, - CompletionCount = response.Value?.Usage?.OutputTokenCount ?? 0 + TextInputTokens = (tokenUsage?.InputTokenCount ?? 0) - (inputTokenDetails?.CachedTokenCount ?? 0), + CachedTextInputTokens = inputTokenDetails?.CachedTokenCount ?? 0, + TextOutputTokens = tokenUsage?.OutputTokenCount ?? 0 }); } diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs index 924ac7f1e..245eeafb2 100644 --- a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Realtime/RealTimeCompletionProvider.cs @@ -178,11 +178,6 @@ private async Task ReceiveMessage(RealtimeHubConnection conn, _logger.LogDebug($"{response.Type}: {receivedText}"); onModelAudioDeltaReceived(audio.Delta, audio.ItemId); } - else - { - _logger.LogDebug($"{response.Type}: {receivedText}"); - onModelAudioDeltaReceived(audio.Delta, audio.ItemId); - } } else if (response.Type == "response.audio.done") { @@ -571,6 +566,9 @@ public async Task> OnResponsedDone(RealtimeHubConnection c var contentHooks = _services.GetServices().ToList(); + var inputTokenDetails = data.Usage?.InputTokenDetails; + var outputTokenDetails = data.Usage?.OutputTokenDetails; + foreach (var output in data.Outputs) { if (output.Type == "function_call") @@ -591,13 +589,18 @@ public async Task> OnResponsedDone(RealtimeHubConnection c await hook.AfterGenerated(new RoleDialogModel(AgentRole.Assistant, $"{output.Name}\r\n{output.Arguments}") { CurrentAgentId = conn.CurrentAgentId - }, new TokenStatsModel + }, + new TokenStatsModel { Provider = Provider, Model = _model, Prompt = $"{output.Name}\r\n{output.Arguments}", - CompletionCount = data.Usage.OutputTokens, - PromptCount = data.Usage.InputTokens + TextInputTokens = inputTokenDetails?.TextTokens ?? 0 - inputTokenDetails?.CachedTokenDetails?.TextTokens ?? 0, + CachedTextInputTokens = data.Usage?.InputTokenDetails?.CachedTokenDetails?.TextTokens ?? 0, + AudioInputTokens = inputTokenDetails?.AudioTokens ?? 0 - inputTokenDetails?.CachedTokenDetails?.AudioTokens ?? 0, + CachedAudioInputTokens = inputTokenDetails?.CachedTokenDetails?.AudioTokens ?? 0, + TextOutputTokens = outputTokenDetails?.TextTokens ?? 0, + AudioOutputTokens = outputTokenDetails?.AudioTokens ?? 0 }); } } @@ -618,13 +621,18 @@ await hook.AfterGenerated(new RoleDialogModel(AgentRole.Assistant, $"{output.Nam await hook.AfterGenerated(new RoleDialogModel(AgentRole.Assistant, content.Transcript) { CurrentAgentId = conn.CurrentAgentId - }, new TokenStatsModel + }, + new TokenStatsModel { Provider = Provider, Model = _model, Prompt = content.Transcript, - CompletionCount = data.Usage.OutputTokens, - PromptCount = data.Usage.InputTokens + TextInputTokens = inputTokenDetails?.TextTokens ?? 0 - inputTokenDetails?.CachedTokenDetails?.TextTokens ?? 0, + CachedTextInputTokens = data.Usage?.InputTokenDetails?.CachedTokenDetails?.TextTokens ?? 0, + AudioInputTokens = inputTokenDetails?.AudioTokens ?? 0 - inputTokenDetails?.CachedTokenDetails?.AudioTokens ?? 0, + CachedAudioInputTokens = inputTokenDetails?.CachedTokenDetails?.AudioTokens ?? 0, + TextOutputTokens = outputTokenDetails?.TextTokens ?? 0, + AudioOutputTokens = outputTokenDetails?.AudioTokens ?? 0 }); } } diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Text/TextCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Text/TextCompletionProvider.cs index a80eb9356..bf0252ea4 100644 --- a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Text/TextCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Text/TextCompletionProvider.cs @@ -73,8 +73,8 @@ public async Task GetCompletion(string text, string agentId, string mess Prompt = text, Provider = Provider, Model = _model, - PromptCount = response.Usage?.PromptTokens ?? default, - CompletionCount = response.Usage?.CompletionTokens ?? default + TextInputTokens = response.Usage?.PromptTokens ?? 0, + TextOutputTokens = response.Usage?.CompletionTokens ?? 0 })).ToArray()); return completion.Trim(); diff --git a/src/Plugins/BotSharp.Plugin.SparkDesk/Providers/ChatCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.SparkDesk/Providers/ChatCompletionProvider.cs index d5e64e7d4..ba0aa220f 100644 --- a/src/Plugins/BotSharp.Plugin.SparkDesk/Providers/ChatCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.SparkDesk/Providers/ChatCompletionProvider.cs @@ -69,8 +69,8 @@ public async Task GetChatCompletions(Agent agent, List GetChatCompletionsAsync(Agent agent, List