Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,14 @@ public class TokenStatsModel
public string Provider { get; set; }
public string Model { get; set; }
public string Prompt { get; set; }
public int PromptCount { get; set; }
public int CachedPromptCount { get; set; }
public int CompletionCount { get; set; }
public int TextInputTokens { get; set; }
public int CachedTextInputTokens { get; set; }
public int AudioInputTokens { get; set; }
public int CachedAudioInputTokens { get; set; }
public int TextOutputTokens { get; set; }
public int AudioOutputTokens { get; set; }
public AgentLlmConfig LlmConfig { get; set; }

public int TotalInputTokens => TextInputTokens + CachedTextInputTokens + AudioInputTokens + CachedAudioInputTokens;
public int TotalOutputTokens => TextOutputTokens + AudioOutputTokens;
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,35 +42,33 @@ public class LlmModelSetting
/// </summary>
public bool ImageGeneration { get; set; }

/// <summary>
/// Prompt cost per 1K token
/// </summary>
public float PromptCost { get; set; }

/// <summary>
/// Completion cost per 1K token
/// </summary>
public float CompletionCost { get; set; }

/// <summary>
/// Embedding dimension
/// </summary>
public int Dimension { get; set; }

public LlmCost AdditionalCost { get; set; } = new();
public LlmCost Cost { get; set; } = new();

public override string ToString()
{
return $"[{Type}] {Name} {Endpoint}";
}
}

/// <summary>
/// Cost per 1K tokens
/// </summary>
public class LlmCost
{
public float CachedPromptCost { get; set; } = 0f;
public float AudioPromptCost { get; set; } = 0f;
public float ReasoningCompletionCost { get; } = 0f;
public float AudioCompletionCost { get; } = 0f;
// Input
public float TextInputCost { get; set; } = 0f;
public float CachedTextInputCost { get; set; } = 0f;
public float AudioInputCost { get; set; } = 0f;
public float CachedAudioInputCost { get; set; } = 0f;

// Output
public float TextOutputCost { get; set; } = 0f;
public float AudioOutputCost { get; set; } = 0f;
}

public enum LlmModelType
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,28 +35,33 @@ public TokenStatistics(IServiceProvider services, ILogger<TokenStatistics> logge
public void AddToken(TokenStatsModel stats, RoleDialogModel message)
{
_model = stats.Model;
_promptTokenCount += stats.PromptCount;
_completionTokenCount += stats.CompletionCount;
_promptTokenCount += stats.TotalInputTokens;
_completionTokenCount += stats.TotalOutputTokens;

var settingsService = _services.GetRequiredService<ILlmProviderService>();
var settings = settingsService.GetSetting(stats.Provider, _model);

var deltaPromptCost = (stats.PromptCount - stats.CachedPromptCount) / 1000f * settings.PromptCost;
var deltaCachedPromptCost = stats.CachedPromptCount / 1000f * (settings.AdditionalCost?.CachedPromptCost ?? 0f);
var deltaCompletionCost = stats.CompletionCount / 1000f * settings.CompletionCost;
var deltaTextInputCost = stats.TextInputTokens / 1000f * (settings.Cost?.TextInputCost ?? 0f);
var deltaCachedTextInputCost = stats.CachedTextInputTokens / 1000f * (settings.Cost?.CachedTextInputCost ?? 0f);
var deltaAudioInputCost = stats.AudioInputTokens / 1000f * (settings.Cost?.AudioInputCost ?? 0f);
var deltaCachedAudioInputCost = stats.CachedAudioInputTokens / 1000f * (settings.Cost?.CachedAudioInputCost ?? 0f);

var deltaTotal = deltaPromptCost + deltaCachedPromptCost + deltaCompletionCost;
var deltaTextOutputCost = stats.TextOutputTokens / 1000f * (settings.Cost?.TextOutputCost ?? 0f);
var deltaAudioOutputCost = stats.AudioOutputTokens / 1000f * (settings.Cost?.AudioOutputCost ?? 0f);

var deltaPromptCost = deltaTextInputCost + deltaCachedTextInputCost + deltaAudioInputCost + deltaCachedAudioInputCost;
var deltaCompletionCost = deltaTextOutputCost + deltaAudioOutputCost;

var deltaTotal = deltaPromptCost + deltaCompletionCost;
_promptCost += deltaPromptCost;
_completionCost += deltaCompletionCost;

// Accumulated Token
var stat = _services.GetRequiredService<IConversationStateService>();
var inputCount = int.Parse(stat.GetState("prompt_total", "0"));
stat.SetState("prompt_total", stats.PromptCount + inputCount, isNeedVersion: false, source: StateSource.Application);
stat.SetState("prompt_total", stats.TotalInputTokens + inputCount, isNeedVersion: false, source: StateSource.Application);
var outputCount = int.Parse(stat.GetState("completion_total", "0"));
stat.SetState("completion_total", stats.CompletionCount + outputCount, isNeedVersion: false, source: StateSource.Application);
var cachedCount = int.Parse(stat.GetState("cached_prompt_total", "0"));
stat.SetState("cached_prompt_total", stats.CachedPromptCount + cachedCount, isNeedVersion: false, source: StateSource.Application);
stat.SetState("completion_total", stats.TotalOutputTokens + outputCount, isNeedVersion: false, source: StateSource.Application);

// Total cost
var total_cost = float.Parse(stat.GetState("llm_total_cost", "0"));
Expand All @@ -76,8 +81,8 @@ public void AddToken(TokenStatsModel stats, RoleDialogModel message)
RecordTime = DateTime.UtcNow,
IntervalType = StatsInterval.Day,
Data = [
new StatsKeyValuePair("prompt_token_count_total", stats.PromptCount),
new StatsKeyValuePair("completion_token_count_total", stats.CompletionCount),
new StatsKeyValuePair("prompt_token_count_total", stats.TotalInputTokens),
new StatsKeyValuePair("completion_token_count_total", stats.TotalOutputTokens),
new StatsKeyValuePair("prompt_cost_total", deltaPromptCost),
new StatsKeyValuePair("completion_cost_total", deltaCompletionCost)
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -551,8 +551,8 @@ public async Task<SpeechToTextViewModel> SpeechToText(IFormFile file,

try
{
var auditData = FileUtility.BuildFileDataFromFile(file);
var content = await fileInstruct.SpeechToText(new InstructFileModel { FileData = auditData }, text, new InstructOptions
var audioData = FileUtility.BuildFileDataFromFile(file);
var content = await fileInstruct.SpeechToText(new InstructFileModel { FileData = audioData }, text, new InstructOptions
{
Provider = provider,
Model = model,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ public async Task<RoleDialogModel> GetChatCompletions(Agent agent, List<RoleDial
Prompt = prompt,
Provider = Provider,
Model = _model,
PromptCount = response.Usage?.InputTokens ?? 0,
CompletionCount = response.Usage?.OutputTokens ?? 0
TextInputTokens = response.Usage?.InputTokens ?? 0,
TextOutputTokens = response.Usage?.OutputTokens ?? 0
});
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
using Azure;
using BotSharp.Abstraction.Files.Utilities;
using OpenAI.Chat;
using System.ClientModel;
Expand Down Expand Up @@ -40,12 +41,13 @@ public async Task<RoleDialogModel> GetChatCompletions(Agent agent, List<RoleDial
var chatClient = client.GetChatClient(_model);
var (prompt, messages, options) = PrepareOptions(agent, conversations);

ClientResult<ChatCompletion>? response = null;
ChatCompletion value = default;
RoleDialogModel responseMessage;

try
{
var response = chatClient.CompleteChat(messages, options);
response = chatClient.CompleteChat(messages, options);
value = response.Value;

var reason = value.FinishReason;
Expand Down Expand Up @@ -101,6 +103,9 @@ public async Task<RoleDialogModel> GetChatCompletions(Agent agent, List<RoleDial
};
}

var tokenUsage = response?.Value?.Usage;
var inputTokenDetails = response?.Value?.Usage?.InputTokenDetails;

// After chat completion hook
foreach (var hook in contentHooks)
{
Expand All @@ -109,8 +114,9 @@ public async Task<RoleDialogModel> GetChatCompletions(Agent agent, List<RoleDial
Prompt = prompt,
Provider = Provider,
Model = _model,
PromptCount = value?.Usage?.InputTokenCount ?? 0,
CompletionCount = value?.Usage?.OutputTokenCount ?? 0
TextInputTokens = (tokenUsage?.InputTokenCount ?? 0) - (inputTokenDetails?.CachedTokenCount ?? 0),
CachedTextInputTokens = inputTokenDetails?.CachedTokenCount ?? 0,
TextOutputTokens = tokenUsage?.OutputTokenCount ?? 0
});
}

Expand Down Expand Up @@ -146,6 +152,9 @@ public async Task<bool> GetChatCompletionsAsync(Agent agent,
RenderedInstruction = string.Join("\r\n", renderedInstructions)
};

var tokenUsage = response?.Value?.Usage;
var inputTokenDetails = response?.Value?.Usage?.InputTokenDetails;

// After chat completion hook
foreach (var hook in hooks)
{
Expand All @@ -154,8 +163,9 @@ public async Task<bool> GetChatCompletionsAsync(Agent agent,
Prompt = prompt,
Provider = Provider,
Model = _model,
PromptCount = response.Value?.Usage?.InputTokenCount ?? 0,
CompletionCount = response.Value?.Usage?.OutputTokenCount ?? 0
TextInputTokens = (tokenUsage?.InputTokenCount ?? 0) - (inputTokenDetails?.CachedTokenCount ?? 0),
CachedTextInputTokens = inputTokenDetails?.CachedTokenCount ?? 0,
TextOutputTokens = tokenUsage?.OutputTokenCount ?? 0
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,15 @@ public async Task<string> GetCompletion(string text, string agentId, string mess
CurrentAgentId = agentId,
MessageId = messageId
};

Task.WaitAll(contentHooks.Select(hook =>
hook.AfterGenerated(responseMessage, new TokenStatsModel
{
Prompt = text,
Provider = Provider,
Model = _model,
PromptCount = response.Usage?.PromptTokens ?? default,
CompletionCount = response.Usage?.CompletionTokens ?? default
TextInputTokens = response?.Usage?.PromptTokens ?? 0,
TextOutputTokens = response?.Usage?.CompletionTokens ?? 0
})).ToArray());

return completion.Trim();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ public async Task<RoleDialogModel> GetChatCompletions(Agent agent, List<RoleDial
};
}

var tokenUsage = response?.Value?.Usage;
var inputTokenDetails = response?.Value?.Usage?.InputTokenDetails;

// After chat completion hook
foreach (var hook in contentHooks)
{
Expand All @@ -81,8 +84,9 @@ public async Task<RoleDialogModel> GetChatCompletions(Agent agent, List<RoleDial
Prompt = prompt,
Provider = Provider,
Model = _model,
PromptCount = response.Value?.Usage?.InputTokenCount ?? 0,
CompletionCount = response.Value?.Usage?.OutputTokenCount ?? 0
TextInputTokens = (tokenUsage?.InputTokenCount ?? 0) - (inputTokenDetails?.CachedTokenCount ?? 0),
CachedTextInputTokens = inputTokenDetails?.CachedTokenCount ?? 0,
TextOutputTokens = tokenUsage?.OutputTokenCount ?? 0
});
}

Expand Down Expand Up @@ -115,6 +119,9 @@ public async Task<bool> GetChatCompletionsAsync(Agent agent, List<RoleDialogMode
RenderedInstruction = string.Join("\r\n", renderedInstructions)
};

var tokenUsage = response?.Value?.Usage;
var inputTokenDetails = response?.Value?.Usage?.InputTokenDetails;

// After chat completion hook
foreach (var hook in hooks)
{
Expand All @@ -123,8 +130,9 @@ public async Task<bool> GetChatCompletionsAsync(Agent agent, List<RoleDialogMode
Prompt = prompt,
Provider = Provider,
Model = _model,
PromptCount = response.Value?.Usage?.InputTokenCount ?? 0,
CompletionCount = response.Value?.Usage?.OutputTokenCount ?? 0
TextInputTokens = (tokenUsage?.InputTokenCount ?? 0) - (inputTokenDetails?.CachedTokenCount ?? 0),
CachedTextInputTokens = inputTokenDetails?.CachedTokenCount ?? 0,
TextOutputTokens = tokenUsage?.OutputTokenCount ?? 0
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,19 @@ public async Task<string> GetCompletion(string text, string agentId, string mess
MessageId = messageId
};

var tokenUsage = response?.Value?.Usage;
var inputTokenDetails = response?.Value?.Usage?.InputTokenDetails;

foreach (var hook in contentHooks)
{
await hook.AfterGenerated(responseMessage, new TokenStatsModel
{
Prompt = text,
Provider = Provider,
Model = _model,
PromptCount = response?.Value?.Usage?.InputTokenCount ?? default,
CompletionCount = response?.Value?.Usage?.OutputTokenCount ?? default
TextInputTokens = (tokenUsage?.InputTokenCount ?? 0) - (inputTokenDetails?.CachedTokenCount ?? 0),
CachedTextInputTokens = inputTokenDetails?.CachedTokenCount ?? 0,
TextOutputTokens = tokenUsage?.OutputTokenCount ?? 0
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,8 @@ public async Task<RoleDialogModel> GetChatCompletions(Agent agent, List<RoleDial
Prompt = prompt,
Provider = Provider,
Model = _model,
PromptCount = response.UsageMetadata?.PromptTokenCount ?? 0,
CachedPromptCount = response.UsageMetadata?.CachedContentTokenCount ?? 0,
CompletionCount = response.UsageMetadata?.CandidatesTokenCount ?? 0
TextInputTokens = response?.UsageMetadata?.PromptTokenCount ?? 0,
TextOutputTokens = response?.UsageMetadata?.CandidatesTokenCount ?? 0
});
}

Expand Down Expand Up @@ -124,9 +123,8 @@ public async Task<bool> GetChatCompletionsAsync(Agent agent, List<RoleDialogMode
Prompt = prompt,
Provider = Provider,
Model = _model,
PromptCount = response?.UsageMetadata?.PromptTokenCount ?? 0,
CachedPromptCount = response.UsageMetadata?.CachedContentTokenCount ?? 0,
CompletionCount = response.UsageMetadata?.CandidatesTokenCount ?? 0
TextInputTokens = response?.UsageMetadata?.PromptTokenCount ?? 0,
TextOutputTokens = response?.UsageMetadata?.CandidatesTokenCount ?? 0
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,8 @@ public async Task<string> GetCompletion(string text, string agentId, string mess
Prompt = text,
Provider = Provider,
Model = _model,
PromptCount = response.UsageMetadata?.PromptTokenCount ?? 0,
CachedPromptCount = response.UsageMetadata?.CachedContentTokenCount ?? 0,
CompletionCount = response.UsageMetadata?.CandidatesTokenCount ?? 0
TextInputTokens = response?.UsageMetadata?.PromptTokenCount ?? 0,
TextOutputTokens = response?.UsageMetadata?.CandidatesTokenCount ?? 0
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,14 @@ public async Task<string> GetCompletion(string text, string agentId, string mess
};

Task.WaitAll(contentHooks.Select(hook =>
hook.AfterGenerated(responseMessage, new TokenStatsModel
{
Prompt = text,
Provider = Provider,
Model = _model,
PromptCount = response.Usage.TotalTokens,
CompletionCount = response.Usage.OutputTokens
})).ToArray());
hook.AfterGenerated(responseMessage, new TokenStatsModel
{
Prompt = text,
Provider = Provider,
Model = _model,
TextInputTokens = response.Usage.InputTokens,
TextOutputTokens = response.Usage.OutputTokens
})).ToArray());

return response.LastMessageContent;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ public async Task<RoleDialogModel> GetChatCompletions(Agent agent, List<RoleDial
Prompt = prompt,
Provider = Provider,
Model = _model,
PromptCount = response.usage.GetValueOrDefault("prompt_tokens"),
CompletionCount = response.usage.GetValueOrDefault("completion_tokens")
TextInputTokens = response.usage.GetValueOrDefault("prompt_tokens"),
TextOutputTokens = response.usage.GetValueOrDefault("completion_tokens")
});
}

Expand Down
Loading
Loading