Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions BotSharp.sln
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BotSharp.Test.RealtimeVoice
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BotSharp.Plugin.ChartHandler", "src\Plugins\BotSharp.Plugin.ChartHandler\BotSharp.Plugin.ChartHandler.csproj", "{0428DEAA-E4FE-4259-A6D8-6EDD1A9D0702}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BotSharp.Plugin.ExcelHandler", "src\Plugins\BotSharp.Plugin.ExcelHandler\BotSharp.Plugin.ExcelHandler.csproj", "{FC63C875-E880-D8BB-B8B5-978AB7B62983}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -609,6 +611,14 @@ Global
{0428DEAA-E4FE-4259-A6D8-6EDD1A9D0702}.Release|Any CPU.Build.0 = Release|Any CPU
{0428DEAA-E4FE-4259-A6D8-6EDD1A9D0702}.Release|x64.ActiveCfg = Release|Any CPU
{0428DEAA-E4FE-4259-A6D8-6EDD1A9D0702}.Release|x64.Build.0 = Release|Any CPU
{FC63C875-E880-D8BB-B8B5-978AB7B62983}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{FC63C875-E880-D8BB-B8B5-978AB7B62983}.Debug|Any CPU.Build.0 = Debug|Any CPU
{FC63C875-E880-D8BB-B8B5-978AB7B62983}.Debug|x64.ActiveCfg = Debug|Any CPU
{FC63C875-E880-D8BB-B8B5-978AB7B62983}.Debug|x64.Build.0 = Debug|Any CPU
{FC63C875-E880-D8BB-B8B5-978AB7B62983}.Release|Any CPU.ActiveCfg = Release|Any CPU
{FC63C875-E880-D8BB-B8B5-978AB7B62983}.Release|Any CPU.Build.0 = Release|Any CPU
{FC63C875-E880-D8BB-B8B5-978AB7B62983}.Release|x64.ActiveCfg = Release|Any CPU
{FC63C875-E880-D8BB-B8B5-978AB7B62983}.Release|x64.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -679,6 +689,7 @@ Global
{7C0C7D13-D161-4AB0-9C29-83A0F1FF990E} = {32FAFFFE-A4CB-4FEE-BF7C-84518BBC6DCC}
{B067B126-88CD-4282-BEEF-7369B64423EF} = {32FAFFFE-A4CB-4FEE-BF7C-84518BBC6DCC}
{0428DEAA-E4FE-4259-A6D8-6EDD1A9D0702} = {51AFE054-AE99-497D-A593-69BAEFB5106F}
{FC63C875-E880-D8BB-B8B5-978AB7B62983} = {51AFE054-AE99-497D-A593-69BAEFB5106F}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {A9969D89-C98B-40A5-A12B-FC87E55B3A19}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ public class LlmModelSetting
/// </summary>
public ImageSetting? Image { get; set; }

/// <summary>
/// Settings for audio
/// </summary>
public AudioSetting? Audio { get; set; }

/// <summary>
/// Settings for llm cost
/// </summary>
Expand Down Expand Up @@ -128,6 +133,20 @@ public class ImageVariationSetting
}
#endregion

#region Audio model settings
public class AudioSetting
{
public AudioTranscriptionSetting? Transcription { get; set; }
}

public class AudioTranscriptionSetting
{
public float? Temperature { get; set; }
public ModelSettingBase? ResponseFormat { get; set; }
public ModelSettingBase? Granularity { get; set; }
}
#endregion

public class ModelSettingBase
{
public string? Default { get; set; }
Expand Down

This file was deleted.

171 changes: 171 additions & 0 deletions src/Plugins/BotSharp.Plugin.AudioHandler/Functions/ReadAudioFn.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
namespace BotSharp.Plugin.AudioHandler.Functions;

public class ReadAudioFn : IFunctionCallback
{
public string Name => "util-audio-handle_audio_request";
public string Indication => "Reading audio";

private readonly IServiceProvider _services;
private readonly IFileStorageService _fileStorage;
private readonly ILogger<ReadAudioFn> _logger;
private readonly BotSharpOptions _options;
private readonly AudioHandlerSettings _settings;

private readonly IEnumerable<string> _audioContentTypes = new List<string>
{
AudioType.mp3.ToFileType(),
AudioType.wav.ToFileType(),
};

public ReadAudioFn(
IServiceProvider services,
ILogger<ReadAudioFn> logger,
BotSharpOptions options,
AudioHandlerSettings settings,
IFileStorageService fileStorage)
{
_services = services;
_logger = logger;
_options = options;
_settings = settings;
_fileStorage = fileStorage;
}

public async Task<bool> Execute(RoleDialogModel message)
{
var args = JsonSerializer.Deserialize<LlmContextIn>(message.FunctionArgs, _options.JsonSerializerOptions);
var conv = _services.GetRequiredService<IConversationService>();
var routingCtx = _services.GetRequiredService<IRoutingContext>();

var wholeDialogs = routingCtx.GetDialogs();
if (wholeDialogs.IsNullOrEmpty())
{
wholeDialogs = conv.GetDialogHistory();
}

var dialogs = AssembleFiles(conv.ConversationId, wholeDialogs);
var response = await GetAudioTranscription(dialogs);
message.Content = response;
dialogs.ForEach(x => x.Files = null);
return true;
}

private List<RoleDialogModel> AssembleFiles(string convId, List<RoleDialogModel> dialogs)
{
if (dialogs.IsNullOrEmpty())
{
return new List<RoleDialogModel>();
}

var messageId = dialogs.Select(x => x.MessageId).Distinct().ToList();
var audioFiles = _fileStorage.GetMessageFiles(convId, messageId, options: new()
{
Sources = [FileSource.User],
ContentTypes = _audioContentTypes
});

foreach (var dialog in dialogs)
{
var found = audioFiles.Where(x => x.MessageId == dialog.MessageId
&& x.FileSource.IsEqualTo(FileSource.User)).ToList();

if (found.IsNullOrEmpty() || !dialog.IsFromUser)
{
continue;
}

dialog.Files = found.Select(x => new BotSharpFile
{
ContentType = x.ContentType,
FileUrl = x.FileUrl,
FileStorageUrl = x.FileStorageUrl
}).ToList();
}

return dialogs;
}

private async Task<string> GetAudioTranscription(List<RoleDialogModel> dialogs)
{
var audioCompletion = PrepareModel();
var dialog = dialogs.Where(x => !x.Files.IsNullOrEmpty()).LastOrDefault();
var transcripts = new List<string>();

if (dialog != null)
{
foreach (var file in dialog.Files)
{
if (string.IsNullOrWhiteSpace(file?.FileStorageUrl))
{
continue;
}

var extension = Path.GetExtension(file.FileStorageUrl);
var fileName = Path.GetFileName(file.FileStorageUrl);
if (!VerifyAudioFileType(fileName))
{
continue;
}

var binary = _fileStorage.GetFileBytes(file.FileStorageUrl);
using var stream = binary.ToStream();
stream.Position = 0;

var result = await audioCompletion.TranscriptTextAsync(stream, fileName);
transcripts.Add(result);
stream.Close();
await Task.Delay(100);
}
}


if (transcripts.IsNullOrEmpty())
{
var msg = "No audio is found in the chat.";
_logger.LogWarning(msg);
transcripts.Add(msg);
}

return string.Join("\r\n\r\n", transcripts);
}

private IAudioTranscription PrepareModel()
{
var (provider, model) = GetLlmProviderModel();
return CompletionProvider.GetAudioTranscriber(_services, provider: provider, model: model);
}

private bool VerifyAudioFileType(string fileName)
{
var extension = Path.GetExtension(fileName).TrimStart('.').ToLower();
return Enum.TryParse<AudioType>(extension, out _)
|| !string.IsNullOrEmpty(FileUtility.GetFileContentType(fileName));
}

private (string, string) GetLlmProviderModel()
{
var state = _services.GetRequiredService<IConversationStateService>();
var llmProviderService = _services.GetRequiredService<ILlmProviderService>();

var provider = state.GetState("audio_read_llm_provider");
var model = state.GetState("audio_read_llm_provider");

if (!string.IsNullOrEmpty(provider) && !string.IsNullOrEmpty(model))
{
return (provider, model);
}

provider = _settings?.Audio?.Reading?.LlmProvider;
model = _settings?.Audio?.Reading?.LlmModel;

if (!string.IsNullOrEmpty(provider) && !string.IsNullOrEmpty(model))
{
return (provider, model);
}

provider = "openai";
model = "gpt-4o-mini-transcribe";

return (provider, model);
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
using BotSharp.Abstraction.Models;

namespace BotSharp.Plugin.AudioHandler.Settings;

public class AudioHandlerSettings
{
public AudioSettings? Audio { get; set; }
}

#region Audio
public class AudioSettings
{
public AudioReadSettings? Reading { get; set; }
}

public class AudioReadSettings : LlmBase
{
}
#endregion
Loading
Loading