diff --git a/src/Infrastructure/BotSharp.Core/Files/Services/Instruct/FileInstructService.Pdf.cs b/src/Infrastructure/BotSharp.Core/Files/Services/Instruct/FileInstructService.Pdf.cs index cd703cc7b..b593e9497 100644 --- a/src/Infrastructure/BotSharp.Core/Files/Services/Instruct/FileInstructService.Pdf.cs +++ b/src/Infrastructure/BotSharp.Core/Files/Services/Instruct/FileInstructService.Pdf.cs @@ -1,7 +1,6 @@ using BotSharp.Abstraction.Files.Converters; using BotSharp.Abstraction.Instructs.Models; using BotSharp.Abstraction.Instructs; -using BotSharp.Abstraction.Infrastructures; namespace BotSharp.Core.Files.Services; @@ -22,14 +21,24 @@ public async Task ReadPdf(string text, List files, In try { + var provider = options?.Provider ?? "openai"; var pdfFiles = await DownloadFiles(sessionDir, files); - var images = await ConvertPdfToImages(pdfFiles); - if (images.IsNullOrEmpty()) return content; + + var targetFiles = pdfFiles; + if (provider != "google-ai") + { + targetFiles = await ConvertPdfToImages(pdfFiles); + } + + if (targetFiles.IsNullOrEmpty()) + { + return content; + } var innerAgentId = options?.AgentId ?? Guid.Empty.ToString(); var instruction = await GetAgentTemplate(innerAgentId, options?.TemplateName); - var completion = CompletionProvider.GetChatCompletion(_services, provider: options?.Provider ?? "openai", + var completion = CompletionProvider.GetChatCompletion(_services, provider: provider, model: options?.Model ?? "gpt-4o", multiModal: true); var message = await completion.GetChatCompletions(new Agent() { @@ -39,7 +48,7 @@ public async Task ReadPdf(string text, List files, In { new RoleDialogModel(AgentRole.User, text) { - Files = images.Select(x => new BotSharpFile { FileStorageUrl = x }).ToList() + Files = targetFiles.Select(x => new BotSharpFile { FileStorageUrl = x }).ToList() } }); diff --git a/src/Plugins/BotSharp.Plugin.FileHandler/Functions/ReadImageFn.cs b/src/Plugins/BotSharp.Plugin.FileHandler/Functions/ReadImageFn.cs index 6e42af30e..1a093797c 100644 --- a/src/Plugins/BotSharp.Plugin.FileHandler/Functions/ReadImageFn.cs +++ b/src/Plugins/BotSharp.Plugin.FileHandler/Functions/ReadImageFn.cs @@ -35,7 +35,7 @@ public async Task Execute(RoleDialogModel message) { Id = BuiltInAgentId.UtilityAssistant, Name = "Utility Agent", - Instruction = fromAgent?.Instruction ?? args.UserRequest ?? "Please describe the image(s).", + Instruction = fromAgent?.Instruction ?? args?.UserRequest ?? "Please describe the image(s).", TemplateDict = new Dictionary() }; diff --git a/src/Plugins/BotSharp.Plugin.FileHandler/Functions/ReadPdfFn.cs b/src/Plugins/BotSharp.Plugin.FileHandler/Functions/ReadPdfFn.cs index 42cc04f21..ab8ab8a95 100644 --- a/src/Plugins/BotSharp.Plugin.FileHandler/Functions/ReadPdfFn.cs +++ b/src/Plugins/BotSharp.Plugin.FileHandler/Functions/ReadPdfFn.cs @@ -1,3 +1,5 @@ +using BotSharp.Abstraction.Routing; + namespace BotSharp.Plugin.FileHandler.Functions; public class ReadPdfFn : IFunctionCallback @@ -25,20 +27,31 @@ public async Task Execute(RoleDialogModel message) { var args = JsonSerializer.Deserialize(message.FunctionArgs); var conv = _services.GetRequiredService(); + var routingCtx = _services.GetRequiredService(); var agentService = _services.GetRequiredService(); - var wholeDialogs = conv.GetDialogHistory(); - var dialogs = await AssembleFiles(conv.ConversationId, wholeDialogs); - var agent = await agentService.LoadAgent(BuiltInAgentId.UtilityAssistant); - var fileAgent = new Agent + Agent? fromAgent = null; + if (!string.IsNullOrEmpty(message.CurrentAgentId)) + { + fromAgent = await agentService.LoadAgent(message.CurrentAgentId); + } + + var agent = new Agent { - Id = agent?.Id ?? Guid.Empty.ToString(), - Name = agent?.Name ?? "Unkown", - Instruction = !string.IsNullOrWhiteSpace(args?.UserRequest) ? args.UserRequest : "Please describe the pdf file(s).", + Id = BuiltInAgentId.UtilityAssistant, + Name = "Utility Agent", + Instruction = fromAgent?.Instruction ?? args?.UserRequest ?? "Please describe the pdf file(s).", TemplateDict = new Dictionary() }; - var response = await GetChatCompletion(fileAgent, dialogs); + var wholeDialogs = routingCtx.GetDialogs(); + if (wholeDialogs.IsNullOrEmpty()) + { + wholeDialogs = conv.GetDialogHistory(); + } + + var dialogs = await AssembleFiles(conv.ConversationId, wholeDialogs); + var response = await GetChatCompletion(agent, dialogs); message.Content = response; return true; } diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/GeminiChatCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/GeminiChatCompletionProvider.cs index 608533623..9a23d8174 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/GeminiChatCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/GeminiChatCompletionProvider.cs @@ -1,9 +1,6 @@ -using System.Text.Json.Nodes; -using BotSharp.Abstraction.Agents; -using BotSharp.Abstraction.Agents.Enums; -using BotSharp.Abstraction.Conversations; +using BotSharp.Abstraction.Files; +using BotSharp.Abstraction.Files.Utilities; using BotSharp.Abstraction.Hooks; -using BotSharp.Abstraction.Loggers; using GenerativeAI; using GenerativeAI.Core; using GenerativeAI.Types; @@ -43,7 +40,7 @@ public async Task GetChatCompletions(Agent agent, List GetChatCompletionsAsync(Agent agent, List GetChatCompletionsAsync(Agent agent, List GetChatCompletionsStreamingAsync(Agent agent, List conversations, Func onMessageReceived) { var client = ProviderHelper.GetGeminiClient(Provider, _model, _services); - var chatClient = client.CreateGenerativeModel(_model); + var chatClient = client.CreateGenerativeModel(_model.ToModelId()); var (prompt, messages) = PrepareOptions(chatClient,agent, conversations); var asyncEnumerable = chatClient.StreamContentAsync(messages); @@ -207,6 +204,10 @@ public void SetModelName(string model) { var agentService = _services.GetRequiredService(); var googleSettings = _services.GetRequiredService(); + var fileStorage = _services.GetRequiredService(); + var settingsService = _services.GetRequiredService(); + var settings = settingsService.GetSetting(Provider, _model); + var allowMultiModal = settings != null && settings.MultiModal; renderedInstructions = []; // Add settings @@ -298,7 +299,50 @@ public void SetModelName(string model) else if (message.Role == AgentRole.User) { var text = !string.IsNullOrWhiteSpace(message.Payload) ? message.Payload : message.Content; - contents.Add(new Content(text, AgentRole.User)); + var contentParts = new List { new() { Text = text } }; + + if (allowMultiModal && !message.Files.IsNullOrEmpty()) + { + foreach (var file in message.Files) + { + if (!string.IsNullOrEmpty(file.FileData)) + { + var (contentType, bytes) = FileUtility.GetFileInfoFromData(file.FileData); + contentParts.Add(new Part() + { + InlineData = new() + { + MimeType = contentType, + Data = Convert.ToBase64String(bytes) + } + }); + } + else if (!string.IsNullOrEmpty(file.FileStorageUrl)) + { + var contentType = FileUtility.GetFileContentType(file.FileStorageUrl); + var bytes = fileStorage.GetFileBytes(file.FileStorageUrl); + contentParts.Add(new Part() + { + InlineData = new() + { + MimeType = contentType, + Data = Convert.ToBase64String(bytes) + } + }); + } + else if (!string.IsNullOrEmpty(file.FileUrl)) + { + contentParts.Add(new Part() + { + FileData = new() + { + FileUri = file.FileUrl + } + }); + } + } + } + contents.Add(new Content(contentParts, AgentRole.User)); convPrompts.Add($"{AgentRole.User}: {text}"); } else if (message.Role == AgentRole.Assistant) diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/ProviderHelper.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/ProviderHelper.cs index 28b4a03b1..7b6c25ea0 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/ProviderHelper.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/ProviderHelper.cs @@ -7,7 +7,7 @@ public static class ProviderHelper public static GenerativeAI.GoogleAi GetGeminiClient(string provider, string model, IServiceProvider services) { var aiSettings = services.GetRequiredService(); - if (aiSettings == null || aiSettings.Gemini ==null || string.IsNullOrEmpty(aiSettings.Gemini.ApiKey)) + if (string.IsNullOrEmpty(aiSettings?.Gemini?.ApiKey)) { var settingsService = services.GetRequiredService(); var settings = settingsService.GetSetting(provider, model); diff --git a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Chat/ChatCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Chat/ChatCompletionProvider.cs index 074c9b48a..d9591a0a5 100644 --- a/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Chat/ChatCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.OpenAI/Providers/Chat/ChatCompletionProvider.cs @@ -218,7 +218,6 @@ await onMessageReceived(new RoleDialogModel(choice.Role?.ToString() ?? ChatMessa protected (string, IEnumerable, ChatCompletionOptions) PrepareOptions(Agent agent, List conversations) { var agentService = _services.GetRequiredService(); - var state = _services.GetRequiredService(); var fileStorage = _services.GetRequiredService(); var settingsService = _services.GetRequiredService(); var settings = settingsService.GetSetting(Provider, _model);