Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Features/add file handler #545

Merged
merged 8 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions BotSharp.sln
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BotSharp.Plugin.OpenAI", "s
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BotSharp.Plugin.EmailHandler", "src\Plugins\BotSharp.Plugin.EmailHandler\BotSharp.Plugin.EmailHandler.csproj", "{A72B3BEB-E14B-4917-BE44-97EAE4E122D2}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BotSharp.Plugin.FileHandler", "src\Plugins\BotSharp.Plugin.FileHandler\BotSharp.Plugin.FileHandler.csproj", "{D6A99D4F-6248-419E-8A43-B38ADEBABA2C}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -393,6 +395,14 @@ Global
{A72B3BEB-E14B-4917-BE44-97EAE4E122D2}.Release|Any CPU.Build.0 = Release|Any CPU
{A72B3BEB-E14B-4917-BE44-97EAE4E122D2}.Release|x64.ActiveCfg = Release|Any CPU
{A72B3BEB-E14B-4917-BE44-97EAE4E122D2}.Release|x64.Build.0 = Release|Any CPU
{D6A99D4F-6248-419E-8A43-B38ADEBABA2C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{D6A99D4F-6248-419E-8A43-B38ADEBABA2C}.Debug|Any CPU.Build.0 = Debug|Any CPU
{D6A99D4F-6248-419E-8A43-B38ADEBABA2C}.Debug|x64.ActiveCfg = Debug|Any CPU
{D6A99D4F-6248-419E-8A43-B38ADEBABA2C}.Debug|x64.Build.0 = Debug|Any CPU
{D6A99D4F-6248-419E-8A43-B38ADEBABA2C}.Release|Any CPU.ActiveCfg = Release|Any CPU
{D6A99D4F-6248-419E-8A43-B38ADEBABA2C}.Release|Any CPU.Build.0 = Release|Any CPU
{D6A99D4F-6248-419E-8A43-B38ADEBABA2C}.Release|x64.ActiveCfg = Release|Any CPU
{D6A99D4F-6248-419E-8A43-B38ADEBABA2C}.Release|x64.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -439,6 +449,7 @@ Global
{806A0B0E-FEFF-420E-B5B2-C9FCBF890A8C} = {D5293208-2BEF-42FC-A64C-5954F61720BA}
{6507D336-3A4D-41D4-81C0-2B900173A5FE} = {D5293208-2BEF-42FC-A64C-5954F61720BA}
{A72B3BEB-E14B-4917-BE44-97EAE4E122D2} = {51AFE054-AE99-497D-A593-69BAEFB5106F}
{D6A99D4F-6248-419E-8A43-B38ADEBABA2C} = {51AFE054-AE99-497D-A593-69BAEFB5106F}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {A9969D89-C98B-40A5-A12B-FC87E55B3A19}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ namespace BotSharp.Abstraction.Files;
public interface IBotSharpFileService
{
string GetDirectory(string conversationId);
Task<IEnumerable<MessageFileModel>> GetChatImages(string conversationId, string source, IEnumerable<string> fileTypes, List<RoleDialogModel> conversations, int? offset = null);
Task<IEnumerable<MessageFileModel>> GetChatImages(string conversationId, string source,
IEnumerable<RoleDialogModel> conversations, IEnumerable<string> contentTypes,
bool includeScreenShot = false, int? offset = null);
IEnumerable<MessageFileModel> GetMessageFiles(string conversationId, IEnumerable<string> messageIds, string source, bool imageOnly = false);
string GetMessageFile(string conversationId, string messageId, string source, string index, string fileName);
IEnumerable<MessageFileModel> GetMessagesWithFile(string conversationId, IEnumerable<string> messageIds);
Expand Down
8 changes: 0 additions & 8 deletions src/Infrastructure/BotSharp.Core/BotSharp.Core.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,8 @@
<ItemGroup>
<None Remove="data\agents\6745151e-6d46-4a02-8de4-1c4f21c7da95\agent.json" />
<None Remove="data\agents\6745151e-6d46-4a02-8de4-1c4f21c7da95\functions\generate_image.json" />
<None Remove="data\agents\6745151e-6d46-4a02-8de4-1c4f21c7da95\functions\read_file.json" />
<None Remove="data\agents\6745151e-6d46-4a02-8de4-1c4f21c7da95\instruction.liquid" />
<None Remove="data\agents\6745151e-6d46-4a02-8de4-1c4f21c7da95\templates\generate_image.fn.liquid" />
<None Remove="data\agents\6745151e-6d46-4a02-8de4-1c4f21c7da95\templates\read_file.fn.liquid" />
<None Remove="data\agents\01dcc3e5-0af7-49e6-ad7a-a760bd12dc4b\agent.json" />
<None Remove="data\agents\01dcc3e5-0af7-49e6-ad7a-a760bd12dc4b\functions.json" />
<None Remove="data\agents\01dcc3e5-0af7-49e6-ad7a-a760bd12dc4b\functions\human_intervention_needed.json" />
Expand Down Expand Up @@ -159,12 +157,6 @@
<Content Include="data\agents\6745151e-6d46-4a02-8de4-1c4f21c7da95\instruction.liquid">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
<Content Include="data\agents\6745151e-6d46-4a02-8de4-1c4f21c7da95\functions\read_file.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
<Content Include="data\agents\6745151e-6d46-4a02-8de4-1c4f21c7da95\templates\read_file.fn.liquid">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
<Content Include="data\agents\6745151e-6d46-4a02-8de4-1c4f21c7da95\functions\generate_image.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
Expand Down
2 changes: 0 additions & 2 deletions src/Infrastructure/BotSharp.Core/Files/FilePlugin.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ public void RegisterDI(IServiceCollection services, IConfiguration config)
{
services.AddScoped<IBotSharpFileService, BotSharpFileService>();

services.AddScoped<IAgentHook, FileReaderHook>();
services.AddScoped<IAgentUtilityHook, FileReaderUtilityHook>();
services.AddScoped<IAgentHook, ImageGeneratorHook>();
services.AddScoped<IAgentUtilityHook, ImageGeneratorUtilityHook>();
}
Expand Down
52 changes: 0 additions & 52 deletions src/Infrastructure/BotSharp.Core/Files/Hooks/FileReaderHook.cs

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,124 +1,43 @@
using AspectInjector.Broker;
using BotSharp.Abstraction.Files.Converters;
using Microsoft.EntityFrameworkCore;
using System.IO;
using System.Threading;

namespace BotSharp.Core.Files.Services;

public partial class BotSharpFileService
{
public async Task<IEnumerable<MessageFileModel>> GetChatImages(string conversationId, string source, IEnumerable<string> fileTypes,
List<RoleDialogModel> conversations, int? offset = null)
public async Task<IEnumerable<MessageFileModel>> GetChatImages(string conversationId, string source,
IEnumerable<RoleDialogModel> conversations, IEnumerable<string> contentTypes,
bool includeScreenShot = false, int? offset = null)
{
var files = new List<MessageFileModel>();
if (string.IsNullOrEmpty(conversationId) || conversations.IsNullOrEmpty())
{
return new List<MessageFileModel>();
return files;
}

if (offset <= 0)
{
offset = MIN_OFFSET;
}
else if (offset > MAX_OFFSET)
{
offset = MAX_OFFSET;
}
var messageIds = GetMessageIds(conversations, offset);
var pathPrefix = Path.Combine(_baseDir, CONVERSATION_FOLDER, conversationId, FILE_FOLDER);

var messageIds = new List<string>();
if (offset.HasValue)
{
messageIds = conversations.Select(x => x.MessageId).Distinct().TakeLast(offset.Value).ToList();
}
else
{
messageIds = conversations.Select(x => x.MessageId).Distinct().ToList();
}

files = await GetMessageFiles(conversationId, messageIds, source, fileTypes);
return files;
}

private async Task<List<MessageFileModel>> GetMessageFiles(string conversationId, IEnumerable<string> messageIds, string source, IEnumerable<string> fileTypes)
{
var files = new List<MessageFileModel>();
if (string.IsNullOrEmpty(conversationId) || messageIds.IsNullOrEmpty() || fileTypes.IsNullOrEmpty()) return files;

var isNeedScreenShot = fileTypes.Any(x => _allowScreenShotTypes.Contains(x));
var onlyScreenShot = fileTypes.All(x => _allowScreenShotTypes.Contains(x));

try
foreach (var messageId in messageIds)
{
var preFixPath = Path.Combine(_baseDir, CONVERSATION_FOLDER, conversationId, FILE_FOLDER);
var dir = Path.Combine(pathPrefix, messageId, source);
if (!ExistDirectory(dir)) continue;

foreach (var messageId in messageIds)
foreach (var subDir in Directory.GetDirectories(dir))
{
var dir = Path.Combine(preFixPath, messageId, source);
if (!ExistDirectory(dir)) continue;
var file = Directory.GetFiles(subDir).FirstOrDefault();
if (file == null) continue;

foreach (var subDir in Directory.GetDirectories(dir))
{
var file = Directory.GetFiles(subDir).FirstOrDefault();
if (file == null) continue;
var contentType = GetFileContentType(file);
if (contentTypes?.Contains(contentType) != true) continue;

var index = subDir.Split(Path.DirectorySeparatorChar).Last();
var contentType = GetFileContentType(file);
var foundFiles = await GetMessageFiles(file, subDir, contentType, messageId, source, includeScreenShot);
if (foundFiles.IsNullOrEmpty()) continue;

if ((!isNeedScreenShot || (isNeedScreenShot && !onlyScreenShot)) && _allowedImageTypes.Contains(contentType))
{
var model = new MessageFileModel()
{
MessageId = messageId,
FileStorageUrl = file,
ContentType = contentType
};
files.Add(model);
}
else if ((isNeedScreenShot && !onlyScreenShot || onlyScreenShot) && !_allowedImageTypes.Contains(contentType))
{
var screenShotDir = Path.Combine(subDir, SCREENSHOT_FILE_FOLDER);
if (ExistDirectory(screenShotDir) && Directory.GetFiles(screenShotDir).Any())
{
foreach (var screenShot in Directory.GetFiles(screenShotDir))
{
contentType = GetFileContentType(screenShot);
if (!_allowedImageTypes.Contains(contentType)) continue;

var model = new MessageFileModel()
{
MessageId = messageId,
FileStorageUrl = screenShot,
ContentType = contentType
};
files.Add(model);
}
}
else
{
var screenShotPath = Path.Combine(subDir, SCREENSHOT_FILE_FOLDER);
var images = await ConvertPdfToImages(file, screenShotPath);

foreach (var image in images)
{
contentType = GetFileContentType(image);
var model = new MessageFileModel()
{
MessageId = messageId,
FileStorageUrl = image,
ContentType = contentType
};
files.Add(model);
}
}
}
}
files.AddRange(foundFiles);
}
}
catch (Exception ex)
{
_logger.LogWarning($"Error when reading conversation ({conversationId}) files: {ex.Message}\r\n{ex.InnerException}\r\n{ex.StackTrace}");
}

return files;
}
Expand All @@ -144,7 +63,7 @@ public IEnumerable<MessageFileModel> GetMessageFiles(string conversationId, IEnu
foreach (var file in Directory.GetFiles(subDir))
{
var contentType = GetFileContentType(file);
if (imageOnly && !_allowedImageTypes.Contains(contentType))
if (imageOnly && !_imageTypes.Contains(contentType))
{
continue;
}
Expand Down Expand Up @@ -327,6 +246,102 @@ private string GetConversationFileDirectory(string? conversationId, string? mess
return dir;
}

private IEnumerable<string> GetMessageIds(IEnumerable<RoleDialogModel> conversations, int? offset = null)
{
if (conversations.IsNullOrEmpty()) return Enumerable.Empty<string>();

if (offset <= 0)
{
offset = MIN_OFFSET;
}
else if (offset > MAX_OFFSET)
{
offset = MAX_OFFSET;
}

var messageIds = new List<string>();
if (offset.HasValue)
{
messageIds = conversations.Select(x => x.MessageId).Distinct().TakeLast(offset.Value).ToList();
}
else
{
messageIds = conversations.Select(x => x.MessageId).Distinct().ToList();
}

return messageIds;
}


private async Task<IEnumerable<MessageFileModel>> GetMessageFiles(string file, string fileDir, string contentType,
string messageId, string source, bool includeScreenShot)
{
var files = new List<MessageFileModel>();

try
{
if (!_imageTypes.Contains(contentType) && includeScreenShot)
{
var screenShotDir = Path.Combine(fileDir, SCREENSHOT_FILE_FOLDER);
if (ExistDirectory(screenShotDir) && !Directory.GetFiles(screenShotDir).IsNullOrEmpty())
{
foreach (var screenShot in Directory.GetFiles(screenShotDir))
{
contentType = GetFileContentType(screenShot);
if (!_imageTypes.Contains(contentType)) continue;

var model = new MessageFileModel()
{
MessageId = messageId,
FileName = Path.GetFileName(screenShot),
FileStorageUrl = screenShot,
ContentType = contentType,
FileSource = source
};
files.Add(model);
}
}
else if (contentType == MediaTypeNames.Application.Pdf)
{
var images = await ConvertPdfToImages(file, screenShotDir);
foreach (var image in images)
{
contentType = GetFileContentType(image);
var model = new MessageFileModel()
{
MessageId = messageId,
FileName = Path.GetFileName(image),
FileStorageUrl = image,
ContentType = contentType,
FileSource = source
};
files.Add(model);
}
}
}
else
{
var model = new MessageFileModel()
{
MessageId = messageId,
FileName = Path.GetFileName(file),
FileStorageUrl = file,
ContentType = contentType,
FileSource = source
};
files.Add(model);
}

return files;
}
catch (Exception ex)
{
_logger.LogWarning($"Error when getting message files {file} (messageId: {messageId}), Error: {ex.Message}\r\n{ex.InnerException}");
return files;
}
}


private async Task<IEnumerable<string>> ConvertPdfToImages(string pdfLoc, string imageLoc)
{
var converters = _services.GetServices<IPdf2ImageConverter>();
Expand Down
Loading