diff --git a/src/Infrastructure/BotSharp.Abstraction/Files/IFileStorageService.cs b/src/Infrastructure/BotSharp.Abstraction/Files/IFileStorageService.cs index 8d7578706..3107ea47d 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Files/IFileStorageService.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Files/IFileStorageService.cs @@ -63,16 +63,17 @@ public interface IFileStorageService #endregion #region Knowledge - bool SaveKnowledgeBaseFile(string collectionName, string vectorStoreProvider, string fileId, string fileName, BinaryData fileData); + bool SaveKnowledgeBaseFile(string collectionName, string vectorStoreProvider, Guid fileId, string fileName, BinaryData fileData); /// - /// Delete files in a knowledge collection. If fileId is null, remove all files in the collection. + /// Delete files in a knowledge collection, given the vector store provider. If "fileId" is null, delete all files in the collection. /// /// + /// /// /// - bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvider, string? fileId = null); - string GetKnowledgeBaseFileUrl(string collectionName, string fileId); - FileBinaryDataModel? GetKnowledgeBaseFileBinaryData(string collectionName, string vectorStoreProvider, string fileId); + bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvider, Guid? fileId = null); + string GetKnowledgeBaseFileUrl(string collectionName, string vectorStoreProvider, Guid fileId, string fileName); + BinaryData? GetKnowledgeBaseFileBinaryData(string collectionName, string vectorStoreProvider, Guid fileId, string fileName); #endregion } diff --git a/src/Infrastructure/BotSharp.Abstraction/Files/Models/KnowledgeFileModel.cs b/src/Infrastructure/BotSharp.Abstraction/Files/Models/KnowledgeFileModel.cs index f69c8f50d..dcc214291 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Files/Models/KnowledgeFileModel.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Files/Models/KnowledgeFileModel.cs @@ -2,7 +2,7 @@ namespace BotSharp.Abstraction.Files.Models; public class KnowledgeFileModel { - public string FileId { get; set; } + public Guid FileId { get; set; } public string FileName { get; set; } public string FileExtension { get; set; } public string ContentType { get; set; } diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs index 7a185b810..87ba03444 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs @@ -22,9 +22,9 @@ public interface IKnowledgeService #region Document Task UploadKnowledgeDocuments(string collectionName, IEnumerable files); - Task DeleteKnowledgeDocument(string collectionName, string fileId); + Task DeleteKnowledgeDocument(string collectionName, Guid fileId); Task> GetPagedKnowledgeDocuments(string collectionName, KnowledgeFileFilter filter); - Task GetKnowledgeDocumentBinaryData(string collectionName, string fileId); + Task GetKnowledgeDocumentBinaryData(string collectionName, Guid fileId); #endregion #region Common diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeDocMetaData.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeDocMetaData.cs index c9a163f5f..9ae259df1 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeDocMetaData.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeDocMetaData.cs @@ -6,7 +6,7 @@ public class KnowledgeDocMetaData public string Collection { get; set; } [JsonPropertyName("file_id")] - public string FileId { get; set; } + public Guid FileId { get; set; } [JsonPropertyName("file_name")] public string FileName { get; set; } diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeFileFilter.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeFileFilter.cs index a37766caa..8dd17461d 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeFileFilter.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeFileFilter.cs @@ -2,5 +2,5 @@ namespace BotSharp.Abstraction.Knowledges.Models; public class KnowledgeFileFilter : Pagination { - public IEnumerable? FileIds { get; set; } + public IEnumerable? FileIds { get; set; } } diff --git a/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs b/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs index b0957728f..b56dbfb64 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs @@ -112,7 +112,15 @@ public interface IBotSharpRepository bool DeleteKnowledgeCollectionConfig(string collectionName); IEnumerable GetKnowledgeCollectionConfigs(VectorCollectionConfigFilter filter); - public bool SaveKnolwedgeBaseFileMeta(KnowledgeDocMetaData metaData); - public PagedItems GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, KnowledgeFileFilter filter); + bool SaveKnolwedgeBaseFileMeta(KnowledgeDocMetaData metaData); + /// + /// Delete file meta data in a knowledge collection, given the vector store provider. If "fileId" is null, delete all in the collection. + /// + /// + /// + /// + /// + bool DeleteKnolwedgeBaseFileMeta(string collectionName, string vectorStoreProvider, Guid? fileId = null); + PagedItems GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, KnowledgeFileFilter filter); #endregion } diff --git a/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.KnowledgeBase.cs b/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.KnowledgeBase.cs index 8daabafb9..df5bc6d65 100644 --- a/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.KnowledgeBase.cs +++ b/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.KnowledgeBase.cs @@ -1,23 +1,21 @@ -using BotSharp.Abstraction.Knowledges.Models; using System.IO; namespace BotSharp.Core.Files.Services; public partial class LocalFileStorageService { - public bool SaveKnowledgeBaseFile(string collectionName, string vectorStoreProvider, string fileId, string fileName, BinaryData fileData) + public bool SaveKnowledgeBaseFile(string collectionName, string vectorStoreProvider, Guid fileId, string fileName, BinaryData fileData) { if (string.IsNullOrWhiteSpace(collectionName) - || string.IsNullOrWhiteSpace(vectorStoreProvider) - || string.IsNullOrWhiteSpace(fileId)) + || string.IsNullOrWhiteSpace(vectorStoreProvider)) { return false; } try { - var docDir = BuildKnowledgeCollectionDocumentDir(collectionName, vectorStoreProvider); - var dir = Path.Combine(docDir, fileId); + var docDir = BuildKnowledgeCollectionFileDir(collectionName, vectorStoreProvider); + var dir = Path.Combine(docDir, fileId.ToString()); if (ExistDirectory(dir)) { Directory.Delete(dir); @@ -40,7 +38,7 @@ public bool SaveKnowledgeBaseFile(string collectionName, string vectorStoreProvi } } - public bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvider, string? fileId = null) + public bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvider, Guid? fileId = null) { if (string.IsNullOrWhiteSpace(collectionName) || string.IsNullOrWhiteSpace(vectorStoreProvider)) @@ -48,16 +46,16 @@ public bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvide return false; } - var dir = BuildKnowledgeCollectionDocumentDir(collectionName, vectorStoreProvider); + var dir = BuildKnowledgeCollectionFileDir(collectionName, vectorStoreProvider); if (!ExistDirectory(dir)) return false; - if (string.IsNullOrEmpty(fileId)) + if (fileId == null) { Directory.Delete(dir, true); } else { - var fileDir = Path.Combine(dir, fileId); + var fileDir = Path.Combine(dir, fileId.ToString()); if (ExistDirectory(fileDir)) { Directory.Delete(fileDir, true); @@ -67,10 +65,10 @@ public bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvide return true; } - public string GetKnowledgeBaseFileUrl(string collectionName, string fileId) + public string GetKnowledgeBaseFileUrl(string collectionName, string vectorStoreProvider, Guid fileId, string fileName) { if (string.IsNullOrWhiteSpace(collectionName) - || string.IsNullOrWhiteSpace(fileId)) + || string.IsNullOrWhiteSpace(vectorStoreProvider)) { return string.Empty; } @@ -78,39 +76,31 @@ public string GetKnowledgeBaseFileUrl(string collectionName, string fileId) return $"/knowledge/document/{collectionName}/file/{fileId}"; } - public FileBinaryDataModel? GetKnowledgeBaseFileBinaryData(string collectionName, string vectorStoreProvider, string fileId) + public BinaryData? GetKnowledgeBaseFileBinaryData(string collectionName, string vectorStoreProvider, Guid fileId, string fileName) { if (string.IsNullOrWhiteSpace(collectionName) || string.IsNullOrWhiteSpace(vectorStoreProvider) - || string.IsNullOrWhiteSpace(fileId)) + || string.IsNullOrWhiteSpace(fileName)) { return null; } - var docDir = BuildKnowledgeCollectionDocumentDir(collectionName, vectorStoreProvider); - var fileDir = Path.Combine(docDir, fileId); + var docDir = BuildKnowledgeCollectionFileDir(collectionName, vectorStoreProvider); + var fileDir = Path.Combine(docDir, fileId.ToString()); if (!ExistDirectory(fileDir)) return null; - var metaFile = Path.Combine(fileDir, KNOWLEDGE_DOC_META_FILE); - var content = File.ReadAllText(metaFile); - var metaData = JsonSerializer.Deserialize(content, _jsonOptions); - var file = Path.Combine(fileDir, metaData.FileName); + var file = Path.Combine(fileDir, fileName); using var stream = new FileStream(file, FileMode.Open, FileAccess.Read); stream.Position = 0; - return new FileBinaryDataModel - { - FileName = metaData.FileName, - ContentType = metaData.ContentType, - FileBinaryData = BinaryData.FromStream(stream) - }; + return BinaryData.FromStream(stream); } #region Private methods - private string BuildKnowledgeCollectionDocumentDir(string collectionName, string vectorStoreProvider) + private string BuildKnowledgeCollectionFileDir(string collectionName, string vectorStoreProvider) { - return Path.Combine(_baseDir, KNOWLEDGE_FOLDER, KNOWLEDGE_DOC_FOLDER, vectorStoreProvider, collectionName); + return Path.Combine(_baseDir, KNOWLEDGE_FOLDER, KNOWLEDGE_DOC_FOLDER, vectorStoreProvider.CleanStr(), collectionName.CleanStr()); } #endregion } diff --git a/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.cs b/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.cs index 71ddecf5a..ea2a68ac7 100644 --- a/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.cs +++ b/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.cs @@ -21,7 +21,6 @@ public partial class LocalFileStorageService : IFileStorageService private const string TEXT_TO_SPEECH_FOLDER = "speeches"; private const string KNOWLEDGE_FOLDER = "knowledgebase"; private const string KNOWLEDGE_DOC_FOLDER = "document"; - private const string KNOWLEDGE_DOC_META_FILE = "meta.json"; private readonly JsonSerializerOptions _jsonOptions = new JsonSerializerOptions { diff --git a/src/Infrastructure/BotSharp.Core/Repository/BotSharpDbContext.cs b/src/Infrastructure/BotSharp.Core/Repository/BotSharpDbContext.cs index a55070c49..ae3ae408a 100644 --- a/src/Infrastructure/BotSharp.Core/Repository/BotSharpDbContext.cs +++ b/src/Infrastructure/BotSharp.Core/Repository/BotSharpDbContext.cs @@ -234,7 +234,7 @@ public bool SaveTranslationMemories(IEnumerable inputs) throw new NotImplementedException(); #endregion - #region Knowledge + #region KnowledgeBase public bool AddKnowledgeCollectionConfigs(List configs, bool reset = false) => throw new NotImplementedException(); @@ -247,6 +247,9 @@ public IEnumerable GetKnowledgeCollectionConfigs(VectorC public bool SaveKnolwedgeBaseFileMeta(KnowledgeDocMetaData metaData) => throw new NotImplementedException(); + public bool DeleteKnolwedgeBaseFileMeta(string collectionName, string vectorStoreProvider, Guid? fileId = null) => + throw new NotImplementedException(); + public PagedItems GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, KnowledgeFileFilter filter) => throw new NotImplementedException(); #endregion diff --git a/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.KnowledgeBase.cs b/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.KnowledgeBase.cs index 28fc2dd09..3ef93da41 100644 --- a/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.KnowledgeBase.cs +++ b/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.KnowledgeBase.cs @@ -110,14 +110,13 @@ public bool SaveKnolwedgeBaseFileMeta(KnowledgeDocMetaData metaData) { if (metaData == null || string.IsNullOrWhiteSpace(metaData.Collection) - || string.IsNullOrWhiteSpace(metaData.VectorStoreProvider) - || string.IsNullOrWhiteSpace(metaData.FileId)) + || string.IsNullOrWhiteSpace(metaData.VectorStoreProvider)) { return false; } - var dir = BuildKnowledgeDocumentDir(metaData.Collection.CleanStr(), metaData.VectorStoreProvider.CleanStr()); - var docDir = Path.Combine(dir, metaData.FileId); + var dir = BuildKnowledgeCollectionFileDir(metaData.Collection, metaData.VectorStoreProvider); + var docDir = Path.Combine(dir, metaData.FileId.ToString()); if (!Directory.Exists(docDir)) { Directory.CreateDirectory(docDir); @@ -129,6 +128,33 @@ public bool SaveKnolwedgeBaseFileMeta(KnowledgeDocMetaData metaData) return true; } + public bool DeleteKnolwedgeBaseFileMeta(string collectionName, string vectorStoreProvider, Guid? fileId = null) + { + if (string.IsNullOrWhiteSpace(collectionName) + || string.IsNullOrWhiteSpace(vectorStoreProvider)) + { + return false; + } + + var dir = BuildKnowledgeCollectionFileDir(collectionName, vectorStoreProvider); + if (!Directory.Exists(dir)) return false; + + if (fileId == null) + { + Directory.Delete(dir, true); + } + else + { + var fileDir = Path.Combine(dir, fileId.ToString()); + if (Directory.Exists(fileDir)) + { + Directory.Delete(fileDir, true); + } + } + + return true; + } + public PagedItems GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, KnowledgeFileFilter filter) { if (string.IsNullOrWhiteSpace(collectionName) @@ -137,7 +163,7 @@ public PagedItems GetKnowledgeBaseFileMeta(string collecti return new PagedItems(); } - var dir = BuildKnowledgeDocumentDir(collectionName, vectorStoreProvider); + var dir = BuildKnowledgeCollectionFileDir(collectionName, vectorStoreProvider); if (!Directory.Exists(dir)) { return new PagedItems(); @@ -181,9 +207,9 @@ private string BuildKnowledgeCollectionConfigDir() return Path.Combine(_dbSettings.FileRepository, KNOWLEDGE_FOLDER, VECTOR_FOLDER); } - private string BuildKnowledgeDocumentDir(string collectionName, string vectorStoreProvider) + private string BuildKnowledgeCollectionFileDir(string collectionName, string vectorStoreProvider) { - return Path.Combine(_dbSettings.FileRepository, KNOWLEDGE_FOLDER, KNOWLEDGE_DOC_FOLDER, vectorStoreProvider, collectionName); + return Path.Combine(_dbSettings.FileRepository, KNOWLEDGE_FOLDER, KNOWLEDGE_DOC_FOLDER, vectorStoreProvider.CleanStr(), collectionName.CleanStr()); } #endregion } diff --git a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs index a2356faae..fa0df804b 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs @@ -137,7 +137,7 @@ public async Task UploadKnowledgeDocuments([FromRoute] } [HttpDelete("/knowledge/document/{collection}/delete/{fileId}")] - public async Task DeleteKnowledgeDocument([FromRoute] string collection, [FromRoute] string fileId) + public async Task DeleteKnowledgeDocument([FromRoute] string collection, [FromRoute] Guid fileId) { var response = await _knowledgeService.DeleteKnowledgeDocument(collection, fileId); return response; @@ -160,7 +160,7 @@ public async Task> GetPagedKnowledgeDocuments } [HttpGet("/knowledge/document/{collection}/file/{fileId}")] - public async Task GetKnowledgeDocument([FromRoute] string collection, [FromRoute] string fileId) + public async Task GetKnowledgeDocument([FromRoute] string collection, [FromRoute] Guid fileId) { var file = await _knowledgeService.GetKnowledgeDocumentBinaryData(collection, fileId); var stream = file.FileBinaryData.ToStream(); diff --git a/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/KnowledgeFileViewModel.cs b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/KnowledgeFileViewModel.cs index 850863b1c..00ca83892 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/KnowledgeFileViewModel.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/KnowledgeFileViewModel.cs @@ -5,7 +5,7 @@ namespace BotSharp.OpenAPI.ViewModels.Knowledges; public class KnowledgeFileViewModel { [JsonPropertyName("file_id")] - public string FileId { get; set; } + public Guid FileId { get; set; } [JsonPropertyName("file_name")] public string FileName { get; set; } diff --git a/src/Plugins/BotSharp.Plugin.ChatHub/Hooks/WelcomeHook.cs b/src/Plugins/BotSharp.Plugin.ChatHub/Hooks/WelcomeHook.cs index 2e3f81b4d..fd0f9cc73 100644 --- a/src/Plugins/BotSharp.Plugin.ChatHub/Hooks/WelcomeHook.cs +++ b/src/Plugins/BotSharp.Plugin.ChatHub/Hooks/WelcomeHook.cs @@ -44,11 +44,7 @@ public override async Task OnUserAgentConnectedInitially(Conversation conversati foreach (var message in messages) { - var richContent = new RichContent(message) - { - Editor = message.RichType == RichTypeEnum.QuickReply ? EditorTypeEnum.None : EditorTypeEnum.Text, - }; - + var richContent = new RichContent(message); var json = JsonSerializer.Serialize(new ChatResponseModel() { ConversationId = conversation.Id, diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs index 2fc23cbe6..e3aa88347 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs @@ -42,7 +42,7 @@ public async Task UploadKnowledgeDocuments(string colle var contents = await GetFileContent(contentType, bytes); // Save document - var fileId = Guid.NewGuid().ToString(); + var fileId = Guid.NewGuid(); var saved = SaveDocument(collectionName, vectorStoreProvider, fileId, file.FileName, bytes); if (!saved) { @@ -89,9 +89,9 @@ public async Task UploadKnowledgeDocuments(string colle } - public async Task DeleteKnowledgeDocument(string collectionName, string fileId) + public async Task DeleteKnowledgeDocument(string collectionName, Guid fileId) { - if (string.IsNullOrWhiteSpace(collectionName) || string.IsNullOrWhiteSpace(fileId)) + if (string.IsNullOrWhiteSpace(collectionName)) { return false; } @@ -104,21 +104,23 @@ public async Task DeleteKnowledgeDocument(string collectionName, string fi var vectorStoreProvider = _settings.VectorDb.Provider; // Get doc meta data - var pagedData = db.GetKnowledgeBaseFileMeta(collectionName, vectorStoreProvider, new KnowledgeFileFilter + var pageData = db.GetKnowledgeBaseFileMeta(collectionName, vectorStoreProvider, new KnowledgeFileFilter { - FileIds = new[] { fileId } + FileIds = [ fileId ], + Size = 1 }); // Delete doc - fileStorage.DeleteKnowledgeFile(collectionName.CleanStr(), vectorStoreProvider.CleanStr(), fileId); - - var found = pagedData?.Items?.FirstOrDefault(); + fileStorage.DeleteKnowledgeFile(collectionName, vectorStoreProvider, fileId); + + var found = pageData?.Items?.FirstOrDefault(); if (found != null && !found.VectorDataIds.IsNullOrEmpty()) { var guids = found.VectorDataIds.Where(x => Guid.TryParse(x, out _)).Select(x => Guid.Parse(x)).ToList(); await vectorDb.DeleteCollectionData(collectionName, guids); } - + + db.DeleteKnolwedgeBaseFileMeta(collectionName, vectorStoreProvider, fileId); return true; } catch (Exception ex) @@ -154,7 +156,7 @@ public async Task> GetPagedKnowledgeDocuments(str FileName = x.FileName, FileExtension = Path.GetExtension(x.FileName), ContentType = x.ContentType, - FileUrl = fileStorage.GetKnowledgeBaseFileUrl(collectionName, x.FileId) + FileUrl = fileStorage.GetKnowledgeBaseFileUrl(collectionName, vectorStoreProvider, x.FileId, x.FileName) })?.ToList() ?? new List(); return new PagedItems @@ -164,14 +166,29 @@ public async Task> GetPagedKnowledgeDocuments(str }; } - public async Task GetKnowledgeDocumentBinaryData(string collectionName, string fileId) + public async Task GetKnowledgeDocumentBinaryData(string collectionName, Guid fileId) { + var db = _services.GetRequiredService(); var fileStorage = _services.GetRequiredService(); var vectorStoreProvider = _settings.VectorDb.Provider; // Get doc binary data - var file = fileStorage.GetKnowledgeBaseFileBinaryData(collectionName.CleanStr(), vectorStoreProvider.CleanStr(), fileId); - return file; + var pageData = db.GetKnowledgeBaseFileMeta(collectionName, vectorStoreProvider, new KnowledgeFileFilter + { + FileIds = [ fileId ], + Size = 1 + }); + + var metaData = pageData?.Items?.FirstOrDefault(); + if (metaData == null) return null; + + var binaryData = fileStorage.GetKnowledgeBaseFileBinaryData(collectionName, vectorStoreProvider, fileId, metaData.FileName); + return new FileBinaryDataModel + { + FileName = metaData.FileName, + ContentType = metaData.ContentType, + FileBinaryData = binaryData + }; } @@ -246,16 +263,16 @@ private async Task> ReadPdf(byte[] bytes) #endregion - private bool SaveDocument(string collectionName, string vectorStoreProvider, string fileId, string fileName, byte[] bytes) + private bool SaveDocument(string collectionName, string vectorStoreProvider, Guid fileId, string fileName, byte[] bytes) { var fileStoreage = _services.GetRequiredService(); var data = BinaryData.FromBytes(bytes); - var saved = fileStoreage.SaveKnowledgeBaseFile(collectionName.CleanStr(), vectorStoreProvider.CleanStr(), fileId, fileName, data); + var saved = fileStoreage.SaveKnowledgeBaseFile(collectionName, vectorStoreProvider, fileId, fileName, data); return saved; } private async Task> SaveToVectorDb( - string collectionName, string fileId, string fileName, IEnumerable contents, + string collectionName, Guid fileId, string fileName, IEnumerable contents, string fileSource = KnowledgeDocSource.Api, string vectorDataSource = VectorDataSource.File) { if (contents.IsNullOrEmpty()) @@ -275,7 +292,7 @@ private async Task> SaveToVectorDb( var saved = await vectorDb.Upsert(collectionName, dataId, vector, content, new Dictionary { { KnowledgePayloadName.DataSource, vectorDataSource }, - { KnowledgePayloadName.FileId, fileId }, + { KnowledgePayloadName.FileId, fileId.ToString() }, { KnowledgePayloadName.FileName, fileName }, { KnowledgePayloadName.FileSource, fileSource }, { "textNumber", $"{i + 1}" } diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs index c50c9ff84..ffd76c35f 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs @@ -92,7 +92,8 @@ public async Task DeleteVectorCollection(string collectionName) var vectorStoreProvider = _settings.VectorDb.Provider; db.DeleteKnowledgeCollectionConfig(collectionName); - fileStorage.DeleteKnowledgeFile(collectionName.CleanStr(), vectorStoreProvider.CleanStr()); + fileStorage.DeleteKnowledgeFile(collectionName, vectorStoreProvider); + db.DeleteKnolwedgeBaseFileMeta(collectionName, vectorStoreProvider); } return deleted; diff --git a/src/Plugins/BotSharp.Plugin.MongoStorage/Collections/KnowledgeCollectionFileDocument.cs b/src/Plugins/BotSharp.Plugin.MongoStorage/Collections/KnowledgeCollectionFileMetaDocument.cs similarity index 82% rename from src/Plugins/BotSharp.Plugin.MongoStorage/Collections/KnowledgeCollectionFileDocument.cs rename to src/Plugins/BotSharp.Plugin.MongoStorage/Collections/KnowledgeCollectionFileMetaDocument.cs index c517be92e..e18b6df78 100644 --- a/src/Plugins/BotSharp.Plugin.MongoStorage/Collections/KnowledgeCollectionFileDocument.cs +++ b/src/Plugins/BotSharp.Plugin.MongoStorage/Collections/KnowledgeCollectionFileMetaDocument.cs @@ -1,9 +1,9 @@ namespace BotSharp.Plugin.MongoStorage.Collections; -public class KnowledgeCollectionFileDocument : MongoBase +public class KnowledgeCollectionFileMetaDocument : MongoBase { public string Collection { get; set; } - public string FileId { get; set; } + public Guid FileId { get; set; } public string FileName { get; set; } public string FileSource { get; set; } public string ContentType { get; set; } diff --git a/src/Plugins/BotSharp.Plugin.MongoStorage/MongoDbContext.cs b/src/Plugins/BotSharp.Plugin.MongoStorage/MongoDbContext.cs index 7c2f14643..af7c8b8f1 100644 --- a/src/Plugins/BotSharp.Plugin.MongoStorage/MongoDbContext.cs +++ b/src/Plugins/BotSharp.Plugin.MongoStorage/MongoDbContext.cs @@ -157,6 +157,6 @@ public IMongoCollection TranslationMemories public IMongoCollection KnowledgeCollectionConfigs => Database.GetCollection($"{_collectionPrefix}_KnowledgeCollectionConfigs"); - public IMongoCollection KnowledgeCollectionFiles - => Database.GetCollection($"{_collectionPrefix}_KnowledgeCollectionFiles"); + public IMongoCollection KnowledgeCollectionFileMeta + => Database.GetCollection($"{_collectionPrefix}_KnowledgeCollectionFileMeta"); } diff --git a/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.KnowledgeBase.cs b/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.KnowledgeBase.cs index 489704544..353758023 100644 --- a/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.KnowledgeBase.cs +++ b/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.KnowledgeBase.cs @@ -120,13 +120,12 @@ public bool SaveKnolwedgeBaseFileMeta(KnowledgeDocMetaData metaData) { if (metaData == null || string.IsNullOrWhiteSpace(metaData.Collection) - || string.IsNullOrWhiteSpace(metaData.VectorStoreProvider) - || string.IsNullOrWhiteSpace(metaData.FileId)) + || string.IsNullOrWhiteSpace(metaData.VectorStoreProvider)) { return false; } - var doc = new KnowledgeCollectionFileDocument + var doc = new KnowledgeCollectionFileMetaDocument { Id = Guid.NewGuid().ToString(), Collection = metaData.Collection, @@ -140,10 +139,34 @@ public bool SaveKnolwedgeBaseFileMeta(KnowledgeDocMetaData metaData) CreateUserId = metaData.CreateUserId }; - _dc.KnowledgeCollectionFiles.InsertOne(doc); + _dc.KnowledgeCollectionFileMeta.InsertOne(doc); return true; } + public bool DeleteKnolwedgeBaseFileMeta(string collectionName, string vectorStoreProvider, Guid? fileId = null) + { + if (string.IsNullOrWhiteSpace(collectionName) + || string.IsNullOrWhiteSpace(vectorStoreProvider)) + { + return false; + } + + var builder = Builders.Filter; + var filters = new List>() + { + builder.Eq(x => x.Collection, collectionName), + builder.Eq(x => x.VectorStoreProvider, vectorStoreProvider) + }; + + if (fileId != null) + { + filters.Add(builder.Eq(x => x.FileId, fileId)); + } + + var res = _dc.KnowledgeCollectionFileMeta.DeleteMany(builder.And(filters)); + return res.DeletedCount > 0; + } + public PagedItems GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, KnowledgeFileFilter filter) { if (string.IsNullOrWhiteSpace(collectionName) @@ -152,9 +175,8 @@ public PagedItems GetKnowledgeBaseFileMeta(string collecti return new PagedItems(); } - var builder = Builders.Filter; - - var docFilters = new List>() + var builder = Builders.Filter; + var docFilters = new List>() { builder.Eq(x => x.Collection, collectionName), builder.Eq(x => x.VectorStoreProvider, vectorStoreProvider) @@ -167,9 +189,9 @@ public PagedItems GetKnowledgeBaseFileMeta(string collecti } var filterDef = builder.And(docFilters); - var sortDef = Builders.Sort.Descending(x => x.CreateDate); - var docs = _dc.KnowledgeCollectionFiles.Find(filterDef).Sort(sortDef).Skip(filter.Offset).Limit(filter.Size).ToList(); - var count = _dc.KnowledgeCollectionFiles.CountDocuments(filterDef); + var sortDef = Builders.Sort.Descending(x => x.CreateDate); + var docs = _dc.KnowledgeCollectionFileMeta.Find(filterDef).Sort(sortDef).Skip(filter.Offset).Limit(filter.Size).ToList(); + var count = _dc.KnowledgeCollectionFileMeta.CountDocuments(filterDef); var files = docs?.Select(x => new KnowledgeDocMetaData { diff --git a/src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.KnowledgeBase.cs b/src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.KnowledgeBase.cs index cd5f80525..ada41c009 100644 --- a/src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.KnowledgeBase.cs +++ b/src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.KnowledgeBase.cs @@ -2,23 +2,101 @@ namespace BotSharp.Plugin.TencentCos.Services; public partial class TencentCosService { - public bool SaveKnowledgeBaseFile(string collectionName, string vectorStoreProvider, string fileId, string fileName, BinaryData fileData) + public bool SaveKnowledgeBaseFile(string collectionName, string vectorStoreProvider, Guid fileId, string fileName, BinaryData fileData) { - throw new NotImplementedException(); + if (string.IsNullOrWhiteSpace(collectionName) + || string.IsNullOrWhiteSpace(vectorStoreProvider)) + { + return false; + } + + try + { + var docDir = BuildKnowledgeCollectionFileDir(collectionName, vectorStoreProvider); + var dir = $"{docDir}/{fileId}"; + if (ExistDirectory(dir)) + { + _cosClient.BucketClient.DeleteDir(dir); + } + + var file = $"{dir}/{fileName}"; + var res = _cosClient.BucketClient.UploadBytes(file, fileData.ToArray()); + return res; + } + catch (Exception ex) + { + _logger.LogWarning($"Error when saving knowledge file " + + $"(Vector store provider: {vectorStoreProvider}, Collection: {collectionName}, File name: {fileName})." + + $"\r\n{ex.Message}\r\n{ex.InnerException}"); + return false; + } } - public bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvider, string? fileId = null) + public bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvider, Guid? fileId = null) { - throw new NotImplementedException(); + if (string.IsNullOrWhiteSpace(collectionName) + || string.IsNullOrWhiteSpace(vectorStoreProvider)) + { + return false; + } + + var dir = BuildKnowledgeCollectionFileDir(collectionName, vectorStoreProvider); + if (!ExistDirectory(dir)) return false; + + if (fileId == null) + { + _cosClient.BucketClient.DeleteDir(dir); + } + else + { + var fileDir = $"{dir}/{fileId}"; + if (ExistDirectory(fileDir)) + { + _cosClient.BucketClient.DeleteDir(fileDir); + } + } + + return true; } - public string GetKnowledgeBaseFileUrl(string collectionName, string fileId) + public string GetKnowledgeBaseFileUrl(string collectionName, string vectorStoreProvider, Guid fileId, string fileName) { - throw new NotImplementedException(); + if (string.IsNullOrWhiteSpace(collectionName) + || string.IsNullOrWhiteSpace(vectorStoreProvider) + || string.IsNullOrWhiteSpace(fileName)) + { + return string.Empty; + } + + var dir = BuildKnowledgeCollectionFileDir(vectorStoreProvider, collectionName); + return $"https://{_fullBuketName}.cos.{_settings.Region}.myqcloud.com/{dir}/{fileId}/{fileName}"; ; } - public FileBinaryDataModel? GetKnowledgeBaseFileBinaryData(string collectionName, string vectorStoreProvider, string fileId) + public BinaryData? GetKnowledgeBaseFileBinaryData(string collectionName, string vectorStoreProvider, Guid fileId, string fileName) + { + if (string.IsNullOrWhiteSpace(collectionName) + || string.IsNullOrWhiteSpace(vectorStoreProvider) + || string.IsNullOrWhiteSpace(fileName)) + { + return null; + } + + var docDir = BuildKnowledgeCollectionFileDir(collectionName, vectorStoreProvider); + var fileDir = $"{docDir}/{fileId}"; + if (!ExistDirectory(fileDir)) return null; + + var file = $"{fileDir}/{fileName}"; + var bytes = _cosClient.BucketClient.DownloadFileBytes(file); + if (bytes == null) return null; + + return BinaryData.FromBytes(bytes); + } + + + #region Private methods + private string BuildKnowledgeCollectionFileDir(string collectionName, string vectorStoreProvider) { - throw new NotImplementedException(); + return $"{KNOWLEDGE_FOLDER}/{KNOWLEDGE_DOC_FOLDER}/{vectorStoreProvider.CleanStr()}/{collectionName.CleanStr()}"; } + #endregion } diff --git a/src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.cs b/src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.cs index c48812f1f..788b51d90 100644 --- a/src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.cs +++ b/src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.cs @@ -28,6 +28,8 @@ public partial class TencentCosService : IFileStorageService private const string USER_AVATAR_FOLDER = "avatar"; private const string SESSION_FOLDER = "sessions"; private const string TEXT_TO_SPEECH_FOLDER = "speeches"; + private const string KNOWLEDGE_FOLDER = "knowledgebase"; + private const string KNOWLEDGE_DOC_FOLDER = "document"; public TencentCosService( TencentCosSettings settings,