From 9a7f2821a0e956f37c15ef802500566835d265cb Mon Sep 17 00:00:00 2001 From: Jicheng Lu <103353@smsassist.com> Date: Mon, 9 Sep 2024 10:00:46 -0500 Subject: [PATCH 01/14] clean code --- .../Knowledges/IKnowledgeService.cs | 4 +- .../Models/KnowledgeSearchResult.cs | 10 -- .../Controllers/KnowledgeBaseController.cs | 26 +-- .../Knowledges/KnowledgeSearchViewModel.cs | 12 -- .../Knowledges/SearchKnowledgeRequest.cs | 43 ----- .../Services/KnowledgeService.Create.cs | 69 ------- .../Services/KnowledgeService.Delete.cs | 42 ----- .../Services/KnowledgeService.Document.cs | 27 +++ .../Services/KnowledgeService.Get.cs | 103 ----------- .../Services/KnowledgeService.Graph.cs | 22 +++ .../Services/KnowledgeService.Update.cs | 31 ---- .../Services/KnowledgeService.Vector.cs | 170 ++++++++++++++++++ 12 files changed, 224 insertions(+), 335 deletions(-) delete mode 100644 src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeSearchResult.cs delete mode 100644 src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/KnowledgeSearchViewModel.cs delete mode 100644 src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/SearchKnowledgeRequest.cs delete mode 100644 src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Create.cs delete mode 100644 src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Delete.cs create mode 100644 src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs delete mode 100644 src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Get.cs create mode 100644 src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Graph.cs delete mode 100644 src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Update.cs create mode 100644 src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs index 1021ea1c0..ca0400866 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs @@ -19,6 +19,8 @@ public interface IKnowledgeService #region Graph Task SearchGraphKnowledge(string query, GraphSearchOptions options); - Task SearchKnowledge(string query, string collectionName, VectorSearchOptions vectorOptions, GraphSearchOptions graphOptions); + #endregion + + #region Document #endregion } diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeSearchResult.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeSearchResult.cs deleted file mode 100644 index 771b070bc..000000000 --- a/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeSearchResult.cs +++ /dev/null @@ -1,10 +0,0 @@ -using BotSharp.Abstraction.Graph.Models; -using BotSharp.Abstraction.VectorStorage.Models; - -namespace BotSharp.Abstraction.Knowledges.Models; - -public class KnowledgeSearchResult -{ - public IEnumerable VectorResult { get; set; } - public GraphSearchResult GraphResult { get; set; } -} diff --git a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs index 827c2d7ce..fd446bd27 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs @@ -143,29 +143,7 @@ public async Task SearchGraphKnowledge([FromBody] Searc #endregion - #region Knowledge - [HttpPost("/knowledge/search")] - public async Task SearchKnowledge([FromBody] SearchKnowledgeRequest request) - { - var vectorOptions = new VectorSearchOptions - { - Fields = request.VectorParams.Fields, - Limit = request.VectorParams.Limit ?? 5, - Confidence = request.VectorParams.Confidence ?? 0.5f, - WithVector = request.VectorParams.WithVector - }; - - var graphOptions = new GraphSearchOptions - { - Method = request.GraphParams.Method - }; - - var result = await _knowledgeService.SearchKnowledge(request.Text, request.VectorParams.Collection, vectorOptions, graphOptions); - return new KnowledgeSearchViewModel - { - VectorResult = result?.VectorResult?.Select(x => VectorKnowledgeViewModel.From(x)), - GraphResult = result?.GraphResult != null ? new GraphKnowledgeViewModel { Result = result.GraphResult.Result } : null - }; - } + #region Document + #endregion } diff --git a/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/KnowledgeSearchViewModel.cs b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/KnowledgeSearchViewModel.cs deleted file mode 100644 index f862f1844..000000000 --- a/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/KnowledgeSearchViewModel.cs +++ /dev/null @@ -1,12 +0,0 @@ -using System.Text.Json.Serialization; - -namespace BotSharp.OpenAPI.ViewModels.Knowledges; - -public class KnowledgeSearchViewModel -{ - [JsonPropertyName("vector_result")] - public IEnumerable? VectorResult { get; set; } - - [JsonPropertyName("graph_result")] - public GraphKnowledgeViewModel? GraphResult { get; set; } -} diff --git a/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/SearchKnowledgeRequest.cs b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/SearchKnowledgeRequest.cs deleted file mode 100644 index 20f80fec1..000000000 --- a/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/SearchKnowledgeRequest.cs +++ /dev/null @@ -1,43 +0,0 @@ -using System.Text.Json.Serialization; - -namespace BotSharp.OpenAPI.ViewModels.Knowledges; - -public class SearchKnowledgeRequest -{ - [JsonPropertyName("text")] - public string Text { get; set; } = string.Empty; - - #region Vector - [JsonPropertyName("vector_params")] - public VectorParam VectorParams { get; set; } - #endregion - - #region Graph - [JsonPropertyName("graph_params")] - public GraphParam GraphParams { get; set; } - #endregion -} - -public class VectorParam -{ - [JsonPropertyName("collection")] - public string Collection { get; set; } - - [JsonPropertyName("fields")] - public IEnumerable? Fields { get; set; } - - [JsonPropertyName("limit")] - public int? Limit { get; set; } = 5; - - [JsonPropertyName("confidence")] - public float? Confidence { get; set; } = 0.5f; - - [JsonPropertyName("with_vector")] - public bool WithVector { get; set; } -} - -public class GraphParam -{ - [JsonPropertyName("method")] - public string Method { get; set; } = string.Empty; -} \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Create.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Create.cs deleted file mode 100644 index 2c4004fc8..000000000 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Create.cs +++ /dev/null @@ -1,69 +0,0 @@ -namespace BotSharp.Plugin.KnowledgeBase.Services; - -public partial class KnowledgeService -{ - public async Task FeedVectorKnowledge(string collectionName, KnowledgeCreationModel knowledge) - { - var index = 0; - var lines = TextChopper.Chop(knowledge.Content, new ChunkOption - { - Size = 1024, - Conjunction = 32, - SplitByWord = true, - }); - - var db = GetVectorDb(); - var textEmbedding = GetTextEmbedding(collectionName); - - await db.CreateCollection(collectionName, textEmbedding.GetDimension()); - foreach (var line in lines) - { - var vec = await textEmbedding.GetVectorAsync(line); - await db.Upsert(collectionName, Guid.NewGuid(), vec, line); - index++; - Console.WriteLine($"Saved vector {index}/{lines.Count}: {line}\n"); - } - } - - public async Task CreateVectorCollection(string collectionName, int dimension) - { - try - { - if (string.IsNullOrWhiteSpace(collectionName)) - { - return false; - } - - var db = GetVectorDb(); - return await db.CreateCollection(collectionName, dimension); - } - catch (Exception ex) - { - _logger.LogWarning($"Error when creating a vector collection ({collectionName}). {ex.Message}\r\n{ex.InnerException}"); - return false; - } - } - - public async Task CreateVectorCollectionData(string collectionName, VectorCreateModel create) - { - try - { - if (string.IsNullOrWhiteSpace(collectionName) || string.IsNullOrWhiteSpace(create.Text)) - { - return false; - } - - var textEmbedding = GetTextEmbedding(collectionName); - var vector = await textEmbedding.GetVectorAsync(create.Text); - - var db = GetVectorDb(); - var guid = Guid.NewGuid(); - return await db.Upsert(collectionName, guid, vector, create.Text, create.Payload); - } - catch (Exception ex) - { - _logger.LogWarning($"Error when creating vector collection data. {ex.Message}\r\n{ex.InnerException}"); - return false; - } - } -} diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Delete.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Delete.cs deleted file mode 100644 index b0973a996..000000000 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Delete.cs +++ /dev/null @@ -1,42 +0,0 @@ -namespace BotSharp.Plugin.KnowledgeBase.Services; - -public partial class KnowledgeService -{ - public async Task DeleteVectorCollection(string collectionName) - { - try - { - if (string.IsNullOrWhiteSpace(collectionName)) - { - return false; - } - - var db = GetVectorDb(); - return await db.DeleteCollection(collectionName); - } - catch (Exception ex) - { - _logger.LogWarning($"Error when deleting collection ({collectionName}). {ex.Message}\r\n{ex.InnerException}"); - return false; - } - } - - public async Task DeleteVectorCollectionData(string collectionName, string id) - { - try - { - if (!Guid.TryParse(id, out var guid)) - { - return false; - } - - var db = GetVectorDb(); - return await db.DeleteCollectionData(collectionName, guid); - } - catch (Exception ex) - { - _logger.LogWarning($"Error when deleting vector collection data ({collectionName}-{id}). {ex.Message}\r\n{ex.InnerException}"); - return false; - } - } -} diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs new file mode 100644 index 000000000..0e4710bc9 --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs @@ -0,0 +1,27 @@ +namespace BotSharp.Plugin.KnowledgeBase.Services; + +public partial class KnowledgeService +{ + public async Task FeedVectorKnowledge(string collectionName, KnowledgeCreationModel knowledge) + { + var index = 0; + var lines = TextChopper.Chop(knowledge.Content, new ChunkOption + { + Size = 1024, + Conjunction = 32, + SplitByWord = true, + }); + + var db = GetVectorDb(); + var textEmbedding = GetTextEmbedding(collectionName); + + await db.CreateCollection(collectionName, textEmbedding.GetDimension()); + foreach (var line in lines) + { + var vec = await textEmbedding.GetVectorAsync(line); + await db.Upsert(collectionName, Guid.NewGuid(), vec, line); + index++; + Console.WriteLine($"Saved vector {index}/{lines.Count}: {line}\n"); + } + } +} diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Get.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Get.cs deleted file mode 100644 index dcffa9443..000000000 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Get.cs +++ /dev/null @@ -1,103 +0,0 @@ -namespace BotSharp.Plugin.KnowledgeBase.Services; - -public partial class KnowledgeService -{ - public async Task> GetVectorCollections() - { - try - { - var db = GetVectorDb(); - return await db.GetCollections(); - } - catch (Exception ex) - { - _logger.LogWarning($"Error when getting vector db collections. {ex.Message}\r\n{ex.InnerException}"); - return Enumerable.Empty(); - } - } - - public async Task> GetPagedVectorCollectionData(string collectionName, VectorFilter filter) - { - try - { - var db = GetVectorDb(); - var pagedResult = await db.GetPagedCollectionData(collectionName, filter); - return new StringIdPagedItems - { - Count = pagedResult.Count, - Items = pagedResult.Items.Select(x => VectorSearchResult.CopyFrom(x)), - NextId = pagedResult.NextId, - }; - } - catch (Exception ex) - { - _logger.LogWarning($"Error when getting vector knowledge collection data ({collectionName}). {ex.Message}\r\n{ex.InnerException}"); - return new StringIdPagedItems(); - } - } - - public async Task> SearchVectorKnowledge(string query, string collectionName, VectorSearchOptions options) - { - try - { - var textEmbedding = GetTextEmbedding(collectionName); - var vector = await textEmbedding.GetVectorAsync(query); - - // Vector search - var db = GetVectorDb(); - var found = await db.Search(collectionName, vector, options.Fields, limit: options.Limit ?? 5, confidence: options.Confidence ?? 0.5f, withVector: options.WithVector); - - var results = found.Select(x => VectorSearchResult.CopyFrom(x)).ToList(); - return results; - } - catch (Exception ex) - { - _logger.LogWarning($"Error when searching vector knowledge ({collectionName}). {ex.Message}\r\n{ex.InnerException}"); - return new List(); - } - } - - public async Task SearchGraphKnowledge(string query, GraphSearchOptions options) - { - try - { - var db = GetGraphDb(); - var found = await db.Search(query, options); - return new GraphSearchResult - { - Result = found.Result - }; - } - catch (Exception ex) - { - _logger.LogWarning($"Error when searching graph knowledge (Query: {query}). {ex.Message}\r\n{ex.InnerException}"); - return new GraphSearchResult(); - } - } - - public async Task SearchKnowledge(string query, string collectionName, VectorSearchOptions vectorOptions, GraphSearchOptions graphOptions) - { - try - { - var textEmbedding = GetTextEmbedding(collectionName); - var vector = await textEmbedding.GetVectorAsync(query); - - var vectorDb = GetVectorDb(); - var vectorRes = await vectorDb.Search(collectionName, vector, vectorOptions.Fields, limit: vectorOptions.Limit ?? 5, - confidence: vectorOptions.Confidence ?? 0.5f, withVector: vectorOptions.WithVector); - - var graphDb = GetGraphDb(); - var graphRes = await graphDb.Search(query, graphOptions); - return new KnowledgeSearchResult - { - VectorResult = vectorRes.Select(x => VectorSearchResult.CopyFrom(x)), - GraphResult = new GraphSearchResult { Result = graphRes.Result } - }; - } - catch (Exception ex) - { - _logger.LogWarning($"Error when searching knowledge (Vector collection: {collectionName}) (Query: {query}). {ex.Message}\r\n{ex.InnerException}"); - return new KnowledgeSearchResult(); - } - } -} diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Graph.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Graph.cs new file mode 100644 index 000000000..7c8f7168b --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Graph.cs @@ -0,0 +1,22 @@ +namespace BotSharp.Plugin.KnowledgeBase.Services; + +public partial class KnowledgeService +{ + public async Task SearchGraphKnowledge(string query, GraphSearchOptions options) + { + try + { + var db = GetGraphDb(); + var found = await db.Search(query, options); + return new GraphSearchResult + { + Result = found.Result + }; + } + catch (Exception ex) + { + _logger.LogWarning($"Error when searching graph knowledge (Query: {query}). {ex.Message}\r\n{ex.InnerException}"); + return new GraphSearchResult(); + } + } +} diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Update.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Update.cs deleted file mode 100644 index 0fa1f5cd8..000000000 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Update.cs +++ /dev/null @@ -1,31 +0,0 @@ -namespace BotSharp.Plugin.KnowledgeBase.Services; - -public partial class KnowledgeService -{ - public async Task UpdateVectorCollectionData(string collectionName, VectorUpdateModel update) - { - try - { - if (string.IsNullOrWhiteSpace(collectionName) || string.IsNullOrWhiteSpace(update.Text) || !Guid.TryParse(update.Id, out var guid)) - { - return false; - } - - var db = GetVectorDb(); - var found = await db.GetCollectionData(collectionName, new List { guid }); - if (found.IsNullOrEmpty()) - { - return false; - } - - var textEmbedding = GetTextEmbedding(collectionName); - var vector = await textEmbedding.GetVectorAsync(update.Text); - return await db.Upsert(collectionName, guid, vector, update.Text, update.Payload); - } - catch (Exception ex) - { - _logger.LogWarning($"Error when updating vector collection data. {ex.Message}\r\n{ex.InnerException}"); - return false; - } - } -} diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs new file mode 100644 index 000000000..9afd7c300 --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs @@ -0,0 +1,170 @@ +namespace BotSharp.Plugin.KnowledgeBase.Services; + +public partial class KnowledgeService +{ + #region Collection + public async Task CreateVectorCollection(string collectionName, int dimension) + { + try + { + if (string.IsNullOrWhiteSpace(collectionName)) + { + return false; + } + + var db = GetVectorDb(); + return await db.CreateCollection(collectionName, dimension); + } + catch (Exception ex) + { + _logger.LogWarning($"Error when creating a vector collection ({collectionName}). {ex.Message}\r\n{ex.InnerException}"); + return false; + } + } + + public async Task> GetVectorCollections() + { + try + { + var db = GetVectorDb(); + return await db.GetCollections(); + } + catch (Exception ex) + { + _logger.LogWarning($"Error when getting vector db collections. {ex.Message}\r\n{ex.InnerException}"); + return Enumerable.Empty(); + } + } + + public async Task DeleteVectorCollection(string collectionName) + { + try + { + if (string.IsNullOrWhiteSpace(collectionName)) + { + return false; + } + + var db = GetVectorDb(); + return await db.DeleteCollection(collectionName); + } + catch (Exception ex) + { + _logger.LogWarning($"Error when deleting collection ({collectionName}). {ex.Message}\r\n{ex.InnerException}"); + return false; + } + } + #endregion + + #region Collection data + public async Task CreateVectorCollectionData(string collectionName, VectorCreateModel create) + { + try + { + if (string.IsNullOrWhiteSpace(collectionName) || string.IsNullOrWhiteSpace(create.Text)) + { + return false; + } + + var textEmbedding = GetTextEmbedding(collectionName); + var vector = await textEmbedding.GetVectorAsync(create.Text); + + var db = GetVectorDb(); + var guid = Guid.NewGuid(); + return await db.Upsert(collectionName, guid, vector, create.Text, create.Payload); + } + catch (Exception ex) + { + _logger.LogWarning($"Error when creating vector collection data. {ex.Message}\r\n{ex.InnerException}"); + return false; + } + } + + public async Task UpdateVectorCollectionData(string collectionName, VectorUpdateModel update) + { + try + { + if (string.IsNullOrWhiteSpace(collectionName) || string.IsNullOrWhiteSpace(update.Text) || !Guid.TryParse(update.Id, out var guid)) + { + return false; + } + + var db = GetVectorDb(); + var found = await db.GetCollectionData(collectionName, new List { guid }); + if (found.IsNullOrEmpty()) + { + return false; + } + + var textEmbedding = GetTextEmbedding(collectionName); + var vector = await textEmbedding.GetVectorAsync(update.Text); + return await db.Upsert(collectionName, guid, vector, update.Text, update.Payload); + } + catch (Exception ex) + { + _logger.LogWarning($"Error when updating vector collection data. {ex.Message}\r\n{ex.InnerException}"); + return false; + } + } + + public async Task DeleteVectorCollectionData(string collectionName, string id) + { + try + { + if (!Guid.TryParse(id, out var guid)) + { + return false; + } + + var db = GetVectorDb(); + return await db.DeleteCollectionData(collectionName, guid); + } + catch (Exception ex) + { + _logger.LogWarning($"Error when deleting vector collection data ({collectionName}-{id}). {ex.Message}\r\n{ex.InnerException}"); + return false; + } + } + + public async Task> GetPagedVectorCollectionData(string collectionName, VectorFilter filter) + { + try + { + var db = GetVectorDb(); + var pagedResult = await db.GetPagedCollectionData(collectionName, filter); + return new StringIdPagedItems + { + Count = pagedResult.Count, + Items = pagedResult.Items.Select(x => VectorSearchResult.CopyFrom(x)), + NextId = pagedResult.NextId, + }; + } + catch (Exception ex) + { + _logger.LogWarning($"Error when getting vector knowledge collection data ({collectionName}). {ex.Message}\r\n{ex.InnerException}"); + return new StringIdPagedItems(); + } + } + + public async Task> SearchVectorKnowledge(string query, string collectionName, VectorSearchOptions options) + { + try + { + var textEmbedding = GetTextEmbedding(collectionName); + var vector = await textEmbedding.GetVectorAsync(query); + + // Vector search + var db = GetVectorDb(); + var found = await db.Search(collectionName, vector, options.Fields, limit: options.Limit ?? 5, confidence: options.Confidence ?? 0.5f, withVector: options.WithVector); + + var results = found.Select(x => VectorSearchResult.CopyFrom(x)).ToList(); + return results; + } + catch (Exception ex) + { + _logger.LogWarning($"Error when searching vector knowledge ({collectionName}). {ex.Message}\r\n{ex.InnerException}"); + return new List(); + } + } + #endregion +} From b1946256a1f1360aee5e11d257e7ae706af455c7 Mon Sep 17 00:00:00 2001 From: Jicheng Lu <103353@smsassist.com> Date: Mon, 9 Sep 2024 11:32:31 -0500 Subject: [PATCH 02/14] move vector collection to db --- .../Enums/KnowledgeCollectionType.cs | 7 +++++ .../Knowledges/IKnowledgeService.cs | 4 +++ .../Settings/KnowledgeBaseSettings.cs | 7 ----- .../Repositories/IBotSharpRepository.cs | 6 ++++ .../Models/VectorCollectionConfigModel.cs | 31 +++++++++++++++++++ .../Repository/BotSharpDbContext.cs | 9 ++++++ .../FileRepository.Knowledge.cs | 25 +++++++++++++++ .../FileRepository/FileRepository.cs | 3 ++ .../Controllers/KnowledgeBaseController.cs | 11 ++++++- .../Helpers/KnowledgeSettingHelper.cs | 23 +++++++++----- .../KnowledgeBasePlugin.cs | 3 +- .../Services/KnowledgeService.Common.cs | 11 +++++++ .../Repository/MongoRepository.Knowledge.cs | 12 +++++++ 13 files changed, 135 insertions(+), 17 deletions(-) create mode 100644 src/Infrastructure/BotSharp.Abstraction/Knowledges/Enums/KnowledgeCollectionType.cs create mode 100644 src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigModel.cs create mode 100644 src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.Knowledge.cs create mode 100644 src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Common.cs create mode 100644 src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Knowledge.cs diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/Enums/KnowledgeCollectionType.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/Enums/KnowledgeCollectionType.cs new file mode 100644 index 000000000..b63488964 --- /dev/null +++ b/src/Infrastructure/BotSharp.Abstraction/Knowledges/Enums/KnowledgeCollectionType.cs @@ -0,0 +1,7 @@ +namespace BotSharp.Abstraction.Knowledges.Enums; + +public static class KnowledgeCollectionType +{ + public static string QuestionAnswer = "question-answer"; + public static string Document = "document"; +} diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs index ca0400866..ed3d7b3d2 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs @@ -23,4 +23,8 @@ public interface IKnowledgeService #region Document #endregion + + #region Common + Task RefreshVectorKnowledgeConfigs(VectorCollectionConfigsModel configs); + #endregion } diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/Settings/KnowledgeBaseSettings.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/Settings/KnowledgeBaseSettings.cs index 217dafb9e..b589e5a6d 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Knowledges/Settings/KnowledgeBaseSettings.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Knowledges/Settings/KnowledgeBaseSettings.cs @@ -8,7 +8,6 @@ public class KnowledgeBaseSettings public SettingBase GraphDb { get; set; } public DefaultKnowledgeBaseSetting Default { get; set; } - public List Collections { get; set; } = new(); } public class DefaultKnowledgeBaseSetting @@ -17,12 +16,6 @@ public class DefaultKnowledgeBaseSetting public KnowledgeTextEmbeddingSetting TextEmbedding { get; set; } } -public class VectorCollectionSetting -{ - public string Name { get; set; } - public KnowledgeTextEmbeddingSetting TextEmbedding { get; set; } -} - public class KnowledgeTextEmbeddingSetting : SettingBase { public string Model { get; set; } diff --git a/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs b/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs index 0af440807..258fd3e6e 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs @@ -4,6 +4,7 @@ using BotSharp.Abstraction.Tasks.Models; using BotSharp.Abstraction.Translation.Models; using BotSharp.Abstraction.Users.Models; +using BotSharp.Abstraction.VectorStorage.Models; namespace BotSharp.Abstraction.Repositories; @@ -99,4 +100,9 @@ public interface IBotSharpRepository bool SaveTranslationMemories(IEnumerable inputs); #endregion + + #region Knowledge + bool SaveKnowledgeCollectionConfigs(List configs); + VectorCollectionConfig? GetKnowledgeCollectionConfig(string collectionName); + #endregion } diff --git a/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigModel.cs b/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigModel.cs new file mode 100644 index 000000000..45f2dfe03 --- /dev/null +++ b/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigModel.cs @@ -0,0 +1,31 @@ +namespace BotSharp.Abstraction.VectorStorage.Models; + +public class VectorCollectionConfigsModel +{ + [JsonPropertyName("collections")] + public List Collections { get; set; } = new(); +} + +public class VectorCollectionConfig +{ + [JsonPropertyName("name")] + public string Name { get; set; } + + [JsonPropertyName("type")] + public string Type { get; set; } + + [JsonPropertyName("text_embedding")] + public KnowledgeEmbeddingConfig TextEmbedding { get; set; } +} + +public class KnowledgeEmbeddingConfig +{ + [JsonPropertyName("provider")] + public string Provider { get; set; } + + [JsonPropertyName("model")] + public string Model { get; set; } + + [JsonPropertyName("dimension")] + public int Dimension { get; set; } +} \ No newline at end of file diff --git a/src/Infrastructure/BotSharp.Core/Repository/BotSharpDbContext.cs b/src/Infrastructure/BotSharp.Core/Repository/BotSharpDbContext.cs index 5ce769012..ca5d810d9 100644 --- a/src/Infrastructure/BotSharp.Core/Repository/BotSharpDbContext.cs +++ b/src/Infrastructure/BotSharp.Core/Repository/BotSharpDbContext.cs @@ -3,6 +3,7 @@ using BotSharp.Abstraction.Tasks.Models; using BotSharp.Abstraction.Translation.Models; using BotSharp.Abstraction.Users.Models; +using BotSharp.Abstraction.VectorStorage.Models; using Microsoft.EntityFrameworkCore.Infrastructure; namespace BotSharp.Core.Repository; @@ -232,4 +233,12 @@ public IEnumerable GetTranslationMemories(IEnumerable inputs) => throw new NotImplementedException(); #endregion + + #region Knowledge + public bool SaveKnowledgeCollectionConfigs(List configs) => + throw new NotImplementedException(); + + public VectorCollectionConfig? GetKnowledgeCollectionConfig(string collectionName) => + throw new NotImplementedException(); + #endregion } diff --git a/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.Knowledge.cs b/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.Knowledge.cs new file mode 100644 index 000000000..f3108439e --- /dev/null +++ b/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.Knowledge.cs @@ -0,0 +1,25 @@ +using BotSharp.Abstraction.VectorStorage.Models; +using System.IO; + +namespace BotSharp.Core.Repository; + +public partial class FileRepository +{ + public bool SaveKnowledgeCollectionConfigs(List configs) + { + var dir = Path.Combine(_dbSettings.FileRepository, KNOWLEDGE_FOLDER, VECTOR_FOLDER); + if (!Directory.Exists(dir)) + { + Directory.CreateDirectory(dir); + } + + var configFile = Path.Combine(dir, COLLECTION_CONFIG_FILE); + File.WriteAllText(configFile, JsonSerializer.Serialize(configs ?? new(), _options)); + return true; + } + + public VectorCollectionConfig? GetKnowledgeCollectionConfig(string collectionName) + { + throw new NotImplementedException(); + } +} diff --git a/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.cs b/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.cs index 7e5b4d9db..9e220d60e 100644 --- a/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.cs +++ b/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.cs @@ -40,6 +40,9 @@ public partial class FileRepository : IBotSharpRepository private const string AGENT_RESPONSES_FOLDER = "responses"; private const string AGENT_TASKS_FOLDER = "tasks"; private const string USERS_FOLDER = "users"; + private const string KNOWLEDGE_FOLDER = "knowledge"; + private const string VECTOR_FOLDER = "vector"; + private const string COLLECTION_CONFIG_FILE = "collection-config.json"; public FileRepository( IServiceProvider services, diff --git a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs index fd446bd27..2e804a79a 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs @@ -144,6 +144,15 @@ public async Task SearchGraphKnowledge([FromBody] Searc #region Document - + + #endregion + + #region Common + [HttpPost("/knowledge/vector/refresh-configs")] + public async Task RefreshVectorCollectionConfigs([FromBody] VectorCollectionConfigsModel request) + { + var saved = await _knowledgeService.RefreshVectorKnowledgeConfigs(request); + return saved ? "Success" : "Fail"; + } #endregion } diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Helpers/KnowledgeSettingHelper.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Helpers/KnowledgeSettingHelper.cs index 48033c912..fd0452b1b 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Helpers/KnowledgeSettingHelper.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Helpers/KnowledgeSettingHelper.cs @@ -4,22 +4,29 @@ public static class KnowledgeSettingHelper { public static ITextEmbedding GetTextEmbeddingSetting(IServiceProvider services, string collectionName) { - var settings = services.GetRequiredService(); - var found = settings.Collections.FirstOrDefault(x => x.Name == collectionName)?.TextEmbedding; + var db = services.GetRequiredService(); + var config = db.GetKnowledgeCollectionConfig(collectionName); + var found = config?.TextEmbedding; + var provider = found?.Provider; + var model = found?.Model; + var dimension = found?.Dimension ?? 0; + if (found == null) { - found = settings.Default.TextEmbedding; + var settings = services.GetRequiredService(); + provider = settings.Default.TextEmbedding.Provider; + model = settings.Default.TextEmbedding.Model; + dimension = settings.Default.TextEmbedding.Dimension; } - var embedding = services.GetServices().FirstOrDefault(x => x.Provider == found.Provider); - var dimension = found.Dimension; + var embedding = services.GetServices().FirstOrDefault(x => x.Provider == provider); - if (found.Dimension <= 0) + if (dimension <= 0) { - dimension = GetLlmTextEmbeddingDimension(services, found.Provider, found.Model); + dimension = GetLlmTextEmbeddingDimension(services, provider, model); } - embedding.SetModelName(found.Model); + embedding.SetModelName(model); embedding.SetDimension(dimension); return embedding; } diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/KnowledgeBasePlugin.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/KnowledgeBasePlugin.cs index 0f28aa5e1..35434e998 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/KnowledgeBasePlugin.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/KnowledgeBasePlugin.cs @@ -37,7 +37,8 @@ public bool AttachMenu(List menu) SubMenu = new List { new PluginMenuDef("Q & A", link: "page/knowledge-base/question-answer"), - new PluginMenuDef("Relationships", link: "page/knowledge-base/relationships") + new PluginMenuDef("Relationships", link: "page/knowledge-base/relationships"), + new PluginMenuDef("Documents", link: "page/knowledge-base/documents") } }); return true; diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Common.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Common.cs new file mode 100644 index 000000000..51a746e66 --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Common.cs @@ -0,0 +1,11 @@ +namespace BotSharp.Plugin.KnowledgeBase.Services; + +public partial class KnowledgeService +{ + public async Task RefreshVectorKnowledgeConfigs(VectorCollectionConfigsModel configs) + { + var db = _services.GetRequiredService(); + var saved = db.SaveKnowledgeCollectionConfigs(configs.Collections); + return await Task.FromResult(saved); + } +} diff --git a/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Knowledge.cs b/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Knowledge.cs new file mode 100644 index 000000000..121786d1e --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Knowledge.cs @@ -0,0 +1,12 @@ +using BotSharp.Abstraction.VectorStorage.Models; + +namespace BotSharp.Plugin.MongoStorage.Repository; + +public partial class MongoRepository +{ + public bool SaveKnowledgeCollectionConfigs(List configs) => + throw new NotImplementedException(); + + public VectorCollectionConfig? GetKnowledgeCollectionConfig(string collectionName) => + throw new NotImplementedException(); +} From 1c0818b11e513f3baac2a67c9babf4e870a42723 Mon Sep 17 00:00:00 2001 From: Jicheng Lu <103353@smsassist.com> Date: Mon, 9 Sep 2024 12:05:11 -0500 Subject: [PATCH 03/14] add mongo --- .../Repositories/IBotSharpRepository.cs | 2 +- .../Models/VectorCollectionConfigModel.cs | 6 +++ .../Repository/BotSharpDbContext.cs | 2 +- .../FileRepository.Knowledge.cs | 11 +++++- .../Services/KnowledgeService.Common.cs | 12 +++++- .../Services/KnowledgeService.cs | 3 ++ .../KnowledgeCollectionConfigDocument.cs | 10 +++++ .../KnowledgeEmbeddingConfigMongoModel.cs | 30 ++++++++++++++ .../MongoDbContext.cs | 3 ++ .../Repository/MongoRepository.Knowledge.cs | 39 +++++++++++++++++-- 10 files changed, 109 insertions(+), 9 deletions(-) create mode 100644 src/Plugins/BotSharp.Plugin.MongoStorage/Collections/KnowledgeCollectionConfigDocument.cs create mode 100644 src/Plugins/BotSharp.Plugin.MongoStorage/Models/KnowledgeEmbeddingConfigMongoModel.cs diff --git a/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs b/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs index 258fd3e6e..dd0b264a0 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs @@ -102,7 +102,7 @@ public interface IBotSharpRepository #endregion #region Knowledge - bool SaveKnowledgeCollectionConfigs(List configs); + bool ResetKnowledgeCollectionConfigs(List configs); VectorCollectionConfig? GetKnowledgeCollectionConfig(string collectionName); #endregion } diff --git a/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigModel.cs b/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigModel.cs index 45f2dfe03..45516ba30 100644 --- a/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigModel.cs +++ b/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigModel.cs @@ -16,6 +16,12 @@ public class VectorCollectionConfig [JsonPropertyName("text_embedding")] public KnowledgeEmbeddingConfig TextEmbedding { get; set; } + + [JsonPropertyName("create_date")] + public DateTime CreateDate { get; set; } = DateTime.UtcNow; + + [JsonPropertyName("create_user_id")] + public string CreateUserId { get; set; } = string.Empty; } public class KnowledgeEmbeddingConfig diff --git a/src/Infrastructure/BotSharp.Core/Repository/BotSharpDbContext.cs b/src/Infrastructure/BotSharp.Core/Repository/BotSharpDbContext.cs index ca5d810d9..a96d20dd6 100644 --- a/src/Infrastructure/BotSharp.Core/Repository/BotSharpDbContext.cs +++ b/src/Infrastructure/BotSharp.Core/Repository/BotSharpDbContext.cs @@ -235,7 +235,7 @@ public bool SaveTranslationMemories(IEnumerable inputs) #endregion #region Knowledge - public bool SaveKnowledgeCollectionConfigs(List configs) => + public bool ResetKnowledgeCollectionConfigs(List configs) => throw new NotImplementedException(); public VectorCollectionConfig? GetKnowledgeCollectionConfig(string collectionName) => diff --git a/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.Knowledge.cs b/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.Knowledge.cs index f3108439e..512eb965c 100644 --- a/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.Knowledge.cs +++ b/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.Knowledge.cs @@ -5,7 +5,7 @@ namespace BotSharp.Core.Repository; public partial class FileRepository { - public bool SaveKnowledgeCollectionConfigs(List configs) + public bool ResetKnowledgeCollectionConfigs(List configs) { var dir = Path.Combine(_dbSettings.FileRepository, KNOWLEDGE_FOLDER, VECTOR_FOLDER); if (!Directory.Exists(dir)) @@ -20,6 +20,13 @@ public bool SaveKnowledgeCollectionConfigs(List configs) public VectorCollectionConfig? GetKnowledgeCollectionConfig(string collectionName) { - throw new NotImplementedException(); + if (string.IsNullOrWhiteSpace(collectionName)) return null; + + var file = Path.Combine(_dbSettings.FileRepository, KNOWLEDGE_FOLDER, VECTOR_FOLDER, COLLECTION_CONFIG_FILE); + if (!File.Exists(file)) return null; + + var str = File.ReadAllText(file); + var configs = JsonSerializer.Deserialize>(str, _options) ?? new(); + return configs.FirstOrDefault(x => x.Name == collectionName); } } diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Common.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Common.cs index 51a746e66..7d278773e 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Common.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Common.cs @@ -5,7 +5,17 @@ public partial class KnowledgeService public async Task RefreshVectorKnowledgeConfigs(VectorCollectionConfigsModel configs) { var db = _services.GetRequiredService(); - var saved = db.SaveKnowledgeCollectionConfigs(configs.Collections); + var collections = configs.Collections ?? new(); + var userService = _services.GetRequiredService(); + var user = await userService.GetUser(_user.Id); + + foreach (var collection in collections) + { + collection.CreateDate = DateTime.UtcNow; + collection.CreateUserId = user.Id; + } + + var saved = db.ResetKnowledgeCollectionConfigs(collections); return await Task.FromResult(saved); } } diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.cs index c668e07eb..24db19b14 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.cs @@ -3,15 +3,18 @@ namespace BotSharp.Plugin.KnowledgeBase.Services; public partial class KnowledgeService : IKnowledgeService { private readonly IServiceProvider _services; + private readonly IUserIdentity _user; private readonly KnowledgeBaseSettings _settings; private readonly ILogger _logger; public KnowledgeService( IServiceProvider services, + IUserIdentity user, KnowledgeBaseSettings settings, ILogger logger) { _services = services; + _user = user; _settings = settings; _logger = logger; } diff --git a/src/Plugins/BotSharp.Plugin.MongoStorage/Collections/KnowledgeCollectionConfigDocument.cs b/src/Plugins/BotSharp.Plugin.MongoStorage/Collections/KnowledgeCollectionConfigDocument.cs new file mode 100644 index 000000000..1bf282e1e --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.MongoStorage/Collections/KnowledgeCollectionConfigDocument.cs @@ -0,0 +1,10 @@ +namespace BotSharp.Plugin.MongoStorage.Collections; + +public class KnowledgeCollectionConfigDocument : MongoBase +{ + public string Name { get; set; } + public string Type { get; set; } + public KnowledgeEmbeddingConfigMongoModel TextEmbedding { get; set; } + public DateTime CreateDate { get; set; } + public string CreateUserId { get; set; } +} diff --git a/src/Plugins/BotSharp.Plugin.MongoStorage/Models/KnowledgeEmbeddingConfigMongoModel.cs b/src/Plugins/BotSharp.Plugin.MongoStorage/Models/KnowledgeEmbeddingConfigMongoModel.cs new file mode 100644 index 000000000..31049e5d4 --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.MongoStorage/Models/KnowledgeEmbeddingConfigMongoModel.cs @@ -0,0 +1,30 @@ +using BotSharp.Abstraction.VectorStorage.Models; + +namespace BotSharp.Plugin.MongoStorage.Models; + +public class KnowledgeEmbeddingConfigMongoModel +{ + public string Provider { get; set; } + public string Model { get; set; } + public int Dimension { get; set; } + + public static KnowledgeEmbeddingConfigMongoModel ToMongoModel(KnowledgeEmbeddingConfig model) + { + return new KnowledgeEmbeddingConfigMongoModel + { + Provider = model.Provider, + Model = model.Model, + Dimension = model.Dimension + }; + } + + public static KnowledgeEmbeddingConfig ToDomainModel(KnowledgeEmbeddingConfigMongoModel model) + { + return new KnowledgeEmbeddingConfig + { + Provider = model.Provider, + Model = model.Model, + Dimension = model.Dimension + }; + } +} diff --git a/src/Plugins/BotSharp.Plugin.MongoStorage/MongoDbContext.cs b/src/Plugins/BotSharp.Plugin.MongoStorage/MongoDbContext.cs index 1c9f74672..649cd96fa 100644 --- a/src/Plugins/BotSharp.Plugin.MongoStorage/MongoDbContext.cs +++ b/src/Plugins/BotSharp.Plugin.MongoStorage/MongoDbContext.cs @@ -153,4 +153,7 @@ public IMongoCollection Plugins public IMongoCollection TranslationMemories => Database.GetCollection($"{_collectionPrefix}_TranslationMemories"); + + public IMongoCollection KnowledgeCollectionConfigs + => Database.GetCollection($"{_collectionPrefix}_KnowledgeCollectionConfigs"); } diff --git a/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Knowledge.cs b/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Knowledge.cs index 121786d1e..338670b36 100644 --- a/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Knowledge.cs +++ b/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Knowledge.cs @@ -4,9 +4,40 @@ namespace BotSharp.Plugin.MongoStorage.Repository; public partial class MongoRepository { - public bool SaveKnowledgeCollectionConfigs(List configs) => - throw new NotImplementedException(); + public bool ResetKnowledgeCollectionConfigs(List configs) + { + var docs = configs?.Select(x => new KnowledgeCollectionConfigDocument + { + Id = Guid.NewGuid().ToString(), + Name = x.Name, + Type = x.Type, + TextEmbedding = KnowledgeEmbeddingConfigMongoModel.ToMongoModel(x.TextEmbedding), + CreateDate = x.CreateDate, + CreateUserId = x.CreateUserId, + })?.ToList() ?? new List(); - public VectorCollectionConfig? GetKnowledgeCollectionConfig(string collectionName) => - throw new NotImplementedException(); + var filter = Builders.Filter.Empty; + _dc.KnowledgeCollectionConfigs.DeleteMany(filter); + _dc.KnowledgeCollectionConfigs.InsertMany(docs); + + return true; + } + + public VectorCollectionConfig? GetKnowledgeCollectionConfig(string collectionName) + { + if (string.IsNullOrWhiteSpace(collectionName)) return null; + + var filter = Builders.Filter.Eq(x => x.Name, collectionName); + var config = _dc.KnowledgeCollectionConfigs.Find(filter).FirstOrDefault(); + if (config == null) return null; + + return new VectorCollectionConfig + { + Name = config.Name, + Type = config.Type, + TextEmbedding = KnowledgeEmbeddingConfigMongoModel.ToDomainModel(config.TextEmbedding), + CreateDate = config.CreateDate, + CreateUserId = config.CreateUserId + }; + } } From 0cbe98693f4efce2d328d1eee857e65a86f58065 Mon Sep 17 00:00:00 2001 From: Jicheng Lu <103353@smsassist.com> Date: Mon, 9 Sep 2024 14:16:09 -0500 Subject: [PATCH 04/14] refine collection --- .../Knowledges/IKnowledgeService.cs | 4 +- .../Repositories/IBotSharpRepository.cs | 11 +++- .../Models/VectorCollectionConfigFilter.cs | 12 ++++ .../Models/VectorCollectionConfigModel.cs | 6 ++ .../VectorStorage/Models/VectorFilter.cs | 3 + .../Repository/BotSharpDbContext.cs | 7 ++- .../FileRepository.Knowledge.cs | 59 +++++++++++++++++-- .../Controllers/KnowledgeBaseController.cs | 12 ++-- .../CreateVectorCollectionRequest.cs | 21 +++++++ .../Helpers/KnowledgeSettingHelper.cs | 12 ++-- .../Services/KnowledgeService.Common.cs | 7 +-- .../Services/KnowledgeService.Vector.cs | 55 ++++++++++++++--- .../Services/KnowledgeService.cs | 16 +++++ .../Repository/MongoRepository.Knowledge.cs | 47 ++++++++++----- 14 files changed, 223 insertions(+), 49 deletions(-) create mode 100644 src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigFilter.cs create mode 100644 src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/CreateVectorCollectionRequest.cs diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs index ed3d7b3d2..64f6fdc08 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs @@ -6,9 +6,9 @@ namespace BotSharp.Abstraction.Knowledges; public interface IKnowledgeService { #region Vector - Task CreateVectorCollection(string collectionName, int dimension); + Task CreateVectorCollection(string collectionName, string collectionType, int dimension, string provider, string model); Task DeleteVectorCollection(string collectionName); - Task> GetVectorCollections(); + Task> GetVectorCollections(string type); Task> SearchVectorKnowledge(string query, string collectionName, VectorSearchOptions options); Task FeedVectorKnowledge(string collectionName, KnowledgeCreationModel model); Task> GetPagedVectorCollectionData(string collectionName, VectorFilter filter); diff --git a/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs b/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs index dd0b264a0..91fe9129f 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs @@ -102,7 +102,14 @@ public interface IBotSharpRepository #endregion #region Knowledge - bool ResetKnowledgeCollectionConfigs(List configs); - VectorCollectionConfig? GetKnowledgeCollectionConfig(string collectionName); + /// + /// Save knowledge collection configs. If reset is true, it will remove everything and then save the new configs. + /// + /// + /// + /// + bool AddKnowledgeCollectionConfigs(List configs, bool reset = false); + bool DeleteKnowledgeCollectionConfig(string collectionName); + IEnumerable GetKnowledgeCollectionConfigs(VectorCollectionConfigFilter filter); #endregion } diff --git a/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigFilter.cs b/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigFilter.cs new file mode 100644 index 000000000..338d30ccf --- /dev/null +++ b/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigFilter.cs @@ -0,0 +1,12 @@ +namespace BotSharp.Abstraction.VectorStorage.Models; + +public class VectorCollectionConfigFilter +{ + public IEnumerable? CollectionNames { get; set; } + public IEnumerable? CollectionTypes { get; set; } + + public static VectorCollectionConfigFilter Empty() + { + return new VectorCollectionConfigFilter(); + } +} diff --git a/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigModel.cs b/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigModel.cs index 45516ba30..4fad3e33d 100644 --- a/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigModel.cs +++ b/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigModel.cs @@ -8,9 +8,15 @@ public class VectorCollectionConfigsModel public class VectorCollectionConfig { + /// + /// Must be unique + /// [JsonPropertyName("name")] public string Name { get; set; } + /// + /// Collection type, e.g., question-answer, document + /// [JsonPropertyName("type")] public string Type { get; set; } diff --git a/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorFilter.cs b/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorFilter.cs index 85b9dec29..78eb3691d 100644 --- a/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorFilter.cs +++ b/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorFilter.cs @@ -5,6 +5,9 @@ public class VectorFilter : StringIdPagination [JsonPropertyName("with_vector")] public bool WithVector { get; set; } + /// + /// For keyword search + /// [JsonPropertyName("search_pairs")] public IEnumerable? SearchPairs { get; set; } } \ No newline at end of file diff --git a/src/Infrastructure/BotSharp.Core/Repository/BotSharpDbContext.cs b/src/Infrastructure/BotSharp.Core/Repository/BotSharpDbContext.cs index a96d20dd6..3777b3e27 100644 --- a/src/Infrastructure/BotSharp.Core/Repository/BotSharpDbContext.cs +++ b/src/Infrastructure/BotSharp.Core/Repository/BotSharpDbContext.cs @@ -235,10 +235,13 @@ public bool SaveTranslationMemories(IEnumerable inputs) #endregion #region Knowledge - public bool ResetKnowledgeCollectionConfigs(List configs) => + public bool AddKnowledgeCollectionConfigs(List configs, bool reset = false) => throw new NotImplementedException(); - public VectorCollectionConfig? GetKnowledgeCollectionConfig(string collectionName) => + public bool DeleteKnowledgeCollectionConfig(string collectionName) => + throw new NotImplementedException(); + + public IEnumerable GetKnowledgeCollectionConfigs(VectorCollectionConfigFilter filter) => throw new NotImplementedException(); #endregion } diff --git a/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.Knowledge.cs b/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.Knowledge.cs index 512eb965c..b4069d330 100644 --- a/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.Knowledge.cs +++ b/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.Knowledge.cs @@ -5,7 +5,7 @@ namespace BotSharp.Core.Repository; public partial class FileRepository { - public bool ResetKnowledgeCollectionConfigs(List configs) + public bool AddKnowledgeCollectionConfigs(List configs, bool reset = false) { var dir = Path.Combine(_dbSettings.FileRepository, KNOWLEDGE_FOLDER, VECTOR_FOLDER); if (!Directory.Exists(dir)) @@ -14,19 +14,66 @@ public bool ResetKnowledgeCollectionConfigs(List configs } var configFile = Path.Combine(dir, COLLECTION_CONFIG_FILE); - File.WriteAllText(configFile, JsonSerializer.Serialize(configs ?? new(), _options)); + if (reset) + { + File.WriteAllText(configFile, JsonSerializer.Serialize(configs ?? new(), _options)); + return true; + } + + if (!File.Exists(configFile)) + { + File.Create(configFile); + } + + var str = File.ReadAllText(configFile); + var savedConfigs = JsonSerializer.Deserialize>(str, _options) ?? new(); + savedConfigs.AddRange(configs); + File.WriteAllText(configFile, JsonSerializer.Serialize(savedConfigs ?? new(), _options)); + return true; } - public VectorCollectionConfig? GetKnowledgeCollectionConfig(string collectionName) + public bool DeleteKnowledgeCollectionConfig(string collectionName) { - if (string.IsNullOrWhiteSpace(collectionName)) return null; + if (string.IsNullOrWhiteSpace(collectionName)) return false; + + var configFile = Path.Combine(_dbSettings.FileRepository, KNOWLEDGE_FOLDER, VECTOR_FOLDER, COLLECTION_CONFIG_FILE); + if (!File.Exists(configFile)) return false; + + var str = File.ReadAllText(configFile); + var savedConfigs = JsonSerializer.Deserialize>(str, _options) ?? new(); + savedConfigs = savedConfigs.Where(x => x.Name != collectionName).ToList(); + File.WriteAllText(configFile, JsonSerializer.Serialize(savedConfigs ?? new(), _options)); + + return true; + } + + public IEnumerable GetKnowledgeCollectionConfigs(VectorCollectionConfigFilter filter) + { + if (filter == null) + { + return Enumerable.Empty(); + } var file = Path.Combine(_dbSettings.FileRepository, KNOWLEDGE_FOLDER, VECTOR_FOLDER, COLLECTION_CONFIG_FILE); - if (!File.Exists(file)) return null; + if (!File.Exists(file)) + { + return Enumerable.Empty(); + } var str = File.ReadAllText(file); var configs = JsonSerializer.Deserialize>(str, _options) ?? new(); - return configs.FirstOrDefault(x => x.Name == collectionName); + + if (!filter.CollectionNames.IsNullOrEmpty()) + { + configs = configs.Where(x => filter.CollectionNames.Contains(x.Name)).ToList(); + } + + if (!filter.CollectionTypes.IsNullOrEmpty()) + { + configs = configs.Where(x => filter.CollectionTypes.Contains(x.Type)).ToList(); + } + + return configs; } } diff --git a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs index 2e804a79a..51ddfa0bd 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs @@ -20,19 +20,19 @@ public KnowledgeBaseController(IKnowledgeService knowledgeService, IServiceProvi #region Vector [HttpGet("knowledge/vector/collections")] - public async Task> GetVectorCollections() + public async Task> GetVectorCollections([FromQuery] string type) { - return await _knowledgeService.GetVectorCollections(); + return await _knowledgeService.GetVectorCollections(type); } - [HttpPost("knowledge/vector/{collection}/create-collection/{dimension}")] - public async Task CreateVectorCollection([FromRoute] string collection, [FromRoute] int dimension) + [HttpPost("knowledge/vector/create-collection")] + public async Task CreateVectorCollection([FromBody] CreateVectorCollectionRequest request) { - return await _knowledgeService.CreateVectorCollection(collection, dimension); + return await _knowledgeService.CreateVectorCollection(request.CollectionName, request.CollectionType, request.Dimension, request.Provider, request.Model); } [HttpDelete("knowledge/vector/{collection}/delete-collection")] - public async Task GetVectorCollections([FromRoute] string collection) + public async Task DeleteVectorCollections([FromRoute] string collection) { return await _knowledgeService.DeleteVectorCollection(collection); } diff --git a/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/CreateVectorCollectionRequest.cs b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/CreateVectorCollectionRequest.cs new file mode 100644 index 000000000..7df04941f --- /dev/null +++ b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/CreateVectorCollectionRequest.cs @@ -0,0 +1,21 @@ +using System.Text.Json.Serialization; + +namespace BotSharp.OpenAPI.ViewModels.Knowledges; + +public class CreateVectorCollectionRequest +{ + [JsonPropertyName("collection_name")] + public string CollectionName { get; set; } + + [JsonPropertyName("collection_type")] + public string CollectionType { get; set; } + + [JsonPropertyName("provider")] + public string Provider { get; set; } + + [JsonPropertyName("model")] + public string Model { get; set; } + + [JsonPropertyName("dimension")] + public int Dimension { get; set; } +} diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Helpers/KnowledgeSettingHelper.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Helpers/KnowledgeSettingHelper.cs index fd0452b1b..137579a79 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Helpers/KnowledgeSettingHelper.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Helpers/KnowledgeSettingHelper.cs @@ -5,10 +5,14 @@ public static class KnowledgeSettingHelper public static ITextEmbedding GetTextEmbeddingSetting(IServiceProvider services, string collectionName) { var db = services.GetRequiredService(); - var config = db.GetKnowledgeCollectionConfig(collectionName); - var found = config?.TextEmbedding; - var provider = found?.Provider; - var model = found?.Model; + var configs = db.GetKnowledgeCollectionConfigs(new VectorCollectionConfigFilter + { + CollectionNames = new[] { collectionName } + }); + + var found = configs?.FirstOrDefault()?.TextEmbedding; + var provider = found?.Provider ?? string.Empty; + var model = found?.Model ?? string.Empty; var dimension = found?.Dimension ?? 0; if (found == null) diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Common.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Common.cs index 7d278773e..4995d0ca4 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Common.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Common.cs @@ -6,16 +6,15 @@ public async Task RefreshVectorKnowledgeConfigs(VectorCollectionConfigsMod { var db = _services.GetRequiredService(); var collections = configs.Collections ?? new(); - var userService = _services.GetRequiredService(); - var user = await userService.GetUser(_user.Id); + var userId = await GetUserId(); foreach (var collection in collections) { collection.CreateDate = DateTime.UtcNow; - collection.CreateUserId = user.Id; + collection.CreateUserId = userId; } - var saved = db.ResetKnowledgeCollectionConfigs(collections); + var saved = db.AddKnowledgeCollectionConfigs(collections, reset: true); return await Task.FromResult(saved); } } diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs index 9afd7c300..32a8d72d2 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs @@ -3,7 +3,7 @@ namespace BotSharp.Plugin.KnowledgeBase.Services; public partial class KnowledgeService { #region Collection - public async Task CreateVectorCollection(string collectionName, int dimension) + public async Task CreateVectorCollection(string collectionName, string collectionType, int dimension, string provider, string model) { try { @@ -12,8 +12,32 @@ public async Task CreateVectorCollection(string collectionName, int dimens return false; } - var db = GetVectorDb(); - return await db.CreateCollection(collectionName, dimension); + var vectorDb = GetVectorDb(); + var created = await vectorDb.CreateCollection(collectionName, dimension); + if (created) + { + var db = _services.GetRequiredService(); + var userId = await GetUserId(); + + db.AddKnowledgeCollectionConfigs(new List + { + new VectorCollectionConfig + { + Name = collectionName, + Type = collectionType, + TextEmbedding = new KnowledgeEmbeddingConfig + { + Provider = provider, + Model = model, + Dimension = dimension + }, + CreateDate = DateTime.UtcNow, + CreateUserId = userId + } + }); + } + + return created; } catch (Exception ex) { @@ -22,12 +46,19 @@ public async Task CreateVectorCollection(string collectionName, int dimens } } - public async Task> GetVectorCollections() + public async Task> GetVectorCollections(string type) { try { - var db = GetVectorDb(); - return await db.GetCollections(); + var db = _services.GetRequiredService(); + var collectionNames = db.GetKnowledgeCollectionConfigs(new VectorCollectionConfigFilter + { + CollectionTypes = new[] { type } + }).Select(x => x.Name).ToList(); + + var vectorDb = GetVectorDb(); + var vectorCollections = await vectorDb.GetCollections(); + return vectorCollections.Where(x => collectionNames.Contains(x)); } catch (Exception ex) { @@ -45,8 +76,16 @@ public async Task DeleteVectorCollection(string collectionName) return false; } - var db = GetVectorDb(); - return await db.DeleteCollection(collectionName); + var vectorDb = GetVectorDb(); + var deleted = await vectorDb.DeleteCollection(collectionName); + + if (deleted) + { + var db = _services.GetRequiredService(); + db.DeleteKnowledgeCollectionConfig(collectionName); + } + + return deleted; } catch (Exception ex) { diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.cs index 24db19b14..64ae2a47d 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.cs @@ -35,4 +35,20 @@ private ITextEmbedding GetTextEmbedding(string collection) { return KnowledgeSettingHelper.GetTextEmbeddingSetting(_services, collection); } + + private VectorCollectionConfig? GetVectorCollectionConfig(string collection) + { + var db = _services.GetRequiredService(); + return db.GetKnowledgeCollectionConfigs(new VectorCollectionConfigFilter + { + CollectionNames = new[] { collection } + })?.FirstOrDefault(); + } + + private async Task GetUserId() + { + var userService = _services.GetRequiredService(); + var user = await userService.GetUser(_user.Id); + return user.Id; + } } diff --git a/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Knowledge.cs b/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Knowledge.cs index 338670b36..b2d341dc6 100644 --- a/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Knowledge.cs +++ b/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Knowledge.cs @@ -4,7 +4,7 @@ namespace BotSharp.Plugin.MongoStorage.Repository; public partial class MongoRepository { - public bool ResetKnowledgeCollectionConfigs(List configs) + public bool AddKnowledgeCollectionConfigs(List configs, bool reset = false) { var docs = configs?.Select(x => new KnowledgeCollectionConfigDocument { @@ -16,28 +16,45 @@ public bool ResetKnowledgeCollectionConfigs(List configs CreateUserId = x.CreateUserId, })?.ToList() ?? new List(); - var filter = Builders.Filter.Empty; - _dc.KnowledgeCollectionConfigs.DeleteMany(filter); - _dc.KnowledgeCollectionConfigs.InsertMany(docs); + if (reset) + { + var filter = Builders.Filter.Empty; + _dc.KnowledgeCollectionConfigs.DeleteMany(filter); + } + _dc.KnowledgeCollectionConfigs.InsertMany(docs); return true; } - public VectorCollectionConfig? GetKnowledgeCollectionConfig(string collectionName) + public bool DeleteKnowledgeCollectionConfig(string collectionName) { - if (string.IsNullOrWhiteSpace(collectionName)) return null; + if (string.IsNullOrWhiteSpace(collectionName)) return false; var filter = Builders.Filter.Eq(x => x.Name, collectionName); - var config = _dc.KnowledgeCollectionConfigs.Find(filter).FirstOrDefault(); - if (config == null) return null; + var deleted = _dc.KnowledgeCollectionConfigs.DeleteMany(filter); + return deleted.DeletedCount > 0; + } - return new VectorCollectionConfig + public IEnumerable GetKnowledgeCollectionConfigs(VectorCollectionConfigFilter filter) + { + if (filter == null) { - Name = config.Name, - Type = config.Type, - TextEmbedding = KnowledgeEmbeddingConfigMongoModel.ToDomainModel(config.TextEmbedding), - CreateDate = config.CreateDate, - CreateUserId = config.CreateUserId - }; + return Enumerable.Empty(); + } + + var builder = Builders.Filter; + var filters = new List> { builder.Empty }; + + var configs = _dc.KnowledgeCollectionConfigs.Find(Builders.Filter.And(filters)).ToList(); + + + return configs.Select(x => new VectorCollectionConfig + { + Name = x.Name, + Type = x.Type, + TextEmbedding = KnowledgeEmbeddingConfigMongoModel.ToDomainModel(x.TextEmbedding), + CreateDate = x.CreateDate, + CreateUserId= x.CreateUserId + }); } } From 16c50075ee1c45b28858992b0ce858a3e6c742af Mon Sep 17 00:00:00 2001 From: Jicheng Lu <103353@smsassist.com> Date: Mon, 9 Sep 2024 17:42:09 -0500 Subject: [PATCH 05/14] add upload vector knowledge files --- .../Files/IFileStorageService.cs | 4 + .../Knowledges/IKnowledgeService.cs | 1 + .../Models/UploadKnowledgeResponse.cs | 10 +++ .../Utilities/StringExtensions.cs | 7 ++ .../LocalFileStorageService.Knowledge.cs | 34 ++++++++ .../Storage/LocalFileStorageService.cs | 2 + .../FileRepository/FileRepository.cs | 2 +- .../Controllers/ConversationController.cs | 3 +- .../Controllers/KnowledgeBaseController.cs | 51 ++++++----- .../VectorKnowledgeUploadRequest.cs | 6 ++ .../Provider/NativeWhisperProvider.cs | 2 +- .../Services/KnowledgeService.Document.cs | 85 +++++++++++++++++++ src/WebStarter/appsettings.json | 3 +- 13 files changed, 182 insertions(+), 28 deletions(-) create mode 100644 src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/UploadKnowledgeResponse.cs create mode 100644 src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.Knowledge.cs create mode 100644 src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/VectorKnowledgeUploadRequest.cs diff --git a/src/Infrastructure/BotSharp.Abstraction/Files/IFileStorageService.cs b/src/Infrastructure/BotSharp.Abstraction/Files/IFileStorageService.cs index 373466c41..d16d5012d 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Files/IFileStorageService.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Files/IFileStorageService.cs @@ -61,4 +61,8 @@ public interface IFileStorageService bool SaveSpeechFile(string conversationId, string fileName, BinaryData data); BinaryData GetSpeechFile(string conversationId, string fileName); #endregion + + #region Knowledge + bool SaveKnowledgeFiles(string collectionName, string fileId, string fileName, Stream stream); + #endregion } diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs index 64f6fdc08..3e4aecd8d 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs @@ -22,6 +22,7 @@ public interface IKnowledgeService #endregion #region Document + Task UploadVectorKnowledge(string collectionName, IEnumerable files); #endregion #region Common diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/UploadKnowledgeResponse.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/UploadKnowledgeResponse.cs new file mode 100644 index 000000000..3f5df77a5 --- /dev/null +++ b/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/UploadKnowledgeResponse.cs @@ -0,0 +1,10 @@ +namespace BotSharp.Abstraction.Knowledges.Models; + +public class UploadKnowledgeResponse +{ + [JsonPropertyName("success")] + public IEnumerable Success { get; set; } = new List(); + + [JsonPropertyName("failed")] + public IEnumerable Failed { get; set; } = new List(); +} diff --git a/src/Infrastructure/BotSharp.Abstraction/Utilities/StringExtensions.cs b/src/Infrastructure/BotSharp.Abstraction/Utilities/StringExtensions.cs index 625a1f4b2..e5009698e 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Utilities/StringExtensions.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Utilities/StringExtensions.cs @@ -44,6 +44,13 @@ public static bool IsEqualTo(this string? str1, string? str2, StringComparison o return str1.Equals(str2, option); } + public static string RemoveWhiteSpaces(this string? str) + { + if (string.IsNullOrWhiteSpace(str)) return string.Empty; + + return str.Replace(" ", "").Replace("\t", "").Replace("\n", "").Replace("\r", ""); + } + public static string JsonContent(this string text) { var m = Regex.Match(text, @"\{(?:[^{}]|(?\{)|(?<-open>\}))+(?(open)(?!))\}"); diff --git a/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.Knowledge.cs b/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.Knowledge.cs new file mode 100644 index 000000000..f417654e9 --- /dev/null +++ b/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.Knowledge.cs @@ -0,0 +1,34 @@ +using System.IO; + +namespace BotSharp.Core.Files.Services; + +public partial class LocalFileStorageService +{ + public bool SaveKnowledgeFiles(string collectionName, string fileId, string fileName, Stream stream) + { + if (string.IsNullOrWhiteSpace(collectionName) || string.IsNullOrWhiteSpace(fileId)) + { + return false; + } + + try + { + var dir = Path.Combine(_baseDir, KNOWLEDGE_FOLDER, KNOWLEDGE_DOC_FOLDER, collectionName, fileId); + if (ExistDirectory(dir)) + { + Directory.Delete(dir); + } + Directory.CreateDirectory(dir); + + var filePath = Path.Combine(dir, fileName); + using var fs = File.Create(filePath); + stream.CopyTo(fs); + return true; + } + catch (Exception ex) + { + _logger.LogWarning($"Error when saving knowledge file (Collection: {collectionName}, File name: {fileName}). {ex.Message}\r\n{ex.InnerException}"); + return false; + } + } +} diff --git a/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.cs b/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.cs index 750803c42..177381095 100644 --- a/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.cs +++ b/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.cs @@ -19,6 +19,8 @@ public partial class LocalFileStorageService : IFileStorageService private const string USER_AVATAR_FOLDER = "avatar"; private const string SESSION_FOLDER = "sessions"; private const string TEXT_TO_SPEECH_FOLDER = "speeches"; + private const string KNOWLEDGE_FOLDER = "knowledgebase"; + private const string KNOWLEDGE_DOC_FOLDER = "document"; public LocalFileStorageService( BotSharpDatabaseSettings dbSettings, diff --git a/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.cs b/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.cs index 9e220d60e..713f1d8bc 100644 --- a/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.cs +++ b/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.cs @@ -40,7 +40,7 @@ public partial class FileRepository : IBotSharpRepository private const string AGENT_RESPONSES_FOLDER = "responses"; private const string AGENT_TASKS_FOLDER = "tasks"; private const string USERS_FOLDER = "users"; - private const string KNOWLEDGE_FOLDER = "knowledge"; + private const string KNOWLEDGE_FOLDER = "knowledgebase"; private const string VECTOR_FOLDER = "vector"; private const string COLLECTION_CONFIG_FILE = "collection-config.json"; diff --git a/src/Infrastructure/BotSharp.OpenAPI/Controllers/ConversationController.cs b/src/Infrastructure/BotSharp.OpenAPI/Controllers/ConversationController.cs index 565677f7c..b4aa32fb8 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/Controllers/ConversationController.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/Controllers/ConversationController.cs @@ -366,8 +366,7 @@ await conv.SendMessage(agentId, inputMsg, #region Files and attachments [HttpPost("/conversation/{conversationId}/attachments")] - public IActionResult UploadAttachments([FromRoute] string conversationId, - IFormFile[] files) + public IActionResult UploadAttachments([FromRoute] string conversationId, IFormFile[] files) { if (files != null && files.Length > 0) { diff --git a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs index 51ddfa0bd..b5da8213a 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs @@ -99,29 +99,6 @@ public async Task DeleteVectorCollectionData([FromRoute] string collection { return await _knowledgeService.DeleteVectorCollectionData(collection, id); } - - [HttpPost("/knowledge/vector/{collection}/upload")] - public async Task UploadVectorKnowledge([FromRoute] string collection, IFormFile file, [FromForm] int? startPageNum, [FromForm] int? endPageNum) - { - var setttings = _services.GetRequiredService(); - var textConverter = _services.GetServices().FirstOrDefault(x => x.Provider == setttings.Pdf2TextConverter.Provider); - - var filePath = Path.GetTempFileName(); - using (var stream = new FileStream(filePath, FileMode.Create, FileAccess.Write, FileShare.None)) - { - await file.CopyToAsync(stream); - await stream.FlushAsync(); - } - - var content = await textConverter.ConvertPdfToText(filePath, startPageNum, endPageNum); - await _knowledgeService.FeedVectorKnowledge(collection, new KnowledgeCreationModel - { - Content = content - }); - - System.IO.File.Delete(filePath); - return Ok(new { count = 1, file.Length }); - } #endregion @@ -144,7 +121,35 @@ public async Task SearchGraphKnowledge([FromBody] Searc #region Document + //[HttpPost("/knowledge/vector/{collection}/upload")] + //public async Task UploadVectorKnowledge([FromRoute] string collection, IFormFile file, [FromForm] int? startPageNum, [FromForm] int? endPageNum) + //{ + // var setttings = _services.GetRequiredService(); + // var textConverter = _services.GetServices().FirstOrDefault(x => x.Provider == setttings.Pdf2TextConverter.Provider); + + // var filePath = Path.GetTempFileName(); + // using (var stream = new FileStream(filePath, FileMode.Create, FileAccess.Write, FileShare.None)) + // { + // await file.CopyToAsync(stream); + // await stream.FlushAsync(); + // } + + // var content = await textConverter.ConvertPdfToText(filePath, startPageNum, endPageNum); + // await _knowledgeService.FeedVectorKnowledge(collection, new KnowledgeCreationModel + // { + // Content = content + // }); + + // System.IO.File.Delete(filePath); + // return Ok(new { count = 1, file.Length }); + //} + [HttpPost("/knowledge/vector/{collection}/upload")] + public async Task UploadVectorKnowledge([FromRoute] string collection, [FromBody] VectorKnowledgeUploadRequest request) + { + var response = await _knowledgeService.UploadVectorKnowledge(collection, request.Files); + return response; + } #endregion #region Common diff --git a/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/VectorKnowledgeUploadRequest.cs b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/VectorKnowledgeUploadRequest.cs new file mode 100644 index 000000000..c49bdec11 --- /dev/null +++ b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/VectorKnowledgeUploadRequest.cs @@ -0,0 +1,6 @@ +namespace BotSharp.OpenAPI.ViewModels.Knowledges; + +public class VectorKnowledgeUploadRequest +{ + public IEnumerable Files { get; set; } = new List(); +} diff --git a/src/Plugins/BotSharp.Plugin.AudioHandler/Provider/NativeWhisperProvider.cs b/src/Plugins/BotSharp.Plugin.AudioHandler/Provider/NativeWhisperProvider.cs index e6ac230e9..1947ebc4c 100644 --- a/src/Plugins/BotSharp.Plugin.AudioHandler/Provider/NativeWhisperProvider.cs +++ b/src/Plugins/BotSharp.Plugin.AudioHandler/Provider/NativeWhisperProvider.cs @@ -14,7 +14,7 @@ public class NativeWhisperProvider : IAudioCompletion private readonly IFileStorageService _fileStorage; private readonly ILogger _logger; - public string Provider => "native"; + public string Provider => "native-whisper"; public NativeWhisperProvider( BotSharpDatabaseSettings dbSettings, diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs index 0e4710bc9..ddbca650e 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs @@ -1,7 +1,89 @@ +using BotSharp.Abstraction.Files; +using BotSharp.Abstraction.Files.Models; +using BotSharp.Abstraction.Files.Utilities; + namespace BotSharp.Plugin.KnowledgeBase.Services; public partial class KnowledgeService { + public async Task UploadVectorKnowledge(string collectionName, IEnumerable files) + { + if (string.IsNullOrWhiteSpace(collectionName)) + { + return new UploadKnowledgeResponse + { + Success = [], + Failed = files.Select(x => x.FileName) + }; + } + + var fileStoreage = _services.GetRequiredService(); + var cleanCollectionName = collectionName.RemoveWhiteSpaces(); + var successFiles = new List(); + var failedFiles = new List(); + + foreach (var file in files) + { + if (string.IsNullOrWhiteSpace(file.FileData) || string.IsNullOrWhiteSpace(file.FileName)) + { + continue; + } + + var dataIds = new List(); + + try + { + // Chop text + var (contentType, bytes) = FileUtility.GetFileInfoFromData(file.FileData); + using var stream = new MemoryStream(bytes); + using var reader = new StreamReader(stream); + var content = await reader.ReadToEndAsync(); + + // Save file + var fileId = Guid.NewGuid().ToString(); + var saved = fileStoreage.SaveKnowledgeFiles(cleanCollectionName, fileId, file.FileName, stream); + reader.Close(); + stream.Close(); + + if (!saved) + { + failedFiles.Add(file.FileName); + continue; + } + + // Text embedding + var vectorDb = GetVectorDb(); + var textEmbedding = GetTextEmbedding(collectionName); + var vector = await textEmbedding.GetVectorAsync(content); + + // Save to vector db + var dataId = Guid.NewGuid(); + await vectorDb.Upsert(collectionName, dataId, vector, content, new Dictionary + { + { "fileName", file.FileName }, + { "fileId", fileId }, + { "page", "0" } + }); + + dataIds.Add(dataId.ToString()); + successFiles.Add(file.FileName); + } + catch (Exception ex) + { + _logger.LogError($"Error when processing knowledge file ({file.FileName}). {ex.Message}\r\n{ex.InnerException}"); + failedFiles.Add(file.FileName); + continue; + } + } + + return new UploadKnowledgeResponse + { + Success = successFiles, + Failed = failedFiles + }; + } + + public async Task FeedVectorKnowledge(string collectionName, KnowledgeCreationModel knowledge) { var index = 0; @@ -24,4 +106,7 @@ public async Task FeedVectorKnowledge(string collectionName, KnowledgeCreationMo Console.WriteLine($"Saved vector {index}/{lines.Count}: {line}\n"); } } + + #region Private methods + #endregion } diff --git a/src/WebStarter/appsettings.json b/src/WebStarter/appsettings.json index 4fcc745ca..a755c1188 100644 --- a/src/WebStarter/appsettings.json +++ b/src/WebStarter/appsettings.json @@ -141,7 +141,8 @@ "Enable": true, "BatchSize": 50, "MessageLimit": 2, - "BufferHours": 12 + "BufferHours": 12, + "ExcludeAgentIds": [] }, "RateLimit": { "MaxConversationPerDay": 100, From 4cdb1981ab486d7fde4341657fe147103c867e4b Mon Sep 17 00:00:00 2001 From: Jicheng Lu <103353@smsassist.com> Date: Tue, 10 Sep 2024 14:02:25 -0500 Subject: [PATCH 06/14] sync file and vector --- .../Files/IFileStorageService.cs | 18 ++- .../Files/Models/ExternalFileModel.cs | 7 ++ .../{InputFileModel.cs => FileDataModel.cs} | 2 +- .../Knowledges/IKnowledgeService.cs | 4 +- .../Knowledges/Models/KnowledgeDocMetaData.cs | 24 ++++ .../Models/UploadKnowledgeResponse.cs | 8 ++ .../Repositories/IBotSharpRepository.cs | 2 +- .../Utilities/StringExtensions.cs | 2 +- .../VectorStorage/IVectorDb.cs | 2 +- .../Models/VectorCollectionConfigFilter.cs | 1 + .../Models/VectorCollectionConfigModel.cs | 15 ++- .../LocalFileStorageService.Conversation.cs | 2 +- .../LocalFileStorageService.Knowledge.cs | 34 ------ .../LocalFileStorageService.KnowledgeBase.cs | 113 ++++++++++++++++++ .../Storage/LocalFileStorageService.User.cs | 2 +- .../Storage/LocalFileStorageService.cs | 9 ++ ...dge.cs => FileRepository.KnowledgeBase.cs} | 53 ++++++-- .../Controllers/KnowledgeBaseController.cs | 38 ++---- .../Controllers/UserController.cs | 2 +- .../Conversations/InputMessageFiles.cs | 2 +- .../VectorKnowledgeUploadRequest.cs | 2 +- .../Functions/EditImageFn.cs | 4 +- .../Functions/GenerateImageFn.cs | 2 +- .../MemVecDb/MemoryVectorDb.cs | 2 +- .../Services/KnowledgeService.Common.cs | 8 -- .../Services/KnowledgeService.Document.cs | 102 ++++++++++++++-- .../Services/KnowledgeService.Vector.cs | 21 +++- .../Providers/FaissDb.cs | 2 +- .../KnowledgeCollectionConfigDocument.cs | 3 +- .../KnowledgeVectorStorageConfigMongoModel.cs | 24 ++++ .../Repository/MongoRepository.Knowledge.cs | 60 ---------- .../MongoRepository.KnowledgeBase.cs | 113 ++++++++++++++++++ .../BotSharp.Plugin.Qdrant/QdrantDb.cs | 35 ++++-- .../SemanticKernelMemoryStoreProvider.cs | 14 +-- 34 files changed, 530 insertions(+), 202 deletions(-) create mode 100644 src/Infrastructure/BotSharp.Abstraction/Files/Models/ExternalFileModel.cs rename src/Infrastructure/BotSharp.Abstraction/Files/Models/{InputFileModel.cs => FileDataModel.cs} (91%) create mode 100644 src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeDocMetaData.cs delete mode 100644 src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.Knowledge.cs create mode 100644 src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.KnowledgeBase.cs rename src/Infrastructure/BotSharp.Core/Repository/FileRepository/{FileRepository.Knowledge.cs => FileRepository.KnowledgeBase.cs} (57%) create mode 100644 src/Plugins/BotSharp.Plugin.MongoStorage/Models/KnowledgeVectorStorageConfigMongoModel.cs delete mode 100644 src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Knowledge.cs create mode 100644 src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.KnowledgeBase.cs diff --git a/src/Infrastructure/BotSharp.Abstraction/Files/IFileStorageService.cs b/src/Infrastructure/BotSharp.Abstraction/Files/IFileStorageService.cs index d16d5012d..277e122a3 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Files/IFileStorageService.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Files/IFileStorageService.cs @@ -37,7 +37,7 @@ public interface IFileStorageService IEnumerable GetMessageFiles(string conversationId, IEnumerable messageIds, string source, IEnumerable? contentTypes = null); string GetMessageFile(string conversationId, string messageId, string source, string index, string fileName); IEnumerable GetMessagesWithFile(string conversationId, IEnumerable messageIds); - bool SaveMessageFiles(string conversationId, string messageId, string source, List files); + bool SaveMessageFiles(string conversationId, string messageId, string source, List files); /// /// Delete files under messages @@ -54,7 +54,7 @@ public interface IFileStorageService #region User string GetUserAvatar(); - bool SaveUserAvatar(InputFileModel file); + bool SaveUserAvatar(FileDataModel file); #endregion #region Speech @@ -63,6 +63,18 @@ public interface IFileStorageService #endregion #region Knowledge - bool SaveKnowledgeFiles(string collectionName, string fileId, string fileName, Stream stream); + bool SaveKnowledgeBaseFile(string collectionName, string vectorStoreProvider, string fileId, string fileName, Stream stream); + + /// + /// Delete files in a knowledge collection. If fileId is null, remove all files in the collection. + /// + /// + /// + /// + bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvider, string? fileId = null); + + bool SaveKnolwedgeBaseFileMeta(string collectionName, string vectorStoreProvider,string fileId, KnowledgeDocMetaData metaData); + + KnowledgeDocMetaData? GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, string fileId); #endregion } diff --git a/src/Infrastructure/BotSharp.Abstraction/Files/Models/ExternalFileModel.cs b/src/Infrastructure/BotSharp.Abstraction/Files/Models/ExternalFileModel.cs new file mode 100644 index 000000000..6cd2c75d8 --- /dev/null +++ b/src/Infrastructure/BotSharp.Abstraction/Files/Models/ExternalFileModel.cs @@ -0,0 +1,7 @@ +namespace BotSharp.Abstraction.Files.Models; + +public class ExternalFileModel : FileDataModel +{ + [JsonPropertyName("file_url")] + public string FileUrl { get; set; } = string.Empty; +} diff --git a/src/Infrastructure/BotSharp.Abstraction/Files/Models/InputFileModel.cs b/src/Infrastructure/BotSharp.Abstraction/Files/Models/FileDataModel.cs similarity index 91% rename from src/Infrastructure/BotSharp.Abstraction/Files/Models/InputFileModel.cs rename to src/Infrastructure/BotSharp.Abstraction/Files/Models/FileDataModel.cs index 7eb350e3d..d2a2e1f0d 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Files/Models/InputFileModel.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Files/Models/FileDataModel.cs @@ -1,6 +1,6 @@ namespace BotSharp.Abstraction.Files.Models; -public class InputFileModel : FileBase +public class FileDataModel : FileBase { /// /// File name with extension diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs index 3e4aecd8d..35bb77ba2 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs @@ -10,7 +10,6 @@ public interface IKnowledgeService Task DeleteVectorCollection(string collectionName); Task> GetVectorCollections(string type); Task> SearchVectorKnowledge(string query, string collectionName, VectorSearchOptions options); - Task FeedVectorKnowledge(string collectionName, KnowledgeCreationModel model); Task> GetPagedVectorCollectionData(string collectionName, VectorFilter filter); Task DeleteVectorCollectionData(string collectionName, string id); Task CreateVectorCollectionData(string collectionName, VectorCreateModel create); @@ -22,7 +21,8 @@ public interface IKnowledgeService #endregion #region Document - Task UploadVectorKnowledge(string collectionName, IEnumerable files); + Task UploadKnowledgeDocuments(string collectionName, IEnumerable files); + Task DeleteKnowledgeDocument(string collectionName, string fileId); #endregion #region Common diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeDocMetaData.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeDocMetaData.cs new file mode 100644 index 000000000..c74f3b051 --- /dev/null +++ b/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeDocMetaData.cs @@ -0,0 +1,24 @@ +using BotSharp.Abstraction.VectorStorage.Models; + +namespace BotSharp.Abstraction.Knowledges.Models; + +public class KnowledgeDocMetaData +{ + [JsonPropertyName("collection")] + public string Collection { get; set; } + + [JsonPropertyName("file_name")] + public string FileName { get; set; } + + [JsonPropertyName("content_type")] + public string ContentType { get; set; } + + [JsonPropertyName("vector_data_ids")] + public IEnumerable VectorDataIds { get; set; } = new List(); + + [JsonPropertyName("create_date")] + public DateTime CreateDate { get; set; } = DateTime.UtcNow; + + [JsonPropertyName("create_user_id")] + public string CreateUserId { get; set; } +} diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/UploadKnowledgeResponse.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/UploadKnowledgeResponse.cs index 3f5df77a5..a1dad405e 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/UploadKnowledgeResponse.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/UploadKnowledgeResponse.cs @@ -7,4 +7,12 @@ public class UploadKnowledgeResponse [JsonPropertyName("failed")] public IEnumerable Failed { get; set; } = new List(); + + [JsonPropertyName("is_success")] + public bool IsSuccess { + get + { + return !Success.IsNullOrEmpty() && Failed.IsNullOrEmpty(); + } + } } diff --git a/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs b/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs index 1088e027b..67952f54f 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs @@ -101,7 +101,7 @@ public interface IBotSharpRepository #endregion - #region Knowledge + #region KnowledgeBase /// /// Save knowledge collection configs. If reset is true, it will remove everything and then save the new configs. /// diff --git a/src/Infrastructure/BotSharp.Abstraction/Utilities/StringExtensions.cs b/src/Infrastructure/BotSharp.Abstraction/Utilities/StringExtensions.cs index e5009698e..65f7b78e7 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Utilities/StringExtensions.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Utilities/StringExtensions.cs @@ -44,7 +44,7 @@ public static bool IsEqualTo(this string? str1, string? str2, StringComparison o return str1.Equals(str2, option); } - public static string RemoveWhiteSpaces(this string? str) + public static string CleanStr(this string? str) { if (string.IsNullOrWhiteSpace(str)) return string.Empty; diff --git a/src/Infrastructure/BotSharp.Abstraction/VectorStorage/IVectorDb.cs b/src/Infrastructure/BotSharp.Abstraction/VectorStorage/IVectorDb.cs index cedacb46c..2fbc628b7 100644 --- a/src/Infrastructure/BotSharp.Abstraction/VectorStorage/IVectorDb.cs +++ b/src/Infrastructure/BotSharp.Abstraction/VectorStorage/IVectorDb.cs @@ -13,5 +13,5 @@ public interface IVectorDb Task DeleteCollection(string collectionName); Task Upsert(string collectionName, Guid id, float[] vector, string text, Dictionary? payload = null); Task> Search(string collectionName, float[] vector, IEnumerable? fields, int limit = 5, float confidence = 0.5f, bool withVector = false); - Task DeleteCollectionData(string collectionName, Guid id); + Task DeleteCollectionData(string collectionName, List ids); } diff --git a/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigFilter.cs b/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigFilter.cs index 338d30ccf..8b612bbaf 100644 --- a/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigFilter.cs +++ b/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigFilter.cs @@ -4,6 +4,7 @@ public class VectorCollectionConfigFilter { public IEnumerable? CollectionNames { get; set; } public IEnumerable? CollectionTypes { get; set; } + public IEnumerable? VectorStroageProviders { get; set; } public static VectorCollectionConfigFilter Empty() { diff --git a/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigModel.cs b/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigModel.cs index 4fad3e33d..4709e0e6c 100644 --- a/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigModel.cs +++ b/src/Infrastructure/BotSharp.Abstraction/VectorStorage/Models/VectorCollectionConfigModel.cs @@ -20,14 +20,11 @@ public class VectorCollectionConfig [JsonPropertyName("type")] public string Type { get; set; } + [JsonPropertyName("vector_storage")] + public VectorStorageConfig VectorStorage { get; set; } + [JsonPropertyName("text_embedding")] public KnowledgeEmbeddingConfig TextEmbedding { get; set; } - - [JsonPropertyName("create_date")] - public DateTime CreateDate { get; set; } = DateTime.UtcNow; - - [JsonPropertyName("create_user_id")] - public string CreateUserId { get; set; } = string.Empty; } public class KnowledgeEmbeddingConfig @@ -40,4 +37,10 @@ public class KnowledgeEmbeddingConfig [JsonPropertyName("dimension")] public int Dimension { get; set; } +} + +public class VectorStorageConfig +{ + [JsonPropertyName("provider")] + public string Provider { get; set; } } \ No newline at end of file diff --git a/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.Conversation.cs b/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.Conversation.cs index 5638179cf..44bfb9f1a 100644 --- a/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.Conversation.cs +++ b/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.Conversation.cs @@ -118,7 +118,7 @@ public IEnumerable GetMessagesWithFile(string conversationId, return foundMsgs; } - public bool SaveMessageFiles(string conversationId, string messageId, string source, List files) + public bool SaveMessageFiles(string conversationId, string messageId, string source, List files) { if (files.IsNullOrEmpty()) return false; diff --git a/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.Knowledge.cs b/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.Knowledge.cs deleted file mode 100644 index f417654e9..000000000 --- a/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.Knowledge.cs +++ /dev/null @@ -1,34 +0,0 @@ -using System.IO; - -namespace BotSharp.Core.Files.Services; - -public partial class LocalFileStorageService -{ - public bool SaveKnowledgeFiles(string collectionName, string fileId, string fileName, Stream stream) - { - if (string.IsNullOrWhiteSpace(collectionName) || string.IsNullOrWhiteSpace(fileId)) - { - return false; - } - - try - { - var dir = Path.Combine(_baseDir, KNOWLEDGE_FOLDER, KNOWLEDGE_DOC_FOLDER, collectionName, fileId); - if (ExistDirectory(dir)) - { - Directory.Delete(dir); - } - Directory.CreateDirectory(dir); - - var filePath = Path.Combine(dir, fileName); - using var fs = File.Create(filePath); - stream.CopyTo(fs); - return true; - } - catch (Exception ex) - { - _logger.LogWarning($"Error when saving knowledge file (Collection: {collectionName}, File name: {fileName}). {ex.Message}\r\n{ex.InnerException}"); - return false; - } - } -} diff --git a/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.KnowledgeBase.cs b/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.KnowledgeBase.cs new file mode 100644 index 000000000..26d2735a9 --- /dev/null +++ b/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.KnowledgeBase.cs @@ -0,0 +1,113 @@ +using BotSharp.Abstraction.Knowledges.Models; +using System.IO; + +namespace BotSharp.Core.Files.Services; + +public partial class LocalFileStorageService +{ + public bool SaveKnowledgeBaseFile(string collectionName, string vectorStoreProvider, string fileId, string fileName, Stream stream) + { + if (string.IsNullOrWhiteSpace(collectionName) + || string.IsNullOrWhiteSpace(vectorStoreProvider) + || string.IsNullOrWhiteSpace(fileId)) + { + return false; + } + + try + { + var docDir = BuildKnowledgeCollectionDocumentDir(collectionName, vectorStoreProvider); + var dir = Path.Combine(docDir, fileId); + if (ExistDirectory(dir)) + { + Directory.Delete(dir); + } + Directory.CreateDirectory(dir); + + var filePath = Path.Combine(dir, fileName); + using var fs = File.Create(filePath); + stream.CopyTo(fs); + return true; + } + catch (Exception ex) + { + _logger.LogWarning($"Error when saving knowledge file " + + $"(Vector store provider: {vectorStoreProvider}, Collection: {collectionName}, File name: {fileName})." + + $"\r\n{ex.Message}\r\n{ex.InnerException}"); + return false; + } + } + + public bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvider, string? fileId = null) + { + if (string.IsNullOrWhiteSpace(collectionName) + || string.IsNullOrWhiteSpace(vectorStoreProvider)) + { + return false; + } + + var dir = BuildKnowledgeCollectionDocumentDir(collectionName, vectorStoreProvider); + if (!ExistDirectory(dir)) return false; + + if (string.IsNullOrEmpty(fileId)) + { + Directory.Delete(dir, true); + } + else + { + var fileDir = Path.Combine(dir, fileId); + if (ExistDirectory(fileDir)) + { + Directory.Delete(fileDir, true); + } + } + + return true; + } + + public bool SaveKnolwedgeBaseFileMeta(string collectionName, string vectorStoreProvider, string fileId, KnowledgeDocMetaData metaData) + { + if (string.IsNullOrWhiteSpace(collectionName) + || string.IsNullOrWhiteSpace(vectorStoreProvider) + || string.IsNullOrWhiteSpace(fileId)) + { + return false; + } + + var docDir = BuildKnowledgeCollectionDocumentDir(collectionName, vectorStoreProvider); + var dir = Path.Combine(docDir, fileId); + if (!ExistDirectory(dir)) + { + Directory.CreateDirectory(dir); + } + + var metaFile = Path.Combine(dir, KNOWLEDGE_DOC_META_FILE); + var content = JsonSerializer.Serialize(metaData, _jsonOptions); + File.WriteAllText(metaFile, content); + return true; + } + + public KnowledgeDocMetaData? GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, string fileId) + { + if (string.IsNullOrWhiteSpace(collectionName) || string.IsNullOrWhiteSpace(fileId)) + { + return null; + } + + var docDir = BuildKnowledgeCollectionDocumentDir(collectionName, vectorStoreProvider); + var metaFile = Path.Combine(docDir, fileId, KNOWLEDGE_DOC_META_FILE); + if (!File.Exists(metaFile)) + { + return null; + } + + var content = File.ReadAllText(metaFile); + var metaData = JsonSerializer.Deserialize(content, _jsonOptions); + return metaData; + } + + private string BuildKnowledgeCollectionDocumentDir(string collectionName, string vectorStoreProvider) + { + return Path.Combine(_baseDir, KNOWLEDGE_FOLDER, KNOWLEDGE_DOC_FOLDER, vectorStoreProvider, collectionName); + } +} diff --git a/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.User.cs b/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.User.cs index d1e962cd4..97da2d6f1 100644 --- a/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.User.cs +++ b/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.User.cs @@ -16,7 +16,7 @@ public string GetUserAvatar() return found; } - public bool SaveUserAvatar(InputFileModel file) + public bool SaveUserAvatar(FileDataModel file) { if (file == null || string.IsNullOrEmpty(file.FileData)) return false; diff --git a/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.cs b/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.cs index 177381095..71ddecf5a 100644 --- a/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.cs +++ b/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.cs @@ -21,6 +21,15 @@ public partial class LocalFileStorageService : IFileStorageService private const string TEXT_TO_SPEECH_FOLDER = "speeches"; private const string KNOWLEDGE_FOLDER = "knowledgebase"; private const string KNOWLEDGE_DOC_FOLDER = "document"; + private const string KNOWLEDGE_DOC_META_FILE = "meta.json"; + + private readonly JsonSerializerOptions _jsonOptions = new JsonSerializerOptions + { + PropertyNameCaseInsensitive = true, + PropertyNamingPolicy = JsonNamingPolicy.CamelCase, + WriteIndented = true, + AllowTrailingCommas = true + }; public LocalFileStorageService( BotSharpDatabaseSettings dbSettings, diff --git a/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.Knowledge.cs b/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.KnowledgeBase.cs similarity index 57% rename from src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.Knowledge.cs rename to src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.KnowledgeBase.cs index b4069d330..6356869ef 100644 --- a/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.Knowledge.cs +++ b/src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.KnowledgeBase.cs @@ -7,13 +7,13 @@ public partial class FileRepository { public bool AddKnowledgeCollectionConfigs(List configs, bool reset = false) { - var dir = Path.Combine(_dbSettings.FileRepository, KNOWLEDGE_FOLDER, VECTOR_FOLDER); - if (!Directory.Exists(dir)) + var vectorDir = BuildKnowledgeCollectionConfigDir(); + if (!Directory.Exists(vectorDir)) { - Directory.CreateDirectory(dir); + Directory.CreateDirectory(vectorDir); } - var configFile = Path.Combine(dir, COLLECTION_CONFIG_FILE); + var configFile = Path.Combine(vectorDir, COLLECTION_CONFIG_FILE); if (reset) { File.WriteAllText(configFile, JsonSerializer.Serialize(configs ?? new(), _options)); @@ -27,7 +27,24 @@ public bool AddKnowledgeCollectionConfigs(List configs, var str = File.ReadAllText(configFile); var savedConfigs = JsonSerializer.Deserialize>(str, _options) ?? new(); - savedConfigs.AddRange(configs); + + // Update if collection already exists, otherwise insert + foreach (var config in configs) + { + if (string.IsNullOrWhiteSpace(config.Name)) continue; + + var found = savedConfigs.FirstOrDefault(x => x.Name == config.Name); + if (found != null) + { + found.TextEmbedding = config.TextEmbedding; + found.Type = config.Type; + } + else + { + savedConfigs.Add(config); + } + } + File.WriteAllText(configFile, JsonSerializer.Serialize(savedConfigs ?? new(), _options)); return true; @@ -37,7 +54,8 @@ public bool DeleteKnowledgeCollectionConfig(string collectionName) { if (string.IsNullOrWhiteSpace(collectionName)) return false; - var configFile = Path.Combine(_dbSettings.FileRepository, KNOWLEDGE_FOLDER, VECTOR_FOLDER, COLLECTION_CONFIG_FILE); + var vectorDir = BuildKnowledgeCollectionConfigDir(); + var configFile = Path.Combine(vectorDir, COLLECTION_CONFIG_FILE); if (!File.Exists(configFile)) return false; var str = File.ReadAllText(configFile); @@ -55,15 +73,18 @@ public IEnumerable GetKnowledgeCollectionConfigs(VectorC return Enumerable.Empty(); } - var file = Path.Combine(_dbSettings.FileRepository, KNOWLEDGE_FOLDER, VECTOR_FOLDER, COLLECTION_CONFIG_FILE); - if (!File.Exists(file)) + var vectorDir = BuildKnowledgeCollectionConfigDir(); + var configFile = Path.Combine(vectorDir, COLLECTION_CONFIG_FILE); + if (!File.Exists(configFile)) { return Enumerable.Empty(); } - var str = File.ReadAllText(file); - var configs = JsonSerializer.Deserialize>(str, _options) ?? new(); + // Get data + var content = File.ReadAllText(configFile); + var configs = JsonSerializer.Deserialize>(content, _options) ?? new(); + // Apply filters if (!filter.CollectionNames.IsNullOrEmpty()) { configs = configs.Where(x => filter.CollectionNames.Contains(x.Name)).ToList(); @@ -74,6 +95,18 @@ public IEnumerable GetKnowledgeCollectionConfigs(VectorC configs = configs.Where(x => filter.CollectionTypes.Contains(x.Type)).ToList(); } + if (!filter.VectorStroageProviders.IsNullOrEmpty()) + { + configs = configs.Where(x => filter.VectorStroageProviders.Contains(x.VectorStorage?.Provider)).ToList(); + } + return configs; } + + #region Private methods + private string BuildKnowledgeCollectionConfigDir() + { + return Path.Combine(_dbSettings.FileRepository, KNOWLEDGE_FOLDER, VECTOR_FOLDER); + } + #endregion } diff --git a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs index b5da8213a..17d21964d 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs @@ -32,7 +32,7 @@ public async Task CreateVectorCollection([FromBody] CreateVectorCollection } [HttpDelete("knowledge/vector/{collection}/delete-collection")] - public async Task DeleteVectorCollections([FromRoute] string collection) + public async Task DeleteVectorCollection([FromRoute] string collection) { return await _knowledgeService.DeleteVectorCollection(collection); } @@ -121,33 +121,17 @@ public async Task SearchGraphKnowledge([FromBody] Searc #region Document - //[HttpPost("/knowledge/vector/{collection}/upload")] - //public async Task UploadVectorKnowledge([FromRoute] string collection, IFormFile file, [FromForm] int? startPageNum, [FromForm] int? endPageNum) - //{ - // var setttings = _services.GetRequiredService(); - // var textConverter = _services.GetServices().FirstOrDefault(x => x.Provider == setttings.Pdf2TextConverter.Provider); - - // var filePath = Path.GetTempFileName(); - // using (var stream = new FileStream(filePath, FileMode.Create, FileAccess.Write, FileShare.None)) - // { - // await file.CopyToAsync(stream); - // await stream.FlushAsync(); - // } - - // var content = await textConverter.ConvertPdfToText(filePath, startPageNum, endPageNum); - // await _knowledgeService.FeedVectorKnowledge(collection, new KnowledgeCreationModel - // { - // Content = content - // }); - - // System.IO.File.Delete(filePath); - // return Ok(new { count = 1, file.Length }); - //} - - [HttpPost("/knowledge/vector/{collection}/upload")] - public async Task UploadVectorKnowledge([FromRoute] string collection, [FromBody] VectorKnowledgeUploadRequest request) + [HttpPost("/knowledge/document/{collection}/upload")] + public async Task UploadKnowledgeDocuments([FromRoute] string collection, [FromBody] VectorKnowledgeUploadRequest request) { - var response = await _knowledgeService.UploadVectorKnowledge(collection, request.Files); + var response = await _knowledgeService.UploadKnowledgeDocuments(collection, request.Files); + return response; + } + + [HttpDelete("/knowledge/document/{collection}/delete/{fileId}")] + public async Task DeleteKnowledgeDocument([FromRoute] string collection, [FromRoute] string fileId) + { + var response = await _knowledgeService.DeleteKnowledgeDocument(collection, fileId); return response; } #endregion diff --git a/src/Infrastructure/BotSharp.OpenAPI/Controllers/UserController.cs b/src/Infrastructure/BotSharp.OpenAPI/Controllers/UserController.cs index 163993349..a7b481e28 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/Controllers/UserController.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/Controllers/UserController.cs @@ -138,7 +138,7 @@ public async Task ModifyUserPhone([FromQuery] string phone) public bool UploadUserAvatar([FromBody] UserAvatarModel input) { var fileStorage = _services.GetRequiredService(); - var file = new InputFileModel + var file = new FileDataModel { FileName = input.FileName, FileData = input.FileData, diff --git a/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Conversations/InputMessageFiles.cs b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Conversations/InputMessageFiles.cs index fdf088bbc..e1d709592 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Conversations/InputMessageFiles.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Conversations/InputMessageFiles.cs @@ -3,5 +3,5 @@ namespace BotSharp.OpenAPI.ViewModels.Conversations; public class InputMessageFiles { public List States { get; set; } = new(); - public List Files { get; set; } = new(); + public List Files { get; set; } = new(); } diff --git a/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/VectorKnowledgeUploadRequest.cs b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/VectorKnowledgeUploadRequest.cs index c49bdec11..9b36366a7 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/VectorKnowledgeUploadRequest.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/VectorKnowledgeUploadRequest.cs @@ -2,5 +2,5 @@ namespace BotSharp.OpenAPI.ViewModels.Knowledges; public class VectorKnowledgeUploadRequest { - public IEnumerable Files { get; set; } = new List(); + public IEnumerable Files { get; set; } = new List(); } diff --git a/src/Plugins/BotSharp.Plugin.FileHandler/Functions/EditImageFn.cs b/src/Plugins/BotSharp.Plugin.FileHandler/Functions/EditImageFn.cs index dc4ddef03..cb7139f0d 100644 --- a/src/Plugins/BotSharp.Plugin.FileHandler/Functions/EditImageFn.cs +++ b/src/Plugins/BotSharp.Plugin.FileHandler/Functions/EditImageFn.cs @@ -99,9 +99,9 @@ private void SaveGeneratedImage(ImageGeneration? image) { if (image == null) return; - var files = new List() + var files = new List() { - new InputFileModel + new FileDataModel { FileName = $"{Guid.NewGuid()}.png", FileData = $"data:{MediaTypeNames.Image.Png};base64,{image.ImageData}" diff --git a/src/Plugins/BotSharp.Plugin.FileHandler/Functions/GenerateImageFn.cs b/src/Plugins/BotSharp.Plugin.FileHandler/Functions/GenerateImageFn.cs index 9592ef80c..c3e8c147b 100644 --- a/src/Plugins/BotSharp.Plugin.FileHandler/Functions/GenerateImageFn.cs +++ b/src/Plugins/BotSharp.Plugin.FileHandler/Functions/GenerateImageFn.cs @@ -77,7 +77,7 @@ private void SaveGeneratedImages(List? images) { if (images.IsNullOrEmpty()) return; - var files = images.Where(x => !string.IsNullOrEmpty(x?.ImageData)).Select(x => new InputFileModel + var files = images.Where(x => !string.IsNullOrEmpty(x?.ImageData)).Select(x => new FileDataModel { FileName = $"{Guid.NewGuid()}.png", FileData = $"data:{MediaTypeNames.Image.Png};base64,{x.ImageData}" diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/MemVecDb/MemoryVectorDb.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/MemVecDb/MemoryVectorDb.cs index 17a2c7572..61a56477f 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/MemVecDb/MemoryVectorDb.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/MemVecDb/MemoryVectorDb.cs @@ -74,7 +74,7 @@ public async Task Upsert(string collectionName, Guid id, float[] vector, s return true; } - public async Task DeleteCollectionData(string collectionName, Guid id) + public async Task DeleteCollectionData(string collectionName, List ids) { return await Task.FromResult(false); } diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Common.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Common.cs index 4995d0ca4..7fc31f2d2 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Common.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Common.cs @@ -6,14 +6,6 @@ public async Task RefreshVectorKnowledgeConfigs(VectorCollectionConfigsMod { var db = _services.GetRequiredService(); var collections = configs.Collections ?? new(); - var userId = await GetUserId(); - - foreach (var collection in collections) - { - collection.CreateDate = DateTime.UtcNow; - collection.CreateUserId = userId; - } - var saved = db.AddKnowledgeCollectionConfigs(collections, reset: true); return await Task.FromResult(saved); } diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs index ddbca650e..7a94cdad7 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs @@ -1,12 +1,13 @@ using BotSharp.Abstraction.Files; using BotSharp.Abstraction.Files.Models; using BotSharp.Abstraction.Files.Utilities; +using System.Net.Http; namespace BotSharp.Plugin.KnowledgeBase.Services; public partial class KnowledgeService { - public async Task UploadVectorKnowledge(string collectionName, IEnumerable files) + public async Task UploadKnowledgeDocuments(string collectionName, IEnumerable files) { if (string.IsNullOrWhiteSpace(collectionName)) { @@ -18,30 +19,32 @@ public async Task UploadVectorKnowledge(string collecti } var fileStoreage = _services.GetRequiredService(); - var cleanCollectionName = collectionName.RemoveWhiteSpaces(); + var userId = await GetUserId(); + var vectorStoreProvider = _settings.VectorDb.Provider; var successFiles = new List(); var failedFiles = new List(); foreach (var file in files) { - if (string.IsNullOrWhiteSpace(file.FileData) || string.IsNullOrWhiteSpace(file.FileName)) + if (string.IsNullOrWhiteSpace(file.FileData) + && string.IsNullOrWhiteSpace(file.FileUrl)) { continue; } - var dataIds = new List(); - try { - // Chop text - var (contentType, bytes) = FileUtility.GetFileInfoFromData(file.FileData); + var dataIds = new List(); + + // Chop text (to do) + var (contentType, bytes) = await GetFileInfo(file); using var stream = new MemoryStream(bytes); using var reader = new StreamReader(stream); var content = await reader.ReadToEndAsync(); // Save file var fileId = Guid.NewGuid().ToString(); - var saved = fileStoreage.SaveKnowledgeFiles(cleanCollectionName, fileId, file.FileName, stream); + var saved = fileStoreage.SaveKnowledgeBaseFile(collectionName.CleanStr(), vectorStoreProvider.CleanStr(), fileId, file.FileName, stream); reader.Close(); stream.Close(); @@ -58,15 +61,31 @@ public async Task UploadVectorKnowledge(string collecti // Save to vector db var dataId = Guid.NewGuid(); - await vectorDb.Upsert(collectionName, dataId, vector, content, new Dictionary + saved = await vectorDb.Upsert(collectionName, dataId, vector, content, new Dictionary { { "fileName", file.FileName }, { "fileId", fileId }, { "page", "0" } }); - dataIds.Add(dataId.ToString()); - successFiles.Add(file.FileName); + if (saved) + { + dataIds.Add(dataId.ToString()); + fileStoreage.SaveKnolwedgeBaseFileMeta(collectionName.CleanStr(), vectorStoreProvider.CleanStr(), fileId, new KnowledgeDocMetaData + { + Collection = collectionName, + FileName = file.FileName, + ContentType = contentType, + VectorDataIds = dataIds, + CreateDate = DateTime.UtcNow, + CreateUserId = userId + }); + successFiles.Add(file.FileName); + } + else + { + failedFiles.Add(file.FileName); + } } catch (Exception ex) { @@ -84,6 +103,39 @@ public async Task UploadVectorKnowledge(string collecti } + public async Task DeleteKnowledgeDocument(string collectionName, string fileId) + { + if (string.IsNullOrWhiteSpace(collectionName) || string.IsNullOrWhiteSpace(fileId)) + { + return false; + } + + try + { + var fileStorage = _services.GetRequiredService(); + var vectorDb = GetVectorDb(); + var vectorStoreProvider = _settings.VectorDb.Provider; + + fileStorage.DeleteKnowledgeFile(collectionName.CleanStr(), vectorStoreProvider.CleanStr(), fileId); + var metaData = fileStorage.GetKnowledgeBaseFileMeta(collectionName.CleanStr(), vectorStoreProvider.CleanStr(), fileId); + + if (metaData != null && !metaData.VectorDataIds.IsNullOrEmpty()) + { + var guids = metaData.VectorDataIds.Where(x => Guid.TryParse(x, out _)).Select(x => Guid.Parse(x)).ToList(); + await vectorDb.DeleteCollectionData(collectionName, guids); + } + + return true; + } + catch (Exception ex) + { + _logger.LogWarning($"Error when deleting knowledge document " + + $"(Collection: {collectionName}, File id: {fileId})" + + $"\r\n{ex.Message}\r\n{ex.InnerException}"); + return false; + } + } + public async Task FeedVectorKnowledge(string collectionName, KnowledgeCreationModel knowledge) { var index = 0; @@ -108,5 +160,33 @@ public async Task FeedVectorKnowledge(string collectionName, KnowledgeCreationMo } #region Private methods + /// + /// Get file content type and file bytes + /// + /// + /// + private async Task<(string, byte[])> GetFileInfo(ExternalFileModel file) + { + if (file == null) + { + return (string.Empty, new byte[0]); + } + + if (!string.IsNullOrWhiteSpace(file.FileUrl)) + { + var http = _services.GetRequiredService(); + var contentType = FileUtility.GetFileContentType(file.FileName); + using var client = http.CreateClient(); + var bytes = await client.GetByteArrayAsync(file.FileUrl); + return (contentType, bytes); + } + else if (!string.IsNullOrWhiteSpace(file.FileData)) + { + var (contentType, bytes) = FileUtility.GetFileInfoFromData(file.FileData); + return (contentType, bytes); + } + + return (string.Empty, new byte[0]); + } #endregion } diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs index 32a8d72d2..c30606df1 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Vector.cs @@ -1,3 +1,5 @@ +using BotSharp.Abstraction.Files; + namespace BotSharp.Plugin.KnowledgeBase.Services; public partial class KnowledgeService @@ -25,14 +27,16 @@ public async Task CreateVectorCollection(string collectionName, string col { Name = collectionName, Type = collectionType, + VectorStorage = new VectorStorageConfig + { + Provider = _settings.VectorDb.Provider + }, TextEmbedding = new KnowledgeEmbeddingConfig { Provider = provider, Model = model, Dimension = dimension - }, - CreateDate = DateTime.UtcNow, - CreateUserId = userId + } } }); } @@ -53,7 +57,8 @@ public async Task> GetVectorCollections(string type) var db = _services.GetRequiredService(); var collectionNames = db.GetKnowledgeCollectionConfigs(new VectorCollectionConfigFilter { - CollectionTypes = new[] { type } + CollectionTypes = new[] { type }, + VectorStroageProviders = new[] { _settings.VectorDb.Provider } }).Select(x => x.Name).ToList(); var vectorDb = GetVectorDb(); @@ -82,7 +87,11 @@ public async Task DeleteVectorCollection(string collectionName) if (deleted) { var db = _services.GetRequiredService(); + var fileStorage = _services.GetRequiredService(); + var vectorStoreProvider = _settings.VectorDb.Provider; + db.DeleteKnowledgeCollectionConfig(collectionName); + fileStorage.DeleteKnowledgeFile(collectionName.CleanStr(), vectorStoreProvider.CleanStr()); } return deleted; @@ -156,7 +165,7 @@ public async Task DeleteVectorCollectionData(string collectionName, string } var db = GetVectorDb(); - return await db.DeleteCollectionData(collectionName, guid); + return await db.DeleteCollectionData(collectionName, new List { guid }); } catch (Exception ex) { @@ -202,7 +211,7 @@ public async Task> SearchVectorKnowledge(string catch (Exception ex) { _logger.LogWarning($"Error when searching vector knowledge ({collectionName}). {ex.Message}\r\n{ex.InnerException}"); - return new List(); + return Enumerable.Empty(); } } #endregion diff --git a/src/Plugins/BotSharp.Plugin.MetaAI/Providers/FaissDb.cs b/src/Plugins/BotSharp.Plugin.MetaAI/Providers/FaissDb.cs index 58fb18512..817deb363 100644 --- a/src/Plugins/BotSharp.Plugin.MetaAI/Providers/FaissDb.cs +++ b/src/Plugins/BotSharp.Plugin.MetaAI/Providers/FaissDb.cs @@ -48,7 +48,7 @@ public Task Upsert(string collectionName, Guid id, float[] vector, string throw new NotImplementedException(); } - public Task DeleteCollectionData(string collectionName, Guid id) + public Task DeleteCollectionData(string collectionName, List ids) { throw new NotImplementedException(); } diff --git a/src/Plugins/BotSharp.Plugin.MongoStorage/Collections/KnowledgeCollectionConfigDocument.cs b/src/Plugins/BotSharp.Plugin.MongoStorage/Collections/KnowledgeCollectionConfigDocument.cs index 1bf282e1e..2519a627b 100644 --- a/src/Plugins/BotSharp.Plugin.MongoStorage/Collections/KnowledgeCollectionConfigDocument.cs +++ b/src/Plugins/BotSharp.Plugin.MongoStorage/Collections/KnowledgeCollectionConfigDocument.cs @@ -4,7 +4,6 @@ public class KnowledgeCollectionConfigDocument : MongoBase { public string Name { get; set; } public string Type { get; set; } + public KnowledgeVectorStorageConfigMongoModel VectorStorage { get; set; } public KnowledgeEmbeddingConfigMongoModel TextEmbedding { get; set; } - public DateTime CreateDate { get; set; } - public string CreateUserId { get; set; } } diff --git a/src/Plugins/BotSharp.Plugin.MongoStorage/Models/KnowledgeVectorStorageConfigMongoModel.cs b/src/Plugins/BotSharp.Plugin.MongoStorage/Models/KnowledgeVectorStorageConfigMongoModel.cs new file mode 100644 index 000000000..809886f7e --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.MongoStorage/Models/KnowledgeVectorStorageConfigMongoModel.cs @@ -0,0 +1,24 @@ +using BotSharp.Abstraction.VectorStorage.Models; + +namespace BotSharp.Plugin.MongoStorage.Models; + +public class KnowledgeVectorStorageConfigMongoModel +{ + public string Provider { get; set; } + + public static KnowledgeVectorStorageConfigMongoModel ToMongoModel(VectorStorageConfig model) + { + return new KnowledgeVectorStorageConfigMongoModel + { + Provider = model.Provider + }; + } + + public static VectorStorageConfig ToDomainModel(KnowledgeVectorStorageConfigMongoModel model) + { + return new VectorStorageConfig + { + Provider = model.Provider + }; + } +} diff --git a/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Knowledge.cs b/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Knowledge.cs deleted file mode 100644 index b2d341dc6..000000000 --- a/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.Knowledge.cs +++ /dev/null @@ -1,60 +0,0 @@ -using BotSharp.Abstraction.VectorStorage.Models; - -namespace BotSharp.Plugin.MongoStorage.Repository; - -public partial class MongoRepository -{ - public bool AddKnowledgeCollectionConfigs(List configs, bool reset = false) - { - var docs = configs?.Select(x => new KnowledgeCollectionConfigDocument - { - Id = Guid.NewGuid().ToString(), - Name = x.Name, - Type = x.Type, - TextEmbedding = KnowledgeEmbeddingConfigMongoModel.ToMongoModel(x.TextEmbedding), - CreateDate = x.CreateDate, - CreateUserId = x.CreateUserId, - })?.ToList() ?? new List(); - - if (reset) - { - var filter = Builders.Filter.Empty; - _dc.KnowledgeCollectionConfigs.DeleteMany(filter); - } - - _dc.KnowledgeCollectionConfigs.InsertMany(docs); - return true; - } - - public bool DeleteKnowledgeCollectionConfig(string collectionName) - { - if (string.IsNullOrWhiteSpace(collectionName)) return false; - - var filter = Builders.Filter.Eq(x => x.Name, collectionName); - var deleted = _dc.KnowledgeCollectionConfigs.DeleteMany(filter); - return deleted.DeletedCount > 0; - } - - public IEnumerable GetKnowledgeCollectionConfigs(VectorCollectionConfigFilter filter) - { - if (filter == null) - { - return Enumerable.Empty(); - } - - var builder = Builders.Filter; - var filters = new List> { builder.Empty }; - - var configs = _dc.KnowledgeCollectionConfigs.Find(Builders.Filter.And(filters)).ToList(); - - - return configs.Select(x => new VectorCollectionConfig - { - Name = x.Name, - Type = x.Type, - TextEmbedding = KnowledgeEmbeddingConfigMongoModel.ToDomainModel(x.TextEmbedding), - CreateDate = x.CreateDate, - CreateUserId= x.CreateUserId - }); - } -} diff --git a/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.KnowledgeBase.cs b/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.KnowledgeBase.cs new file mode 100644 index 000000000..73352dee3 --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.KnowledgeBase.cs @@ -0,0 +1,113 @@ +using BotSharp.Abstraction.VectorStorage.Models; + +namespace BotSharp.Plugin.MongoStorage.Repository; + +public partial class MongoRepository +{ + public bool AddKnowledgeCollectionConfigs(List configs, bool reset = false) + { + var filter = Builders.Filter.Empty; + var docs = configs?.Where(x => !string.IsNullOrWhiteSpace(x.Name)) + .Select(x => new KnowledgeCollectionConfigDocument + { + Id = Guid.NewGuid().ToString(), + Name = x.Name, + Type = x.Type, + TextEmbedding = KnowledgeEmbeddingConfigMongoModel.ToMongoModel(x.TextEmbedding) + })?.ToList() ?? new List(); + + if (reset) + { + _dc.KnowledgeCollectionConfigs.DeleteMany(filter); + _dc.KnowledgeCollectionConfigs.InsertMany(docs); + return true; + } + + // Update if collection already exists, otherwise insert. + var insertDocs = new List(); + var updateDocs = new List(); + + var names = docs.Select(x => x.Name).ToList(); + filter = Builders.Filter.In(x => x.Name, names); + var savedConfigs = _dc.KnowledgeCollectionConfigs.Find(filter).ToList(); + + foreach (var doc in docs) + { + var found = savedConfigs.FirstOrDefault(x => x.Name == doc.Name); + if (found != null) + { + found.Type = doc.Type; + found.VectorStorage = doc.VectorStorage; + found.TextEmbedding = doc.TextEmbedding; + updateDocs.Add(found); + } + else + { + insertDocs.Add(doc); + } + } + + if (!insertDocs.IsNullOrEmpty()) + { + _dc.KnowledgeCollectionConfigs.InsertMany(docs); + } + + if (!updateDocs.IsNullOrEmpty()) + { + foreach (var doc in updateDocs) + { + filter = Builders.Filter.Eq(x => x.Id, doc.Id); + _dc.KnowledgeCollectionConfigs.ReplaceOne(filter, doc); + } + } + + return true; + } + + public bool DeleteKnowledgeCollectionConfig(string collectionName) + { + if (string.IsNullOrWhiteSpace(collectionName)) return false; + + var filter = Builders.Filter.Eq(x => x.Name, collectionName); + var deleted = _dc.KnowledgeCollectionConfigs.DeleteMany(filter); + return deleted.DeletedCount > 0; + } + + public IEnumerable GetKnowledgeCollectionConfigs(VectorCollectionConfigFilter filter) + { + if (filter == null) + { + return Enumerable.Empty(); + } + + var builder = Builders.Filter; + var filters = new List> { builder.Empty }; + + // Apply filters + if (!filter.CollectionNames.IsNullOrEmpty()) + { + filters.Add(builder.In(x => x.Name, filter.CollectionNames)); + } + + if (!filter.CollectionTypes.IsNullOrEmpty()) + { + filters.Add(builder.In(x => x.Type, filter.CollectionTypes)); + } + + if (!filter.VectorStroageProviders.IsNullOrEmpty()) + { + filters.Add(builder.In(x => x.VectorStorage.Provider, filter.VectorStroageProviders)); + } + + // Get data + var configs = _dc.KnowledgeCollectionConfigs.Find(Builders.Filter.And(filters)).ToList(); + + return configs.Select(x => new VectorCollectionConfig + { + Name = x.Name, + Type = x.Type, + VectorStorage = KnowledgeVectorStorageConfigMongoModel.ToDomainModel(x.VectorStorage), + TextEmbedding = KnowledgeEmbeddingConfigMongoModel.ToDomainModel(x.TextEmbedding) + }); + } +} diff --git a/src/Plugins/BotSharp.Plugin.Qdrant/QdrantDb.cs b/src/Plugins/BotSharp.Plugin.Qdrant/QdrantDb.cs index be89fec05..b9563a1b9 100644 --- a/src/Plugins/BotSharp.Plugin.Qdrant/QdrantDb.cs +++ b/src/Plugins/BotSharp.Plugin.Qdrant/QdrantDb.cs @@ -1,5 +1,6 @@ using BotSharp.Abstraction.Utilities; using BotSharp.Abstraction.VectorStorage.Models; +using Microsoft.Extensions.Logging; using Qdrant.Client; using Qdrant.Client.Grpc; @@ -10,12 +11,15 @@ public class QdrantDb : IVectorDb private QdrantClient _client; private readonly QdrantSetting _setting; private readonly IServiceProvider _services; + private readonly ILogger _logger; public QdrantDb( QdrantSetting setting, + ILogger logger, IServiceProvider services) { _setting = setting; + _logger = logger; _services = services; } @@ -35,21 +39,28 @@ private QdrantClient GetClient() return _client; } - public async Task CreateCollection(string collectionName, int dim) + public async Task CreateCollection(string collectionName, int dimension) { var client = GetClient(); var exist = await DoesCollectionExist(client, collectionName); if (exist) return false; - // Create a new collection - await client.CreateCollectionAsync(collectionName, new VectorParams() + try { - Size = (ulong)dim, - Distance = Distance.Cosine - }); - - return true; + // Create a new collection + await client.CreateCollectionAsync(collectionName, new VectorParams() + { + Size = (ulong)dimension, + Distance = Distance.Cosine + }); + return true; + } + catch (Exception ex) + { + _logger.LogWarning($"Error when create collection (Name: {collectionName}, Dimension: {dimension})."); + return false; + } } public async Task DeleteCollection(string collectionName) @@ -145,8 +156,6 @@ public async Task> GetCollectionData(string co }); } - - public async Task Upsert(string collectionName, Guid id, float[] vector, string text, Dictionary? payload = null) { // Insert vectors @@ -216,10 +225,12 @@ public async Task> Search(string collectionNam return results; } - public async Task DeleteCollectionData(string collectionName, Guid id) + public async Task DeleteCollectionData(string collectionName, List ids) { + if (ids.IsNullOrEmpty()) return false; + var client = GetClient(); - var result = await client.DeleteAsync(collectionName, id); + var result = await client.DeleteAsync(collectionName, ids); return result.Status == UpdateStatus.Completed; } diff --git a/src/Plugins/BotSharp.Plugin.SemanticKernel/SemanticKernelMemoryStoreProvider.cs b/src/Plugins/BotSharp.Plugin.SemanticKernel/SemanticKernelMemoryStoreProvider.cs index 4912e1276..3d8a8455c 100644 --- a/src/Plugins/BotSharp.Plugin.SemanticKernel/SemanticKernelMemoryStoreProvider.cs +++ b/src/Plugins/BotSharp.Plugin.SemanticKernel/SemanticKernelMemoryStoreProvider.cs @@ -4,6 +4,7 @@ using Microsoft.SemanticKernel.Memory; using System; using System.Collections.Generic; +using System.Linq; using System.Threading.Tasks; namespace BotSharp.Plugin.SemanticKernel @@ -84,16 +85,15 @@ public async Task Upsert(string collectionName, Guid id, float[] vector, s return true; } - public async Task DeleteCollectionData(string collectionName, Guid id) + public async Task DeleteCollectionData(string collectionName, List ids) { + if (ids.IsNullOrEmpty()) return false; + var exist = await _memoryStore.DoesCollectionExistAsync(collectionName); + if (!exist) return false; - if (exist) - { - await _memoryStore.RemoveAsync(collectionName, id.ToString()); - return true; - } - return false; + await _memoryStore.RemoveBatchAsync(collectionName, ids.Select(x => x.ToString())); + return true; } } } From 1dbae69018ad90f7a554eb8ff3e6e3005b1be26c Mon Sep 17 00:00:00 2001 From: Jicheng Lu <103353@smsassist.com> Date: Tue, 10 Sep 2024 14:03:55 -0500 Subject: [PATCH 07/14] minor change --- .../BotSharp.Plugin.Planner/Functions/PrimaryStagePlanFn.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Plugins/BotSharp.Plugin.Planner/Functions/PrimaryStagePlanFn.cs b/src/Plugins/BotSharp.Plugin.Planner/Functions/PrimaryStagePlanFn.cs index 593e80536..71f2bf815 100644 --- a/src/Plugins/BotSharp.Plugin.Planner/Functions/PrimaryStagePlanFn.cs +++ b/src/Plugins/BotSharp.Plugin.Planner/Functions/PrimaryStagePlanFn.cs @@ -21,7 +21,6 @@ public async Task Execute(RoleDialogModel message) var state = _services.GetRequiredService(); var knowledgeService = _services.GetRequiredService(); var knowledgeSettings = _services.GetRequiredService(); - var fn = _services.GetRequiredService(); state.SetState("max_tokens", "4096"); var task = JsonSerializer.Deserialize(message.FunctionArgs); From 2a3bea2ceaa9998580535bb86aa52b2a9cbd801b Mon Sep 17 00:00:00 2001 From: Jicheng Lu <103353@smsassist.com> Date: Tue, 10 Sep 2024 14:33:42 -0500 Subject: [PATCH 08/14] add knowledge file list --- .../Files/IFileStorageService.cs | 2 + .../Files/Models/KnowledgeFileModel.cs | 10 ++++ .../Knowledges/IKnowledgeService.cs | 1 + .../Knowledges/Models/KnowledgeDocMetaData.cs | 3 + .../LocalFileStorageService.KnowledgeBase.cs | 55 ++++++++++++++++++- .../Controllers/KnowledgeBaseController.cs | 20 +++++++ .../Knowledges/KnowledgeFileViewModel.cs | 33 +++++++++++ .../Services/KnowledgeService.Document.cs | 16 ++++++ 8 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 src/Infrastructure/BotSharp.Abstraction/Files/Models/KnowledgeFileModel.cs create mode 100644 src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/KnowledgeFileViewModel.cs diff --git a/src/Infrastructure/BotSharp.Abstraction/Files/IFileStorageService.cs b/src/Infrastructure/BotSharp.Abstraction/Files/IFileStorageService.cs index 277e122a3..62cab86f1 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Files/IFileStorageService.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Files/IFileStorageService.cs @@ -76,5 +76,7 @@ public interface IFileStorageService bool SaveKnolwedgeBaseFileMeta(string collectionName, string vectorStoreProvider,string fileId, KnowledgeDocMetaData metaData); KnowledgeDocMetaData? GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, string fileId); + + IEnumerable GetKnowledgeBaseFiles(string collectionName, string vectorStoreProvider); #endregion } diff --git a/src/Infrastructure/BotSharp.Abstraction/Files/Models/KnowledgeFileModel.cs b/src/Infrastructure/BotSharp.Abstraction/Files/Models/KnowledgeFileModel.cs new file mode 100644 index 000000000..f69c8f50d --- /dev/null +++ b/src/Infrastructure/BotSharp.Abstraction/Files/Models/KnowledgeFileModel.cs @@ -0,0 +1,10 @@ +namespace BotSharp.Abstraction.Files.Models; + +public class KnowledgeFileModel +{ + public string FileId { get; set; } + public string FileName { get; set; } + public string FileExtension { get; set; } + public string ContentType { get; set; } + public string FileUrl { get; set; } +} diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs index 35bb77ba2..207519ea5 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs @@ -23,6 +23,7 @@ public interface IKnowledgeService #region Document Task UploadKnowledgeDocuments(string collectionName, IEnumerable files); Task DeleteKnowledgeDocument(string collectionName, string fileId); + Task> GetKnowledgeDocuments(string collectionName); #endregion #region Common diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeDocMetaData.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeDocMetaData.cs index c74f3b051..955b9a06d 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeDocMetaData.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeDocMetaData.cs @@ -7,6 +7,9 @@ public class KnowledgeDocMetaData [JsonPropertyName("collection")] public string Collection { get; set; } + [JsonPropertyName("file_id")] + public string FileId { get; set; } + [JsonPropertyName("file_name")] public string FileName { get; set; } diff --git a/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.KnowledgeBase.cs b/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.KnowledgeBase.cs index 26d2735a9..bcb83c887 100644 --- a/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.KnowledgeBase.cs +++ b/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.KnowledgeBase.cs @@ -1,3 +1,6 @@ +using AspectInjector.Broker; +using BotSharp.Abstraction.Conversations.Models; +using BotSharp.Abstraction.Files.Models; using BotSharp.Abstraction.Knowledges.Models; using System.IO; @@ -89,7 +92,9 @@ public bool SaveKnolwedgeBaseFileMeta(string collectionName, string vectorStoreP public KnowledgeDocMetaData? GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, string fileId) { - if (string.IsNullOrWhiteSpace(collectionName) || string.IsNullOrWhiteSpace(fileId)) + if (string.IsNullOrWhiteSpace(collectionName) + || string.IsNullOrWhiteSpace(vectorStoreProvider) + || string.IsNullOrWhiteSpace(fileId)) { return null; } @@ -106,8 +111,56 @@ public bool SaveKnolwedgeBaseFileMeta(string collectionName, string vectorStoreP return metaData; } + public IEnumerable GetKnowledgeBaseFiles(string collectionName, string vectorStoreProvider) + { + if (string.IsNullOrWhiteSpace(collectionName) + || string.IsNullOrWhiteSpace(vectorStoreProvider)) + { + return Enumerable.Empty(); + } + + var docDir = BuildKnowledgeCollectionDocumentDir(collectionName, vectorStoreProvider); + if (!ExistDirectory(docDir)) + { + return Enumerable.Empty(); + } + + var files = new List(); + foreach (var folder in Directory.GetDirectories(docDir)) + { + var metaFile = Path.Combine(docDir, folder, KNOWLEDGE_DOC_META_FILE); + if (!File.Exists(metaFile)) continue; + + var content = File.ReadAllText(metaFile); + var metaData = JsonSerializer.Deserialize(content, _jsonOptions); + if (metaData == null) continue; + + var fileName = Path.GetFileNameWithoutExtension(metaData.FileName); + var fileExtension = Path.GetExtension(metaData.FileName); + + files.Add(new KnowledgeFileModel + { + FileId = metaData.FileId, + FileName = metaData.FileName, + FileExtension = fileExtension.Substring(1), + ContentType = FileUtility.GetFileContentType(metaData.FileName), + FileUrl = BuildKnowledgeFileUrl(collectionName, vectorStoreProvider, metaData.FileId) + }); + } + + return files; + } + + + #region Private methods private string BuildKnowledgeCollectionDocumentDir(string collectionName, string vectorStoreProvider) { return Path.Combine(_baseDir, KNOWLEDGE_FOLDER, KNOWLEDGE_DOC_FOLDER, vectorStoreProvider, collectionName); } + + private string BuildKnowledgeFileUrl(string collectionName, string vectorProvider, string fileId) + { + return $"/knowledge/file/{vectorProvider}/{collectionName}/{fileId}"; + } + #endregion } diff --git a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs index 17d21964d..b11aaab41 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs @@ -1,3 +1,4 @@ +using BotSharp.Abstraction.Files.Constants; using BotSharp.Abstraction.Graph.Models; using BotSharp.Abstraction.Knowledges.Models; using BotSharp.Abstraction.VectorStorage.Models; @@ -134,8 +135,16 @@ public async Task DeleteKnowledgeDocument([FromRoute] string collection, [ var response = await _knowledgeService.DeleteKnowledgeDocument(collection, fileId); return response; } + + [HttpGet("/knowledge/document/{collection}/list")] + public async Task> GetKnowledgeDocuments([FromRoute] string collection) + { + var files = await _knowledgeService.GetKnowledgeDocuments(collection); + return files.Select(x => KnowledgeFileViewModel.From(x)); + } #endregion + #region Common [HttpPost("/knowledge/vector/refresh-configs")] public async Task RefreshVectorCollectionConfigs([FromBody] VectorCollectionConfigsModel request) @@ -144,4 +153,15 @@ public async Task RefreshVectorCollectionConfigs([FromBody] VectorCollec return saved ? "Success" : "Fail"; } #endregion + + + #region Private methods + private FileContentResult BuildFileResult(string file) + { + using Stream stream = System.IO.File.Open(file, FileMode.Open, FileAccess.Read, FileShare.Read); + var bytes = new byte[stream.Length]; + stream.Read(bytes, 0, (int)stream.Length); + return File(bytes, "application/octet-stream", Path.GetFileName(file)); + } + #endregion } diff --git a/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/KnowledgeFileViewModel.cs b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/KnowledgeFileViewModel.cs new file mode 100644 index 000000000..850863b1c --- /dev/null +++ b/src/Infrastructure/BotSharp.OpenAPI/ViewModels/Knowledges/KnowledgeFileViewModel.cs @@ -0,0 +1,33 @@ +using System.Text.Json.Serialization; + +namespace BotSharp.OpenAPI.ViewModels.Knowledges; + +public class KnowledgeFileViewModel +{ + [JsonPropertyName("file_id")] + public string FileId { get; set; } + + [JsonPropertyName("file_name")] + public string FileName { get; set; } + + [JsonPropertyName("file_extension")] + public string FileExtension { get; set; } + + [JsonPropertyName("content_type")] + public string ContentType { get; set; } + + [JsonPropertyName("file_url")] + public string FileUrl { get; set; } + + public static KnowledgeFileViewModel From(KnowledgeFileModel model) + { + return new KnowledgeFileViewModel + { + FileId = model.FileId, + FileName = model.FileName, + FileExtension = model.FileExtension, + ContentType = model.ContentType, + FileUrl = model.FileUrl + }; + } +} diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs index 7a94cdad7..e98b2634b 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs @@ -74,6 +74,7 @@ public async Task UploadKnowledgeDocuments(string colle fileStoreage.SaveKnolwedgeBaseFileMeta(collectionName.CleanStr(), vectorStoreProvider.CleanStr(), fileId, new KnowledgeDocMetaData { Collection = collectionName, + FileId = fileId, FileName = file.FileName, ContentType = contentType, VectorDataIds = dataIds, @@ -136,6 +137,21 @@ public async Task DeleteKnowledgeDocument(string collectionName, string fi } } + + public async Task> GetKnowledgeDocuments(string collectionName) + { + if (string.IsNullOrWhiteSpace(collectionName)) + { + return Enumerable.Empty(); + } + + var fileStorage = _services.GetRequiredService(); + var vectorStoreProvider = _settings.VectorDb.Provider; + var files = fileStorage.GetKnowledgeBaseFiles(collectionName.CleanStr(), vectorStoreProvider.CleanStr()); + return files; + } + + public async Task FeedVectorKnowledge(string collectionName, KnowledgeCreationModel knowledge) { var index = 0; From 0cebb56a3e9f60dcc5d3f1cac38a79260568b84a Mon Sep 17 00:00:00 2001 From: Jicheng Lu <103353@smsassist.com> Date: Tue, 10 Sep 2024 15:07:56 -0500 Subject: [PATCH 09/14] add file binary data --- .../Files/IFileStorageService.cs | 2 ++ .../Files/Models/FileBinaryDataModel.cs | 7 ++++ .../Knowledges/IKnowledgeService.cs | 1 + .../LocalFileStorageService.KnowledgeBase.cs | 35 ++++++++++++++++--- .../Controllers/KnowledgeBaseController.cs | 19 +++++++--- .../Services/KnowledgeService.Document.cs | 8 +++++ 6 files changed, 63 insertions(+), 9 deletions(-) create mode 100644 src/Infrastructure/BotSharp.Abstraction/Files/Models/FileBinaryDataModel.cs diff --git a/src/Infrastructure/BotSharp.Abstraction/Files/IFileStorageService.cs b/src/Infrastructure/BotSharp.Abstraction/Files/IFileStorageService.cs index 62cab86f1..d87da6bd4 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Files/IFileStorageService.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Files/IFileStorageService.cs @@ -78,5 +78,7 @@ public interface IFileStorageService KnowledgeDocMetaData? GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, string fileId); IEnumerable GetKnowledgeBaseFiles(string collectionName, string vectorStoreProvider); + + FileBinaryDataModel? GetKnowledgeBaseFileBinaryData(string collectionName, string vectorStoreProvider, string fileId); #endregion } diff --git a/src/Infrastructure/BotSharp.Abstraction/Files/Models/FileBinaryDataModel.cs b/src/Infrastructure/BotSharp.Abstraction/Files/Models/FileBinaryDataModel.cs new file mode 100644 index 000000000..bc7bee8ca --- /dev/null +++ b/src/Infrastructure/BotSharp.Abstraction/Files/Models/FileBinaryDataModel.cs @@ -0,0 +1,7 @@ +namespace BotSharp.Abstraction.Files.Models; + +public class FileBinaryDataModel +{ + public string FileName { get; set; } + public BinaryData FileBinaryData { get; set; } +} diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs index 207519ea5..926a7bb8b 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs @@ -24,6 +24,7 @@ public interface IKnowledgeService Task UploadKnowledgeDocuments(string collectionName, IEnumerable files); Task DeleteKnowledgeDocument(string collectionName, string fileId); Task> GetKnowledgeDocuments(string collectionName); + Task GetKnowledgeDocumentBinaryData(string collectionName, string fileId); #endregion #region Common diff --git a/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.KnowledgeBase.cs b/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.KnowledgeBase.cs index bcb83c887..521507a0e 100644 --- a/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.KnowledgeBase.cs +++ b/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.KnowledgeBase.cs @@ -2,6 +2,7 @@ using BotSharp.Abstraction.Conversations.Models; using BotSharp.Abstraction.Files.Models; using BotSharp.Abstraction.Knowledges.Models; +using StackExchange.Redis; using System.IO; namespace BotSharp.Core.Files.Services; @@ -128,7 +129,7 @@ public IEnumerable GetKnowledgeBaseFiles(string collectionNa var files = new List(); foreach (var folder in Directory.GetDirectories(docDir)) { - var metaFile = Path.Combine(docDir, folder, KNOWLEDGE_DOC_META_FILE); + var metaFile = Path.Combine(folder, KNOWLEDGE_DOC_META_FILE); if (!File.Exists(metaFile)) continue; var content = File.ReadAllText(metaFile); @@ -144,13 +145,39 @@ public IEnumerable GetKnowledgeBaseFiles(string collectionNa FileName = metaData.FileName, FileExtension = fileExtension.Substring(1), ContentType = FileUtility.GetFileContentType(metaData.FileName), - FileUrl = BuildKnowledgeFileUrl(collectionName, vectorStoreProvider, metaData.FileId) + FileUrl = BuildKnowledgeFileUrl(collectionName, metaData.FileId) }); } return files; } + public FileBinaryDataModel? GetKnowledgeBaseFileBinaryData(string collectionName, string vectorStoreProvider, string fileId) + { + if (string.IsNullOrWhiteSpace(collectionName) + || string.IsNullOrWhiteSpace(vectorStoreProvider) + || string.IsNullOrWhiteSpace(fileId)) + { + return null; + } + + var docDir = BuildKnowledgeCollectionDocumentDir(collectionName, vectorStoreProvider); + var fileDir = Path.Combine(docDir, fileId); + if (!ExistDirectory(fileDir)) return null; + + var metaFile = Path.Combine(fileDir, KNOWLEDGE_DOC_META_FILE); + var content = File.ReadAllText(metaFile); + var metaData = JsonSerializer.Deserialize(content, _jsonOptions); + using var stream = new FileStream(fileDir, FileMode.Open, FileAccess.Read); + stream.Position = 0; + + return new FileBinaryDataModel + { + FileName = metaData.FileName, + FileBinaryData = BinaryData.FromStream(stream) + }; + } + #region Private methods private string BuildKnowledgeCollectionDocumentDir(string collectionName, string vectorStoreProvider) @@ -158,9 +185,9 @@ private string BuildKnowledgeCollectionDocumentDir(string collectionName, string return Path.Combine(_baseDir, KNOWLEDGE_FOLDER, KNOWLEDGE_DOC_FOLDER, vectorStoreProvider, collectionName); } - private string BuildKnowledgeFileUrl(string collectionName, string vectorProvider, string fileId) + private string BuildKnowledgeFileUrl(string collectionName, string fileId) { - return $"/knowledge/file/{vectorProvider}/{collectionName}/{fileId}"; + return $"/knowledge/file/{collectionName}/file/{fileId}"; } #endregion } diff --git a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs index b11aaab41..ebaed0d0c 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs @@ -142,6 +142,13 @@ public async Task> GetKnowledgeDocuments([Fr var files = await _knowledgeService.GetKnowledgeDocuments(collection); return files.Select(x => KnowledgeFileViewModel.From(x)); } + + [HttpGet("/knowledge/document/{collection}/file/{fileId}")] + public async Task GetKnowledgeDocument([FromRoute] string collection, [FromRoute] string fileId) + { + var file = await _knowledgeService.GetKnowledgeDocumentBinaryData(collection, fileId); + return BuildFileResult(file); + } #endregion @@ -156,12 +163,14 @@ public async Task RefreshVectorCollectionConfigs([FromBody] VectorCollec #region Private methods - private FileContentResult BuildFileResult(string file) + private FileContentResult BuildFileResult(FileBinaryDataModel? file) { - using Stream stream = System.IO.File.Open(file, FileMode.Open, FileAccess.Read, FileShare.Read); - var bytes = new byte[stream.Length]; - stream.Read(bytes, 0, (int)stream.Length); - return File(bytes, "application/octet-stream", Path.GetFileName(file)); + if (file == null) + { + return File(new byte[0], "application/octet-stream", "error.txt"); + } + + return File(file.FileBinaryData.ToArray(), "application/octet-stream", file.FileName); } #endregion } diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs index e98b2634b..bc0fd5bfa 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs @@ -151,6 +151,14 @@ public async Task> GetKnowledgeDocuments(string return files; } + public async Task GetKnowledgeDocumentBinaryData(string collectionName, string fileId) + { + var fileStorage = _services.GetRequiredService(); + var vectorStoreProvider = _settings.VectorDb.Provider; + var file = fileStorage.GetKnowledgeBaseFileBinaryData(collectionName.CleanStr(), vectorStoreProvider.CleanStr(), fileId); + return file; + } + public async Task FeedVectorKnowledge(string collectionName, KnowledgeCreationModel knowledge) { From 6bfb2523f5974646031197f55c7299c6869bcaca Mon Sep 17 00:00:00 2001 From: Jicheng Lu <103353@smsassist.com> Date: Tue, 10 Sep 2024 15:11:40 -0500 Subject: [PATCH 10/14] add cos --- .../TencentCosService.Conversation.cs | 2 +- .../TencentCosService.KnowledgeBase.cs | 36 +++++++++++++++++++ .../Services/TencentCosService.User.cs | 2 +- .../Services/TencentCosService.cs | 1 + 4 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.KnowledgeBase.cs diff --git a/src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.Conversation.cs b/src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.Conversation.cs index ff8dfa17c..844afb654 100644 --- a/src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.Conversation.cs +++ b/src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.Conversation.cs @@ -115,7 +115,7 @@ public IEnumerable GetMessagesWithFile(string conversationId, return foundMsgs; } - public bool SaveMessageFiles(string conversationId, string messageId, string source, List files) + public bool SaveMessageFiles(string conversationId, string messageId, string source, List files) { if (files.IsNullOrEmpty()) return false; diff --git a/src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.KnowledgeBase.cs b/src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.KnowledgeBase.cs new file mode 100644 index 000000000..9d521142f --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.KnowledgeBase.cs @@ -0,0 +1,36 @@ +using BotSharp.Abstraction.Knowledges.Models; + +namespace BotSharp.Plugin.TencentCos.Services; + +public partial class TencentCosService +{ + public bool SaveKnowledgeBaseFile(string collectionName, string vectorStoreProvider, string fileId, string fileName, Stream stream) + { + throw new NotImplementedException(); + } + + public bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvider, string? fileId = null) + { + throw new NotImplementedException(); + } + + public bool SaveKnolwedgeBaseFileMeta(string collectionName, string vectorStoreProvider, string fileId, KnowledgeDocMetaData metaData) + { + throw new NotImplementedException(); + } + + public KnowledgeDocMetaData? GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, string fileId) + { + throw new NotImplementedException(); + } + + public IEnumerable GetKnowledgeBaseFiles(string collectionName, string vectorStoreProvider) + { + throw new NotImplementedException(); + } + + public FileBinaryDataModel? GetKnowledgeBaseFileBinaryData(string collectionName, string vectorStoreProvider, string fileId) + { + throw new NotImplementedException(); + } +} diff --git a/src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.User.cs b/src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.User.cs index a23085396..d955342da 100644 --- a/src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.User.cs +++ b/src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.User.cs @@ -16,7 +16,7 @@ public string GetUserAvatar() return found; } - public bool SaveUserAvatar(InputFileModel file) + public bool SaveUserAvatar(FileDataModel file) { if (file == null || string.IsNullOrEmpty(file.FileData)) return false; diff --git a/src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.cs b/src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.cs index c48812f1f..fe7339231 100644 --- a/src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.cs +++ b/src/Plugins/BotSharp.Plugin.TencentCos/Services/TencentCosService.cs @@ -1,4 +1,5 @@ using BotSharp.Abstraction.Files; +using BotSharp.Abstraction.Knowledges.Models; using BotSharp.Abstraction.Users; using BotSharp.Plugin.TencentCos.Settings; using System.Net.Mime; From 39571b2f9b6b5eeb3c55d685f2ce92e16b5ada21 Mon Sep 17 00:00:00 2001 From: Jicheng Lu <103353@smsassist.com> Date: Wed, 11 Sep 2024 15:48:05 -0500 Subject: [PATCH 11/14] chop text --- .../LocalFileStorageService.KnowledgeBase.cs | 8 +- .../Helpers/TextChopper.cs | 28 +++-- .../Services/KnowledgeService.Document.cs | 108 +++++++++++++----- 3 files changed, 101 insertions(+), 43 deletions(-) diff --git a/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.KnowledgeBase.cs b/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.KnowledgeBase.cs index 521507a0e..1cb804a70 100644 --- a/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.KnowledgeBase.cs +++ b/src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.KnowledgeBase.cs @@ -1,8 +1,4 @@ -using AspectInjector.Broker; -using BotSharp.Abstraction.Conversations.Models; -using BotSharp.Abstraction.Files.Models; using BotSharp.Abstraction.Knowledges.Models; -using StackExchange.Redis; using System.IO; namespace BotSharp.Core.Files.Services; @@ -29,8 +25,10 @@ public bool SaveKnowledgeBaseFile(string collectionName, string vectorStoreProvi Directory.CreateDirectory(dir); var filePath = Path.Combine(dir, fileName); - using var fs = File.Create(filePath); + using var fs = new FileStream(filePath, FileMode.Create, FileAccess.Write); stream.CopyTo(fs); + fs.Flush(); + fs.Close(); return true; } catch (Exception ex) diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Helpers/TextChopper.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Helpers/TextChopper.cs index 9e9b5f9dc..8421e5371 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Helpers/TextChopper.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Helpers/TextChopper.cs @@ -14,40 +14,48 @@ public static List Chop(string content, ChunkOption option) private static List ChopByWord(string content, ChunkOption option) { var chunks = new List(); + var words = content.Split(' ').Where(x => !string.IsNullOrWhiteSpace(x)).ToList(); - var words = content.Split(' ') - .Where(x => !string.IsNullOrWhiteSpace(x)) - .ToList(); - - var chunk = ""; + var chunk = string.Empty; for (int i = 0; i < words.Count; i++) { chunk += words[i] + " "; if (chunk.Length > option.Size) { chunks.Add(chunk.Trim()); - chunk = ""; + chunk = string.Empty; i -= option.Conjunction; } } + if (chunks.IsNullOrEmpty() && !string.IsNullOrEmpty(chunk)) + { + chunks.Add(chunk); + } + return chunks; } private static List ChopByChar(string content, ChunkOption option) { var chunks = new List(); + var chunk = string.Empty; var currentPos = 0; + while (currentPos < content.Length) { - var len = content.Length - currentPos > option.Size ? - option.Size : - content.Length - currentPos; - var chunk = content.Substring(currentPos, len); + var len = content.Length - currentPos > option.Size ? option.Size : content.Length - currentPos; + chunk = content.Substring(currentPos, len); chunks.Add(chunk); // move backward currentPos += option.Size - option.Conjunction; } + + if (chunks.IsNullOrEmpty() && !string.IsNullOrEmpty(chunk)) + { + chunks.Add(chunk); + } + return chunks; } } diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs index bc0fd5bfa..27a2e590a 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs @@ -2,6 +2,7 @@ using BotSharp.Abstraction.Files.Models; using BotSharp.Abstraction.Files.Utilities; using System.Net.Http; +using System.Net.Mime; namespace BotSharp.Plugin.KnowledgeBase.Services; @@ -34,43 +35,23 @@ public async Task UploadKnowledgeDocuments(string colle try { - var dataIds = new List(); - - // Chop text (to do) + // Get document info var (contentType, bytes) = await GetFileInfo(file); - using var stream = new MemoryStream(bytes); - using var reader = new StreamReader(stream); - var content = await reader.ReadToEndAsync(); - - // Save file + var contents = await GetFileContent(contentType, bytes); + + // Save document var fileId = Guid.NewGuid().ToString(); - var saved = fileStoreage.SaveKnowledgeBaseFile(collectionName.CleanStr(), vectorStoreProvider.CleanStr(), fileId, file.FileName, stream); - reader.Close(); - stream.Close(); - + var saved = SaveDocument(collectionName, vectorStoreProvider, fileId, file.FileName, bytes); if (!saved) { failedFiles.Add(file.FileName); continue; } - // Text embedding - var vectorDb = GetVectorDb(); - var textEmbedding = GetTextEmbedding(collectionName); - var vector = await textEmbedding.GetVectorAsync(content); - // Save to vector db - var dataId = Guid.NewGuid(); - saved = await vectorDb.Upsert(collectionName, dataId, vector, content, new Dictionary - { - { "fileName", file.FileName }, - { "fileId", fileId }, - { "page", "0" } - }); - - if (saved) + var dataIds = await SaveToVectorDb(collectionName, fileId, file.FileName, contents); + if (!dataIds.IsNullOrEmpty()) { - dataIds.Add(dataId.ToString()); fileStoreage.SaveKnolwedgeBaseFileMeta(collectionName.CleanStr(), vectorStoreProvider.CleanStr(), fileId, new KnowledgeDocMetaData { Collection = collectionName, @@ -117,8 +98,8 @@ public async Task DeleteKnowledgeDocument(string collectionName, string fi var vectorDb = GetVectorDb(); var vectorStoreProvider = _settings.VectorDb.Provider; - fileStorage.DeleteKnowledgeFile(collectionName.CleanStr(), vectorStoreProvider.CleanStr(), fileId); var metaData = fileStorage.GetKnowledgeBaseFileMeta(collectionName.CleanStr(), vectorStoreProvider.CleanStr(), fileId); + fileStorage.DeleteKnowledgeFile(collectionName.CleanStr(), vectorStoreProvider.CleanStr(), fileId); if (metaData != null && !metaData.VectorDataIds.IsNullOrEmpty()) { @@ -212,5 +193,76 @@ public async Task FeedVectorKnowledge(string collectionName, KnowledgeCreationMo return (string.Empty, new byte[0]); } + + private async Task> GetFileContent(string contentType, byte[] bytes) + { + var results = new List(); + + if (contentType.IsEqualTo(MediaTypeNames.Text.Plain)) + { + using var stream = new MemoryStream(bytes); + using var reader = new StreamReader(stream); + var content = await reader.ReadToEndAsync(); + + var lines = TextChopper.Chop(content, new ChunkOption + { + Size = 1024, + Conjunction = 32, + SplitByWord = true, + }); + + reader.Close(); + stream.Close(); + results.AddRange(lines); + } + else if (contentType.IsEqualTo(MediaTypeNames.Application.Pdf)) + { + // to do + } + + return results; + } + + private bool SaveDocument(string collectionName, string vectorStoreProvider, string fileId, string fileName, byte[] bytes) + { + var fileStoreage = _services.GetRequiredService(); + using var stream = new MemoryStream(bytes); + stream.Position = 0; + + var saved = fileStoreage.SaveKnowledgeBaseFile(collectionName.CleanStr(), vectorStoreProvider.CleanStr(), fileId, fileName, stream); + stream.Close(); + return saved; + } + + private async Task> SaveToVectorDb(string collectionName, string fileId, string fileName, IEnumerable contents) + { + if (contents.IsNullOrEmpty()) + { + return Enumerable.Empty(); + } + + var dataIds = new List(); + var vectorDb = GetVectorDb(); + var textEmbedding = GetTextEmbedding(collectionName); + + for (int i = 0; i < contents.Count(); i++) + { + var content = contents.ElementAt(i); + var vector = await textEmbedding.GetVectorAsync(content); + var dataId = Guid.NewGuid(); + var saved = await vectorDb.Upsert(collectionName, dataId, vector, content, new Dictionary + { + { "fileName", fileName }, + { "fileId", fileId }, + { "textNumber", $"{i + 1}" } + }); + + if (!saved) continue; + + dataIds.Add(dataId.ToString()); + } + + return dataIds; + } #endregion } From f161ab6e6c44281c04d3f00f149857f5f1d2876a Mon Sep 17 00:00:00 2001 From: Jicheng Lu <103353@smsassist.com> Date: Wed, 11 Sep 2024 16:07:17 -0500 Subject: [PATCH 12/14] add missing --- .../Helpers/KnowledgeSettingHelper.cs | 5 +++-- .../Services/KnowledgeService.cs | 13 ++----------- .../Repository/MongoRepository.KnowledgeBase.cs | 1 + 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Helpers/KnowledgeSettingHelper.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Helpers/KnowledgeSettingHelper.cs index 137579a79..08ad2c495 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Helpers/KnowledgeSettingHelper.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Helpers/KnowledgeSettingHelper.cs @@ -4,10 +4,12 @@ public static class KnowledgeSettingHelper { public static ITextEmbedding GetTextEmbeddingSetting(IServiceProvider services, string collectionName) { + var settings = services.GetRequiredService(); var db = services.GetRequiredService(); var configs = db.GetKnowledgeCollectionConfigs(new VectorCollectionConfigFilter { - CollectionNames = new[] { collectionName } + CollectionNames = [collectionName], + VectorStroageProviders = [settings.VectorDb.Provider] }); var found = configs?.FirstOrDefault()?.TextEmbedding; @@ -17,7 +19,6 @@ public static ITextEmbedding GetTextEmbeddingSetting(IServiceProvider services, if (found == null) { - var settings = services.GetRequiredService(); provider = settings.Default.TextEmbedding.Provider; model = settings.Default.TextEmbedding.Model; dimension = settings.Default.TextEmbedding.Dimension; diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.cs index 64ae2a47d..57003c5f7 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.cs @@ -31,18 +31,9 @@ private IGraphDb GetGraphDb() return db; } - private ITextEmbedding GetTextEmbedding(string collection) + private ITextEmbedding GetTextEmbedding(string collectionName) { - return KnowledgeSettingHelper.GetTextEmbeddingSetting(_services, collection); - } - - private VectorCollectionConfig? GetVectorCollectionConfig(string collection) - { - var db = _services.GetRequiredService(); - return db.GetKnowledgeCollectionConfigs(new VectorCollectionConfigFilter - { - CollectionNames = new[] { collection } - })?.FirstOrDefault(); + return KnowledgeSettingHelper.GetTextEmbeddingSetting(_services, collectionName); } private async Task GetUserId() diff --git a/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.KnowledgeBase.cs b/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.KnowledgeBase.cs index 73352dee3..b4c6e3b99 100644 --- a/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.KnowledgeBase.cs +++ b/src/Plugins/BotSharp.Plugin.MongoStorage/Repository/MongoRepository.KnowledgeBase.cs @@ -13,6 +13,7 @@ public bool AddKnowledgeCollectionConfigs(List configs, Id = Guid.NewGuid().ToString(), Name = x.Name, Type = x.Type, + VectorStorage = KnowledgeVectorStorageConfigMongoModel.ToMongoModel(x.VectorStorage), TextEmbedding = KnowledgeEmbeddingConfigMongoModel.ToMongoModel(x.TextEmbedding) })?.ToList() ?? new List(); From 4538e612167ef0e38df78615c51b96d524f9ddd2 Mon Sep 17 00:00:00 2001 From: Jicheng Lu <103353@smsassist.com> Date: Wed, 11 Sep 2024 16:08:46 -0500 Subject: [PATCH 13/14] minor change --- .../BackgroundServices/ConversationTimeoutService.cs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Infrastructure/BotSharp.OpenAPI/BackgroundServices/ConversationTimeoutService.cs b/src/Infrastructure/BotSharp.OpenAPI/BackgroundServices/ConversationTimeoutService.cs index a409ea149..4e76dc069 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/BackgroundServices/ConversationTimeoutService.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/BackgroundServices/ConversationTimeoutService.cs @@ -62,7 +62,11 @@ private async Task CleanIdleConversationsAsync() if (cleanSetting == null || !cleanSetting.Enable) return; var conversationService = scope.ServiceProvider.GetRequiredService(); - var conversationIds = await conversationService.GetIdleConversations(cleanSetting.BatchSize, cleanSetting.MessageLimit, cleanSetting.BufferHours, cleanSetting.ExcludeAgentIds); + var batchSize = cleanSetting.BatchSize; + var limit = cleanSetting.MessageLimit; + var bufferHours = cleanSetting.BufferHours; + var excludeAgentIds = cleanSetting.ExcludeAgentIds ?? new List(); + var conversationIds = await conversationService.GetIdleConversations(batchSize, limit, bufferHours, excludeAgentIds); if (!conversationIds.IsNullOrEmpty()) { From 4232d162438257578dd2c169fd0914f14e991115 Mon Sep 17 00:00:00 2001 From: Jicheng Lu <103353@smsassist.com> Date: Wed, 11 Sep 2024 16:12:48 -0500 Subject: [PATCH 14/14] add comments --- .../Helpers/KnowledgeSettingHelper.cs | 3 ++ .../Services/KnowledgeService.Document.cs | 38 +++++-------------- 2 files changed, 13 insertions(+), 28 deletions(-) diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Helpers/KnowledgeSettingHelper.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Helpers/KnowledgeSettingHelper.cs index 08ad2c495..48742f0a5 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Helpers/KnowledgeSettingHelper.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Helpers/KnowledgeSettingHelper.cs @@ -6,6 +6,8 @@ public static ITextEmbedding GetTextEmbeddingSetting(IServiceProvider services, { var settings = services.GetRequiredService(); var db = services.GetRequiredService(); + + // Get collection config from db var configs = db.GetKnowledgeCollectionConfigs(new VectorCollectionConfigFilter { CollectionNames = [collectionName], @@ -24,6 +26,7 @@ public static ITextEmbedding GetTextEmbeddingSetting(IServiceProvider services, dimension = settings.Default.TextEmbedding.Dimension; } + // Set up text embedding var embedding = services.GetServices().FirstOrDefault(x => x.Provider == provider); if (dimension <= 0) diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs index 27a2e590a..2e614a684 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/KnowledgeService.Document.cs @@ -10,12 +10,12 @@ public partial class KnowledgeService { public async Task UploadKnowledgeDocuments(string collectionName, IEnumerable files) { - if (string.IsNullOrWhiteSpace(collectionName)) + if (string.IsNullOrWhiteSpace(collectionName) || files.IsNullOrEmpty()) { return new UploadKnowledgeResponse { Success = [], - Failed = files.Select(x => x.FileName) + Failed = files?.Select(x => x.FileName) ?? new List() }; } @@ -98,7 +98,9 @@ public async Task DeleteKnowledgeDocument(string collectionName, string fi var vectorDb = GetVectorDb(); var vectorStoreProvider = _settings.VectorDb.Provider; + // Get doc meta data var metaData = fileStorage.GetKnowledgeBaseFileMeta(collectionName.CleanStr(), vectorStoreProvider.CleanStr(), fileId); + // Delete doc fileStorage.DeleteKnowledgeFile(collectionName.CleanStr(), vectorStoreProvider.CleanStr(), fileId); if (metaData != null && !metaData.VectorDataIds.IsNullOrEmpty()) @@ -128,6 +130,8 @@ public async Task> GetKnowledgeDocuments(string var fileStorage = _services.GetRequiredService(); var vectorStoreProvider = _settings.VectorDb.Provider; + + // Get doc meta data var files = fileStorage.GetKnowledgeBaseFiles(collectionName.CleanStr(), vectorStoreProvider.CleanStr()); return files; } @@ -136,34 +140,13 @@ public async Task> GetKnowledgeDocuments(string { var fileStorage = _services.GetRequiredService(); var vectorStoreProvider = _settings.VectorDb.Provider; + + // Get doc binary data var file = fileStorage.GetKnowledgeBaseFileBinaryData(collectionName.CleanStr(), vectorStoreProvider.CleanStr(), fileId); return file; } - public async Task FeedVectorKnowledge(string collectionName, KnowledgeCreationModel knowledge) - { - var index = 0; - var lines = TextChopper.Chop(knowledge.Content, new ChunkOption - { - Size = 1024, - Conjunction = 32, - SplitByWord = true, - }); - - var db = GetVectorDb(); - var textEmbedding = GetTextEmbedding(collectionName); - - await db.CreateCollection(collectionName, textEmbedding.GetDimension()); - foreach (var line in lines) - { - var vec = await textEmbedding.GetVectorAsync(line); - await db.Upsert(collectionName, Guid.NewGuid(), vec, line); - index++; - Console.WriteLine($"Saved vector {index}/{lines.Count}: {line}\n"); - } - } - #region Private methods /// /// Get file content type and file bytes @@ -203,6 +186,8 @@ private async Task> GetFileContent(string contentType, byte[ using var stream = new MemoryStream(bytes); using var reader = new StreamReader(stream); var content = await reader.ReadToEndAsync(); + reader.Close(); + stream.Close(); var lines = TextChopper.Chop(content, new ChunkOption { @@ -210,9 +195,6 @@ private async Task> GetFileContent(string contentType, byte[ Conjunction = 32, SplitByWord = true, }); - - reader.Close(); - stream.Close(); results.AddRange(lines); } else if (contentType.IsEqualTo(MediaTypeNames.Application.Pdf))