Skip to content

Commit

Permalink
Merge pull request #633 from iceljc/features/add-knowledge-docs
Browse files Browse the repository at this point in the history
Features/add knowledge docs
  • Loading branch information
iceljc authored Sep 11, 2024
2 parents 0bb42d7 + 4232d16 commit 057190a
Show file tree
Hide file tree
Showing 64 changed files with 1,460 additions and 429 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public interface IFileStorageService
IEnumerable<MessageFileModel> GetMessageFiles(string conversationId, IEnumerable<string> messageIds, string source, IEnumerable<string>? contentTypes = null);
string GetMessageFile(string conversationId, string messageId, string source, string index, string fileName);
IEnumerable<MessageFileModel> GetMessagesWithFile(string conversationId, IEnumerable<string> messageIds);
bool SaveMessageFiles(string conversationId, string messageId, string source, List<InputFileModel> files);
bool SaveMessageFiles(string conversationId, string messageId, string source, List<FileDataModel> files);

/// <summary>
/// Delete files under messages
Expand All @@ -54,11 +54,31 @@ public interface IFileStorageService

#region User
string GetUserAvatar();
bool SaveUserAvatar(InputFileModel file);
bool SaveUserAvatar(FileDataModel file);
#endregion

#region Speech
bool SaveSpeechFile(string conversationId, string fileName, BinaryData data);
BinaryData GetSpeechFile(string conversationId, string fileName);
#endregion

#region Knowledge
bool SaveKnowledgeBaseFile(string collectionName, string vectorStoreProvider, string fileId, string fileName, Stream stream);

/// <summary>
/// Delete files in a knowledge collection. If fileId is null, remove all files in the collection.
/// </summary>
/// <param name="collectionName"></param>
/// <param name="fileId"></param>
/// <returns></returns>
bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvider, string? fileId = null);

bool SaveKnolwedgeBaseFileMeta(string collectionName, string vectorStoreProvider,string fileId, KnowledgeDocMetaData metaData);

KnowledgeDocMetaData? GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, string fileId);

IEnumerable<KnowledgeFileModel> GetKnowledgeBaseFiles(string collectionName, string vectorStoreProvider);

FileBinaryDataModel? GetKnowledgeBaseFileBinaryData(string collectionName, string vectorStoreProvider, string fileId);
#endregion
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
namespace BotSharp.Abstraction.Files.Models;

public class ExternalFileModel : FileDataModel
{
[JsonPropertyName("file_url")]
public string FileUrl { get; set; } = string.Empty;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
namespace BotSharp.Abstraction.Files.Models;

public class FileBinaryDataModel
{
public string FileName { get; set; }
public BinaryData FileBinaryData { get; set; }
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
namespace BotSharp.Abstraction.Files.Models;

public class InputFileModel : FileBase
public class FileDataModel : FileBase
{
/// <summary>
/// File name with extension
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
namespace BotSharp.Abstraction.Files.Models;

public class KnowledgeFileModel
{
public string FileId { get; set; }
public string FileName { get; set; }
public string FileExtension { get; set; }
public string ContentType { get; set; }
public string FileUrl { get; set; }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
namespace BotSharp.Abstraction.Knowledges.Enums;

public static class KnowledgeCollectionType
{
public static string QuestionAnswer = "question-answer";
public static string Document = "document";
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,10 @@ namespace BotSharp.Abstraction.Knowledges;
public interface IKnowledgeService
{
#region Vector
Task<bool> CreateVectorCollection(string collectionName, int dimension);
Task<bool> CreateVectorCollection(string collectionName, string collectionType, int dimension, string provider, string model);
Task<bool> DeleteVectorCollection(string collectionName);
Task<IEnumerable<string>> GetVectorCollections();
Task<IEnumerable<string>> GetVectorCollections(string type);
Task<IEnumerable<VectorSearchResult>> SearchVectorKnowledge(string query, string collectionName, VectorSearchOptions options);
Task FeedVectorKnowledge(string collectionName, KnowledgeCreationModel model);
Task<StringIdPagedItems<VectorSearchResult>> GetPagedVectorCollectionData(string collectionName, VectorFilter filter);
Task<bool> DeleteVectorCollectionData(string collectionName, string id);
Task<bool> CreateVectorCollectionData(string collectionName, VectorCreateModel create);
Expand All @@ -19,6 +18,16 @@ public interface IKnowledgeService

#region Graph
Task<GraphSearchResult> SearchGraphKnowledge(string query, GraphSearchOptions options);
Task<KnowledgeSearchResult> SearchKnowledge(string query, string collectionName, VectorSearchOptions vectorOptions, GraphSearchOptions graphOptions);
#endregion

#region Document
Task<UploadKnowledgeResponse> UploadKnowledgeDocuments(string collectionName, IEnumerable<ExternalFileModel> files);
Task<bool> DeleteKnowledgeDocument(string collectionName, string fileId);
Task<IEnumerable<KnowledgeFileModel>> GetKnowledgeDocuments(string collectionName);
Task<FileBinaryDataModel?> GetKnowledgeDocumentBinaryData(string collectionName, string fileId);
#endregion

#region Common
Task<bool> RefreshVectorKnowledgeConfigs(VectorCollectionConfigsModel configs);
#endregion
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
using BotSharp.Abstraction.VectorStorage.Models;

namespace BotSharp.Abstraction.Knowledges.Models;

public class KnowledgeDocMetaData
{
[JsonPropertyName("collection")]
public string Collection { get; set; }

[JsonPropertyName("file_id")]
public string FileId { get; set; }

[JsonPropertyName("file_name")]
public string FileName { get; set; }

[JsonPropertyName("content_type")]
public string ContentType { get; set; }

[JsonPropertyName("vector_data_ids")]
public IEnumerable<string> VectorDataIds { get; set; } = new List<string>();

[JsonPropertyName("create_date")]
public DateTime CreateDate { get; set; } = DateTime.UtcNow;

[JsonPropertyName("create_user_id")]
public string CreateUserId { get; set; }
}

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
namespace BotSharp.Abstraction.Knowledges.Models;

public class UploadKnowledgeResponse
{
[JsonPropertyName("success")]
public IEnumerable<string> Success { get; set; } = new List<string>();

[JsonPropertyName("failed")]
public IEnumerable<string> Failed { get; set; } = new List<string>();

[JsonPropertyName("is_success")]
public bool IsSuccess {
get
{
return !Success.IsNullOrEmpty() && Failed.IsNullOrEmpty();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ public class KnowledgeBaseSettings
public SettingBase GraphDb { get; set; }

public DefaultKnowledgeBaseSetting Default { get; set; }
public List<VectorCollectionSetting> Collections { get; set; } = new();
}

public class DefaultKnowledgeBaseSetting
Expand All @@ -17,12 +16,6 @@ public class DefaultKnowledgeBaseSetting
public KnowledgeTextEmbeddingSetting TextEmbedding { get; set; }
}

public class VectorCollectionSetting
{
public string Name { get; set; }
public KnowledgeTextEmbeddingSetting TextEmbedding { get; set; }
}

public class KnowledgeTextEmbeddingSetting : SettingBase
{
public string Model { get; set; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using BotSharp.Abstraction.Tasks.Models;
using BotSharp.Abstraction.Translation.Models;
using BotSharp.Abstraction.Users.Models;
using BotSharp.Abstraction.VectorStorage.Models;

namespace BotSharp.Abstraction.Repositories;

Expand Down Expand Up @@ -99,4 +100,16 @@ public interface IBotSharpRepository
bool SaveTranslationMemories(IEnumerable<TranslationMemoryInput> inputs);

#endregion

#region KnowledgeBase
/// <summary>
/// Save knowledge collection configs. If reset is true, it will remove everything and then save the new configs.
/// </summary>
/// <param name="configs"></param>
/// <param name="reset"></param>
/// <returns></returns>
bool AddKnowledgeCollectionConfigs(List<VectorCollectionConfig> configs, bool reset = false);
bool DeleteKnowledgeCollectionConfig(string collectionName);
IEnumerable<VectorCollectionConfig> GetKnowledgeCollectionConfigs(VectorCollectionConfigFilter filter);
#endregion
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,13 @@ public static bool IsEqualTo(this string? str1, string? str2, StringComparison o
return str1.Equals(str2, option);
}

public static string CleanStr(this string? str)
{
if (string.IsNullOrWhiteSpace(str)) return string.Empty;

return str.Replace(" ", "").Replace("\t", "").Replace("\n", "").Replace("\r", "");
}

public static string JsonContent(this string text)
{
var m = Regex.Match(text, @"\{(?:[^{}]|(?<open>\{)|(?<-open>\}))+(?(open)(?!))\}");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ public interface IVectorDb
Task<bool> DeleteCollection(string collectionName);
Task<bool> Upsert(string collectionName, Guid id, float[] vector, string text, Dictionary<string, string>? payload = null);
Task<IEnumerable<VectorCollectionData>> Search(string collectionName, float[] vector, IEnumerable<string>? fields, int limit = 5, float confidence = 0.5f, bool withVector = false);
Task<bool> DeleteCollectionData(string collectionName, Guid id);
Task<bool> DeleteCollectionData(string collectionName, List<Guid> ids);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
namespace BotSharp.Abstraction.VectorStorage.Models;

public class VectorCollectionConfigFilter
{
public IEnumerable<string>? CollectionNames { get; set; }
public IEnumerable<string>? CollectionTypes { get; set; }
public IEnumerable<string>? VectorStroageProviders { get; set; }

public static VectorCollectionConfigFilter Empty()
{
return new VectorCollectionConfigFilter();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
namespace BotSharp.Abstraction.VectorStorage.Models;

public class VectorCollectionConfigsModel
{
[JsonPropertyName("collections")]
public List<VectorCollectionConfig> Collections { get; set; } = new();
}

public class VectorCollectionConfig
{
/// <summary>
/// Must be unique
/// </summary>
[JsonPropertyName("name")]
public string Name { get; set; }

/// <summary>
/// Collection type, e.g., question-answer, document
/// </summary>
[JsonPropertyName("type")]
public string Type { get; set; }

[JsonPropertyName("vector_storage")]
public VectorStorageConfig VectorStorage { get; set; }

[JsonPropertyName("text_embedding")]
public KnowledgeEmbeddingConfig TextEmbedding { get; set; }
}

public class KnowledgeEmbeddingConfig
{
[JsonPropertyName("provider")]
public string Provider { get; set; }

[JsonPropertyName("model")]
public string Model { get; set; }

[JsonPropertyName("dimension")]
public int Dimension { get; set; }
}

public class VectorStorageConfig
{
[JsonPropertyName("provider")]
public string Provider { get; set; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ public class VectorFilter : StringIdPagination
[JsonPropertyName("with_vector")]
public bool WithVector { get; set; }

/// <summary>
/// For keyword search
/// </summary>
[JsonPropertyName("search_pairs")]
public IEnumerable<KeyValue>? SearchPairs { get; set; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ public IEnumerable<MessageFileModel> GetMessagesWithFile(string conversationId,
return foundMsgs;
}

public bool SaveMessageFiles(string conversationId, string messageId, string source, List<InputFileModel> files)
public bool SaveMessageFiles(string conversationId, string messageId, string source, List<FileDataModel> files)
{
if (files.IsNullOrEmpty()) return false;

Expand Down
Loading

0 comments on commit 057190a

Please sign in to comment.