Skip to content

Commit

Permalink
Merge pull request #644 from iceljc/features/add-knowledge-docs
Browse files Browse the repository at this point in the history
refine knowledge doc
  • Loading branch information
iceljc authored Sep 17, 2024
2 parents e775ebb + 16904e0 commit cd96a4d
Show file tree
Hide file tree
Showing 20 changed files with 240 additions and 97 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -63,16 +63,17 @@ public interface IFileStorageService
#endregion

#region Knowledge
bool SaveKnowledgeBaseFile(string collectionName, string vectorStoreProvider, string fileId, string fileName, BinaryData fileData);
bool SaveKnowledgeBaseFile(string collectionName, string vectorStoreProvider, Guid fileId, string fileName, BinaryData fileData);

/// <summary>
/// Delete files in a knowledge collection. If fileId is null, remove all files in the collection.
/// Delete files in a knowledge collection, given the vector store provider. If "fileId" is null, delete all files in the collection.
/// </summary>
/// <param name="collectionName"></param>
/// <param name="vectorStoreProvider"></param>
/// <param name="fileId"></param>
/// <returns></returns>
bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvider, string? fileId = null);
string GetKnowledgeBaseFileUrl(string collectionName, string fileId);
FileBinaryDataModel? GetKnowledgeBaseFileBinaryData(string collectionName, string vectorStoreProvider, string fileId);
bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvider, Guid? fileId = null);
string GetKnowledgeBaseFileUrl(string collectionName, string vectorStoreProvider, Guid fileId, string fileName);
BinaryData? GetKnowledgeBaseFileBinaryData(string collectionName, string vectorStoreProvider, Guid fileId, string fileName);
#endregion
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ namespace BotSharp.Abstraction.Files.Models;

public class KnowledgeFileModel
{
public string FileId { get; set; }
public Guid FileId { get; set; }
public string FileName { get; set; }
public string FileExtension { get; set; }
public string ContentType { get; set; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ public interface IKnowledgeService

#region Document
Task<UploadKnowledgeResponse> UploadKnowledgeDocuments(string collectionName, IEnumerable<ExternalFileModel> files);
Task<bool> DeleteKnowledgeDocument(string collectionName, string fileId);
Task<bool> DeleteKnowledgeDocument(string collectionName, Guid fileId);
Task<PagedItems<KnowledgeFileModel>> GetPagedKnowledgeDocuments(string collectionName, KnowledgeFileFilter filter);
Task<FileBinaryDataModel?> GetKnowledgeDocumentBinaryData(string collectionName, string fileId);
Task<FileBinaryDataModel?> GetKnowledgeDocumentBinaryData(string collectionName, Guid fileId);
#endregion

#region Common
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ public class KnowledgeDocMetaData
public string Collection { get; set; }

[JsonPropertyName("file_id")]
public string FileId { get; set; }
public Guid FileId { get; set; }

[JsonPropertyName("file_name")]
public string FileName { get; set; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ namespace BotSharp.Abstraction.Knowledges.Models;

public class KnowledgeFileFilter : Pagination
{
public IEnumerable<string>? FileIds { get; set; }
public IEnumerable<Guid>? FileIds { get; set; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,15 @@ public interface IBotSharpRepository
bool DeleteKnowledgeCollectionConfig(string collectionName);
IEnumerable<VectorCollectionConfig> GetKnowledgeCollectionConfigs(VectorCollectionConfigFilter filter);

public bool SaveKnolwedgeBaseFileMeta(KnowledgeDocMetaData metaData);
public PagedItems<KnowledgeDocMetaData> GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, KnowledgeFileFilter filter);
bool SaveKnolwedgeBaseFileMeta(KnowledgeDocMetaData metaData);
/// <summary>
/// Delete file meta data in a knowledge collection, given the vector store provider. If "fileId" is null, delete all in the collection.
/// </summary>
/// <param name="collectionName"></param>
/// <param name="vectorStoreProvider"></param>
/// <param name="fileId"></param>
/// <returns></returns>
bool DeleteKnolwedgeBaseFileMeta(string collectionName, string vectorStoreProvider, Guid? fileId = null);
PagedItems<KnowledgeDocMetaData> GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, KnowledgeFileFilter filter);
#endregion
}
Original file line number Diff line number Diff line change
@@ -1,23 +1,21 @@
using BotSharp.Abstraction.Knowledges.Models;
using System.IO;

namespace BotSharp.Core.Files.Services;

public partial class LocalFileStorageService
{
public bool SaveKnowledgeBaseFile(string collectionName, string vectorStoreProvider, string fileId, string fileName, BinaryData fileData)
public bool SaveKnowledgeBaseFile(string collectionName, string vectorStoreProvider, Guid fileId, string fileName, BinaryData fileData)
{
if (string.IsNullOrWhiteSpace(collectionName)
|| string.IsNullOrWhiteSpace(vectorStoreProvider)
|| string.IsNullOrWhiteSpace(fileId))
|| string.IsNullOrWhiteSpace(vectorStoreProvider))
{
return false;
}

try
{
var docDir = BuildKnowledgeCollectionDocumentDir(collectionName, vectorStoreProvider);
var dir = Path.Combine(docDir, fileId);
var docDir = BuildKnowledgeCollectionFileDir(collectionName, vectorStoreProvider);
var dir = Path.Combine(docDir, fileId.ToString());
if (ExistDirectory(dir))
{
Directory.Delete(dir);
Expand All @@ -40,24 +38,24 @@ public bool SaveKnowledgeBaseFile(string collectionName, string vectorStoreProvi
}
}

public bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvider, string? fileId = null)
public bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvider, Guid? fileId = null)
{
if (string.IsNullOrWhiteSpace(collectionName)
|| string.IsNullOrWhiteSpace(vectorStoreProvider))
{
return false;
}

var dir = BuildKnowledgeCollectionDocumentDir(collectionName, vectorStoreProvider);
var dir = BuildKnowledgeCollectionFileDir(collectionName, vectorStoreProvider);
if (!ExistDirectory(dir)) return false;

if (string.IsNullOrEmpty(fileId))
if (fileId == null)
{
Directory.Delete(dir, true);
}
else
{
var fileDir = Path.Combine(dir, fileId);
var fileDir = Path.Combine(dir, fileId.ToString());
if (ExistDirectory(fileDir))
{
Directory.Delete(fileDir, true);
Expand All @@ -67,50 +65,42 @@ public bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvide
return true;
}

public string GetKnowledgeBaseFileUrl(string collectionName, string fileId)
public string GetKnowledgeBaseFileUrl(string collectionName, string vectorStoreProvider, Guid fileId, string fileName)
{
if (string.IsNullOrWhiteSpace(collectionName)
|| string.IsNullOrWhiteSpace(fileId))
|| string.IsNullOrWhiteSpace(vectorStoreProvider))
{
return string.Empty;
}

return $"/knowledge/document/{collectionName}/file/{fileId}";
}

public FileBinaryDataModel? GetKnowledgeBaseFileBinaryData(string collectionName, string vectorStoreProvider, string fileId)
public BinaryData? GetKnowledgeBaseFileBinaryData(string collectionName, string vectorStoreProvider, Guid fileId, string fileName)
{
if (string.IsNullOrWhiteSpace(collectionName)
|| string.IsNullOrWhiteSpace(vectorStoreProvider)
|| string.IsNullOrWhiteSpace(fileId))
|| string.IsNullOrWhiteSpace(fileName))
{
return null;
}

var docDir = BuildKnowledgeCollectionDocumentDir(collectionName, vectorStoreProvider);
var fileDir = Path.Combine(docDir, fileId);
var docDir = BuildKnowledgeCollectionFileDir(collectionName, vectorStoreProvider);
var fileDir = Path.Combine(docDir, fileId.ToString());
if (!ExistDirectory(fileDir)) return null;

var metaFile = Path.Combine(fileDir, KNOWLEDGE_DOC_META_FILE);
var content = File.ReadAllText(metaFile);
var metaData = JsonSerializer.Deserialize<KnowledgeDocMetaData>(content, _jsonOptions);
var file = Path.Combine(fileDir, metaData.FileName);
var file = Path.Combine(fileDir, fileName);
using var stream = new FileStream(file, FileMode.Open, FileAccess.Read);
stream.Position = 0;

return new FileBinaryDataModel
{
FileName = metaData.FileName,
ContentType = metaData.ContentType,
FileBinaryData = BinaryData.FromStream(stream)
};
return BinaryData.FromStream(stream);
}


#region Private methods
private string BuildKnowledgeCollectionDocumentDir(string collectionName, string vectorStoreProvider)
private string BuildKnowledgeCollectionFileDir(string collectionName, string vectorStoreProvider)
{
return Path.Combine(_baseDir, KNOWLEDGE_FOLDER, KNOWLEDGE_DOC_FOLDER, vectorStoreProvider, collectionName);
return Path.Combine(_baseDir, KNOWLEDGE_FOLDER, KNOWLEDGE_DOC_FOLDER, vectorStoreProvider.CleanStr(), collectionName.CleanStr());
}
#endregion
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ public partial class LocalFileStorageService : IFileStorageService
private const string TEXT_TO_SPEECH_FOLDER = "speeches";
private const string KNOWLEDGE_FOLDER = "knowledgebase";
private const string KNOWLEDGE_DOC_FOLDER = "document";
private const string KNOWLEDGE_DOC_META_FILE = "meta.json";

private readonly JsonSerializerOptions _jsonOptions = new JsonSerializerOptions
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ public bool SaveTranslationMemories(IEnumerable<TranslationMemoryInput> inputs)
throw new NotImplementedException();
#endregion

#region Knowledge
#region KnowledgeBase
public bool AddKnowledgeCollectionConfigs(List<VectorCollectionConfig> configs, bool reset = false) =>
throw new NotImplementedException();

Expand All @@ -247,6 +247,9 @@ public IEnumerable<VectorCollectionConfig> GetKnowledgeCollectionConfigs(VectorC
public bool SaveKnolwedgeBaseFileMeta(KnowledgeDocMetaData metaData) =>
throw new NotImplementedException();

public bool DeleteKnolwedgeBaseFileMeta(string collectionName, string vectorStoreProvider, Guid? fileId = null) =>
throw new NotImplementedException();

public PagedItems<KnowledgeDocMetaData> GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, KnowledgeFileFilter filter) =>
throw new NotImplementedException();
#endregion
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,13 @@ public bool SaveKnolwedgeBaseFileMeta(KnowledgeDocMetaData metaData)
{
if (metaData == null
|| string.IsNullOrWhiteSpace(metaData.Collection)
|| string.IsNullOrWhiteSpace(metaData.VectorStoreProvider)
|| string.IsNullOrWhiteSpace(metaData.FileId))
|| string.IsNullOrWhiteSpace(metaData.VectorStoreProvider))
{
return false;
}

var dir = BuildKnowledgeDocumentDir(metaData.Collection.CleanStr(), metaData.VectorStoreProvider.CleanStr());
var docDir = Path.Combine(dir, metaData.FileId);
var dir = BuildKnowledgeCollectionFileDir(metaData.Collection, metaData.VectorStoreProvider);
var docDir = Path.Combine(dir, metaData.FileId.ToString());
if (!Directory.Exists(docDir))
{
Directory.CreateDirectory(docDir);
Expand All @@ -129,6 +128,33 @@ public bool SaveKnolwedgeBaseFileMeta(KnowledgeDocMetaData metaData)
return true;
}

public bool DeleteKnolwedgeBaseFileMeta(string collectionName, string vectorStoreProvider, Guid? fileId = null)
{
if (string.IsNullOrWhiteSpace(collectionName)
|| string.IsNullOrWhiteSpace(vectorStoreProvider))
{
return false;
}

var dir = BuildKnowledgeCollectionFileDir(collectionName, vectorStoreProvider);
if (!Directory.Exists(dir)) return false;

if (fileId == null)
{
Directory.Delete(dir, true);
}
else
{
var fileDir = Path.Combine(dir, fileId.ToString());
if (Directory.Exists(fileDir))
{
Directory.Delete(fileDir, true);
}
}

return true;
}

public PagedItems<KnowledgeDocMetaData> GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, KnowledgeFileFilter filter)
{
if (string.IsNullOrWhiteSpace(collectionName)
Expand All @@ -137,7 +163,7 @@ public PagedItems<KnowledgeDocMetaData> GetKnowledgeBaseFileMeta(string collecti
return new PagedItems<KnowledgeDocMetaData>();
}

var dir = BuildKnowledgeDocumentDir(collectionName, vectorStoreProvider);
var dir = BuildKnowledgeCollectionFileDir(collectionName, vectorStoreProvider);
if (!Directory.Exists(dir))
{
return new PagedItems<KnowledgeDocMetaData>();
Expand Down Expand Up @@ -181,9 +207,9 @@ private string BuildKnowledgeCollectionConfigDir()
return Path.Combine(_dbSettings.FileRepository, KNOWLEDGE_FOLDER, VECTOR_FOLDER);
}

private string BuildKnowledgeDocumentDir(string collectionName, string vectorStoreProvider)
private string BuildKnowledgeCollectionFileDir(string collectionName, string vectorStoreProvider)
{
return Path.Combine(_dbSettings.FileRepository, KNOWLEDGE_FOLDER, KNOWLEDGE_DOC_FOLDER, vectorStoreProvider, collectionName);
return Path.Combine(_dbSettings.FileRepository, KNOWLEDGE_FOLDER, KNOWLEDGE_DOC_FOLDER, vectorStoreProvider.CleanStr(), collectionName.CleanStr());
}
#endregion
}
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ public async Task<UploadKnowledgeResponse> UploadKnowledgeDocuments([FromRoute]
}

[HttpDelete("/knowledge/document/{collection}/delete/{fileId}")]
public async Task<bool> DeleteKnowledgeDocument([FromRoute] string collection, [FromRoute] string fileId)
public async Task<bool> DeleteKnowledgeDocument([FromRoute] string collection, [FromRoute] Guid fileId)
{
var response = await _knowledgeService.DeleteKnowledgeDocument(collection, fileId);
return response;
Expand All @@ -160,7 +160,7 @@ public async Task<PagedItems<KnowledgeFileViewModel>> GetPagedKnowledgeDocuments
}

[HttpGet("/knowledge/document/{collection}/file/{fileId}")]
public async Task<IActionResult> GetKnowledgeDocument([FromRoute] string collection, [FromRoute] string fileId)
public async Task<IActionResult> GetKnowledgeDocument([FromRoute] string collection, [FromRoute] Guid fileId)
{
var file = await _knowledgeService.GetKnowledgeDocumentBinaryData(collection, fileId);
var stream = file.FileBinaryData.ToStream();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ namespace BotSharp.OpenAPI.ViewModels.Knowledges;
public class KnowledgeFileViewModel
{
[JsonPropertyName("file_id")]
public string FileId { get; set; }
public Guid FileId { get; set; }

[JsonPropertyName("file_name")]
public string FileName { get; set; }
Expand Down
6 changes: 1 addition & 5 deletions src/Plugins/BotSharp.Plugin.ChatHub/Hooks/WelcomeHook.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,7 @@ public override async Task OnUserAgentConnectedInitially(Conversation conversati

foreach (var message in messages)
{
var richContent = new RichContent<IRichMessage>(message)
{
Editor = message.RichType == RichTypeEnum.QuickReply ? EditorTypeEnum.None : EditorTypeEnum.Text,
};

var richContent = new RichContent<IRichMessage>(message);
var json = JsonSerializer.Serialize(new ChatResponseModel()
{
ConversationId = conversation.Id,
Expand Down
Loading

0 comments on commit cd96a4d

Please sign in to comment.