Skip to content

Commit cd96a4d

Browse files
authored
Merge pull request #644 from iceljc/features/add-knowledge-docs
refine knowledge doc
2 parents e775ebb + 16904e0 commit cd96a4d

File tree

20 files changed

+240
-97
lines changed

20 files changed

+240
-97
lines changed

src/Infrastructure/BotSharp.Abstraction/Files/IFileStorageService.cs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,16 +63,17 @@ public interface IFileStorageService
6363
#endregion
6464

6565
#region Knowledge
66-
bool SaveKnowledgeBaseFile(string collectionName, string vectorStoreProvider, string fileId, string fileName, BinaryData fileData);
66+
bool SaveKnowledgeBaseFile(string collectionName, string vectorStoreProvider, Guid fileId, string fileName, BinaryData fileData);
6767

6868
/// <summary>
69-
/// Delete files in a knowledge collection. If fileId is null, remove all files in the collection.
69+
/// Delete files in a knowledge collection, given the vector store provider. If "fileId" is null, delete all files in the collection.
7070
/// </summary>
7171
/// <param name="collectionName"></param>
72+
/// <param name="vectorStoreProvider"></param>
7273
/// <param name="fileId"></param>
7374
/// <returns></returns>
74-
bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvider, string? fileId = null);
75-
string GetKnowledgeBaseFileUrl(string collectionName, string fileId);
76-
FileBinaryDataModel? GetKnowledgeBaseFileBinaryData(string collectionName, string vectorStoreProvider, string fileId);
75+
bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvider, Guid? fileId = null);
76+
string GetKnowledgeBaseFileUrl(string collectionName, string vectorStoreProvider, Guid fileId, string fileName);
77+
BinaryData? GetKnowledgeBaseFileBinaryData(string collectionName, string vectorStoreProvider, Guid fileId, string fileName);
7778
#endregion
7879
}

src/Infrastructure/BotSharp.Abstraction/Files/Models/KnowledgeFileModel.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ namespace BotSharp.Abstraction.Files.Models;
22

33
public class KnowledgeFileModel
44
{
5-
public string FileId { get; set; }
5+
public Guid FileId { get; set; }
66
public string FileName { get; set; }
77
public string FileExtension { get; set; }
88
public string ContentType { get; set; }

src/Infrastructure/BotSharp.Abstraction/Knowledges/IKnowledgeService.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ public interface IKnowledgeService
2222

2323
#region Document
2424
Task<UploadKnowledgeResponse> UploadKnowledgeDocuments(string collectionName, IEnumerable<ExternalFileModel> files);
25-
Task<bool> DeleteKnowledgeDocument(string collectionName, string fileId);
25+
Task<bool> DeleteKnowledgeDocument(string collectionName, Guid fileId);
2626
Task<PagedItems<KnowledgeFileModel>> GetPagedKnowledgeDocuments(string collectionName, KnowledgeFileFilter filter);
27-
Task<FileBinaryDataModel?> GetKnowledgeDocumentBinaryData(string collectionName, string fileId);
27+
Task<FileBinaryDataModel?> GetKnowledgeDocumentBinaryData(string collectionName, Guid fileId);
2828
#endregion
2929

3030
#region Common

src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeDocMetaData.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ public class KnowledgeDocMetaData
66
public string Collection { get; set; }
77

88
[JsonPropertyName("file_id")]
9-
public string FileId { get; set; }
9+
public Guid FileId { get; set; }
1010

1111
[JsonPropertyName("file_name")]
1212
public string FileName { get; set; }

src/Infrastructure/BotSharp.Abstraction/Knowledges/Models/KnowledgeFileFilter.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ namespace BotSharp.Abstraction.Knowledges.Models;
22

33
public class KnowledgeFileFilter : Pagination
44
{
5-
public IEnumerable<string>? FileIds { get; set; }
5+
public IEnumerable<Guid>? FileIds { get; set; }
66
}

src/Infrastructure/BotSharp.Abstraction/Repositories/IBotSharpRepository.cs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,15 @@ public interface IBotSharpRepository
112112
bool DeleteKnowledgeCollectionConfig(string collectionName);
113113
IEnumerable<VectorCollectionConfig> GetKnowledgeCollectionConfigs(VectorCollectionConfigFilter filter);
114114

115-
public bool SaveKnolwedgeBaseFileMeta(KnowledgeDocMetaData metaData);
116-
public PagedItems<KnowledgeDocMetaData> GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, KnowledgeFileFilter filter);
115+
bool SaveKnolwedgeBaseFileMeta(KnowledgeDocMetaData metaData);
116+
/// <summary>
117+
/// Delete file meta data in a knowledge collection, given the vector store provider. If "fileId" is null, delete all in the collection.
118+
/// </summary>
119+
/// <param name="collectionName"></param>
120+
/// <param name="vectorStoreProvider"></param>
121+
/// <param name="fileId"></param>
122+
/// <returns></returns>
123+
bool DeleteKnolwedgeBaseFileMeta(string collectionName, string vectorStoreProvider, Guid? fileId = null);
124+
PagedItems<KnowledgeDocMetaData> GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, KnowledgeFileFilter filter);
117125
#endregion
118126
}
Lines changed: 18 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,21 @@
1-
using BotSharp.Abstraction.Knowledges.Models;
21
using System.IO;
32

43
namespace BotSharp.Core.Files.Services;
54

65
public partial class LocalFileStorageService
76
{
8-
public bool SaveKnowledgeBaseFile(string collectionName, string vectorStoreProvider, string fileId, string fileName, BinaryData fileData)
7+
public bool SaveKnowledgeBaseFile(string collectionName, string vectorStoreProvider, Guid fileId, string fileName, BinaryData fileData)
98
{
109
if (string.IsNullOrWhiteSpace(collectionName)
11-
|| string.IsNullOrWhiteSpace(vectorStoreProvider)
12-
|| string.IsNullOrWhiteSpace(fileId))
10+
|| string.IsNullOrWhiteSpace(vectorStoreProvider))
1311
{
1412
return false;
1513
}
1614

1715
try
1816
{
19-
var docDir = BuildKnowledgeCollectionDocumentDir(collectionName, vectorStoreProvider);
20-
var dir = Path.Combine(docDir, fileId);
17+
var docDir = BuildKnowledgeCollectionFileDir(collectionName, vectorStoreProvider);
18+
var dir = Path.Combine(docDir, fileId.ToString());
2119
if (ExistDirectory(dir))
2220
{
2321
Directory.Delete(dir);
@@ -40,24 +38,24 @@ public bool SaveKnowledgeBaseFile(string collectionName, string vectorStoreProvi
4038
}
4139
}
4240

43-
public bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvider, string? fileId = null)
41+
public bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvider, Guid? fileId = null)
4442
{
4543
if (string.IsNullOrWhiteSpace(collectionName)
4644
|| string.IsNullOrWhiteSpace(vectorStoreProvider))
4745
{
4846
return false;
4947
}
5048

51-
var dir = BuildKnowledgeCollectionDocumentDir(collectionName, vectorStoreProvider);
49+
var dir = BuildKnowledgeCollectionFileDir(collectionName, vectorStoreProvider);
5250
if (!ExistDirectory(dir)) return false;
5351

54-
if (string.IsNullOrEmpty(fileId))
52+
if (fileId == null)
5553
{
5654
Directory.Delete(dir, true);
5755
}
5856
else
5957
{
60-
var fileDir = Path.Combine(dir, fileId);
58+
var fileDir = Path.Combine(dir, fileId.ToString());
6159
if (ExistDirectory(fileDir))
6260
{
6361
Directory.Delete(fileDir, true);
@@ -67,50 +65,42 @@ public bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvide
6765
return true;
6866
}
6967

70-
public string GetKnowledgeBaseFileUrl(string collectionName, string fileId)
68+
public string GetKnowledgeBaseFileUrl(string collectionName, string vectorStoreProvider, Guid fileId, string fileName)
7169
{
7270
if (string.IsNullOrWhiteSpace(collectionName)
73-
|| string.IsNullOrWhiteSpace(fileId))
71+
|| string.IsNullOrWhiteSpace(vectorStoreProvider))
7472
{
7573
return string.Empty;
7674
}
7775

7876
return $"/knowledge/document/{collectionName}/file/{fileId}";
7977
}
8078

81-
public FileBinaryDataModel? GetKnowledgeBaseFileBinaryData(string collectionName, string vectorStoreProvider, string fileId)
79+
public BinaryData? GetKnowledgeBaseFileBinaryData(string collectionName, string vectorStoreProvider, Guid fileId, string fileName)
8280
{
8381
if (string.IsNullOrWhiteSpace(collectionName)
8482
|| string.IsNullOrWhiteSpace(vectorStoreProvider)
85-
|| string.IsNullOrWhiteSpace(fileId))
83+
|| string.IsNullOrWhiteSpace(fileName))
8684
{
8785
return null;
8886
}
8987

90-
var docDir = BuildKnowledgeCollectionDocumentDir(collectionName, vectorStoreProvider);
91-
var fileDir = Path.Combine(docDir, fileId);
88+
var docDir = BuildKnowledgeCollectionFileDir(collectionName, vectorStoreProvider);
89+
var fileDir = Path.Combine(docDir, fileId.ToString());
9290
if (!ExistDirectory(fileDir)) return null;
9391

94-
var metaFile = Path.Combine(fileDir, KNOWLEDGE_DOC_META_FILE);
95-
var content = File.ReadAllText(metaFile);
96-
var metaData = JsonSerializer.Deserialize<KnowledgeDocMetaData>(content, _jsonOptions);
97-
var file = Path.Combine(fileDir, metaData.FileName);
92+
var file = Path.Combine(fileDir, fileName);
9893
using var stream = new FileStream(file, FileMode.Open, FileAccess.Read);
9994
stream.Position = 0;
10095

101-
return new FileBinaryDataModel
102-
{
103-
FileName = metaData.FileName,
104-
ContentType = metaData.ContentType,
105-
FileBinaryData = BinaryData.FromStream(stream)
106-
};
96+
return BinaryData.FromStream(stream);
10797
}
10898

10999

110100
#region Private methods
111-
private string BuildKnowledgeCollectionDocumentDir(string collectionName, string vectorStoreProvider)
101+
private string BuildKnowledgeCollectionFileDir(string collectionName, string vectorStoreProvider)
112102
{
113-
return Path.Combine(_baseDir, KNOWLEDGE_FOLDER, KNOWLEDGE_DOC_FOLDER, vectorStoreProvider, collectionName);
103+
return Path.Combine(_baseDir, KNOWLEDGE_FOLDER, KNOWLEDGE_DOC_FOLDER, vectorStoreProvider.CleanStr(), collectionName.CleanStr());
114104
}
115105
#endregion
116106
}

src/Infrastructure/BotSharp.Core/Files/Services/Storage/LocalFileStorageService.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ public partial class LocalFileStorageService : IFileStorageService
2121
private const string TEXT_TO_SPEECH_FOLDER = "speeches";
2222
private const string KNOWLEDGE_FOLDER = "knowledgebase";
2323
private const string KNOWLEDGE_DOC_FOLDER = "document";
24-
private const string KNOWLEDGE_DOC_META_FILE = "meta.json";
2524

2625
private readonly JsonSerializerOptions _jsonOptions = new JsonSerializerOptions
2726
{

src/Infrastructure/BotSharp.Core/Repository/BotSharpDbContext.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ public bool SaveTranslationMemories(IEnumerable<TranslationMemoryInput> inputs)
234234
throw new NotImplementedException();
235235
#endregion
236236

237-
#region Knowledge
237+
#region KnowledgeBase
238238
public bool AddKnowledgeCollectionConfigs(List<VectorCollectionConfig> configs, bool reset = false) =>
239239
throw new NotImplementedException();
240240

@@ -247,6 +247,9 @@ public IEnumerable<VectorCollectionConfig> GetKnowledgeCollectionConfigs(VectorC
247247
public bool SaveKnolwedgeBaseFileMeta(KnowledgeDocMetaData metaData) =>
248248
throw new NotImplementedException();
249249

250+
public bool DeleteKnolwedgeBaseFileMeta(string collectionName, string vectorStoreProvider, Guid? fileId = null) =>
251+
throw new NotImplementedException();
252+
250253
public PagedItems<KnowledgeDocMetaData> GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, KnowledgeFileFilter filter) =>
251254
throw new NotImplementedException();
252255
#endregion

src/Infrastructure/BotSharp.Core/Repository/FileRepository/FileRepository.KnowledgeBase.cs

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -110,14 +110,13 @@ public bool SaveKnolwedgeBaseFileMeta(KnowledgeDocMetaData metaData)
110110
{
111111
if (metaData == null
112112
|| string.IsNullOrWhiteSpace(metaData.Collection)
113-
|| string.IsNullOrWhiteSpace(metaData.VectorStoreProvider)
114-
|| string.IsNullOrWhiteSpace(metaData.FileId))
113+
|| string.IsNullOrWhiteSpace(metaData.VectorStoreProvider))
115114
{
116115
return false;
117116
}
118117

119-
var dir = BuildKnowledgeDocumentDir(metaData.Collection.CleanStr(), metaData.VectorStoreProvider.CleanStr());
120-
var docDir = Path.Combine(dir, metaData.FileId);
118+
var dir = BuildKnowledgeCollectionFileDir(metaData.Collection, metaData.VectorStoreProvider);
119+
var docDir = Path.Combine(dir, metaData.FileId.ToString());
121120
if (!Directory.Exists(docDir))
122121
{
123122
Directory.CreateDirectory(docDir);
@@ -129,6 +128,33 @@ public bool SaveKnolwedgeBaseFileMeta(KnowledgeDocMetaData metaData)
129128
return true;
130129
}
131130

131+
public bool DeleteKnolwedgeBaseFileMeta(string collectionName, string vectorStoreProvider, Guid? fileId = null)
132+
{
133+
if (string.IsNullOrWhiteSpace(collectionName)
134+
|| string.IsNullOrWhiteSpace(vectorStoreProvider))
135+
{
136+
return false;
137+
}
138+
139+
var dir = BuildKnowledgeCollectionFileDir(collectionName, vectorStoreProvider);
140+
if (!Directory.Exists(dir)) return false;
141+
142+
if (fileId == null)
143+
{
144+
Directory.Delete(dir, true);
145+
}
146+
else
147+
{
148+
var fileDir = Path.Combine(dir, fileId.ToString());
149+
if (Directory.Exists(fileDir))
150+
{
151+
Directory.Delete(fileDir, true);
152+
}
153+
}
154+
155+
return true;
156+
}
157+
132158
public PagedItems<KnowledgeDocMetaData> GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, KnowledgeFileFilter filter)
133159
{
134160
if (string.IsNullOrWhiteSpace(collectionName)
@@ -137,7 +163,7 @@ public PagedItems<KnowledgeDocMetaData> GetKnowledgeBaseFileMeta(string collecti
137163
return new PagedItems<KnowledgeDocMetaData>();
138164
}
139165

140-
var dir = BuildKnowledgeDocumentDir(collectionName, vectorStoreProvider);
166+
var dir = BuildKnowledgeCollectionFileDir(collectionName, vectorStoreProvider);
141167
if (!Directory.Exists(dir))
142168
{
143169
return new PagedItems<KnowledgeDocMetaData>();
@@ -181,9 +207,9 @@ private string BuildKnowledgeCollectionConfigDir()
181207
return Path.Combine(_dbSettings.FileRepository, KNOWLEDGE_FOLDER, VECTOR_FOLDER);
182208
}
183209

184-
private string BuildKnowledgeDocumentDir(string collectionName, string vectorStoreProvider)
210+
private string BuildKnowledgeCollectionFileDir(string collectionName, string vectorStoreProvider)
185211
{
186-
return Path.Combine(_dbSettings.FileRepository, KNOWLEDGE_FOLDER, KNOWLEDGE_DOC_FOLDER, vectorStoreProvider, collectionName);
212+
return Path.Combine(_dbSettings.FileRepository, KNOWLEDGE_FOLDER, KNOWLEDGE_DOC_FOLDER, vectorStoreProvider.CleanStr(), collectionName.CleanStr());
187213
}
188214
#endregion
189215
}

0 commit comments

Comments
 (0)