diff --git a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs index 89f26f45e..6ad463b36 100644 --- a/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs +++ b/src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs @@ -1,5 +1,6 @@ using BotSharp.Abstraction.Knowledges.Models; using BotSharp.Abstraction.Knowledges.Settings; +using Microsoft.AspNetCore.Http; namespace BotSharp.OpenAPI.Controllers; @@ -16,13 +17,6 @@ public KnowledgeBaseController(IKnowledgeService knowledgeService, IServiceProvi _services = services; } - [HttpPost("/knowledge-base/embed")] - public async Task EmbedKnowledge() - { - var chunks = await _knowledgeService.CollectChunkedKnowledge(); - await _knowledgeService.EmbedKnowledge(chunks); - } - [HttpGet("/knowledge/{agentId}")] public async Task> RetrieveKnowledge([FromRoute] string agentId, [FromQuery(Name = "q")] string question) { @@ -70,8 +64,13 @@ public async Task FeedKnowledge([FromRoute] string agentId, List< foreach (var formFile in files) { var filePath = Path.GetTempFileName(); - using var stream = System.IO.File.Create(filePath); - await formFile.CopyToAsync(stream); + + + using (var stream = new FileStream(filePath, FileMode.Create, FileAccess.Write, FileShare.None)) + { + await formFile.CopyToAsync(stream); + await stream.FlushAsync(); // Ensure all data is written to the file + } var content = await textConverter.ConvertPdfToText(filePath, startPageNum, endPageNum); @@ -84,6 +83,9 @@ await _knowledgeService.Feed(new KnowledgeFeedModel AgentId = agentId, Content = content }); + + // Delete the temp file after processing to clean up + System.IO.File.Delete(filePath); } return Ok(new { count = files.Count, size }); diff --git a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/PigPdf2TextConverter.cs b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/PigPdf2TextConverter.cs index 58fb15529..17706c136 100644 --- a/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/PigPdf2TextConverter.cs +++ b/src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/PigPdf2TextConverter.cs @@ -5,28 +5,24 @@ namespace BotSharp.Plugin.KnowledgeBase.Services; public class PigPdf2TextConverter : IPdf2TextConverter { - public async Task ConvertPdfToText(string filePath, int? startPageNum, int? endPageNum) + public Task ConvertPdfToText(string filePath, int? startPageNum, int? endPageNum) { - return await OpenPdfDocumentAsync(filePath, startPageNum, endPageNum); + // since PdfDocument.Open is not async, we dont need to make this method async + // if you need this method to be async, consider wrapping the call in Task.Run for CPU-bound work + return Task.FromResult(OpenPdfDocument(filePath, startPageNum, endPageNum)); } - private async Task OpenPdfDocumentAsync(string filePath, int? startPageNum, int? endPageNum) + private string OpenPdfDocument(string filePath, int? startPageNum, int? endPageNum) { - var document = PdfDocument.Open(filePath); - var content = ""; + using var fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read); + using var document = PdfDocument.Open(fileStream); + var content = new StringBuilder(); foreach (Page page in document.GetPages()) { - if (startPageNum.HasValue && page.Number < startPageNum.Value) - { - continue; - } - - if (endPageNum.HasValue && page.Number > endPageNum.Value) - { - continue; - } - content += page.Text; + if (startPageNum.HasValue && page.Number < startPageNum.Value) continue; + if (endPageNum.HasValue && page.Number > endPageNum.Value) continue; + content.Append(page.Text); } - return content; + return content.ToString(); } }