From bc764cf1da122e973145c418d9b164ffdda091e8 Mon Sep 17 00:00:00 2001
From: Jicheng Lu <103353@smsassist.com>
Date: Fri, 28 Jun 2024 16:16:51 -0500
Subject: [PATCH] add pdf converter
---
.../Files/Converters/IPdf2ImageConverter.cs | 12 +++
.../BotSharp.Core/BotSharp.Core.csproj | 8 ++
.../Files/BotSharpFileService.Conversation.cs | 87 ++++++++++---------
.../Files/Converters/PdfiumConverter.cs | 39 +++++++++
.../BotSharp.Core/Files/FilePlugin.cs | 3 +
5 files changed, 110 insertions(+), 39 deletions(-)
create mode 100644 src/Infrastructure/BotSharp.Abstraction/Files/Converters/IPdf2ImageConverter.cs
create mode 100644 src/Infrastructure/BotSharp.Core/Files/Converters/PdfiumConverter.cs
diff --git a/src/Infrastructure/BotSharp.Abstraction/Files/Converters/IPdf2ImageConverter.cs b/src/Infrastructure/BotSharp.Abstraction/Files/Converters/IPdf2ImageConverter.cs
new file mode 100644
index 000000000..54ad3a6da
--- /dev/null
+++ b/src/Infrastructure/BotSharp.Abstraction/Files/Converters/IPdf2ImageConverter.cs
@@ -0,0 +1,12 @@
+namespace BotSharp.Abstraction.Files.Converters;
+
+public interface IPdf2ImageConverter
+{
+ ///
+ /// Convert pdf pages to images, and return a list of image file paths
+ ///
+ /// Pdf file location
+ /// Image folder location
+ ///
+ Task> ConvertPdfToImages(string pdfLocation, string imageFolderLocation);
+}
diff --git a/src/Infrastructure/BotSharp.Core/BotSharp.Core.csproj b/src/Infrastructure/BotSharp.Core/BotSharp.Core.csproj
index 51c73bd3d..e8a9aa7af 100644
--- a/src/Infrastructure/BotSharp.Core/BotSharp.Core.csproj
+++ b/src/Infrastructure/BotSharp.Core/BotSharp.Core.csproj
@@ -181,11 +181,19 @@
+
+
+
+
+
+
+
+
diff --git a/src/Infrastructure/BotSharp.Core/Files/BotSharpFileService.Conversation.cs b/src/Infrastructure/BotSharp.Core/Files/BotSharpFileService.Conversation.cs
index 61e121459..a072e6fe2 100644
--- a/src/Infrastructure/BotSharp.Core/Files/BotSharpFileService.Conversation.cs
+++ b/src/Infrastructure/BotSharp.Core/Files/BotSharpFileService.Conversation.cs
@@ -1,5 +1,5 @@
-using BotSharp.Abstraction.Browsing;
-using BotSharp.Abstraction.Browsing.Models;
+using BotSharp.Abstraction.Files.Converters;
+using BotSharp.Core.Files.Converters;
using Microsoft.EntityFrameworkCore;
using System.IO;
using System.Linq;
@@ -51,18 +51,8 @@ private async Task> GetMessageFiles(string conversationId
try
{
- var msgInfo = new MessageInfo
- {
- ContextId = Guid.NewGuid().ToString()
- };
- var web = _services.GetRequiredService();
var preFixPath = Path.Combine(_baseDir, CONVERSATION_FOLDER, conversationId, FILE_FOLDER);
- if (isNeedScreenShot)
- {
- await web.LaunchBrowser(msgInfo);
- }
-
foreach (var messageId in messageIds)
{
var dir = Path.Combine(preFixPath, messageId, source);
@@ -91,40 +81,40 @@ private async Task> GetMessageFiles(string conversationId
var screenShotDir = Path.Combine(subDir, SCREENSHOT_FILE_FOLDER);
if (ExistDirectory(screenShotDir) && Directory.GetFiles(screenShotDir).Any())
{
- file = Directory.GetFiles(screenShotDir).First();
- contentType = GetFileContentType(file);
-
- var model = new MessageFileModel()
+ foreach (var screenShot in Directory.GetFiles(screenShotDir))
{
- MessageId = messageId,
- FileStorageUrl = file,
- ContentType = contentType
- };
- files.Add(model);
+ contentType = GetFileContentType(screenShot);
+ if (!_allowedImageTypes.Contains(contentType)) continue;
+
+ var model = new MessageFileModel()
+ {
+ MessageId = messageId,
+ FileStorageUrl = screenShot,
+ ContentType = contentType
+ };
+ files.Add(model);
+ }
}
else
{
- await web.GoToPage(msgInfo, new PageActionArgs { Url = file });
- var path = Path.Combine(subDir, SCREENSHOT_FILE_FOLDER, $"{Guid.NewGuid()}.png");
- await web.ScreenshotAsync(msgInfo, path);
- contentType = GetFileContentType(path);
+ var screenShotPath = Path.Combine(subDir, SCREENSHOT_FILE_FOLDER);
+ var images = await ConvertPdfToImages(file, screenShotPath);
- var model = new MessageFileModel()
+ foreach (var image in images)
{
- MessageId = messageId,
- FileStorageUrl = path,
- ContentType = contentType
- };
- files.Add(model);
+ contentType = GetFileContentType(image);
+ var model = new MessageFileModel()
+ {
+ MessageId = messageId,
+ FileStorageUrl = image,
+ ContentType = contentType
+ };
+ files.Add(model);
+ }
}
}
}
}
-
- if (isNeedScreenShot)
- {
- await web.CloseBrowser(msgInfo.ContextId);
- }
}
catch (Exception ex)
{
@@ -227,9 +217,13 @@ public bool SaveMessageFiles(string conversationId, string messageId, string sou
Directory.CreateDirectory(subDir);
}
- using var fs = new FileStream(Path.Combine(subDir, file.FileName), FileMode.Create);
- fs.Write(bytes, 0, bytes.Length);
- fs.Flush(true);
+ using (var fs = new FileStream(Path.Combine(subDir, file.FileName), FileMode.Create))
+ {
+ fs.Write(bytes, 0, bytes.Length);
+ fs.Flush(true);
+ fs.Close();
+ Thread.Sleep(100);
+ }
}
return true;
@@ -318,5 +312,20 @@ private string GetConversationFileDirectory(string? conversationId, string? mess
var dir = Path.Combine(_baseDir, CONVERSATION_FOLDER, conversationId);
return dir;
}
+
+ private async Task> ConvertPdfToImages(string pdfLoc, string imageLoc)
+ {
+ var converters = _services.GetServices();
+ if (converters.IsNullOrEmpty()) return Enumerable.Empty();
+
+ var converter = converters.FirstOrDefault(x => x.GetType().Name != typeof(PdfiumConverter).Name);
+ if (converter == null)
+ {
+ converter = converters.FirstOrDefault(x => x.GetType().Name == typeof(PdfiumConverter).Name);
+ if (converter == null) return Enumerable.Empty();
+ }
+
+ return await converter.ConvertPdfToImages(pdfLoc, imageLoc);
+ }
#endregion
}
diff --git a/src/Infrastructure/BotSharp.Core/Files/Converters/PdfiumConverter.cs b/src/Infrastructure/BotSharp.Core/Files/Converters/PdfiumConverter.cs
new file mode 100644
index 000000000..4ee0e3da3
--- /dev/null
+++ b/src/Infrastructure/BotSharp.Core/Files/Converters/PdfiumConverter.cs
@@ -0,0 +1,39 @@
+using BotSharp.Abstraction.Files.Converters;
+using PdfiumViewer;
+using System.IO;
+
+namespace BotSharp.Core.Files.Converters;
+
+public class PdfiumConverter : IPdf2ImageConverter
+{
+ public async Task> ConvertPdfToImages(string pdfLocation, string imageFolderLocation)
+ {
+ var paths = new List();
+ if (string.IsNullOrWhiteSpace(imageFolderLocation)) return paths;
+
+ if (Directory.Exists(imageFolderLocation))
+ {
+ Directory.Delete(imageFolderLocation, true);
+ }
+ Directory.CreateDirectory(imageFolderLocation);
+
+ var guid = Guid.NewGuid().ToString();
+ using (var document = PdfDocument.Load(pdfLocation))
+ {
+ var pages = document.PageCount;
+
+ for (var page = 0; page < pages; page++)
+ {
+ var size = document.PageSizes[page];
+ using (var image = document.Render(page, (int)size.Width, (int)size.Height, 96, 96, true))
+ {
+ var imagePath = Path.Combine(imageFolderLocation, $"{guid}_pg_{page + 1}.png");
+ image.Save(imagePath, System.Drawing.Imaging.ImageFormat.Png);
+ paths.Add(imagePath);
+ }
+ }
+ }
+
+ return await Task.FromResult(paths);
+ }
+}
diff --git a/src/Infrastructure/BotSharp.Core/Files/FilePlugin.cs b/src/Infrastructure/BotSharp.Core/Files/FilePlugin.cs
index 3110c8d5b..a32560515 100644
--- a/src/Infrastructure/BotSharp.Core/Files/FilePlugin.cs
+++ b/src/Infrastructure/BotSharp.Core/Files/FilePlugin.cs
@@ -1,3 +1,5 @@
+using BotSharp.Abstraction.Files.Converters;
+using BotSharp.Core.Files.Converters;
using BotSharp.Core.Files.Hooks;
using Microsoft.Extensions.Configuration;
@@ -18,5 +20,6 @@ public void RegisterDI(IServiceCollection services, IConfiguration config)
services.AddScoped();
services.AddScoped();
+ services.AddScoped();
}
}