Skip to content

Commit e94e55b

Browse files
authored
Merge pull request #519 from iceljc/features/add-pdf-converter
add pdf converter
2 parents 9162e81 + bc764cf commit e94e55b

File tree

5 files changed

+110
-39
lines changed

5 files changed

+110
-39
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
namespace BotSharp.Abstraction.Files.Converters;
2+
3+
public interface IPdf2ImageConverter
4+
{
5+
/// <summary>
6+
/// Convert pdf pages to images, and return a list of image file paths
7+
/// </summary>
8+
/// <param name="pdfLocation">Pdf file location</param>
9+
/// <param name="imageFolderLocation">Image folder location</param>
10+
/// <returns></returns>
11+
Task<IEnumerable<string>> ConvertPdfToImages(string pdfLocation, string imageFolderLocation);
12+
}

src/Infrastructure/BotSharp.Core/BotSharp.Core.csproj

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,11 +181,19 @@
181181
<PackageReference Include="Fluid.Core" Version="2.8.0" />
182182
<PackageReference Include="Microsoft.AspNetCore.StaticFiles" Version="2.2.0" />
183183
<PackageReference Include="Nanoid" Version="3.0.0" />
184+
<PackageReference Include="PdfiumViewer" Version="2.13.0" />
185+
<PackageReference Include="PdfiumViewer.Native.x86.v8-xfa" Version="2018.4.8.256" />
186+
<PackageReference Include="PdfiumViewer.Native.x86_64.v8-xfa" Version="2018.4.8.256" />
184187
<PackageReference Include="RedLock.net" Version="2.3.2" />
188+
<PackageReference Include="System.Drawing.Common" Version="8.0.6" />
185189
</ItemGroup>
186190

187191
<ItemGroup>
188192
<ProjectReference Include="..\BotSharp.Abstraction\BotSharp.Abstraction.csproj" />
189193
</ItemGroup>
190194

195+
<ItemGroup>
196+
<Folder Include="Translation\Models\" />
197+
</ItemGroup>
198+
191199
</Project>

src/Infrastructure/BotSharp.Core/Files/BotSharpFileService.Conversation.cs

Lines changed: 48 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
using BotSharp.Abstraction.Browsing;
2-
using BotSharp.Abstraction.Browsing.Models;
1+
using BotSharp.Abstraction.Files.Converters;
2+
using BotSharp.Core.Files.Converters;
33
using Microsoft.EntityFrameworkCore;
44
using System.IO;
55
using System.Linq;
@@ -51,18 +51,8 @@ private async Task<List<MessageFileModel>> GetMessageFiles(string conversationId
5151

5252
try
5353
{
54-
var msgInfo = new MessageInfo
55-
{
56-
ContextId = Guid.NewGuid().ToString()
57-
};
58-
var web = _services.GetRequiredService<IWebBrowser>();
5954
var preFixPath = Path.Combine(_baseDir, CONVERSATION_FOLDER, conversationId, FILE_FOLDER);
6055

61-
if (isNeedScreenShot)
62-
{
63-
await web.LaunchBrowser(msgInfo);
64-
}
65-
6656
foreach (var messageId in messageIds)
6757
{
6858
var dir = Path.Combine(preFixPath, messageId, source);
@@ -91,40 +81,40 @@ private async Task<List<MessageFileModel>> GetMessageFiles(string conversationId
9181
var screenShotDir = Path.Combine(subDir, SCREENSHOT_FILE_FOLDER);
9282
if (ExistDirectory(screenShotDir) && Directory.GetFiles(screenShotDir).Any())
9383
{
94-
file = Directory.GetFiles(screenShotDir).First();
95-
contentType = GetFileContentType(file);
96-
97-
var model = new MessageFileModel()
84+
foreach (var screenShot in Directory.GetFiles(screenShotDir))
9885
{
99-
MessageId = messageId,
100-
FileStorageUrl = file,
101-
ContentType = contentType
102-
};
103-
files.Add(model);
86+
contentType = GetFileContentType(screenShot);
87+
if (!_allowedImageTypes.Contains(contentType)) continue;
88+
89+
var model = new MessageFileModel()
90+
{
91+
MessageId = messageId,
92+
FileStorageUrl = screenShot,
93+
ContentType = contentType
94+
};
95+
files.Add(model);
96+
}
10497
}
10598
else
10699
{
107-
await web.GoToPage(msgInfo, new PageActionArgs { Url = file });
108-
var path = Path.Combine(subDir, SCREENSHOT_FILE_FOLDER, $"{Guid.NewGuid()}.png");
109-
await web.ScreenshotAsync(msgInfo, path);
110-
contentType = GetFileContentType(path);
100+
var screenShotPath = Path.Combine(subDir, SCREENSHOT_FILE_FOLDER);
101+
var images = await ConvertPdfToImages(file, screenShotPath);
111102

112-
var model = new MessageFileModel()
103+
foreach (var image in images)
113104
{
114-
MessageId = messageId,
115-
FileStorageUrl = path,
116-
ContentType = contentType
117-
};
118-
files.Add(model);
105+
contentType = GetFileContentType(image);
106+
var model = new MessageFileModel()
107+
{
108+
MessageId = messageId,
109+
FileStorageUrl = image,
110+
ContentType = contentType
111+
};
112+
files.Add(model);
113+
}
119114
}
120115
}
121116
}
122117
}
123-
124-
if (isNeedScreenShot)
125-
{
126-
await web.CloseBrowser(msgInfo.ContextId);
127-
}
128118
}
129119
catch (Exception ex)
130120
{
@@ -227,9 +217,13 @@ public bool SaveMessageFiles(string conversationId, string messageId, string sou
227217
Directory.CreateDirectory(subDir);
228218
}
229219

230-
using var fs = new FileStream(Path.Combine(subDir, file.FileName), FileMode.Create);
231-
fs.Write(bytes, 0, bytes.Length);
232-
fs.Flush(true);
220+
using (var fs = new FileStream(Path.Combine(subDir, file.FileName), FileMode.Create))
221+
{
222+
fs.Write(bytes, 0, bytes.Length);
223+
fs.Flush(true);
224+
fs.Close();
225+
Thread.Sleep(100);
226+
}
233227
}
234228

235229
return true;
@@ -318,5 +312,20 @@ private string GetConversationFileDirectory(string? conversationId, string? mess
318312
var dir = Path.Combine(_baseDir, CONVERSATION_FOLDER, conversationId);
319313
return dir;
320314
}
315+
316+
private async Task<IEnumerable<string>> ConvertPdfToImages(string pdfLoc, string imageLoc)
317+
{
318+
var converters = _services.GetServices<IPdf2ImageConverter>();
319+
if (converters.IsNullOrEmpty()) return Enumerable.Empty<string>();
320+
321+
var converter = converters.FirstOrDefault(x => x.GetType().Name != typeof(PdfiumConverter).Name);
322+
if (converter == null)
323+
{
324+
converter = converters.FirstOrDefault(x => x.GetType().Name == typeof(PdfiumConverter).Name);
325+
if (converter == null) return Enumerable.Empty<string>();
326+
}
327+
328+
return await converter.ConvertPdfToImages(pdfLoc, imageLoc);
329+
}
321330
#endregion
322331
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
using BotSharp.Abstraction.Files.Converters;
2+
using PdfiumViewer;
3+
using System.IO;
4+
5+
namespace BotSharp.Core.Files.Converters;
6+
7+
public class PdfiumConverter : IPdf2ImageConverter
8+
{
9+
public async Task<IEnumerable<string>> ConvertPdfToImages(string pdfLocation, string imageFolderLocation)
10+
{
11+
var paths = new List<string>();
12+
if (string.IsNullOrWhiteSpace(imageFolderLocation)) return paths;
13+
14+
if (Directory.Exists(imageFolderLocation))
15+
{
16+
Directory.Delete(imageFolderLocation, true);
17+
}
18+
Directory.CreateDirectory(imageFolderLocation);
19+
20+
var guid = Guid.NewGuid().ToString();
21+
using (var document = PdfDocument.Load(pdfLocation))
22+
{
23+
var pages = document.PageCount;
24+
25+
for (var page = 0; page < pages; page++)
26+
{
27+
var size = document.PageSizes[page];
28+
using (var image = document.Render(page, (int)size.Width, (int)size.Height, 96, 96, true))
29+
{
30+
var imagePath = Path.Combine(imageFolderLocation, $"{guid}_pg_{page + 1}.png");
31+
image.Save(imagePath, System.Drawing.Imaging.ImageFormat.Png);
32+
paths.Add(imagePath);
33+
}
34+
}
35+
}
36+
37+
return await Task.FromResult(paths);
38+
}
39+
}

src/Infrastructure/BotSharp.Core/Files/FilePlugin.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
using BotSharp.Abstraction.Files.Converters;
2+
using BotSharp.Core.Files.Converters;
13
using BotSharp.Core.Files.Hooks;
24
using Microsoft.Extensions.Configuration;
35

@@ -18,5 +20,6 @@ public void RegisterDI(IServiceCollection services, IConfiguration config)
1820

1921
services.AddScoped<IAgentHook, FileAnalyzerHook>();
2022
services.AddScoped<IAgentToolHook, FileAnalyzerToolHook>();
23+
services.AddScoped<IPdf2ImageConverter, PdfiumConverter>();
2124
}
2225
}

0 commit comments

Comments
 (0)