diff --git a/.gitignore b/.gitignore
index 530b14105f74..07c766a8acbf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -477,4 +477,5 @@ playwright-report/
# Static Web App deployment config
swa-cli.config.json
**/copilot-chat-app/webapp/build
-**/copilot-chat-app/webapp/node_modules
\ No newline at end of file
+**/copilot-chat-app/webapp/node_modules
+**/copilot-chat-app/webapi/data/eng.traineddata
diff --git a/samples/apps/copilot-chat-app/webapi/CopilotChat/Controllers/DocumentImportController.cs b/samples/apps/copilot-chat-app/webapi/CopilotChat/Controllers/DocumentImportController.cs
index d88d89825a62..d53020553534 100644
--- a/samples/apps/copilot-chat-app/webapi/CopilotChat/Controllers/DocumentImportController.cs
+++ b/samples/apps/copilot-chat-app/webapi/CopilotChat/Controllers/DocumentImportController.cs
@@ -18,6 +18,8 @@
using SemanticKernel.Service.CopilotChat.Models;
using SemanticKernel.Service.CopilotChat.Options;
using SemanticKernel.Service.CopilotChat.Storage;
+using SemanticKernel.Service.Services;
+using Tesseract;
using UglyToad.PdfPig;
using UglyToad.PdfPig.DocumentLayoutAnalysis.TextExtractor;
using static SemanticKernel.Service.CopilotChat.Models.MemorySource;
@@ -44,6 +46,21 @@ private enum SupportedFileType
/// .pdf
///
Pdf,
+
+ ///
+ /// .jpg
+ ///
+ Jpg,
+
+ ///
+ /// .png
+ ///
+ Png,
+
+ ///
+ /// .tif or .tiff
+ ///
+ Tiff
};
private readonly ILogger _logger;
@@ -54,6 +71,7 @@ private enum SupportedFileType
private readonly ChatParticipantRepository _participantRepository;
private const string GlobalDocumentUploadedClientCall = "GlobalDocumentUploaded";
private const string ChatDocumentUploadedClientCall = "ChatDocumentUploaded";
+ private readonly ITesseractEngine _tesseractEngine;
///
/// Initializes a new instance of the class.
@@ -64,7 +82,8 @@ public DocumentImportController(
ChatSessionRepository sessionRepository,
ChatMemorySourceRepository sourceRepository,
ChatMessageRepository messageRepository,
- ChatParticipantRepository participantRepository)
+ ChatParticipantRepository participantRepository,
+ ITesseractEngine tesseractEngine)
{
this._logger = logger;
this._options = documentMemoryOptions.Value;
@@ -72,6 +91,7 @@ public DocumentImportController(
this._sourceRepository = sourceRepository;
this._messageRepository = messageRepository;
this._participantRepository = participantRepository;
+ this._tesseractEngine = tesseractEngine;
}
///
@@ -259,6 +279,14 @@ private async Task ImportDocumentHelperAsync(IKernel kernel, IForm
case SupportedFileType.Pdf:
documentContent = this.ReadPdfFile(formFile);
break;
+ case SupportedFileType.Jpg:
+ case SupportedFileType.Png:
+ case SupportedFileType.Tiff:
+ {
+ documentContent = await this.ReadTextFromImageFileAsync(formFile);
+ break;
+ }
+
default:
// This should never happen. Validation should have already caught this.
return ImportResult.Fail();
@@ -391,10 +419,35 @@ private SupportedFileType GetFileType(string fileName)
{
".txt" => SupportedFileType.Txt,
".pdf" => SupportedFileType.Pdf,
+ ".jpg" => SupportedFileType.Jpg,
+ ".jpeg" => SupportedFileType.Jpg,
+ ".png" => SupportedFileType.Png,
+ ".tif" => SupportedFileType.Tiff,
+ ".tiff" => SupportedFileType.Tiff,
_ => throw new ArgumentOutOfRangeException($"Unsupported file type: {extension}"),
};
}
+ ///
+ /// Reads the text content from an image file.
+ ///
+ /// An IFormFile object.
+ /// A string of the content of the file.
+ private async Task ReadTextFromImageFileAsync(IFormFile file)
+ {
+ await using (var ms = new MemoryStream())
+ {
+ await file.CopyToAsync(ms);
+ var fileBytes = ms.ToArray();
+ await using var imgStream = new MemoryStream(fileBytes);
+
+ using var img = Pix.LoadFromMemory(imgStream.ToArray());
+
+ using var page = this._tesseractEngine.Process(img);
+ return page.GetText();
+ }
+ }
+
///
/// Read the content of a text file.
///
diff --git a/samples/apps/copilot-chat-app/webapi/CopilotChat/Extensions/ServiceExtensions.cs b/samples/apps/copilot-chat-app/webapi/CopilotChat/Extensions/ServiceExtensions.cs
index 1b9b341fd7c3..eff7b3854faf 100644
--- a/samples/apps/copilot-chat-app/webapi/CopilotChat/Extensions/ServiceExtensions.cs
+++ b/samples/apps/copilot-chat-app/webapi/CopilotChat/Extensions/ServiceExtensions.cs
@@ -11,6 +11,8 @@
using SemanticKernel.Service.CopilotChat.Options;
using SemanticKernel.Service.CopilotChat.Storage;
using SemanticKernel.Service.Options;
+using SemanticKernel.Service.Services;
+using Tesseract;
namespace SemanticKernel.Service.CopilotChat.Extensions;
@@ -68,13 +70,50 @@ public static IServiceCollection AddCopilotChatOptions(this IServiceCollection s
.ValidateOnStart()
.PostConfigure(TrimStringProperties);
+ // OCR support options
+ services.AddOptions()
+ .Bind(configuration.GetSection(OcrSupportOptions.PropertyName))
+ .ValidateOnStart()
+ .PostConfigure(TrimStringProperties);
+
+ return services;
+ }
+
+ ///
+ /// Adds persistent OCR support service.
+ ///
+ ///
+ public static IServiceCollection AddPersistentOcrSupport(this IServiceCollection services)
+ {
+ OcrSupportOptions ocrSupportConfig = services.BuildServiceProvider().GetRequiredService>().Value;
+
+ switch (ocrSupportConfig.Type)
+ {
+ case OcrSupportOptions.OcrSupportType.Tesseract:
+ {
+ services.AddSingleton(sp => new TesseractEngineWrapper(new TesseractEngine(ocrSupportConfig.Tesseract!.FilePath, ocrSupportConfig.Tesseract!.Language, EngineMode.Default)));
+ break;
+ }
+
+ case OcrSupportOptions.OcrSupportType.None:
+ {
+ services.AddSingleton(sp => new NullTesseractEngine());
+ break;
+ }
+
+ default:
+ {
+ throw new InvalidOperationException($"Unsupported OcrSupport:Type '{ocrSupportConfig.Type}'");
+ }
+ }
+
return services;
}
///
/// Add persistent chat store services.
///
- public static void AddPersistentChatStore(this IServiceCollection services)
+ public static IServiceCollection AddPersistentChatStore(this IServiceCollection services)
{
IStorageContext chatSessionStorageContext;
IStorageContext chatMessageStorageContext;
@@ -144,6 +183,8 @@ public static void AddPersistentChatStore(this IServiceCollection services)
services.AddSingleton(new ChatMessageRepository(chatMessageStorageContext));
services.AddSingleton(new ChatMemorySourceRepository(chatMemorySourceStorageContext));
services.AddSingleton(new ChatParticipantRepository(chatParticipantStorageContext));
+
+ return services;
}
///
diff --git a/samples/apps/copilot-chat-app/webapi/CopilotChat/Options/OcrSupportOptions.cs b/samples/apps/copilot-chat-app/webapi/CopilotChat/Options/OcrSupportOptions.cs
new file mode 100644
index 000000000000..a1744b47b8c6
--- /dev/null
+++ b/samples/apps/copilot-chat-app/webapi/CopilotChat/Options/OcrSupportOptions.cs
@@ -0,0 +1,37 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+using SemanticKernel.Service.Options;
+
+namespace SemanticKernel.Service.CopilotChat.Options;
+
+///
+/// Ocr Support Configuration Options
+///
+public class OcrSupportOptions
+{
+ public const string PropertyName = "OcrSupport";
+
+ public enum OcrSupportType
+ {
+ ///
+ /// No OCR Support
+ ///
+ None,
+
+ ///
+ /// Tesseract OCR Support
+ ///
+ Tesseract
+ }
+
+ ///
+ /// Gets or sets the type of OCR support to use.
+ ///
+ public OcrSupportType Type { get; set; } = OcrSupportType.None;
+
+ ///
+ /// Gets or sets the configuration for the Tesseract OCR support.
+ ///
+ [RequiredOnPropertyValue(nameof(Type), OcrSupportType.Tesseract)]
+ public TesseractOptions? Tesseract { get; set; }
+}
diff --git a/samples/apps/copilot-chat-app/webapi/CopilotChat/Options/TesseractOptions.cs b/samples/apps/copilot-chat-app/webapi/CopilotChat/Options/TesseractOptions.cs
new file mode 100644
index 000000000000..0fe50f104667
--- /dev/null
+++ b/samples/apps/copilot-chat-app/webapi/CopilotChat/Options/TesseractOptions.cs
@@ -0,0 +1,26 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+using System.ComponentModel.DataAnnotations;
+using SemanticKernel.Service.Options;
+
+namespace SemanticKernel.Service.CopilotChat.Options;
+
+///
+/// Configuration options for Tesseract OCR support.
+///
+public sealed class TesseractOptions
+{
+ public const string PropertyName = "Tesseract";
+
+ ///
+ /// The file path where the Tesseract language file is stored (e.g. "./data")
+ ///
+ [Required, NotEmptyOrWhitespace]
+ public string? FilePath { get; set; } = string.Empty;
+
+ ///
+ /// The language file prefix name (e.g. "eng")
+ ///
+ [Required, NotEmptyOrWhitespace]
+ public string? Language { get; set; } = string.Empty;
+}
diff --git a/samples/apps/copilot-chat-app/webapi/CopilotChatWebApi.csproj b/samples/apps/copilot-chat-app/webapi/CopilotChatWebApi.csproj
index 95f4dd6f790e..33b3c81fd550 100644
--- a/samples/apps/copilot-chat-app/webapi/CopilotChatWebApi.csproj
+++ b/samples/apps/copilot-chat-app/webapi/CopilotChatWebApi.csproj
@@ -62,6 +62,8 @@
all
runtime; build; native; contentfiles; analyzers; buildtransitive
+
+
@@ -69,4 +71,10 @@
<_Parameter1>false
+
+
+
+ PreserveNewest
+
+
diff --git a/samples/apps/copilot-chat-app/webapi/CopilotChatWebApi.sln b/samples/apps/copilot-chat-app/webapi/CopilotChatWebApi.sln
new file mode 100644
index 000000000000..9d08ac20fb52
--- /dev/null
+++ b/samples/apps/copilot-chat-app/webapi/CopilotChatWebApi.sln
@@ -0,0 +1,25 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 17
+VisualStudioVersion = 17.5.33530.505
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CopilotChatWebApi", "CopilotChatWebApi.csproj", "{35CC3A68-E577-4B21-B94C-BF674F8FA505}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Any CPU = Debug|Any CPU
+ Release|Any CPU = Release|Any CPU
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {35CC3A68-E577-4B21-B94C-BF674F8FA505}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {35CC3A68-E577-4B21-B94C-BF674F8FA505}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {35CC3A68-E577-4B21-B94C-BF674F8FA505}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {35CC3A68-E577-4B21-B94C-BF674F8FA505}.Release|Any CPU.Build.0 = Release|Any CPU
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {1F60AC39-60D2-4CD2-B2FC-71E174DDFC1A}
+ EndGlobalSection
+EndGlobal
diff --git a/samples/apps/copilot-chat-app/webapi/Program.cs b/samples/apps/copilot-chat-app/webapi/Program.cs
index dd707dfc59d4..4f4a988638d3 100644
--- a/samples/apps/copilot-chat-app/webapi/Program.cs
+++ b/samples/apps/copilot-chat-app/webapi/Program.cs
@@ -47,7 +47,8 @@ public static async Task Main(string[] args)
builder.Services
.AddCopilotChatOptions(builder.Configuration)
.AddCopilotChatPlannerServices()
- .AddPersistentChatStore();
+ .AddPersistentChatStore()
+ .AddPersistentOcrSupport();
// Add SignalR as the real time relay service
builder.Services.AddSignalR();
diff --git a/samples/apps/copilot-chat-app/webapi/README.md b/samples/apps/copilot-chat-app/webapi/README.md
index 425bf3dcb35d..36fd9918eabb 100644
--- a/samples/apps/copilot-chat-app/webapi/README.md
+++ b/samples/apps/copilot-chat-app/webapi/README.md
@@ -9,8 +9,9 @@ while allowing user interfaces to be developed using frontend frameworks such as
Before you get started, make sure you have the following requirements in place:
1. [.NET 6.0](https://dotnet.microsoft.com/en-us/download/dotnet/6.0) for building and deploying .NET 6 projects.
-2. Update the properties in `./appsettings.json` to configure your Azure OpenAI resource or OpenAI account.
-3. Generate and trust a localhost developer certificate.
+2. **(Optional)** [Visual Studio Code](http://aka.ms/vscode) or [Visual Studio](http://aka.ms/vsdownload).
+3. Update the properties in `./appsettings.json` to configure your Azure OpenAI resource or OpenAI account.
+4. Generate and trust a localhost developer certificate.
- For Windows and Mac run
```bash
dotnet dev-certs https --trust
@@ -25,7 +26,9 @@ Before you get started, make sure you have the following requirements in place:
> To clean your system of the developer certificate, run `dotnet run dev-certs https --clean`
-4. **(Optional)** [Visual Studio Code](http://aka.ms/vscode) or [Visual Studio](http://aka.ms/vsdownload).
+5. **(Optional)** To enable support for uploading image file formats such as png, jpg and tiff, we have included the [Tesseract](https://www.nuget.org/packages/Tesseract) nuget package.
+ - You will need to obtain one or more [tessdata language data files](https://github.com/tesseract-ocr/tessdata) such as `eng.traineddata` and add them to your `./data` directory or the location specified in the `Tesseract.FilePath` location in `./appsettings.json`.
+ - Set the `Copy to Output Directory` value to `Copy if newer`.
# Start the WebApi Service
diff --git a/samples/apps/copilot-chat-app/webapi/Services/ITesseractEngine.cs b/samples/apps/copilot-chat-app/webapi/Services/ITesseractEngine.cs
new file mode 100644
index 000000000000..d3318d9757b7
--- /dev/null
+++ b/samples/apps/copilot-chat-app/webapi/Services/ITesseractEngine.cs
@@ -0,0 +1,26 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+using Tesseract;
+
+namespace SemanticKernel.Service.Services;
+
+///
+/// Wrapper for the Tesseract engine.
+///
+public interface ITesseractEngine
+{
+ //
+ // Summary:
+ // Processes the specific image.
+ //
+ // Parameters:
+ // image:
+ // The image to process.
+ //
+ // pageSegMode:
+ // The page layout analyasis method to use.
+ //
+ // Remarks:
+ // You can only have one result iterator open at any one time.
+ Page Process(Pix image);
+}
diff --git a/samples/apps/copilot-chat-app/webapi/Services/NullTesseractEngine.cs b/samples/apps/copilot-chat-app/webapi/Services/NullTesseractEngine.cs
new file mode 100644
index 000000000000..d1aff4dc38f0
--- /dev/null
+++ b/samples/apps/copilot-chat-app/webapi/Services/NullTesseractEngine.cs
@@ -0,0 +1,23 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+using System;
+using Tesseract;
+
+namespace SemanticKernel.Service.Services;
+
+///
+/// Used to mock the TesseractEngine in the event that the Tesseract language file is not installed.
+///
+public class NullTesseractEngine : ITesseractEngine
+{
+ ///
+ /// Throws an exception to let the user know they need to install the Tesseract language file.
+ ///
+ /// Not used
+ /// This will always throw a NotImplementedException
+ ///
+ public Page Process(Pix image)
+ {
+ throw new NotImplementedException("You must have the Tesseract language file to use the image upload feature. See the README.md");
+ }
+}
diff --git a/samples/apps/copilot-chat-app/webapi/Services/TesseractEngineWrapper.cs b/samples/apps/copilot-chat-app/webapi/Services/TesseractEngineWrapper.cs
new file mode 100644
index 000000000000..ff7b4f088254
--- /dev/null
+++ b/samples/apps/copilot-chat-app/webapi/Services/TesseractEngineWrapper.cs
@@ -0,0 +1,37 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+using System;
+using Tesseract;
+
+namespace SemanticKernel.Service.Services;
+
+///
+/// Wrapper for the TesseractEngine within the Tesseract OCR library. This is used to allow the TesseractEngine to be mocked in the event that the Tesseract language file is not installed.
+///
+public class TesseractEngineWrapper : ITesseractEngine
+{
+ ///
+ /// Creates a new instance of the TesseractEngineWrapper passing in a valid TesseractEngine.
+ ///
+ ///
+ public TesseractEngineWrapper(TesseractEngine tesseractEngine)
+ {
+ if (tesseractEngine == null)
+ {
+ throw new ArgumentNullException(nameof(tesseractEngine));
+ }
+
+ this.TesseractEngine = tesseractEngine;
+ }
+
+ ///
+ /// Passes the TesseractEngine to the wrapper.
+ ///
+ public TesseractEngine TesseractEngine { get; }
+
+ ///
+ public Page Process(Pix image)
+ {
+ return this.TesseractEngine.Process(image);
+ }
+}
diff --git a/samples/apps/copilot-chat-app/webapi/appsettings.json b/samples/apps/copilot-chat-app/webapi/appsettings.json
index 8b1c6bc59e6e..c345f9c1f5b3 100644
--- a/samples/apps/copilot-chat-app/webapi/appsettings.json
+++ b/samples/apps/copilot-chat-app/webapi/appsettings.json
@@ -143,6 +143,21 @@
"FileSizeLimit": 4000000,
"FileCountLimit": 10
},
+ //
+ // OCR support is used for allowing end users to upload images containing text in addition to text based documents.
+ // - Supported Types are "none" or "tesseract".
+ // - When using Tesseract OCR Support (In order to upload image file formats such as png, jpg and tiff)
+ // - Obtain language data files here: https://github.com/tesseract-ocr/tessdata .
+ // - Add these files to your `data` folder or the path specified in the "FilePath" property and set the "Copy to Output Directory" value to "Copy if newer".
+ //
+"OcrSupport": {
+ "Type": "tesseract",
+ "Tesseract": {
+ "Language": "eng",
+ "FilePath": "./data"
+ }
+ },
+
//
// ChatSkill prompts are used to generate responses to user messages.
// - CompletionTokenLimit is the token limit of the chat model, see https://platform.openai.com/docs/models/overview
diff --git a/samples/apps/copilot-chat-app/webapi/data/README.md b/samples/apps/copilot-chat-app/webapi/data/README.md
new file mode 100644
index 000000000000..68b97f51dee3
--- /dev/null
+++ b/samples/apps/copilot-chat-app/webapi/data/README.md
@@ -0,0 +1,6 @@
+# Tesseract OCR Support
+
+This API supports the ability to upload image file formats such as png, jpg and tiff via the [Tesseract](https://www.nuget.org/packages/Tesseract) nuget package.
+You will need to obtain one or more [tessdata language data files](https://github.com/tesseract-ocr/tessdata) such as `eng.traineddata` and add them to your `./data` directory or the location specified in the `Tesseract.FilePath` location in `./appsettings.json`.
+
+If you do not add any `.traineddata` files, you will receive a runtime exception when attempting to upload one of these image formats.
\ No newline at end of file
diff --git a/samples/apps/copilot-chat-app/webapp/src/components/FileUploader.tsx b/samples/apps/copilot-chat-app/webapp/src/components/FileUploader.tsx
index dbeb3c771e53..6a83307e177d 100644
--- a/samples/apps/copilot-chat-app/webapp/src/components/FileUploader.tsx
+++ b/samples/apps/copilot-chat-app/webapp/src/components/FileUploader.tsx
@@ -45,7 +45,7 @@ export const FileUploader: React.FC = forwardRef
);
},
diff --git a/samples/apps/copilot-chat-app/webapp/src/components/chat/ChatInput.tsx b/samples/apps/copilot-chat-app/webapp/src/components/chat/ChatInput.tsx
index ed2433a607ac..8c12fe304895 100644
--- a/samples/apps/copilot-chat-app/webapp/src/components/chat/ChatInput.tsx
+++ b/samples/apps/copilot-chat-app/webapp/src/components/chat/ChatInput.tsx
@@ -224,7 +224,7 @@ export const ChatInput: React.FC = ({ isDraggingOver, onDragLeav
type="file"
ref={documentFileRef}
style={{ display: 'none' }}
- accept=".txt,.pdf"
+ accept=".txt,.pdf,.jpg,.jpeg,.png,.tif,.tiff"
multiple={true}
onChange={() => {
handleImport();