diff --git a/.gitignore b/.gitignore index 530b14105f74..07c766a8acbf 100644 --- a/.gitignore +++ b/.gitignore @@ -477,4 +477,5 @@ playwright-report/ # Static Web App deployment config swa-cli.config.json **/copilot-chat-app/webapp/build -**/copilot-chat-app/webapp/node_modules \ No newline at end of file +**/copilot-chat-app/webapp/node_modules +**/copilot-chat-app/webapi/data/eng.traineddata diff --git a/samples/apps/copilot-chat-app/webapi/CopilotChat/Controllers/DocumentImportController.cs b/samples/apps/copilot-chat-app/webapi/CopilotChat/Controllers/DocumentImportController.cs index d88d89825a62..d53020553534 100644 --- a/samples/apps/copilot-chat-app/webapi/CopilotChat/Controllers/DocumentImportController.cs +++ b/samples/apps/copilot-chat-app/webapi/CopilotChat/Controllers/DocumentImportController.cs @@ -18,6 +18,8 @@ using SemanticKernel.Service.CopilotChat.Models; using SemanticKernel.Service.CopilotChat.Options; using SemanticKernel.Service.CopilotChat.Storage; +using SemanticKernel.Service.Services; +using Tesseract; using UglyToad.PdfPig; using UglyToad.PdfPig.DocumentLayoutAnalysis.TextExtractor; using static SemanticKernel.Service.CopilotChat.Models.MemorySource; @@ -44,6 +46,21 @@ private enum SupportedFileType /// .pdf /// Pdf, + + /// + /// .jpg + /// + Jpg, + + /// + /// .png + /// + Png, + + /// + /// .tif or .tiff + /// + Tiff }; private readonly ILogger _logger; @@ -54,6 +71,7 @@ private enum SupportedFileType private readonly ChatParticipantRepository _participantRepository; private const string GlobalDocumentUploadedClientCall = "GlobalDocumentUploaded"; private const string ChatDocumentUploadedClientCall = "ChatDocumentUploaded"; + private readonly ITesseractEngine _tesseractEngine; /// /// Initializes a new instance of the class. @@ -64,7 +82,8 @@ public DocumentImportController( ChatSessionRepository sessionRepository, ChatMemorySourceRepository sourceRepository, ChatMessageRepository messageRepository, - ChatParticipantRepository participantRepository) + ChatParticipantRepository participantRepository, + ITesseractEngine tesseractEngine) { this._logger = logger; this._options = documentMemoryOptions.Value; @@ -72,6 +91,7 @@ public DocumentImportController( this._sourceRepository = sourceRepository; this._messageRepository = messageRepository; this._participantRepository = participantRepository; + this._tesseractEngine = tesseractEngine; } /// @@ -259,6 +279,14 @@ private async Task ImportDocumentHelperAsync(IKernel kernel, IForm case SupportedFileType.Pdf: documentContent = this.ReadPdfFile(formFile); break; + case SupportedFileType.Jpg: + case SupportedFileType.Png: + case SupportedFileType.Tiff: + { + documentContent = await this.ReadTextFromImageFileAsync(formFile); + break; + } + default: // This should never happen. Validation should have already caught this. return ImportResult.Fail(); @@ -391,10 +419,35 @@ private SupportedFileType GetFileType(string fileName) { ".txt" => SupportedFileType.Txt, ".pdf" => SupportedFileType.Pdf, + ".jpg" => SupportedFileType.Jpg, + ".jpeg" => SupportedFileType.Jpg, + ".png" => SupportedFileType.Png, + ".tif" => SupportedFileType.Tiff, + ".tiff" => SupportedFileType.Tiff, _ => throw new ArgumentOutOfRangeException($"Unsupported file type: {extension}"), }; } + /// + /// Reads the text content from an image file. + /// + /// An IFormFile object. + /// A string of the content of the file. + private async Task ReadTextFromImageFileAsync(IFormFile file) + { + await using (var ms = new MemoryStream()) + { + await file.CopyToAsync(ms); + var fileBytes = ms.ToArray(); + await using var imgStream = new MemoryStream(fileBytes); + + using var img = Pix.LoadFromMemory(imgStream.ToArray()); + + using var page = this._tesseractEngine.Process(img); + return page.GetText(); + } + } + /// /// Read the content of a text file. /// diff --git a/samples/apps/copilot-chat-app/webapi/CopilotChat/Extensions/ServiceExtensions.cs b/samples/apps/copilot-chat-app/webapi/CopilotChat/Extensions/ServiceExtensions.cs index 1b9b341fd7c3..eff7b3854faf 100644 --- a/samples/apps/copilot-chat-app/webapi/CopilotChat/Extensions/ServiceExtensions.cs +++ b/samples/apps/copilot-chat-app/webapi/CopilotChat/Extensions/ServiceExtensions.cs @@ -11,6 +11,8 @@ using SemanticKernel.Service.CopilotChat.Options; using SemanticKernel.Service.CopilotChat.Storage; using SemanticKernel.Service.Options; +using SemanticKernel.Service.Services; +using Tesseract; namespace SemanticKernel.Service.CopilotChat.Extensions; @@ -68,13 +70,50 @@ public static IServiceCollection AddCopilotChatOptions(this IServiceCollection s .ValidateOnStart() .PostConfigure(TrimStringProperties); + // OCR support options + services.AddOptions() + .Bind(configuration.GetSection(OcrSupportOptions.PropertyName)) + .ValidateOnStart() + .PostConfigure(TrimStringProperties); + + return services; + } + + /// + /// Adds persistent OCR support service. + /// + /// + public static IServiceCollection AddPersistentOcrSupport(this IServiceCollection services) + { + OcrSupportOptions ocrSupportConfig = services.BuildServiceProvider().GetRequiredService>().Value; + + switch (ocrSupportConfig.Type) + { + case OcrSupportOptions.OcrSupportType.Tesseract: + { + services.AddSingleton(sp => new TesseractEngineWrapper(new TesseractEngine(ocrSupportConfig.Tesseract!.FilePath, ocrSupportConfig.Tesseract!.Language, EngineMode.Default))); + break; + } + + case OcrSupportOptions.OcrSupportType.None: + { + services.AddSingleton(sp => new NullTesseractEngine()); + break; + } + + default: + { + throw new InvalidOperationException($"Unsupported OcrSupport:Type '{ocrSupportConfig.Type}'"); + } + } + return services; } /// /// Add persistent chat store services. /// - public static void AddPersistentChatStore(this IServiceCollection services) + public static IServiceCollection AddPersistentChatStore(this IServiceCollection services) { IStorageContext chatSessionStorageContext; IStorageContext chatMessageStorageContext; @@ -144,6 +183,8 @@ public static void AddPersistentChatStore(this IServiceCollection services) services.AddSingleton(new ChatMessageRepository(chatMessageStorageContext)); services.AddSingleton(new ChatMemorySourceRepository(chatMemorySourceStorageContext)); services.AddSingleton(new ChatParticipantRepository(chatParticipantStorageContext)); + + return services; } /// diff --git a/samples/apps/copilot-chat-app/webapi/CopilotChat/Options/OcrSupportOptions.cs b/samples/apps/copilot-chat-app/webapi/CopilotChat/Options/OcrSupportOptions.cs new file mode 100644 index 000000000000..a1744b47b8c6 --- /dev/null +++ b/samples/apps/copilot-chat-app/webapi/CopilotChat/Options/OcrSupportOptions.cs @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft. All rights reserved. + +using SemanticKernel.Service.Options; + +namespace SemanticKernel.Service.CopilotChat.Options; + +/// +/// Ocr Support Configuration Options +/// +public class OcrSupportOptions +{ + public const string PropertyName = "OcrSupport"; + + public enum OcrSupportType + { + /// + /// No OCR Support + /// + None, + + /// + /// Tesseract OCR Support + /// + Tesseract + } + + /// + /// Gets or sets the type of OCR support to use. + /// + public OcrSupportType Type { get; set; } = OcrSupportType.None; + + /// + /// Gets or sets the configuration for the Tesseract OCR support. + /// + [RequiredOnPropertyValue(nameof(Type), OcrSupportType.Tesseract)] + public TesseractOptions? Tesseract { get; set; } +} diff --git a/samples/apps/copilot-chat-app/webapi/CopilotChat/Options/TesseractOptions.cs b/samples/apps/copilot-chat-app/webapi/CopilotChat/Options/TesseractOptions.cs new file mode 100644 index 000000000000..0fe50f104667 --- /dev/null +++ b/samples/apps/copilot-chat-app/webapi/CopilotChat/Options/TesseractOptions.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.ComponentModel.DataAnnotations; +using SemanticKernel.Service.Options; + +namespace SemanticKernel.Service.CopilotChat.Options; + +/// +/// Configuration options for Tesseract OCR support. +/// +public sealed class TesseractOptions +{ + public const string PropertyName = "Tesseract"; + + /// + /// The file path where the Tesseract language file is stored (e.g. "./data") + /// + [Required, NotEmptyOrWhitespace] + public string? FilePath { get; set; } = string.Empty; + + /// + /// The language file prefix name (e.g. "eng") + /// + [Required, NotEmptyOrWhitespace] + public string? Language { get; set; } = string.Empty; +} diff --git a/samples/apps/copilot-chat-app/webapi/CopilotChatWebApi.csproj b/samples/apps/copilot-chat-app/webapi/CopilotChatWebApi.csproj index 95f4dd6f790e..33b3c81fd550 100644 --- a/samples/apps/copilot-chat-app/webapi/CopilotChatWebApi.csproj +++ b/samples/apps/copilot-chat-app/webapi/CopilotChatWebApi.csproj @@ -62,6 +62,8 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive + + @@ -69,4 +71,10 @@ <_Parameter1>false + + + + PreserveNewest + + diff --git a/samples/apps/copilot-chat-app/webapi/CopilotChatWebApi.sln b/samples/apps/copilot-chat-app/webapi/CopilotChatWebApi.sln new file mode 100644 index 000000000000..9d08ac20fb52 --- /dev/null +++ b/samples/apps/copilot-chat-app/webapi/CopilotChatWebApi.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.5.33530.505 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CopilotChatWebApi", "CopilotChatWebApi.csproj", "{35CC3A68-E577-4B21-B94C-BF674F8FA505}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {35CC3A68-E577-4B21-B94C-BF674F8FA505}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {35CC3A68-E577-4B21-B94C-BF674F8FA505}.Debug|Any CPU.Build.0 = Debug|Any CPU + {35CC3A68-E577-4B21-B94C-BF674F8FA505}.Release|Any CPU.ActiveCfg = Release|Any CPU + {35CC3A68-E577-4B21-B94C-BF674F8FA505}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {1F60AC39-60D2-4CD2-B2FC-71E174DDFC1A} + EndGlobalSection +EndGlobal diff --git a/samples/apps/copilot-chat-app/webapi/Program.cs b/samples/apps/copilot-chat-app/webapi/Program.cs index dd707dfc59d4..4f4a988638d3 100644 --- a/samples/apps/copilot-chat-app/webapi/Program.cs +++ b/samples/apps/copilot-chat-app/webapi/Program.cs @@ -47,7 +47,8 @@ public static async Task Main(string[] args) builder.Services .AddCopilotChatOptions(builder.Configuration) .AddCopilotChatPlannerServices() - .AddPersistentChatStore(); + .AddPersistentChatStore() + .AddPersistentOcrSupport(); // Add SignalR as the real time relay service builder.Services.AddSignalR(); diff --git a/samples/apps/copilot-chat-app/webapi/README.md b/samples/apps/copilot-chat-app/webapi/README.md index 425bf3dcb35d..36fd9918eabb 100644 --- a/samples/apps/copilot-chat-app/webapi/README.md +++ b/samples/apps/copilot-chat-app/webapi/README.md @@ -9,8 +9,9 @@ while allowing user interfaces to be developed using frontend frameworks such as Before you get started, make sure you have the following requirements in place: 1. [.NET 6.0](https://dotnet.microsoft.com/en-us/download/dotnet/6.0) for building and deploying .NET 6 projects. -2. Update the properties in `./appsettings.json` to configure your Azure OpenAI resource or OpenAI account. -3. Generate and trust a localhost developer certificate. +2. **(Optional)** [Visual Studio Code](http://aka.ms/vscode) or [Visual Studio](http://aka.ms/vsdownload). +3. Update the properties in `./appsettings.json` to configure your Azure OpenAI resource or OpenAI account. +4. Generate and trust a localhost developer certificate. - For Windows and Mac run ```bash dotnet dev-certs https --trust @@ -25,7 +26,9 @@ Before you get started, make sure you have the following requirements in place: > To clean your system of the developer certificate, run `dotnet run dev-certs https --clean` -4. **(Optional)** [Visual Studio Code](http://aka.ms/vscode) or [Visual Studio](http://aka.ms/vsdownload). +5. **(Optional)** To enable support for uploading image file formats such as png, jpg and tiff, we have included the [Tesseract](https://www.nuget.org/packages/Tesseract) nuget package. + - You will need to obtain one or more [tessdata language data files](https://github.com/tesseract-ocr/tessdata) such as `eng.traineddata` and add them to your `./data` directory or the location specified in the `Tesseract.FilePath` location in `./appsettings.json`. + - Set the `Copy to Output Directory` value to `Copy if newer`. # Start the WebApi Service diff --git a/samples/apps/copilot-chat-app/webapi/Services/ITesseractEngine.cs b/samples/apps/copilot-chat-app/webapi/Services/ITesseractEngine.cs new file mode 100644 index 000000000000..d3318d9757b7 --- /dev/null +++ b/samples/apps/copilot-chat-app/webapi/Services/ITesseractEngine.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Tesseract; + +namespace SemanticKernel.Service.Services; + +/// +/// Wrapper for the Tesseract engine. +/// +public interface ITesseractEngine +{ + // + // Summary: + // Processes the specific image. + // + // Parameters: + // image: + // The image to process. + // + // pageSegMode: + // The page layout analyasis method to use. + // + // Remarks: + // You can only have one result iterator open at any one time. + Page Process(Pix image); +} diff --git a/samples/apps/copilot-chat-app/webapi/Services/NullTesseractEngine.cs b/samples/apps/copilot-chat-app/webapi/Services/NullTesseractEngine.cs new file mode 100644 index 000000000000..d1aff4dc38f0 --- /dev/null +++ b/samples/apps/copilot-chat-app/webapi/Services/NullTesseractEngine.cs @@ -0,0 +1,23 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using Tesseract; + +namespace SemanticKernel.Service.Services; + +/// +/// Used to mock the TesseractEngine in the event that the Tesseract language file is not installed. +/// +public class NullTesseractEngine : ITesseractEngine +{ + /// + /// Throws an exception to let the user know they need to install the Tesseract language file. + /// + /// Not used + /// This will always throw a NotImplementedException + /// + public Page Process(Pix image) + { + throw new NotImplementedException("You must have the Tesseract language file to use the image upload feature. See the README.md"); + } +} diff --git a/samples/apps/copilot-chat-app/webapi/Services/TesseractEngineWrapper.cs b/samples/apps/copilot-chat-app/webapi/Services/TesseractEngineWrapper.cs new file mode 100644 index 000000000000..ff7b4f088254 --- /dev/null +++ b/samples/apps/copilot-chat-app/webapi/Services/TesseractEngineWrapper.cs @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using Tesseract; + +namespace SemanticKernel.Service.Services; + +/// +/// Wrapper for the TesseractEngine within the Tesseract OCR library. This is used to allow the TesseractEngine to be mocked in the event that the Tesseract language file is not installed. +/// +public class TesseractEngineWrapper : ITesseractEngine +{ + /// + /// Creates a new instance of the TesseractEngineWrapper passing in a valid TesseractEngine. + /// + /// + public TesseractEngineWrapper(TesseractEngine tesseractEngine) + { + if (tesseractEngine == null) + { + throw new ArgumentNullException(nameof(tesseractEngine)); + } + + this.TesseractEngine = tesseractEngine; + } + + /// + /// Passes the TesseractEngine to the wrapper. + /// + public TesseractEngine TesseractEngine { get; } + + /// + public Page Process(Pix image) + { + return this.TesseractEngine.Process(image); + } +} diff --git a/samples/apps/copilot-chat-app/webapi/appsettings.json b/samples/apps/copilot-chat-app/webapi/appsettings.json index 8b1c6bc59e6e..c345f9c1f5b3 100644 --- a/samples/apps/copilot-chat-app/webapi/appsettings.json +++ b/samples/apps/copilot-chat-app/webapi/appsettings.json @@ -143,6 +143,21 @@ "FileSizeLimit": 4000000, "FileCountLimit": 10 }, + // + // OCR support is used for allowing end users to upload images containing text in addition to text based documents. + // - Supported Types are "none" or "tesseract". + // - When using Tesseract OCR Support (In order to upload image file formats such as png, jpg and tiff) + // - Obtain language data files here: https://github.com/tesseract-ocr/tessdata . + // - Add these files to your `data` folder or the path specified in the "FilePath" property and set the "Copy to Output Directory" value to "Copy if newer". + // +"OcrSupport": { + "Type": "tesseract", + "Tesseract": { + "Language": "eng", + "FilePath": "./data" + } + }, + // // ChatSkill prompts are used to generate responses to user messages. // - CompletionTokenLimit is the token limit of the chat model, see https://platform.openai.com/docs/models/overview diff --git a/samples/apps/copilot-chat-app/webapi/data/README.md b/samples/apps/copilot-chat-app/webapi/data/README.md new file mode 100644 index 000000000000..68b97f51dee3 --- /dev/null +++ b/samples/apps/copilot-chat-app/webapi/data/README.md @@ -0,0 +1,6 @@ +# Tesseract OCR Support + +This API supports the ability to upload image file formats such as png, jpg and tiff via the [Tesseract](https://www.nuget.org/packages/Tesseract) nuget package. +You will need to obtain one or more [tessdata language data files](https://github.com/tesseract-ocr/tessdata) such as `eng.traineddata` and add them to your `./data` directory or the location specified in the `Tesseract.FilePath` location in `./appsettings.json`. + +If you do not add any `.traineddata` files, you will receive a runtime exception when attempting to upload one of these image formats. \ No newline at end of file diff --git a/samples/apps/copilot-chat-app/webapp/src/components/FileUploader.tsx b/samples/apps/copilot-chat-app/webapp/src/components/FileUploader.tsx index dbeb3c771e53..6a83307e177d 100644 --- a/samples/apps/copilot-chat-app/webapp/src/components/FileUploader.tsx +++ b/samples/apps/copilot-chat-app/webapp/src/components/FileUploader.tsx @@ -45,7 +45,7 @@ export const FileUploader: React.FC = forwardRef ); }, diff --git a/samples/apps/copilot-chat-app/webapp/src/components/chat/ChatInput.tsx b/samples/apps/copilot-chat-app/webapp/src/components/chat/ChatInput.tsx index ed2433a607ac..8c12fe304895 100644 --- a/samples/apps/copilot-chat-app/webapp/src/components/chat/ChatInput.tsx +++ b/samples/apps/copilot-chat-app/webapp/src/components/chat/ChatInput.tsx @@ -224,7 +224,7 @@ export const ChatInput: React.FC = ({ isDraggingOver, onDragLeav type="file" ref={documentFileRef} style={{ display: 'none' }} - accept=".txt,.pdf" + accept=".txt,.pdf,.jpg,.jpeg,.png,.tif,.tiff" multiple={true} onChange={() => { handleImport();