-
Notifications
You must be signed in to change notification settings - Fork 4.4k
Copilot Chat: Feature/tesseract ocr Issue #1440 #1491
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
gitri-ms
merged 23 commits into
microsoft:main
from
davearlin:feature/tesseract_ocr_copilot_chat
Jul 12, 2023
Merged
Changes from all commits
Commits
Show all changes
23 commits
Select commit
Hold shift + click to select a range
d2cb642
Add support for rasterized images using Tesseract OSS library.
davearlin 1993eaa
Include ability to import rasterized images via Tesseract library.
davearlin e1379ed
Get latest from main repo
davearlin 6da132c
Merge from main
davearlin 080215e
Merge branch 'main' into feature/tesseract_ocr_copilot_chat
davearlin 2bac559
Merge branch 'main' into feature/tesseract_ocr_copilot_chat
davearlin d53f9b4
Updates to make more Eviden branded.
davearlin be45939
Merge from main
davearlin 76c71aa
Merge changes from main repo.
davearlin 2670605
Change lifetime of TesseractEngine to a singleton.
davearlin 6897d8e
Merge branch 'microsoft:main' into main
davearlin dad5e5d
Merge branch 'microsoft:main' into feature/tesseract_ocr_copilot_chat
davearlin 1c5dae1
Merge branch 'feature/tesseract_ocr_copilot_chat'
davearlin dc9e1e0
Clean up to conform the Tessearct OCR support to existing standards a…
davearlin cb78cd5
Undo custom / fork changes
davearlin d209518
Merge branch 'feature/tesseract_ocr_copilot_chat'
davearlin 7e499a6
Update App.tsx
davearlin a0c16b9
Update App.tsx to remove unneeded subtitle.
davearlin 7d9fd28
Update App.tsx
davearlin 6cda3e2
Merge branch 'main' into feature/tesseract_ocr_copilot_chat
gitri-ms ab60f29
Merge branch 'main' into feature/tesseract_ocr_copilot_chat
gitri-ms e8f6ffc
Minor fixes from merge and formatting
gitri-ms 9a03ab4
Merge branch 'main' into feature/tesseract_ocr_copilot_chat
TaoChenOSU File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
37 changes: 37 additions & 0 deletions
37
samples/apps/copilot-chat-app/webapi/CopilotChat/Options/OcrSupportOptions.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| // Copyright (c) Microsoft. All rights reserved. | ||
|
|
||
| using SemanticKernel.Service.Options; | ||
|
|
||
| namespace SemanticKernel.Service.CopilotChat.Options; | ||
|
|
||
| /// <summary> | ||
| /// Ocr Support Configuration Options | ||
| /// </summary> | ||
| public class OcrSupportOptions | ||
| { | ||
| public const string PropertyName = "OcrSupport"; | ||
|
|
||
| public enum OcrSupportType | ||
| { | ||
| /// <summary> | ||
| /// No OCR Support | ||
| /// </summary> | ||
| None, | ||
|
|
||
| /// <summary> | ||
| /// Tesseract OCR Support | ||
| /// </summary> | ||
| Tesseract | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Gets or sets the type of OCR support to use. | ||
| /// </summary> | ||
| public OcrSupportType Type { get; set; } = OcrSupportType.None; | ||
|
|
||
| /// <summary> | ||
| /// Gets or sets the configuration for the Tesseract OCR support. | ||
| /// </summary> | ||
| [RequiredOnPropertyValue(nameof(Type), OcrSupportType.Tesseract)] | ||
| public TesseractOptions? Tesseract { get; set; } | ||
| } |
26 changes: 26 additions & 0 deletions
26
samples/apps/copilot-chat-app/webapi/CopilotChat/Options/TesseractOptions.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| // Copyright (c) Microsoft. All rights reserved. | ||
|
|
||
| using System.ComponentModel.DataAnnotations; | ||
| using SemanticKernel.Service.Options; | ||
|
|
||
| namespace SemanticKernel.Service.CopilotChat.Options; | ||
|
|
||
| /// <summary> | ||
| /// Configuration options for Tesseract OCR support. | ||
| /// </summary> | ||
| public sealed class TesseractOptions | ||
| { | ||
| public const string PropertyName = "Tesseract"; | ||
|
|
||
| /// <summary> | ||
| /// The file path where the Tesseract language file is stored (e.g. "./data") | ||
| /// </summary> | ||
| [Required, NotEmptyOrWhitespace] | ||
| public string? FilePath { get; set; } = string.Empty; | ||
|
|
||
| /// <summary> | ||
| /// The language file prefix name (e.g. "eng") | ||
| /// </summary> | ||
| [Required, NotEmptyOrWhitespace] | ||
| public string? Language { get; set; } = string.Empty; | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
25 changes: 25 additions & 0 deletions
25
samples/apps/copilot-chat-app/webapi/CopilotChatWebApi.sln
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| | ||
| Microsoft Visual Studio Solution File, Format Version 12.00 | ||
gitri-ms marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| # Visual Studio Version 17 | ||
| VisualStudioVersion = 17.5.33530.505 | ||
| MinimumVisualStudioVersion = 10.0.40219.1 | ||
| Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CopilotChatWebApi", "CopilotChatWebApi.csproj", "{35CC3A68-E577-4B21-B94C-BF674F8FA505}" | ||
| EndProject | ||
| Global | ||
| GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||
| Debug|Any CPU = Debug|Any CPU | ||
| Release|Any CPU = Release|Any CPU | ||
| EndGlobalSection | ||
| GlobalSection(ProjectConfigurationPlatforms) = postSolution | ||
| {35CC3A68-E577-4B21-B94C-BF674F8FA505}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
| {35CC3A68-E577-4B21-B94C-BF674F8FA505}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
| {35CC3A68-E577-4B21-B94C-BF674F8FA505}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
| {35CC3A68-E577-4B21-B94C-BF674F8FA505}.Release|Any CPU.Build.0 = Release|Any CPU | ||
| EndGlobalSection | ||
| GlobalSection(SolutionProperties) = preSolution | ||
| HideSolutionNode = FALSE | ||
| EndGlobalSection | ||
| GlobalSection(ExtensibilityGlobals) = postSolution | ||
| SolutionGuid = {1F60AC39-60D2-4CD2-B2FC-71E174DDFC1A} | ||
| EndGlobalSection | ||
| EndGlobal | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
26 changes: 26 additions & 0 deletions
26
samples/apps/copilot-chat-app/webapi/Services/ITesseractEngine.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| // Copyright (c) Microsoft. All rights reserved. | ||
|
|
||
| using Tesseract; | ||
|
|
||
| namespace SemanticKernel.Service.Services; | ||
|
|
||
| /// <summary> | ||
| /// Wrapper for the Tesseract engine. | ||
| /// </summary> | ||
| public interface ITesseractEngine | ||
| { | ||
| // | ||
| // Summary: | ||
| // Processes the specific image. | ||
| // | ||
| // Parameters: | ||
| // image: | ||
| // The image to process. | ||
| // | ||
| // pageSegMode: | ||
| // The page layout analyasis method to use. | ||
| // | ||
| // Remarks: | ||
| // You can only have one result iterator open at any one time. | ||
| Page Process(Pix image); | ||
| } |
23 changes: 23 additions & 0 deletions
23
samples/apps/copilot-chat-app/webapi/Services/NullTesseractEngine.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| // Copyright (c) Microsoft. All rights reserved. | ||
|
|
||
| using System; | ||
| using Tesseract; | ||
|
|
||
| namespace SemanticKernel.Service.Services; | ||
|
|
||
| /// <summary> | ||
| /// Used to mock the TesseractEngine in the event that the Tesseract language file is not installed. | ||
| /// </summary> | ||
| public class NullTesseractEngine : ITesseractEngine | ||
| { | ||
| /// <summary> | ||
| /// Throws an exception to let the user know they need to install the Tesseract language file. | ||
| /// </summary> | ||
| /// <param name="image">Not used</param> | ||
| /// <returns>This will always throw a NotImplementedException</returns> | ||
| /// <exception cref="NotImplementedException"></exception> | ||
| public Page Process(Pix image) | ||
| { | ||
| throw new NotImplementedException("You must have the Tesseract language file to use the image upload feature. See the README.md"); | ||
| } | ||
| } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.