Skip to content

Commit

Permalink
Merge branch 'release_branch_DEVSIX-5576' into master-rc
Browse files Browse the repository at this point in the history
  • Loading branch information
iText-CI committed Sep 29, 2021
2 parents d143306 + fb9aa93 commit 923548d
Show file tree
Hide file tree
Showing 84 changed files with 3,568 additions and 1,872 deletions.
4 changes: 2 additions & 2 deletions pdfocr-api/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>com.itextpdf</groupId>
<artifactId>pdfocr-root</artifactId>
<version>1.0.3</version>
<version>2.0.0</version>
</parent>

<artifactId>pdfocr-api</artifactId>
Expand Down Expand Up @@ -47,4 +47,4 @@
</resource>
</resources>
</build>
</project>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2021 iText Group NV
Authors: iText Software.
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package com.itextpdf.pdfocr;

import com.itextpdf.commons.actions.AbstractITextEvent;
import com.itextpdf.commons.actions.AbstractProductITextEvent;
import com.itextpdf.commons.actions.confirmations.EventConfirmationType;
import com.itextpdf.commons.actions.sequence.SequenceId;

/**
* Helper class for working with events. This class is for internal usage.
*/
public abstract class AbstractPdfOcrEventHelper extends AbstractITextEvent {

/**
* Handles the event.
*
* @param event event
*/
public abstract void onEvent(AbstractProductITextEvent event);

/**
* Returns the sequence id
*
* @return sequence id
*/
public abstract SequenceId getSequenceId();

/**
* Returns the confirmation type of event.
*
* @return event confirmation type
*/
public abstract EventConfirmationType getConfirmationType();
}
27 changes: 27 additions & 0 deletions pdfocr-api/src/main/java/com/itextpdf/pdfocr/IOcrEngine.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,21 @@ public interface IOcrEngine {
*/
Map<Integer, List<TextInfo>> doImageOcr(File input);

/**
* Reads data from the provided input image file and returns retrieved data
* in the format described below.
*
* @param input input image {@link java.io.File}
* @param ocrProcessContext ocr processing context
*
* @return {@link java.util.Map} where key is {@link java.lang.Integer}
* representing the number of the page and value is
* {@link java.util.List} of {@link TextInfo} elements where each
* {@link TextInfo} element contains a word or a line and its 4
* coordinates(bbox)
*/
Map<Integer, List<TextInfo>> doImageOcr(File input, OcrProcessContext ocrProcessContext);

/**
* Performs OCR using provided {@link IOcrEngine} for the given list of
* input images and saves output to a text file using provided path.
Expand All @@ -58,4 +73,16 @@ public interface IOcrEngine {
* @param txtFile file to be created
*/
void createTxtFile(List<File> inputImages, File txtFile);

/**
* Performs OCR using provided {@link IOcrEngine} for the given list of
* input images and saves output to a text file using provided path.
* Note that a human reading order is not guaranteed
* due to possible specifics of input images (multi column layout, tables etc)
*
* @param inputImages {@link java.util.List} of images to be OCRed
* @param txtFile file to be created
* @param ocrProcessContext ocr processing context
*/
void createTxtFile(List<File> inputImages, File txtFile, OcrProcessContext ocrProcessContext);
}
45 changes: 45 additions & 0 deletions pdfocr-api/src/main/java/com/itextpdf/pdfocr/IProductAware.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2021 iText Group NV
Authors: iText Software.
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package com.itextpdf.pdfocr;

import com.itextpdf.commons.actions.data.ProductData;

/**
* The interface that holds information about product data and meta info.
*/
public interface IProductAware {

/**
* Gets the container with meta info.
*
* @return the held meta info container
*/
PdfOcrMetaInfoContainer getMetaInfoContainer();

/**
* Gets object containing information about the product.
*
* @return product data
*/
ProductData getProductData();
}
Loading

0 comments on commit 923548d

Please sign in to comment.