diff --git a/Samples/OCRTest/PHP/OCRTest.php b/Samples/OCRTest/PHP/OCRTest.php index 67e31719..708ecc87 100644 --- a/Samples/OCRTest/PHP/OCRTest.php +++ b/Samples/OCRTest/PHP/OCRTest.php @@ -23,6 +23,9 @@ // The location of the OCR Module PDFNet::AddResourceSearchPath("../../../Lib/"); + + // If the IRIS OCR module is available, will use that instead of the default + $use_iris = OCRModule::IsIRISModuleAvailable(); if(!OCRModule::IsModuleAvailable()) { echo "Unable to run OCRTest: PDFTron SDK OCR module not available.\n ---------------------------------------------------------------\n @@ -33,17 +36,22 @@ } else { //-------------------------------------------------------------------------------- - // Example 1) Process image without specifying options, default language - English - is used - - + // Example 1) Process image // A) Setup empty destination doc + $doc = new PDFDoc(); - // B) Run OCR on the .png with options + // B) Use the IRIS OCR engine if available - OCRModule::ImageToPDF($doc, $input_path."psychomachia_excerpt.png", NULL); + $opts = new OCROptions(); + if ($use_iris) { + $opts->SetOCREngine("iris"); + } - // C) check the result + // C) Run OCR on the .png with options + OCRModule::ImageToPDF($doc, $input_path."psychomachia_excerpt.png", $opts); + + // D) Check the result $doc->Save($output_path."psychomachia_excerpt.pdf", 0); @@ -60,6 +68,9 @@ // B) Setup options with multiple target languages, English will always be considered as secondary language $opts = new OCROptions(); + if ($use_iris) { + $opts->SetOCREngine("iris"); + } $opts->AddLang("deu"); $opts->AddLang("fra"); $opts->AddLang("eng"); @@ -85,6 +96,9 @@ // B) Setup options with a single language and an ignore zone $opts = new OCROptions(); + if ($use_iris) { + $opts->SetOCREngine("iris"); + } $opts->AddLang("deu"); $ignore_zones = new RectCollection(); @@ -112,6 +126,9 @@ // B) Setup options with a single language plus text/ignore zones $opts = new OCROptions(); + if ($use_iris) { + $opts->SetOCREngine("iris"); + } $opts->AddLang("eng"); $ignore_zones = new RectCollection(); @@ -160,17 +177,24 @@ $doc = new PDFDoc($input_path."zero_value_test_no_text.pdf"); - // B) Run OCR on the .pdf with default English language + // B) Use the IRIS OCR engine if available + + $opts = new OCROptions(); + if ($use_iris) { + $opts->SetOCREngine("iris"); + } + + // C) Run OCR on the .pdf with default English language - $json = OCRModule::GetOCRJsonFromPDF($doc, NULL); + $json = OCRModule::GetOCRJsonFromPDF($doc, $opts); - // C) Post-processing step (whatever it might be) + // D) Post-processing step (whatever it might be) echo "Have OCR result JSON, re-applying to PDF \n"; OCRModule::ApplyOCRJsonToPDF($doc, $json); - // D) check the result + // E) check the result $doc->Save($output_path."zero_value_test_no_text.pdf", 0); @@ -184,55 +208,33 @@ $doc = new PDFDoc(); - // B) Run OCR on the .tif with default English language, extracting OCR results in XML format. Note that + // B) Use the IRIS OCR engine if available + + $opts = new OCROptions(); + if ($use_iris) { + $opts->SetOCREngine("iris"); + } + + // C) Run OCR on the .tif with default English language, extracting OCR results in XML format. Note that // in the process we convert the source image into PDF. We reuse this PDF document later to add hidden text layer to it. - $xml = OCRModule::GetOCRXmlFromImage($doc, $input_path."physics.tif", NULL); + $xml = OCRModule::GetOCRXmlFromImage($doc, $input_path."physics.tif", $opts); - // C) Post-processing step (whatever it might be) + // D) Post-processing step (whatever it might be) echo "Have OCR result XML, re-applying to PDF \n"; OCRModule::ApplyOCRXmlToPDF($doc, $xml); - // D) check the result + // E) check the result $doc->Save($output_path."physics.pdf", 0); echo "Example 6: extracting and applying OCR XML from physics.tif \n"; echo "Done. \n"; - - - //-------------------------------------------------------------------------------- - // Example 7) Resolution can be manually set, when DPI missing from metadata or is wrong - - // A) Setup empty destination doc - - $doc = new PDFDoc(); - - // B) Setup options with a text zone - - $opts = new OCROptions(); - $text_zones = new RectCollection(); - $text_zones->AddRect(new Rect(140.0, 870.0, 310.0, 920.0)); - $opts->AddTextZonesForPage($text_zones, 1); - - // C) Manually override DPI - - $opts->AddDPI(100); - - // D) Run OCR on the .jpg with options - - OCRModule::ImageToPDF($doc, $input_path."corrupted_dpi.jpg", $opts); - - // E) check the result - - $doc->Save($output_path."corrupted_dpi.pdf", 0); - - echo "Example 7: converting image with corrupted resolution metadata corrupted_dpi.jpg to pdf with searchable text \n"; - } + PDFNet::Terminate(); ?>