diff --git a/samples/bugs/Issue450.pdf b/samples/bugs/Issue450.pdf new file mode 100644 index 00000000..86997d07 Binary files /dev/null and b/samples/bugs/Issue450.pdf differ diff --git a/src/Smalot/PdfParser/Page.php b/src/Smalot/PdfParser/Page.php index 2365ac6b..fcc2a773 100644 --- a/src/Smalot/PdfParser/Page.php +++ b/src/Smalot/PdfParser/Page.php @@ -240,6 +240,12 @@ public function getTextArray(self $page = null): array $header = new Header([], $this->document); $contents = new PDFObject($this->document, $header, $new_content, $this->config); + } else { + try { + $contents->getTextArray($this); + } catch (\Throwable $e) { + return $contents->getTextArray(); + } } } elseif ($contents instanceof ElementArray) { // Create a virtual global content. @@ -342,11 +348,7 @@ public function extractDecodedRawData(array $extractedRawData = null): array continue; } $tmpText = $data[$i]['c']; - $decodedText = ''; - if (isset($currentFont)) { - $decodedText = $currentFont->decodeOctal($tmpText); - //$tmpText = $currentFont->decodeHexadecimal($tmpText, false); - } + $decodedText = isset($currentFont) ? $currentFont->decodeOctal($tmpText) : $tmpText; $decodedText = str_replace( ['\\\\', '\(', '\)', '\n', '\r', '\t', '\ '], ['\\', '(', ')', "\n", "\r", "\t", ' '], diff --git a/tests/Integration/PageTest.php b/tests/Integration/PageTest.php index ed9365ee..2eb9043e 100644 --- a/tests/Integration/PageTest.php +++ b/tests/Integration/PageTest.php @@ -570,4 +570,45 @@ public function testGetTextXY() $result = $page->getTextXY(174, 827, 1, 1); $this->assertStringContainsString('Purchase 2', $result[0][1]); } + + public function testExtractDecodedRawDataIssue450() + { + $filename = $this->rootDir.'/samples/bugs/Issue450.pdf'; + $parser = $this->getParserInstance(); + $document = $parser->parseFile($filename); + $pages = $document->getPages(); + $page = $pages[0]; + $extractedDecodedRawData = $page->extractDecodedRawData(); + $this->assertIsArray($extractedDecodedRawData); + $this->assertGreaterThan(3, \count($extractedDecodedRawData)); + $this->assertIsArray($extractedDecodedRawData[3]); + $this->assertEquals('TJ', $extractedDecodedRawData[3]['o']); + $this->assertIsArray($extractedDecodedRawData[3]['c']); + $this->assertIsArray($extractedDecodedRawData[3]['c'][0]); + $this->assertEquals(3, \count($extractedDecodedRawData[3]['c'][0])); + $this->assertEquals('{signature:signer505906:Please+Sign+Here}', $extractedDecodedRawData[3]['c'][0]['c']); + } + + public function testGetDataTmIssue450() + { + $filename = $this->rootDir.'/samples/bugs/Issue450.pdf'; + $parser = $this->getParserInstance(); + $document = $parser->parseFile($filename); + $pages = $document->getPages(); + $page = $pages[0]; + $dataTm = $page->getDataTm(); + $this->assertIsArray($dataTm); + $this->assertEquals(1, \count($dataTm)); + $this->assertIsArray($dataTm[0]); + $this->assertEquals(2, \count($dataTm[0])); + $this->assertIsArray($dataTm[0][0]); + $this->assertEquals(6, \count($dataTm[0][0])); + $this->assertEquals(1, $dataTm[0][0][0]); + $this->assertEquals(0, $dataTm[0][0][1]); + $this->assertEquals(0, $dataTm[0][0][2]); + $this->assertEquals(1, $dataTm[0][0][3]); + $this->assertEquals(67.5, $dataTm[0][0][4]); + $this->assertEquals(756.25, $dataTm[0][0][5]); + $this->assertEquals('{signature:signer505906:Please+Sign+Here}', $dataTm[0][1]); + } } diff --git a/tests/Integration/ParserTest.php b/tests/Integration/ParserTest.php index 5228cf99..5a5c8885 100644 --- a/tests/Integration/ParserTest.php +++ b/tests/Integration/ParserTest.php @@ -321,7 +321,7 @@ public function testRetainImageContentImpact() } $filename = $this->rootDir.'/samples/bugs/Issue104a.pdf'; - $iterations = 1; + $iterations = 2; /* * check default (= true) @@ -335,7 +335,7 @@ public function testRetainImageContentImpact() } $usedMemory = memory_get_usage(true); - $this->assertTrue($usedMemory > 100000000, 'Memory is only '.$usedMemory); + $this->assertTrue($usedMemory > 200000000, 'Memory is only '.$usedMemory); $this->assertTrue(null != $document && 0 < \strlen($document->getText())); // force garbage collection @@ -359,7 +359,7 @@ public function testRetainImageContentImpact() * note: the following memory value is set manually and may differ from system to system. * it must be high enough to not produce a false negative though. */ - $this->assertTrue($usedMemory < 106000000, 'Memory is '.$usedMemory); + $this->assertTrue($usedMemory < 107000000, 'Memory is '.$usedMemory); $this->assertTrue(0 < \strlen($document->getText())); } }