diff --git a/src/Smalot/PdfParser/RawData/RawDataParser.php b/src/Smalot/PdfParser/RawData/RawDataParser.php index ec8f600b..77630897 100644 --- a/src/Smalot/PdfParser/RawData/RawDataParser.php +++ b/src/Smalot/PdfParser/RawData/RawDataParser.php @@ -756,7 +756,7 @@ protected function getRawObject(string $pdfData, int $offset = 0, array $headerD // start stream object $objtype = 'stream'; $offset += 6; - if (1 == preg_match('/^([\r]?[\n])/isU', substr($pdfData, $offset, 4), $matches)) { + if (1 == preg_match('/^( *[\r]?[\n])/isU', substr($pdfData, $offset, 4), $matches)) { $offset += \strlen($matches[0]); // we get stream length here to later help preg_match test less data diff --git a/tests/PHPUnit/Integration/RawData/RawDataParserTest.php b/tests/PHPUnit/Integration/RawData/RawDataParserTest.php index 17c64d30..dec70977 100644 --- a/tests/PHPUnit/Integration/RawData/RawDataParserTest.php +++ b/tests/PHPUnit/Integration/RawData/RawDataParserTest.php @@ -87,6 +87,25 @@ public function testGetRawObjectIssue372(): void ], $result ); + + // Test that spaces after a 'stream' declaration are absorbed + // See: https://github.com/smalot/pdfparser/issues/641 + $data = 'stream '."\n"; + $data .= 'streamdata'."\n"; + $data .= 'endstream'."\n"; + $data .= 'endobj'; + + $result = $this->fixture->exposeGetRawObject($data); + + // Value 'streamdata'."\n" would be empty string without the fix + $this->assertEquals( + [ + 'stream', + 'streamdata'."\n", + 19, + ], + $result + ); } /**