diff --git a/src/core/parser.js b/src/core/parser.js index db0c4533d6143..954bec8ba9b75 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -148,7 +148,8 @@ var Parser = (function ParserClosure() { * @returns {number} The inline stream length. */ findDefaultInlineStreamEnd(stream) { - const E = 0x45, I = 0x49, SPACE = 0x20, LF = 0xA, CR = 0xD, n = 5; + const E = 0x45, I = 0x49, SPACE = 0x20, LF = 0xA, CR = 0xD; + const n = 10, NUL = 0x0; let startPos = stream.pos, state = 0, ch, maybeEIPos; while ((ch = stream.getByte()) !== -1) { if (state === 0) { @@ -159,10 +160,23 @@ var Parser = (function ParserClosure() { assert(state === 2); if (ch === SPACE || ch === LF || ch === CR) { maybeEIPos = stream.pos; - // Let's check the next `n` bytes are ASCII... just be sure. + // Let's check that the next `n` bytes are ASCII... just to be sure. let followingBytes = stream.peekBytes(n); - for (let i = 0; i < n; i++) { + for (let i = 0, ii = followingBytes.length; i < ii; i++) { ch = followingBytes[i]; + if (ch === NUL && followingBytes[i + 1] !== NUL) { + // NUL bytes are not supposed to occur *outside* of inline + // images, but some PDF generators violate that assumption, + // thus breaking the EI detection heuristics used below. + // + // However, we can't unconditionally treat NUL bytes as "ASCII", + // since that *could* result in inline images being truncated. + // + // To attempt to address this, we'll still treat any *sequence* + // of NUL bytes as non-ASCII, but for a *single* NUL byte we'll + // continue checking the `followingBytes` (fixes issue8823.pdf). + continue; + } if (ch !== LF && ch !== CR && (ch < SPACE || ch > 0x7F)) { // Not a LF, CR, SPACE or any visible ASCII character, i.e. // it's binary stuff. Resetting the state. diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 8e19cdd285171..78c790ade606c 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -58,6 +58,7 @@ !issue8697.pdf !issue8707.pdf !issue8798r.pdf +!issue8823.pdf !bad-PageLabels.pdf !filled-background.pdf !ArabicCIDTrueType.pdf diff --git a/test/pdfs/issue8823.pdf b/test/pdfs/issue8823.pdf new file mode 100644 index 0000000000000..9878a62ba342c Binary files /dev/null and b/test/pdfs/issue8823.pdf differ diff --git a/test/test_manifest.json b/test/test_manifest.json index 1b39b8ad67aa3..de8aa12000f60 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -2976,9 +2976,15 @@ { "id": "issue8798", "file": "pdfs/issue8798r.pdf", "md5": "3a0e29f013d9edcceb5d852e37738a77", + "link": false, + "rounds": 1, + "type": "eq" + }, + { "id": "issue8823", + "file": "pdfs/issue8823.pdf", + "md5": "ad02d4aa374b315bf1766038d002d57a", + "link": false, "rounds": 1, - "lastPage": 1, - "link": true, "type": "eq" }, { "id": "issue8613",