Skip to content

Commit

Permalink
Merge pull request #9827 from Snuffleupagus/misc-corrupt-pdf-fixes
Browse files Browse the repository at this point in the history
Fix various corrupt PDF files (issue 9252, issue 9418)
  • Loading branch information
timvandermeij authored Jun 21, 2018
2 parents a278c5a + 56e3648 commit 98ea39f
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 20 deletions.
56 changes: 47 additions & 9 deletions src/core/obj.js
Original file line number Diff line number Diff line change
Expand Up @@ -855,7 +855,16 @@ var XRef = (function XRefClosure() {
}
trailerDict.assignXref(this);
this.trailer = trailerDict;
var encrypt = trailerDict.get('Encrypt');

let encrypt;
try {
encrypt = trailerDict.get('Encrypt');
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
warn(`XRef.parse - Invalid "Encrypt" reference: "${ex}".`);
}
if (isDict(encrypt)) {
var ids = trailerDict.get('ID');
var fileId = (ids && ids.length) ? ids[0] : '';
Expand All @@ -868,8 +877,22 @@ var XRef = (function XRefClosure() {
this.pdfManager.password);
}

// get the root dictionary (catalog) object
if (!(this.root = trailerDict.get('Root'))) {
// Get the root dictionary (catalog) object, and do some basic validation.
let root;
try {
root = trailerDict.get('Root');
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
warn(`XRef.parse - Invalid "Root" reference: "${ex}".`);
}
if (isDict(root) && root.has('Pages')) {
this.root = root;
} else {
if (!recoveryMode) {
throw new XRefParseException();
}
throw new FormatError('Invalid root reference');
}
},
Expand Down Expand Up @@ -1208,7 +1231,7 @@ var XRef = (function XRefClosure() {
break;
}
}
startPos += contentLength;
startPos = endPos;
}
let content = buffer.subarray(position, position + contentLength);

Expand Down Expand Up @@ -1237,7 +1260,7 @@ var XRef = (function XRefClosure() {
this.readXRef(/* recoveryMode */ true);
}
// finding main trailer
var dict;
let trailerDict;
for (i = 0, ii = trailers.length; i < ii; ++i) {
stream.pos = trailers[i];
var parser = new Parser(new Lexer(stream), /* allowStreams = */ true,
Expand All @@ -1247,18 +1270,33 @@ var XRef = (function XRefClosure() {
continue;
}
// read the trailer dictionary
dict = parser.getObj();
let dict = parser.getObj();
if (!isDict(dict)) {
continue;
}
// Do some basic validation of the trailer/root dictionary candidate.
let rootDict;
try {
rootDict = dict.get('Root');
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
continue;
}
if (!isDict(rootDict) || !rootDict.has('Pages')) {
continue;
}
// taking the first one with 'ID'
if (dict.has('ID')) {
return dict;
}
// The current dictionary is a candidate, but continue searching.
trailerDict = dict;
}
// no tailer with 'ID', taking last one (if exists)
if (dict) {
return dict;
// No trailer with 'ID', taking last one (if exists).
if (trailerDict) {
return trailerDict;
}
// nothing helps
throw new InvalidPDFException('Invalid PDF structure');
Expand Down
22 changes: 15 additions & 7 deletions src/core/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ import {
PredictorStream, RunLengthStream
} from './stream';
import {
assert, FormatError, info, isNum, isString, MissingDataException, StreamType,
warn
assert, FormatError, info, isNum, isSpace, isString, MissingDataException,
StreamType, warn
} from '../shared/util';
import {
Cmd, Dict, EOF, isCmd, isDict, isEOF, isName, Name, Ref
Expand Down Expand Up @@ -721,7 +721,7 @@ var Lexer = (function LexerClosure() {
var ch = this.currentChar;
var eNotation = false;
var divideBy = 0; // different from 0 if it's a floating point value
var sign = 1;
var sign = 0;

if (ch === 0x2D) { // '-'
sign = -1;
Expand All @@ -732,10 +732,7 @@ var Lexer = (function LexerClosure() {
ch = this.nextChar();
}
} else if (ch === 0x2B) { // '+'
ch = this.nextChar();
}
if (ch === 0x2E) { // '.'
divideBy = 10;
sign = 1;
ch = this.nextChar();
}
if (ch === 0x0A || ch === 0x0D) { // LF, CR
Expand All @@ -744,11 +741,22 @@ var Lexer = (function LexerClosure() {
ch = this.nextChar();
} while (ch === 0x0A || ch === 0x0D);
}
if (ch === 0x2E) { // '.'
divideBy = 10;
ch = this.nextChar();
}
if (ch < 0x30 || ch > 0x39) { // '0' - '9'
if (divideBy === 10 && sign === 0 &&
(isSpace(ch) || ch === /* EOF = */ -1)) {
// This is consistent with Adobe Reader (fixes issue9252.pdf).
warn('Lexer.getNumber - treating a single decimal point as zero.');
return 0;
}
throw new FormatError(
`Invalid number: ${String.fromCharCode(ch)} (charCode ${ch})`);
}

sign = sign || 1;
var baseValue = ch - 0x30; // '0'
var powerValue = 0;
var powerValueSign = 1;
Expand Down
2 changes: 2 additions & 0 deletions test/pdfs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,10 @@
!issue8823.pdf
!issue9084.pdf
!issue9105_reduced.pdf
!issue9252.pdf
!issue9262_reduced.pdf
!issue9291.pdf
!issue9418.pdf
!issue9458.pdf
!bad-PageLabels.pdf
!decodeACSuccessive.pdf
Expand Down
Binary file added test/pdfs/issue9252.pdf
Binary file not shown.
Binary file added test/pdfs/issue9418.pdf
Binary file not shown.
14 changes: 14 additions & 0 deletions test/test_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,20 @@
"lastPage": 1,
"type": "eq"
},
{ "id": "issue9252",
"file": "pdfs/issue9252.pdf",
"md5": "c7d039d808d9344a95d2c9cfa7586ca3",
"rounds": 1,
"link": false,
"type": "eq"
},
{ "id": "issue9418",
"file": "pdfs/issue9418.pdf",
"md5": "32ecad8098acb1938539d47944ecb54b",
"rounds": 1,
"link": false,
"type": "eq"
},
{ "id": "issue9262",
"file": "pdfs/issue9262_reduced.pdf",
"md5": "5347ce2d7b3866625c22e115fd90e0de",
Expand Down
30 changes: 26 additions & 4 deletions test/unit/parser_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
*/

import { Lexer, Linearization } from '../../src/core/parser';
import { FormatError } from '../../src/shared/util';
import { Name } from '../../src/core/primitives';
import { StringStream } from '../../src/core/stream';

Expand Down Expand Up @@ -58,11 +59,32 @@ describe('parser', function() {

it('should ignore line-breaks between operator and digit in number',
function() {
var input = new StringStream('-\r\n205.88');
var lexer = new Lexer(input);
var result = lexer.getNumber();
let minusInput = new StringStream('-\r\n205.88');
let minusLexer = new Lexer(minusInput);

expect(result).toEqual(-205.88);
expect(minusLexer.getNumber()).toEqual(-205.88);

let plusInput = new StringStream('+\r\n205.88');
let plusLexer = new Lexer(plusInput);

expect(plusLexer.getNumber()).toEqual(205.88);
});

it('should treat a single decimal point as zero', function() {
let input = new StringStream('.');
let lexer = new Lexer(input);

expect(lexer.getNumber()).toEqual(0);

let numbers = ['..', '-.', '+.', '-\r\n.', '+\r\n.'];
for (let number of numbers) {
let input = new StringStream(number);
let lexer = new Lexer(input);

expect(function() {
return lexer.getNumber();
}).toThrowError(FormatError, /^Invalid number:\s/);
}
});

it('should handle glued numbers and operators', function() {
Expand Down

0 comments on commit 98ea39f

Please sign in to comment.