Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix various corrupt PDF files (issue 9252, issue 9418) #9827

Merged
merged 6 commits into from
Jun 21, 2018
56 changes: 47 additions & 9 deletions src/core/obj.js
Original file line number Diff line number Diff line change
Expand Up @@ -855,7 +855,16 @@ var XRef = (function XRefClosure() {
}
trailerDict.assignXref(this);
this.trailer = trailerDict;
var encrypt = trailerDict.get('Encrypt');

let encrypt;
try {
encrypt = trailerDict.get('Encrypt');
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
warn(`XRef.parse - Invalid "Encrypt" reference: "${ex}".`);
}
if (isDict(encrypt)) {
var ids = trailerDict.get('ID');
var fileId = (ids && ids.length) ? ids[0] : '';
Expand All @@ -868,8 +877,22 @@ var XRef = (function XRefClosure() {
this.pdfManager.password);
}

// get the root dictionary (catalog) object
if (!(this.root = trailerDict.get('Root'))) {
// Get the root dictionary (catalog) object, and do some basic validation.
let root;
try {
root = trailerDict.get('Root');
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
warn(`XRef.parse - Invalid "Root" reference: "${ex}".`);
}
if (isDict(root) && root.has('Pages')) {
this.root = root;
} else {
if (!recoveryMode) {
throw new XRefParseException();
}
throw new FormatError('Invalid root reference');
}
},
Expand Down Expand Up @@ -1208,7 +1231,7 @@ var XRef = (function XRefClosure() {
break;
}
}
startPos += contentLength;
startPos = endPos;
}
let content = buffer.subarray(position, position + contentLength);

Expand Down Expand Up @@ -1237,7 +1260,7 @@ var XRef = (function XRefClosure() {
this.readXRef(/* recoveryMode */ true);
}
// finding main trailer
var dict;
let trailerDict;
for (i = 0, ii = trailers.length; i < ii; ++i) {
stream.pos = trailers[i];
var parser = new Parser(new Lexer(stream), /* allowStreams = */ true,
Expand All @@ -1247,18 +1270,33 @@ var XRef = (function XRefClosure() {
continue;
}
// read the trailer dictionary
dict = parser.getObj();
let dict = parser.getObj();
if (!isDict(dict)) {
continue;
}
// Do some basic validation of the trailer/root dictionary candidate.
let rootDict;
try {
rootDict = dict.get('Root');
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
continue;
}
if (!isDict(rootDict) || !rootDict.has('Pages')) {
continue;
}
// taking the first one with 'ID'
if (dict.has('ID')) {
return dict;
}
// The current dictionary is a candidate, but continue searching.
trailerDict = dict;
}
// no tailer with 'ID', taking last one (if exists)
if (dict) {
return dict;
// No trailer with 'ID', taking last one (if exists).
if (trailerDict) {
return trailerDict;
}
// nothing helps
throw new InvalidPDFException('Invalid PDF structure');
Expand Down
22 changes: 15 additions & 7 deletions src/core/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ import {
PredictorStream, RunLengthStream
} from './stream';
import {
assert, FormatError, info, isNum, isString, MissingDataException, StreamType,
warn
assert, FormatError, info, isNum, isSpace, isString, MissingDataException,
StreamType, warn
} from '../shared/util';
import {
Cmd, Dict, EOF, isCmd, isDict, isEOF, isName, Name, Ref
Expand Down Expand Up @@ -721,7 +721,7 @@ var Lexer = (function LexerClosure() {
var ch = this.currentChar;
var eNotation = false;
var divideBy = 0; // different from 0 if it's a floating point value
var sign = 1;
var sign = 0;

if (ch === 0x2D) { // '-'
sign = -1;
Expand All @@ -732,10 +732,7 @@ var Lexer = (function LexerClosure() {
ch = this.nextChar();
}
} else if (ch === 0x2B) { // '+'
ch = this.nextChar();
}
if (ch === 0x2E) { // '.'
divideBy = 10;
sign = 1;
ch = this.nextChar();
}
if (ch === 0x0A || ch === 0x0D) { // LF, CR
Expand All @@ -744,11 +741,22 @@ var Lexer = (function LexerClosure() {
ch = this.nextChar();
} while (ch === 0x0A || ch === 0x0D);
}
if (ch === 0x2E) { // '.'
divideBy = 10;
ch = this.nextChar();
}
if (ch < 0x30 || ch > 0x39) { // '0' - '9'
if (divideBy === 10 && sign === 0 &&
(isSpace(ch) || ch === /* EOF = */ -1)) {
// This is consistent with Adobe Reader (fixes issue9252.pdf).
warn('Lexer.getNumber - treating a single decimal point as zero.');
return 0;
}
throw new FormatError(
`Invalid number: ${String.fromCharCode(ch)} (charCode ${ch})`);
}

sign = sign || 1;
var baseValue = ch - 0x30; // '0'
var powerValue = 0;
var powerValueSign = 1;
Expand Down
2 changes: 2 additions & 0 deletions test/pdfs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,10 @@
!issue8823.pdf
!issue9084.pdf
!issue9105_reduced.pdf
!issue9252.pdf
!issue9262_reduced.pdf
!issue9291.pdf
!issue9418.pdf
!issue9458.pdf
!bad-PageLabels.pdf
!decodeACSuccessive.pdf
Expand Down
Binary file added test/pdfs/issue9252.pdf
Binary file not shown.
Binary file added test/pdfs/issue9418.pdf
Binary file not shown.
14 changes: 14 additions & 0 deletions test/test_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,20 @@
"lastPage": 1,
"type": "eq"
},
{ "id": "issue9252",
"file": "pdfs/issue9252.pdf",
"md5": "c7d039d808d9344a95d2c9cfa7586ca3",
"rounds": 1,
"link": false,
"type": "eq"
},
{ "id": "issue9418",
"file": "pdfs/issue9418.pdf",
"md5": "32ecad8098acb1938539d47944ecb54b",
"rounds": 1,
"link": false,
"type": "eq"
},
{ "id": "issue9262",
"file": "pdfs/issue9262_reduced.pdf",
"md5": "5347ce2d7b3866625c22e115fd90e0de",
Expand Down
30 changes: 26 additions & 4 deletions test/unit/parser_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
*/

import { Lexer, Linearization } from '../../src/core/parser';
import { FormatError } from '../../src/shared/util';
import { Name } from '../../src/core/primitives';
import { StringStream } from '../../src/core/stream';

Expand Down Expand Up @@ -58,11 +59,32 @@ describe('parser', function() {

it('should ignore line-breaks between operator and digit in number',
function() {
var input = new StringStream('-\r\n205.88');
var lexer = new Lexer(input);
var result = lexer.getNumber();
let minusInput = new StringStream('-\r\n205.88');
let minusLexer = new Lexer(minusInput);

expect(result).toEqual(-205.88);
expect(minusLexer.getNumber()).toEqual(-205.88);

let plusInput = new StringStream('+\r\n205.88');
let plusLexer = new Lexer(plusInput);

expect(plusLexer.getNumber()).toEqual(205.88);
});

it('should treat a single decimal point as zero', function() {
let input = new StringStream('.');
let lexer = new Lexer(input);

expect(lexer.getNumber()).toEqual(0);

let numbers = ['..', '-.', '+.', '-\r\n.', '+\r\n.'];
for (let number of numbers) {
let input = new StringStream(number);
let lexer = new Lexer(input);

expect(function() {
return lexer.getNumber();
}).toThrowError(FormatError, /^Invalid number:\s/);
}
});

it('should handle glued numbers and operators', function() {
Expand Down