Skip to content

Commit

Permalink
Merge pull request #10392 from Snuffleupagus/checkFirstPage
Browse files Browse the repository at this point in the history
Check that the first page can be successfully loaded, to try and ascertain the validity of the XRef table (issue 7496, issue 10326)
  • Loading branch information
timvandermeij authored Dec 29, 2018
2 parents d3868e1 + 60bcce1 commit e53877f
Show file tree
Hide file tree
Showing 7 changed files with 69 additions and 37 deletions.
16 changes: 15 additions & 1 deletion src/core/document.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import {
assert, FormatError, getInheritableProperty, info, isArrayBuffer, isBool,
isNum, isSpace, isString, MissingDataException, OPS, shadow, stringToBytes,
stringToPDFString, Util, warn
stringToPDFString, Util, warn, XRefEntryException, XRefParseException
} from '../shared/util';
import { Catalog, ObjectLoader, XRef } from './obj';
import { Dict, isDict, isName, isStream, Ref } from './primitives';
Expand Down Expand Up @@ -649,6 +649,20 @@ var PDFDocument = (function PDFDocumentClosure() {
});
},

checkFirstPage() {
return this.getPage(0).catch((reason) => {
if (reason instanceof XRefEntryException) {
// Clear out the various caches to ensure that we haven't stored any
// inconsistent and/or incorrect state, since that could easily break
// subsequent `this.getPage` calls.
this._pagePromises.length = 0;
this.cleanup();

throw new XRefParseException();
}
});
},

cleanup: function PDFDocument_cleanup() {
return this.catalog.cleanup();
},
Expand Down
18 changes: 8 additions & 10 deletions src/core/obj.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import {
bytesToString, createPromiseCapability, createValidAbsoluteUrl, FormatError,
info, InvalidPDFException, isBool, isNum, isString, MissingDataException,
PermissionFlag, shadow, stringToPDFString, stringToUTF8String,
toRomanNumerals, unreachable, warn, XRefParseException
toRomanNumerals, unreachable, warn, XRefEntryException, XRefParseException
} from '../shared/util';
import {
Dict, isCmd, isDict, isName, isRef, isRefsEqual, isStream, Ref, RefSet,
Expand Down Expand Up @@ -1473,7 +1473,7 @@ var XRef = (function XRefClosure() {
if (xrefEntry.uncompressed) {
xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption);
} else {
xrefEntry = this.fetchCompressed(xrefEntry, suppressEncryption);
xrefEntry = this.fetchCompressed(ref, xrefEntry, suppressEncryption);
}
if (isDict(xrefEntry)) {
xrefEntry.objId = ref.toString();
Expand All @@ -1483,12 +1483,11 @@ var XRef = (function XRefClosure() {
return xrefEntry;
},

fetchUncompressed: function XRef_fetchUncompressed(ref, xrefEntry,
suppressEncryption) {
fetchUncompressed(ref, xrefEntry, suppressEncryption = false) {
var gen = ref.gen;
var num = ref.num;
if (xrefEntry.gen !== gen) {
throw new FormatError('inconsistent generation in XRef');
throw new XRefEntryException(`Inconsistent generation in XRef: ${ref}`);
}
var stream = this.stream.makeSubStream(xrefEntry.offset +
this.stream.start);
Expand All @@ -1504,7 +1503,7 @@ var XRef = (function XRefClosure() {
obj2 = parseInt(obj2, 10);
}
if (obj1 !== num || obj2 !== gen || !isCmd(obj3)) {
throw new FormatError('bad XRef entry');
throw new XRefEntryException(`Bad (uncompressed) XRef entry: ${ref}`);
}
if (obj3.cmd !== 'obj') {
// some bad PDFs use "obj1234" and really mean 1234
Expand All @@ -1514,7 +1513,7 @@ var XRef = (function XRefClosure() {
return num;
}
}
throw new FormatError('bad XRef entry');
throw new XRefEntryException(`Bad (uncompressed) XRef entry: ${ref}`);
}
if (this.encrypt && !suppressEncryption) {
xrefEntry = parser.getObj(this.encrypt.createCipherTransform(num, gen));
Expand All @@ -1527,8 +1526,7 @@ var XRef = (function XRefClosure() {
return xrefEntry;
},

fetchCompressed: function XRef_fetchCompressed(xrefEntry,
suppressEncryption) {
fetchCompressed(ref, xrefEntry, suppressEncryption = false) {
var tableOffset = xrefEntry.offset;
var stream = this.fetch(new Ref(tableOffset, 0));
if (!isStream(stream)) {
Expand Down Expand Up @@ -1573,7 +1571,7 @@ var XRef = (function XRefClosure() {
}
xrefEntry = entries[xrefEntry.gen];
if (xrefEntry === undefined) {
throw new FormatError('bad XRef entry for compressed object');
throw new XRefEntryException(`Bad (compressed) XRef entry: ${ref}`);
}
return xrefEntry;
},
Expand Down
41 changes: 15 additions & 26 deletions src/core/worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -258,33 +258,22 @@ var WorkerMessageHandler = {
WorkerTasks.splice(i, 1);
}

function loadDocument(recoveryMode) {
var loadDocumentCapability = createPromiseCapability();

var parseSuccess = function parseSuccess() {
Promise.all([
pdfManager.ensureDoc('numPages'),
pdfManager.ensureDoc('fingerprint'),
]).then(function([numPages, fingerprint]) {
loadDocumentCapability.resolve({
numPages,
fingerprint,
});
}, parseFailure);
};

var parseFailure = function parseFailure(e) {
loadDocumentCapability.reject(e);
};

pdfManager.ensureDoc('checkHeader', []).then(function() {
pdfManager.ensureDoc('parseStartXRef', []).then(function() {
pdfManager.ensureDoc('parse', [recoveryMode]).then(
parseSuccess, parseFailure);
}, parseFailure);
}, parseFailure);
async function loadDocument(recoveryMode) {
await pdfManager.ensureDoc('checkHeader');
await pdfManager.ensureDoc('parseStartXRef');
await pdfManager.ensureDoc('parse', [recoveryMode]);

if (!recoveryMode) {
// Check that at least the first page can be successfully loaded,
// since otherwise the XRef table is definitely not valid.
await pdfManager.ensureDoc('checkFirstPage');
}

return loadDocumentCapability.promise;
const [numPages, fingerprint] = await Promise.all([
pdfManager.ensureDoc('numPages'),
pdfManager.ensureDoc('fingerprint'),
]);
return { numPages, fingerprint, };
}

function getPdfManager(data, evaluatorOptions) {
Expand Down
13 changes: 13 additions & 0 deletions src/shared/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,18 @@ var MissingDataException = (function MissingDataExceptionClosure() {
return MissingDataException;
})();

const XRefEntryException = (function XRefEntryExceptionClosure() {
function XRefEntryException(msg) {
this.message = msg;
}

XRefEntryException.prototype = new Error();
XRefEntryException.prototype.name = 'XRefEntryException';
XRefEntryException.constructor = XRefEntryException;

return XRefEntryException;
})();

var XRefParseException = (function XRefParseExceptionClosure() {
function XRefParseException(msg) {
this.message = msg;
Expand Down Expand Up @@ -1033,6 +1045,7 @@ export {
UnknownErrorException,
Util,
toRomanNumerals,
XRefEntryException,
XRefParseException,
FormatError,
arrayByteLength,
Expand Down
1 change: 1 addition & 0 deletions test/pdfs/issue10326.pdf.link
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
https://github.com/mozilla/pdf.js/files/2643238/test.1.pdf
1 change: 1 addition & 0 deletions test/pdfs/issue7496.pdf.link
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
https://github.com/mozilla/pdf.js/files/369694/repro-pdf.pdf
16 changes: 16 additions & 0 deletions test/test_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -1275,6 +1275,22 @@
"rounds": 1,
"type": "eq"
},
{ "id": "issue7496",
"file": "pdfs/issue7496.pdf",
"md5": "b422981ae781166e75c0fb4c3634ed96",
"link": true,
"rounds": 1,
"lastPage": 1,
"type": "load"
},
{ "id": "issue10326",
"file": "pdfs/issue10326.pdf",
"md5": "015c13b09ef735ea1204f38992c60487",
"link": true,
"rounds": 1,
"lastPage": 1,
"type": "load"
},
{ "id": "issue7544",
"file": "pdfs/issue7544.pdf",
"md5": "87e3a9fc7d6a6c1bd5b53af6926ce48e",
Expand Down

0 comments on commit e53877f

Please sign in to comment.