Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[api-minor] Validate the /Pages-tree /Count entry during document initialization (issue 14303) #14311

Merged
merged 1 commit into from
Nov 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 21 additions & 5 deletions src/core/catalog.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import {
import {
collectActions,
MissingDataException,
PageDictMissingException,
recoverJsURL,
toRomanNumerals,
} from "./core_utils.js";
Expand Down Expand Up @@ -70,6 +71,7 @@ class Catalog {
if (!isDict(this._catDict)) {
throw new FormatError("Catalog object is not a dictionary.");
}
this._actualNumPages = null;

this.fontCache = new RefSetCache();
this.builtInCMapCache = new Map();
Expand Down Expand Up @@ -534,14 +536,26 @@ class Catalog {
};
}

get numPages() {
setActualNumPages(num = null) {
this._actualNumPages = num;
}

get hasActualNumPages() {
return this._actualNumPages !== null;
}

get _pagesCount() {
const obj = this.toplevelPagesDict.get("Count");
if (!Number.isInteger(obj)) {
throw new FormatError(
"Page count in top-level pages dictionary is not an integer."
);
}
return shadow(this, "numPages", obj);
return shadow(this, "_pagesCount", obj);
}

get numPages() {
return this.hasActualNumPages ? this._actualNumPages : this._pagesCount;
}

get destinations() {
Expand Down Expand Up @@ -1071,7 +1085,7 @@ class Catalog {
});
}

getPageDict(pageIndex) {
getPageDict(pageIndex, skipCount = false) {
const capability = createPromiseCapability();
const nodesToVisit = [this._catDict.getRaw("Pages")];
const visitedNodes = new RefSet();
Expand Down Expand Up @@ -1133,7 +1147,7 @@ class Catalog {
}

count = currentNode.get("Count");
if (Number.isInteger(count) && count >= 0) {
if (Number.isInteger(count) && count >= 0 && !skipCount) {
// Cache the Kids count, since it can reduce redundant lookups in
// documents where all nodes are found at *one* level of the tree.
const objId = currentNode.objId;
Expand Down Expand Up @@ -1177,7 +1191,9 @@ class Catalog {
nodesToVisit.push(kids[last]);
}
}
capability.reject(new Error(`Page index ${pageIndex} not found.`));
capability.reject(
new PageDictMissingException(`Page index ${pageIndex} not found.`)
);
}
next();
return capability.promise;
Expand Down
7 changes: 7 additions & 0 deletions src/core/core_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ class MissingDataException extends BaseException {
}
}

class PageDictMissingException extends BaseException {
constructor(msg) {
super(msg, "PageDictMissingException");
}
}

class ParserEOFException extends BaseException {
constructor(msg) {
super(msg, "ParserEOFException");
Expand Down Expand Up @@ -541,6 +547,7 @@ export {
isWhiteSpace,
log2,
MissingDataException,
PageDictMissingException,
ParserEOFException,
parseXFAPath,
readInt8,
Expand Down
69 changes: 65 additions & 4 deletions src/core/document.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ import {
getInheritableProperty,
isWhiteSpace,
MissingDataException,
PageDictMissingException,
validateCSSFont,
XRefEntryException,
XRefParseException,
Expand Down Expand Up @@ -787,7 +788,9 @@ class PDFDocument {

get numPages() {
let num = 0;
if (this.xfaFactory) {
if (this.catalog.hasActualNumPages) {
num = this.catalog.numPages;
} else if (this.xfaFactory) {
// num is a Promise.
num = this.xfaFactory.getNumPages();
} else if (this.linearization) {
Expand Down Expand Up @@ -1331,8 +1334,13 @@ class PDFDocument {
return promise;
}

checkFirstPage() {
return this.getPage(0).catch(async reason => {
async checkFirstPage(recoveryMode = false) {
if (recoveryMode) {
return;
}
try {
await this.getPage(0);
} catch (reason) {
if (reason instanceof XRefEntryException) {
// Clear out the various caches to ensure that we haven't stored any
// inconsistent and/or incorrect state, since that could easily break
Expand All @@ -1342,7 +1350,60 @@ class PDFDocument {

throw new XRefParseException();
}
});
}
}

async checkLastPage(recoveryMode = false) {
this.catalog.setActualNumPages(); // Ensure that it's always reset.
let numPages;

try {
await Promise.all([
this.pdfManager.ensureDoc("xfaFactory"),
this.pdfManager.ensureDoc("linearization"),
this.pdfManager.ensureCatalog("numPages"),
]);

if (this.xfaFactory) {
return; // The Page count is always calculated for XFA-documents.
} else if (this.linearization) {
numPages = this.linearization.numPages;
} else {
numPages = this.catalog.numPages;
}

if (numPages === 1) {
return;
} else if (!Number.isInteger(numPages)) {
throw new FormatError("Page count is not an integer.");
}
await this.getPage(numPages - 1);
} catch (reason) {
warn(`checkLastPage - invalid /Pages tree /Count: ${numPages}.`);
// Clear out the various caches to ensure that we haven't stored any
// inconsistent and/or incorrect state, since that could easily break
// subsequent `this.getPage` calls.
await this.cleanup();

let pageIndex = 1; // The first page was already loaded.
while (true) {
try {
await this.getPage(pageIndex, /* skipCount = */ true);
} catch (reasonLoop) {
if (reasonLoop instanceof PageDictMissingException) {
break;
}
if (reasonLoop instanceof XRefEntryException) {
if (!recoveryMode) {
throw new XRefParseException();
}
break;
}
}
pageIndex++;
}
this.catalog.setActualNumPages(pageIndex);
}
}

fontFallback(id, handler) {
Expand Down
11 changes: 6 additions & 5 deletions src/core/worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -170,11 +170,12 @@ class WorkerMessageHandler {
await pdfManager.ensureDoc("parseStartXRef");
await pdfManager.ensureDoc("parse", [recoveryMode]);

if (!recoveryMode) {
// Check that at least the first page can be successfully loaded,
// since otherwise the XRef table is definitely not valid.
await pdfManager.ensureDoc("checkFirstPage");
}
// Check that at least the first page can be successfully loaded,
// since otherwise the XRef table is definitely not valid.
await pdfManager.ensureDoc("checkFirstPage", [recoveryMode]);
// Check that the last page can be sucessfully loaded, to ensure that
// `numPages` is correct, and fallback to walking the entire /Pages-tree.
await pdfManager.ensureDoc("checkLastPage", [recoveryMode]);

const isPureXfa = await pdfManager.ensureDoc("isPureXfa");
if (isPureXfa) {
Expand Down
2 changes: 2 additions & 0 deletions test/pdfs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -490,3 +490,5 @@
!PDFBOX-4352-0.pdf
!REDHAT-1531897-0.pdf
!xfa_issue14315.pdf
!poppler-67295-0.pdf
!poppler-85140-0.pdf
Binary file added test/pdfs/poppler-67295-0.pdf
Binary file not shown.
Binary file added test/pdfs/poppler-85140-0.pdf
Binary file not shown.
40 changes: 38 additions & 2 deletions test/unit/api_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import {
PasswordResponses,
PermissionFlag,
StreamType,
UnknownErrorException,
} from "../../src/shared/util.js";
import {
buildGetDocumentParams,
Expand Down Expand Up @@ -478,6 +479,38 @@ describe("api", function () {

await loadingTask.destroy();
});

it("creates pdf doc from PDF files, with bad /Pages tree /Count", async function () {
const loadingTask1 = getDocument(
buildGetDocumentParams("poppler-67295-0.pdf")
);
const loadingTask2 = getDocument(
buildGetDocumentParams("poppler-85140-0.pdf")
);
expect(loadingTask1 instanceof PDFDocumentLoadingTask).toEqual(true);
expect(loadingTask2 instanceof PDFDocumentLoadingTask).toEqual(true);

const pdfDocument1 = await loadingTask1.promise;
const pdfDocument2 = await loadingTask2.promise;

expect(pdfDocument1.numPages).toEqual(1);
expect(pdfDocument2.numPages).toEqual(1);

const pageA = await pdfDocument1.getPage(1);
expect(pageA instanceof PDFPageProxy).toEqual(true);

try {
await pdfDocument2.getPage(1);

// Shouldn't get here.
expect(false).toEqual(true);
} catch (reason) {
expect(reason instanceof UnknownErrorException).toEqual(true);
expect(reason.message).toEqual("Bad (uncompressed) XRef entry: 3R");
}

await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);
});
});

describe("PDFWorker", function () {
Expand Down Expand Up @@ -683,7 +716,7 @@ describe("api", function () {
throw new Error("shall fail for invalid page");
},
function (reason) {
expect(reason instanceof Error).toEqual(true);
expect(reason instanceof UnknownErrorException).toEqual(true);
expect(reason.message).toEqual(
"Pages tree contains circular reference."
);
Expand Down Expand Up @@ -724,7 +757,10 @@ describe("api", function () {
// Shouldn't get here.
expect(false).toEqual(true);
} catch (reason) {
expect(reason instanceof Error).toEqual(true);
expect(reason instanceof UnknownErrorException).toEqual(true);
expect(reason.message).toEqual(
"The reference does not point to a /Page dictionary."
);
}
});

Expand Down