Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[api-minor] Clear all caches in XRef.indexObjects, and improve /Root dictionary validation in XRef.parse (issue 14303) #14338

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions src/core/xref.js
Original file line number Diff line number Diff line change
Expand Up @@ -107,14 +107,26 @@ class XRef {
}
warn(`XRef.parse - Invalid "Root" reference: "${ex}".`);
}
if (root instanceof Dict && root.has("Pages")) {
this.root = root;
} else {
if (!recoveryMode) {
throw new XRefParseException();
if (root instanceof Dict) {
try {
const pages = root.get("Pages");
if (pages instanceof Dict) {
this.root = root;
return;
}
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
warn(`XRef.parse - Invalid "Pages" reference: "${ex}".`);
}
throw new FormatError("Invalid root reference");
}

if (!recoveryMode) {
throw new XRefParseException();
}
// Even recovery failed, there's nothing more we can do here.
throw new InvalidPDFException("Invalid Root reference.");
}

processXRefTable(parser) {
Expand Down Expand Up @@ -417,6 +429,7 @@ class XRef {

// Clear out any existing entries, since they may be bogus.
this.entries.length = 0;
this._cacheMap.clear();

const stream = this.stream;
stream.pos = 0;
Expand Down
84 changes: 47 additions & 37 deletions test/unit/api_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ describe("api", function () {
await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);
});

it("creates pdf doc from PDF file with bad XRef table", async function () {
it("creates pdf doc from PDF file with bad XRef entry", async function () {
// A corrupt PDF file, where the XRef table have (some) bogus entries.
const loadingTask = getDocument(
buildGetDocumentParams("PDFBOX-4352-0.pdf", {
Expand All @@ -468,6 +468,26 @@ describe("api", function () {
await loadingTask.destroy();
});

it("creates pdf doc from PDF file with bad XRef header", async function () {
const loadingTask = getDocument(
buildGetDocumentParams("GHOSTSCRIPT-698804-1-fuzzed.pdf")
);
expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true);

const pdfDocument = await loadingTask.promise;
expect(pdfDocument.numPages).toEqual(1);

const page = await pdfDocument.getPage(1);
expect(page instanceof PDFPageProxy).toEqual(true);

const opList = await page.getOperatorList();
expect(opList.fnArray.length).toEqual(0);
expect(opList.argsArray.length).toEqual(0);
expect(opList.lastChunk).toEqual(true);

await loadingTask.destroy();
});

it("creates pdf doc from PDF file with bad XRef byteWidths", async function () {
// A corrupt PDF file, where the XRef /W-array have (some) bogus entries.
const loadingTask = getDocument(
Expand All @@ -488,37 +508,49 @@ describe("api", function () {
await loadingTask.destroy();
});

it("creates pdf doc from PDF file with inaccessible /Pages tree", async function () {
const loadingTask = getDocument(
buildGetDocumentParams("poppler-395-0-fuzzed.pdf")
);
expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true);

try {
await loadingTask.promise;

// Shouldn't get here.
expect(false).toEqual(true);
} catch (reason) {
expect(reason instanceof InvalidPDFException).toEqual(true);
expect(reason.message).toEqual("Invalid Root reference.");
}

await loadingTask.destroy();
});

it("creates pdf doc from PDF files, with bad /Pages tree /Count", async function () {
const loadingTask1 = getDocument(
buildGetDocumentParams("poppler-67295-0.pdf")
);
const loadingTask2 = getDocument(
buildGetDocumentParams("poppler-85140-0.pdf")
);
const loadingTask3 = getDocument(
buildGetDocumentParams("poppler-395-0-fuzzed.pdf")
);
const loadingTask4 = getDocument(
buildGetDocumentParams("GHOSTSCRIPT-698804-1-fuzzed.pdf")
);

expect(loadingTask1 instanceof PDFDocumentLoadingTask).toEqual(true);
expect(loadingTask2 instanceof PDFDocumentLoadingTask).toEqual(true);
expect(loadingTask3 instanceof PDFDocumentLoadingTask).toEqual(true);
expect(loadingTask4 instanceof PDFDocumentLoadingTask).toEqual(true);

const pdfDocument1 = await loadingTask1.promise;
const pdfDocument2 = await loadingTask2.promise;
const pdfDocument3 = await loadingTask3.promise;
const pdfDocument4 = await loadingTask4.promise;

expect(pdfDocument1.numPages).toEqual(1);
expect(pdfDocument2.numPages).toEqual(1);
expect(pdfDocument3.numPages).toEqual(1);
expect(pdfDocument4.numPages).toEqual(1);

const pageA = await pdfDocument1.getPage(1);
expect(pageA instanceof PDFPageProxy).toEqual(true);
const page = await pdfDocument1.getPage(1);
expect(page instanceof PDFPageProxy).toEqual(true);

const opList = await page.getOperatorList();
expect(opList.fnArray.length).toBeGreaterThan(5);
expect(opList.argsArray.length).toBeGreaterThan(5);
expect(opList.lastChunk).toEqual(true);

try {
await pdfDocument2.getPage(1);
Expand All @@ -529,28 +561,6 @@ describe("api", function () {
expect(reason instanceof UnknownErrorException).toEqual(true);
expect(reason.message).toEqual("Bad (uncompressed) XRef entry: 3R");
}
try {
await pdfDocument3.getPage(1);

// Shouldn't get here.
expect(false).toEqual(true);
} catch (reason) {
expect(reason instanceof UnknownErrorException).toEqual(true);
expect(reason.message).toEqual(
"Page dictionary kid reference points to wrong type of object."
);
}
try {
await pdfDocument4.getPage(1);

// Shouldn't get here.
expect(false).toEqual(true);
} catch (reason) {
expect(reason instanceof UnknownErrorException).toEqual(true);
expect(reason.message).toEqual(
"Page dictionary kid reference points to wrong type of object."
);
}

await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]);
});
Expand Down