Skip to content

Commit

Permalink
Always flush the current item with MarkedContent stuff when getting t…
Browse files Browse the repository at this point in the history
…ext (mozilla#15094)
  • Loading branch information
calixteman committed Jun 25, 2022
1 parent 23fcdab commit f161929
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 3 deletions.
5 changes: 3 additions & 2 deletions src/core/evaluator.js
Original file line number Diff line number Diff line change
Expand Up @@ -3290,6 +3290,7 @@ class PartialEvaluator {
);
return;
case OPS.beginMarkedContent:
flushTextContentItem();
if (includeMarkedContent) {
textContent.items.push({
type: "beginMarkedContent",
Expand All @@ -3298,8 +3299,8 @@ class PartialEvaluator {
}
break;
case OPS.beginMarkedContentProps:
flushTextContentItem();
if (includeMarkedContent) {
flushTextContentItem();
let mcid = null;
if (args[1] instanceof Dict) {
mcid = args[1].get("MCID");
Expand All @@ -3314,8 +3315,8 @@ class PartialEvaluator {
}
break;
case OPS.endMarkedContent:
flushTextContentItem();
if (includeMarkedContent) {
flushTextContentItem();
textContent.items.push({
type: "endMarkedContent",
});
Expand Down
22 changes: 21 additions & 1 deletion test/unit/api_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ describe("api", function () {
}

function mergeText(items) {
return items.map(chunk => chunk.str + (chunk.hasEOL ? "\n" : "")).join("");
return items
.map(chunk => (chunk.str ?? "") + (chunk.hasEOL ? "\n" : ""))
.join("");
}

describe("getDocument", function () {
Expand Down Expand Up @@ -2275,6 +2277,24 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
await loadingTask.destroy();
});

it("gets text content with or without includeMarkedContent, and compare (issue 15094)", async function () {
const loadingTask = getDocument(buildGetDocumentParams("pdf.pdf"));
const pdfDoc = await loadingTask.promise;
const pdfPage = await pdfDoc.getPage(568);
let { items } = await pdfPage.getTextContent({
includeMarkedContent: false,
});
const textWithoutMC = mergeText(items);
({ items } = await pdfPage.getTextContent({
includeMarkedContent: true,
}));
const textWithMC = mergeText(items);

expect(textWithoutMC).toEqual(textWithMC);

await loadingTask.destroy();
});

it("gets empty structure tree", async function () {
const tree = await page.getStructTree();

Expand Down

0 comments on commit f161929

Please sign in to comment.