Skip to content

Commit

Permalink
Improve text-selection for Type3 fonts with bogus /FontBBox-entries (…
Browse files Browse the repository at this point in the history
…issue 14999)

This extends PR 13461, by also building a fallback bounding box for Type3 fonts that contain a much too small /FontBBox-entry.

*Please note:* While this patch improves things overall, copy-and-pasting still doesn't work perfectly for this document. In particular the lowercase letter "c" cannot be selected/copied, however this can be reproduced in both Adobe Reader and PDFium (in Google Chrome) too, which is caused by a lack of proper /ToUnicode-data in the PDF document.
  • Loading branch information
Snuffleupagus committed Jul 5, 2022
1 parent a1ac1a6 commit 05fa303
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 5 deletions.
19 changes: 14 additions & 5 deletions src/core/evaluator.js
Original file line number Diff line number Diff line change
Expand Up @@ -4404,8 +4404,10 @@ class TranslatedFont {
const fontResources = this.dict.get("Resources") || resources;
const charProcOperatorList = Object.create(null);

const isEmptyBBox =
!translatedFont.bbox || isArrayEqual(translatedFont.bbox, [0, 0, 0, 0]);
const fontBBox = Util.normalizeRect(translatedFont.bbox || [0, 0, 0, 0]),
width = fontBBox[2] - fontBBox[0],
height = fontBBox[3] - fontBBox[1];
const fontBBoxSize = Math.hypot(width, height);

for (const key of charProcs.getKeys()) {
loadCharProcsPromise = loadCharProcsPromise.then(() => {
Expand All @@ -4426,7 +4428,7 @@ class TranslatedFont {
// colour-related parameters) in the graphics state;
// any use of such operators shall be ignored."
if (operatorList.fnArray[0] === OPS.setCharWidthAndBounds) {
this._removeType3ColorOperators(operatorList, isEmptyBBox);
this._removeType3ColorOperators(operatorList, fontBBoxSize);
}
charProcOperatorList[key] = operatorList.getIR();

Expand Down Expand Up @@ -4454,7 +4456,7 @@ class TranslatedFont {
/**
* @private
*/
_removeType3ColorOperators(operatorList, isEmptyBBox = false) {
_removeType3ColorOperators(operatorList, fontBBoxSize = NaN) {
if (
typeof PDFJSDev === "undefined" ||
PDFJSDev.test("!PRODUCTION || TESTING")
Expand All @@ -4467,12 +4469,19 @@ class TranslatedFont {
const charBBox = Util.normalizeRect(operatorList.argsArray[0].slice(2)),
width = charBBox[2] - charBBox[0],
height = charBBox[3] - charBBox[1];
const charBBoxSize = Math.hypot(width, height);

if (width === 0 || height === 0) {
// Skip the d1 operator when its bounds are bogus (fixes issue14953.pdf).
operatorList.fnArray.splice(0, 1);
operatorList.argsArray.splice(0, 1);
} else if (isEmptyBBox) {
} else if (
fontBBoxSize === 0 ||
Math.round(charBBoxSize / fontBBoxSize) >= 10
) {
// Override the fontBBox when it's undefined/empty, or when it's at least
// (approximately) one order of magnitude smaller than the charBBox
// (fixes issue14999_reduced.pdf).
if (!this._bbox) {
this._bbox = [Infinity, Infinity, -Infinity, -Infinity];
}
Expand Down
2 changes: 2 additions & 0 deletions test/pdfs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@
!issue13916.pdf
!issue14023.pdf
!issue14438.pdf
!issue14999_reduced.pdf
!bad-PageLabels.pdf
!decodeACSuccessive.pdf
!issue13003.pdf
Expand Down Expand Up @@ -328,6 +329,7 @@
!bug903856.pdf
!issue14618.pdf
!bug850854.pdf
!issue14999_reduced.pdf
!issue12810.pdf
!bug866395.pdf
!issue12010_reduced.pdf
Expand Down
Binary file added test/pdfs/issue14999_reduced.pdf
Binary file not shown.
6 changes: 6 additions & 0 deletions test/test_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -2848,6 +2848,12 @@
"link": false,
"type": "text"
},
{ "id": "issue14999",
"file": "pdfs/issue14999_reduced.pdf",
"md5": "a4e664e734f6869aa66245e72d448874",
"rounds": 1,
"type": "text"
},
{ "id": "issue6901-eq",
"file": "pdfs/issue6901.pdf",
"md5": "1a0604b1a7a3aaf2162b425a9a84230b",
Expand Down

0 comments on commit 05fa303

Please sign in to comment.