Skip to content

Commit

Permalink
Ignore (rather than crash on) broken references in annotations
Browse files Browse the repository at this point in the history
Tentative fix for issue #42

Also:
 * use PSLiteral in ANNOT_SUBTYPES rather than hashing the same name strings
 * improve typing of _mkannotation, and placate mypy
  • Loading branch information
0xabu committed Oct 20, 2021
1 parent 1d4dbcc commit 51af886
Showing 1 changed file with 13 additions and 7 deletions.
20 changes: 13 additions & 7 deletions pdfannots/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,12 @@

logger = logging.getLogger(__name__)

ANNOT_SUBTYPES = {e.name: e for e in AnnotationType}
ANNOT_SUBTYPES: typing.Dict[PSLiteral, AnnotationType] = {
PSLiteralTable.intern(e.name): e for e in AnnotationType}


def _mkannotation(
pa: typing.Any,
pa: typing.Dict[str, typing.Any],
page: Page
) -> typing.Optional[Annotation]:
"""
Expand All @@ -44,10 +45,15 @@ def _mkannotation(
"""

subtype = pa.get('Subtype')
try:
annot_type = ANNOT_SUBTYPES[subtype.name]
except (TypeError, KeyError):
# subtype is missing (None), or is an unknown/unsupported type
annot_type = None
if isinstance(subtype, PSLiteral):
try:
annot_type = ANNOT_SUBTYPES[subtype]
except KeyError:
pass

if annot_type is None:
logger.warning("Unsupported annotation subtype: %r", subtype)
return None

contents = pa.get('Contents')
Expand Down Expand Up @@ -445,7 +451,7 @@ def emit_progress(msg: str) -> None:
# Construct Annotation objects, and append them to the page.
for pa in pdftypes.resolve1(pdfpage.annots) if pdfpage.annots else []:
if isinstance(pa, pdftypes.PDFObjRef):
annot = _mkannotation(pa.resolve(), page)
annot = _mkannotation(pdftypes.dict_value(pa), page)
if annot is not None:
page.annots.append(annot)
else:
Expand Down

0 comments on commit 51af886

Please sign in to comment.