Skip to content

Commit

Permalink
ROB: Handle missing /Type entry in Page tree (#1845)
Browse files Browse the repository at this point in the history
/Type is mandatory in page tree nodes according to the the PDF specification. Hence dealing with such files is a robustness improvements.
Acrobat Reader can open such PDF documents as well.

Fixes #500
  • Loading branch information
pubpub-zz authored May 20, 2023
1 parent e4ef5b9 commit 29e7eb9
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 3 deletions.
6 changes: 5 additions & 1 deletion pypdf/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1187,9 +1187,13 @@ def _flatten(
pages = catalog["/Pages"].get_object() # type: ignore
self.flattened_pages = []

t = "/Pages"
if PA.TYPE in pages:
t = pages[PA.TYPE] # type: ignore
# if pdf has no type, considered as a page if /Kids is missing
elif PA.KIDS not in pages:
t = "/Page"
else:
t = "/Pages"

if t == "/Pages":
for attr in inheritable_page_attributes:
Expand Down
20 changes: 18 additions & 2 deletions tests/test_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,13 @@ def test_page_operations(pdf_path, password):
assert abs(t.ctm[4] + 100) < 0.01
assert abs(t.ctm[5] - 50) < 0.01

transformation = Transformation().rotate(90).scale(1).translate(1, 1).transform(Transformation((1, 0, 0, -1, 0, 0)))
transformation = (
Transformation()
.rotate(90)
.scale(1)
.translate(1, 1)
.transform(Transformation((1, 0, 0, -1, 0, 0)))
)
page.add_transformation(transformation, expand=True)
page.add_transformation((1, 0, 0, 0, 0, 0))
page.scale(2, 2)
Expand Down Expand Up @@ -178,7 +184,10 @@ def test_transformation_equivalence2():
w.append(reader_add)
height = reader_add.pages[0].mediabox.height
w.pages[0].merge_transformed_page(
reader_base.pages[0], Transformation().transform(Transformation((1, 0, 0, -1, 0, height))), False, False
reader_base.pages[0],
Transformation().transform(Transformation((1, 0, 0, -1, 0, height))),
False,
False,
)
# No special assert: Visual check the page has been increased and all is visible (box+graph)

Expand Down Expand Up @@ -1111,3 +1120,10 @@ def test_pages_printing():
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
reader = PdfReader(pdf_path)
assert str(reader.pages) == "[PageObject(0)]"


def test_pdf_pages_missing_type():
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
reader = PdfReader(pdf_path)
del reader.trailer["/Root"]["/Pages"]["/Kids"][0].get_object()["/Type"]
reader.pages[0]

0 comments on commit 29e7eb9

Please sign in to comment.