Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ROB: merge documents with named destinations with invalid page #2857

Merged
merged 4 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pypdf/_doc_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -855,7 +855,7 @@ def _get_outline(
# §12.3.3 Document outline, entries in the outline dictionary
if not is_null_or_none(lines) and "/First" in lines:
node = cast(DictionaryObject, lines["/First"])
self._namedDests = self._get_named_destinations()
self._named_destinations = self._get_named_destinations()

if node is None:
return outline
Expand Down Expand Up @@ -996,7 +996,7 @@ def _build_outline_item(self, node: DictionaryObject) -> Optional[Destination]:
# TODO : keep named destination instead of replacing it ?
try:
outline_item = self._build_destination(
title, self._namedDests[dest].dest_array
title, self._named_destinations[dest].dest_array
)
except KeyError:
# named destination not found in Name Dict
Expand Down
15 changes: 12 additions & 3 deletions pypdf/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2699,10 +2699,13 @@ def merge(
position += 1
srcpages[pg.indirect_reference.idnum].original_page = pg

reader._namedDests = (
reader._named_destinations = (
reader.named_destinations
) # need for the outline processing below
for dest in reader._namedDests.values():

arr: Any

def _process_named_dests(dest: Any) -> None:
arr = dest.dest_array
if "/Names" in self._root_object and dest["/Title"] in cast(
List[Any],
Expand All @@ -2718,7 +2721,10 @@ def merge(
elif isinstance(dest["/Page"], int):
# the page reference is a page number normally not a PDF Reference
# page numbers as int are normally accepted only in external goto
p = reader.pages[dest["/Page"]]
try:
p = reader.pages[dest["/Page"]]
except IndexError:
return
assert p.indirect_reference is not None
try:
arr[NumberObject(0)] = NumberObject(
Expand All @@ -2733,6 +2739,9 @@ def merge(
].indirect_reference
self.add_named_destination_array(dest["/Title"], arr)

for dest in reader._named_destinations.values():
_process_named_dests(dest)

outline_item_typ: TreeObject
if outline_item is not None:
outline_item_typ = cast(
Expand Down
12 changes: 12 additions & 0 deletions tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2468,3 +2468,15 @@ def test_increment_writer(caplog):
assert writer.metadata is None
b = BytesIO()
writer.write(b)


@pytest.mark.enable_socket()
def test_append_pdf_with_dest_without_page(caplog):
"""Tests for #2842"""
url = "https://github.com/user-attachments/files/16990834/test.pdf"
name = "iss2842.pdf"
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
writer = PdfWriter()
writer.append(reader)
assert "/__WKANCHOR_8" not in writer.named_destinations
assert len(writer.named_destinations) == 3
Loading