Skip to content

Commit 103f0f9

Browse files
BUG: Fix missing "PreventGC" when cloning (#3520)
Closes #3450. --------- Co-authored-by: Thiago Bellini Ribeiro <thiago@bellini.dev>
1 parent 5dd8a42 commit 103f0f9

File tree

2 files changed

+61
-1
lines changed

2 files changed

+61
-1
lines changed

pypdf/generic/_base.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def _reference_clone(
170170
if ind is not None:
171171
if id(ind.pdf) not in pdf_dest._id_translated:
172172
pdf_dest._id_translated[id(ind.pdf)] = {}
173-
pdf_dest._id_translated[id(ind.pdf)]["PreventGC"] = ind.pdf # type: ignore
173+
pdf_dest._id_translated[id(ind.pdf)]["PreventGC"] = ind.pdf # type: ignore[index]
174174
if (
175175
not force_duplicate
176176
and ind.idnum in pdf_dest._id_translated[id(ind.pdf)]
@@ -346,6 +346,7 @@ def clone(
346346
return self
347347
if id(self.pdf) not in pdf_dest._id_translated:
348348
pdf_dest._id_translated[id(self.pdf)] = {}
349+
pdf_dest._id_translated[id(self.pdf)]["PreventGC"] = self.pdf # type: ignore[index]
349350

350351
if self.idnum in pdf_dest._id_translated[id(self.pdf)]:
351352
dup = pdf_dest.get_object(pdf_dest._id_translated[id(self.pdf)][self.idnum])

tests/test_generic.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
"""Test the pypdf.generic module."""
22

33
import codecs
4+
import gc
5+
import weakref
46
from base64 import a85encode
57
from copy import deepcopy
68
from io import BytesIO
@@ -916,6 +918,63 @@ def test_cloning(caplog):
916918
assert isinstance(obj21.get("/Test2"), IndirectObject)
917919

918920

921+
def test_cloning_indirect_obj_keeps_hard_reference():
922+
"""
923+
Reported in #3450
924+
925+
Ensure that cloning an IndirectObject keeps a hard reference to
926+
the underlying object, preventing its deallocation, which could allow
927+
`id(obj)` to return the same value for different objects.
928+
"""
929+
writer1 = PdfWriter()
930+
indirect_object = IndirectObject(1, 0, writer1)
931+
932+
# Create a weak reference to the underlying object to test later
933+
# if it is still alive in memory or not
934+
obj_weakref = weakref.ref(indirect_object.pdf)
935+
assert obj_weakref() is not None
936+
937+
writer2 = PdfWriter()
938+
indirect_object.clone(writer2)
939+
940+
# Mimic indirect_object/writer1 going out of scope and being
941+
# garbage collected. Clone should have kept a hard reference to
942+
# it, preventing its deallocation.
943+
del indirect_object
944+
del writer1
945+
gc.collect()
946+
assert obj_weakref() is not None
947+
948+
949+
def test_cloning_null_obj_keeps_hard_reference():
950+
"""
951+
Ensure that cloning a NullObject keeps a hard reference to
952+
the underlying object, preventing its deallocation, which could allow
953+
`id(obj)` to return the same value for different objects.
954+
"""
955+
writer1 = PdfWriter()
956+
indirect_object = IndirectObject(1, 0, writer1)
957+
null_obj = NullObject()
958+
null_obj.indirect_reference = indirect_object
959+
960+
# Create a weak reference to the underlying object to test later
961+
# if it is still alive in memory or not
962+
obj_weakref = weakref.ref(indirect_object.pdf)
963+
assert obj_weakref() is not None
964+
965+
writer2 = PdfWriter()
966+
null_obj.clone(writer2)
967+
968+
# Mimic indirect_object/writer1 going out of scope and being
969+
# garbage collected. Clone should have kept a hard reference to
970+
# it, preventing its deallocation.
971+
del indirect_object
972+
del writer1
973+
del null_obj
974+
gc.collect()
975+
assert obj_weakref() is not None
976+
977+
919978
@pytest.mark.enable_socket
920979
def test_append_with_indirectobject_not_pointing(caplog):
921980
"""

0 commit comments

Comments
 (0)