diff --git a/pypdf/_protocols.py b/pypdf/_protocols.py index 85e9e0a56..ba6cd8a3c 100644 --- a/pypdf/_protocols.py +++ b/pypdf/_protocols.py @@ -73,6 +73,9 @@ def get_object(self, indirect_reference: Any) -> Optional[PdfObjectProtocol]: def write(self, stream: Union[Path, StrByteType]) -> Tuple[bool, IO]: ... + def _add_object(self, obj: Any) -> Any: + ... + @property def pages(self) -> List[Any]: ... diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py index f75e66dd6..be3d71c45 100644 --- a/pypdf/generic/_base.py +++ b/pypdf/generic/_base.py @@ -277,7 +277,10 @@ def clone( obj = NullObject() assert isinstance(self, (IndirectObject,)) obj.indirect_reference = self - dup = obj.clone(pdf_dest, force_duplicate, ignore_fields) + dup = pdf_dest._add_object( + obj.clone(pdf_dest, force_duplicate, ignore_fields) + ) + # asserts added to prevent errors in mypy assert dup is not None assert dup.indirect_reference is not None return dup.indirect_reference diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index 1fd196027..b8aaf12d4 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -750,7 +750,10 @@ def _clone( if decoded_self is None: self.decoded_self = None else: - self.decoded_self = decoded_self.clone(pdf_dest, True, ignore_fields) # type: ignore[assignment] + self.decoded_self = cast( + "DecodedStreamObject", + decoded_self.clone(pdf_dest, force_duplicate, ignore_fields), + ) except Exception: pass super()._clone(src, pdf_dest, force_duplicate, ignore_fields) diff --git a/tests/test_writer.py b/tests/test_writer.py index 10943c509..5066eecb6 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -1292,3 +1292,14 @@ def test_iss1723(): in_pdf = PdfReader(BytesIO(get_pdf_from_url(url, name=name))) out_pdf = PdfWriter() out_pdf.append(in_pdf, (3, 5)) + + +@pytest.mark.enable_socket() +def test_iss1767(): + # test with a pdf which is buggy because the object 389,0 exists 3 times: + # twice to define catalog and one as an XObject inducing a loop when + # cloning + url = "https://github.com/py-pdf/pypdf/files/11138472/test.pdf" + name = "iss1723.pdf" + in_pdf = PdfReader(BytesIO(get_pdf_from_url(url, name=name))) + PdfWriter(clone_from=in_pdf)