diff --git a/docs/user/handle-attachments.md b/docs/user/handle-attachments.md index 6b4aec63e..fd2186a5e 100644 --- a/docs/user/handle-attachments.md +++ b/docs/user/handle-attachments.md @@ -71,3 +71,16 @@ embedded_file.write("output.pdf") The same functionality is available if you iterate over the attachments of a writer using `writer.attachment_list`. + +## Delete Attachments + +To delete an existing attachment, use the following code: + +```python +from pypdf import PdfWriter + +writer = PdfWriter(clone_from="example.pdf") +attachment = writer.add_attachment(filename="test.txt", data=b"Hello World!") +attachment.delete() +assert list(writer.attachment_list) == [] +``` diff --git a/pypdf/generic/_files.py b/pypdf/generic/_files.py index ecdb61351..4ac6fdcfb 100644 --- a/pypdf/generic/_files.py +++ b/pypdf/generic/_files.py @@ -7,7 +7,7 @@ from pypdf.constants import CatalogAttributes as CA from pypdf.constants import FileSpecificationDictionaryEntries from pypdf.constants import PageAttributes as PG -from pypdf.errors import PdfReadError +from pypdf.errors import PdfReadError, PyPdfError from pypdf.generic import ( ArrayObject, ByteStringObject, @@ -36,14 +36,16 @@ class EmbeddedFile: Further information on embedded files can be found in section 7.11 of the PDF 2.0 specification. """ - def __init__(self, name: str, pdf_object: DictionaryObject) -> None: + def __init__(self, name: str, pdf_object: DictionaryObject, parent: ArrayObject | None = None) -> None: """ Args: name: The (primary) name as provided in the name tree. pdf_object: The corresponding PDF object to allow retrieving further data. + parent: The parent list. """ self._name = name self.pdf_object = pdf_object + self._parent = parent @property def name(self) -> str: @@ -105,7 +107,7 @@ def _create_new(cls, writer: PdfWriter, name: str, content: str | bytes) -> Embe names_array.extend([create_string_object(name), filespec]) # Return an EmbeddedFile instance - return cls(name=name, pdf_object=filespec) + return cls(name=name, pdf_object=filespec, parent=names_array) @property def alternative_name(self) -> str | None: @@ -276,6 +278,17 @@ def checksum(self, value: ByteStringObject | None) -> None: else: params[NameObject("/CheckSum")] = value + def delete(self) -> None: + """Delete the file from the document.""" + if not self._parent: + raise PyPdfError("Parent required to delete file from document.") + if self.pdf_object not in self._parent: + raise PyPdfError("File not found in parent object.") + index = self._parent.index(self.pdf_object) + self._parent.pop(index) # Reference. + self._parent.pop(index - 1) # Name. + self.pdf_object = DictionaryObject() # Invalidate. + def __repr__(self) -> str: return f"<{self.__class__.__name__} name={self.name!r}>" @@ -296,7 +309,7 @@ def _load_from_names(cls, names: ArrayObject) -> Generator[EmbeddedFile]: # Skip plain strings and retrieve them as `direct_name` by index. file_dictionary = name.get_object() direct_name = names[i - 1].get_object() - yield EmbeddedFile(name=direct_name, pdf_object=file_dictionary) + yield EmbeddedFile(name=direct_name, pdf_object=file_dictionary, parent=names) @classmethod def _load(cls, catalog: DictionaryObject) -> Generator[EmbeddedFile]: diff --git a/tests/generic/test_files.py b/tests/generic/test_files.py index 8f104bc56..92001e2da 100644 --- a/tests/generic/test_files.py +++ b/tests/generic/test_files.py @@ -8,7 +8,7 @@ import pytest from pypdf import PdfReader, PdfWriter -from pypdf.errors import PdfReadError +from pypdf.errors import PdfReadError, PyPdfError from pypdf.generic import ( ByteStringObject, DictionaryObject, @@ -394,3 +394,30 @@ def test_embedded_file_null_object_handling(): assert embedded_file.subtype is None assert embedded_file.size is None assert embedded_file.checksum is None + + +def test_embedded_file__delete_without_parent(): + attachment = EmbeddedFile(name="test.txt", pdf_object=DictionaryObject()) + with pytest.raises(PyPdfError, match=r"^Parent required to delete file from document\.$"): + attachment.delete() + + +def test_embedded_file__delete_known(): + writer = PdfWriter() + writer.add_blank_page(100, 100) + attachment = writer.add_attachment("test.txt", b"content") + writer.add_attachment("test2.txt", b"content2") + + attachments = list(writer.attachment_list) + assert len(attachments) == 2 + attachment.delete() + with pytest.raises(PdfReadError, match=r"^/EF entry not found: {}$"): + _ = attachment.content + + attachments = list(writer.attachment_list) + assert len(attachments) == 1 + assert attachments[0].name == "test2.txt" + + # Delete second time. + with pytest.raises(PyPdfError, match=r"^File not found in parent object\.$"): + attachment.delete()