Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions docs/user/handle-attachments.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,16 @@ embedded_file.write("output.pdf")

The same functionality is available if you iterate over the attachments of a writer
using `writer.attachment_list`.

## Delete Attachments

To delete an existing attachment, use the following code:

```python
from pypdf import PdfWriter

writer = PdfWriter(clone_from="example.pdf")
attachment = writer.add_attachment(filename="test.txt", data=b"Hello World!")
attachment.delete()
assert list(writer.attachment_list) == []
```
21 changes: 17 additions & 4 deletions pypdf/generic/_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pypdf.constants import CatalogAttributes as CA
from pypdf.constants import FileSpecificationDictionaryEntries
from pypdf.constants import PageAttributes as PG
from pypdf.errors import PdfReadError
from pypdf.errors import PdfReadError, PyPdfError
from pypdf.generic import (
ArrayObject,
ByteStringObject,
Expand Down Expand Up @@ -36,14 +36,16 @@ class EmbeddedFile:

Further information on embedded files can be found in section 7.11 of the PDF 2.0 specification.
"""
def __init__(self, name: str, pdf_object: DictionaryObject) -> None:
def __init__(self, name: str, pdf_object: DictionaryObject, parent: ArrayObject | None = None) -> None:
"""
Args:
name: The (primary) name as provided in the name tree.
pdf_object: The corresponding PDF object to allow retrieving further data.
parent: The parent list.
"""
self._name = name
self.pdf_object = pdf_object
self._parent = parent

@property
def name(self) -> str:
Expand Down Expand Up @@ -105,7 +107,7 @@ def _create_new(cls, writer: PdfWriter, name: str, content: str | bytes) -> Embe
names_array.extend([create_string_object(name), filespec])

# Return an EmbeddedFile instance
return cls(name=name, pdf_object=filespec)
return cls(name=name, pdf_object=filespec, parent=names_array)

@property
def alternative_name(self) -> str | None:
Expand Down Expand Up @@ -276,6 +278,17 @@ def checksum(self, value: ByteStringObject | None) -> None:
else:
params[NameObject("/CheckSum")] = value

def delete(self) -> None:
"""Delete the file from the document."""
if not self._parent:
raise PyPdfError("Parent required to delete file from document.")
if self.pdf_object not in self._parent:
raise PyPdfError("File not found in parent object.")
index = self._parent.index(self.pdf_object)
self._parent.pop(index) # Reference.
self._parent.pop(index - 1) # Name.
self.pdf_object = DictionaryObject() # Invalidate.

def __repr__(self) -> str:
return f"<{self.__class__.__name__} name={self.name!r}>"

Expand All @@ -296,7 +309,7 @@ def _load_from_names(cls, names: ArrayObject) -> Generator[EmbeddedFile]:
# Skip plain strings and retrieve them as `direct_name` by index.
file_dictionary = name.get_object()
direct_name = names[i - 1].get_object()
yield EmbeddedFile(name=direct_name, pdf_object=file_dictionary)
yield EmbeddedFile(name=direct_name, pdf_object=file_dictionary, parent=names)

@classmethod
def _load(cls, catalog: DictionaryObject) -> Generator[EmbeddedFile]:
Expand Down
29 changes: 28 additions & 1 deletion tests/generic/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pytest

from pypdf import PdfReader, PdfWriter
from pypdf.errors import PdfReadError
from pypdf.errors import PdfReadError, PyPdfError
from pypdf.generic import (
ByteStringObject,
DictionaryObject,
Expand Down Expand Up @@ -394,3 +394,30 @@ def test_embedded_file_null_object_handling():
assert embedded_file.subtype is None
assert embedded_file.size is None
assert embedded_file.checksum is None


def test_embedded_file__delete_without_parent():
attachment = EmbeddedFile(name="test.txt", pdf_object=DictionaryObject())
with pytest.raises(PyPdfError, match=r"^Parent required to delete file from document\.$"):
attachment.delete()


def test_embedded_file__delete_known():
writer = PdfWriter()
writer.add_blank_page(100, 100)
attachment = writer.add_attachment("test.txt", b"content")
writer.add_attachment("test2.txt", b"content2")

attachments = list(writer.attachment_list)
assert len(attachments) == 2
attachment.delete()
with pytest.raises(PdfReadError, match=r"^/EF entry not found: {}$"):
_ = attachment.content

attachments = list(writer.attachment_list)
assert len(attachments) == 1
assert attachments[0].name == "test2.txt"

# Delete second time.
with pytest.raises(PyPdfError, match=r"^File not found in parent object\.$"):
attachment.delete()
Loading