Skip to content

Commit

Permalink
fix but still some mypy
Browse files Browse the repository at this point in the history
  • Loading branch information
pubpub-zz committed Nov 1, 2023
1 parent 991c07f commit a0ee1a4
Show file tree
Hide file tree
Showing 6 changed files with 110 additions and 192 deletions.
105 changes: 4 additions & 101 deletions pypdf/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
Dict,
Iterable,
List,
Mapping,
Optional,
Tuple,
Union,
Expand Down Expand Up @@ -86,7 +85,7 @@
)
from .generic import (
ArrayObject,
AttachmentBytes,
AttachmentBytesDictionary,
BooleanObject,
ContentStream,
DecodedStreamObject,
Expand All @@ -102,11 +101,9 @@
NullObject,
NumberObject,
PdfObject,
StreamObject,
TextStringObject,
TreeObject,
ViewerPreferences,
get_from_file_specification,
read_object,
)
from .types import OutlineType, PagemodeType
Expand Down Expand Up @@ -2238,13 +2235,10 @@ def attachments_names(self) -> List[str]:
Returns:
List of names
"""
ef = self._get_embedded_files_root()
if ef is None:
return []
return ef.list_keys()
return self.attachments.keys()

@property
def attachments(self) -> Mapping[str, AttachmentBytes]:
def attachments(self) -> AttachmentBytesDictionary:
"""
extracts the /EF entries as bytes from the embedded files
Returns:
Expand All @@ -2255,98 +2249,7 @@ def attachments(self) -> Mapping[str, AttachmentBytes]:
Note:
If you want to access /RF
"""
ef = self._get_embedded_files_root()
if ef is None:
return {}
d: Dict[str, AttachmentBytes] = {}
for k, v in ef.list_items().items():
if len(v) > 1:
logger_warning(
"Unexpected amout of entries in attachments, please report"
"and share the file for analysis with pypdf dev team",
__name__,
)
d[k] = AttachmentBytes(cast(DictionaryObject, v[0].get_object()))
return d

def _list_attachments(self) -> List[str]:
"""
Retrieves the list of filenames of file attachments.
Returns:
list of filenames
"""
ef = self._get_embedded_files_root()
if ef:
lst = ef.list_keys()
else:
lst = []
"""
for ip, p in enumerate(self.pages):
for a in [_a.get_object()
for _a in p.get("/Annots",[])]:
if _a.get_object().get("/Subtype","") != "/FileAttachements":
continue
lst.append(f"$page_{ip}.{get_name_from_file_specification(_a)}")
"""
return lst

def _get_attachment_list(self, name: str) -> List[Union[bytes, Dict[str, bytes]]]:
out = self._get_attachments(name)[name]
if isinstance(out, list):
return out
return [out]

def _get_attachments(
self, filename: Optional[str] = None
) -> Dict[str, List[Union[bytes, Dict[str, bytes]]]]:
"""
Retrieves all or selected file attachments of the PDF as a dictionary of file names
and the file data as a bytestring.
Args:
filename: If filename is None, then a dictionary of all attachments
will be returned, where the key is the filename and the value
is the content. Otherwise, a dictionary with just a single key
- the filename - and its content will be returned.
Returns:
dictionary of filename -> Union[bytestring or List[ByteString]]
if the filename exists multiple times a List of the different version will be provided
"""
ef = self._get_embedded_files_root()
if ef is None:
return {}
if filename is None:
return {k: v if len(v) > 1 else v[0] for k, v in self.attachments.items()} # type: ignore
else:
lst = ef.list_get(filename)
if lst is None:
return {}
lst = cast(DictionaryObject, lst.get_object())
efo = cast(DictionaryObject, lst["/EF"].get_object())
rst = cast(
StreamObject,
get_from_file_specification(efo).get_object(),
).get_data()
if isinstance(rst, str):
rst = rst.encode()
if "/RF" not in lst:
return {filename: [rst]}
else:
rst2 = {"": rst} # /EF will be returned by empty key
lst = cast(
ArrayObject,
get_from_file_specification(
cast(DictionaryObject, lst["/RF"].get_object())
),
)
for i in range(0, len(lst), 2):
t = cast(StreamObject, lst[i + 1].get_object()).get_data()
if isinstance(t, str):
t = t.encode()
rst2[lst[i]] = t
return {filename: [rst2]}
return AttachmentBytesDictionary(self._get_embedded_files_root())


class PdfFileReader(PdfReader): # deprecated
Expand Down
62 changes: 20 additions & 42 deletions pypdf/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
Dict,
Iterable,
List,
Mapping,
Optional,
Pattern,
Tuple,
Expand Down Expand Up @@ -96,6 +95,7 @@
from .generic import (
PAGE_FIT,
ArrayObject,
AttachmentBytesDictionary,
BooleanObject,
ByteStringObject,
ContentStream,
Expand Down Expand Up @@ -740,48 +740,26 @@ def _create_attachment_root(self) -> NameTree:
return node

@property
def embedded_files(self) -> Optional[Mapping[str, List[PdfObject]]]:
ef = self._get_embedded_files_root()
if ef:
return ef.list_items()
else:
return None

def _list_attachments(self) -> List[str]:
ef = self._get_embedded_files_root()
if ef:
return ef.list_keys()
else:
return []
def attachments_names(self) -> List[str]:
"""
Returns:
List of names
"""
return self.attachments.keys()

@property
def attachments(self) -> Mapping[str, List[Union[bytes, Dict[str, bytes]]]]:
ef = self._get_embedded_files_root()
if ef:
d: Dict[str, List[Union[bytes, Dict[str, bytes]]]] = {}
for k, v in ef.list_items().items():
if isinstance(v, list):
if k not in d:
d[k] = []
for e in v:
e = cast(DictionaryObject, e.get_object())
if "/EF" in e:
d[k].append(e["/EF"]["/F"].get_data()) # type: ignore
elif "/RF" in e:
r = cast(
ArrayObject, cast(DictionaryObject, e["/RF"])["/F"]
)
di = {}
i = 0
while i < len(r):
di[cast(str, r[i])] = cast(
bytes, r[i + 1].get_object().get_data()
)
i += 2
d[k].append(di)
return d
else:
return {}
def attachments(self) -> AttachmentBytesDictionary:
"""
extracts the /EF entries as bytes from the embedded files
Returns:
Dictionary with the filenames as keys and the file content as bytes,
extra data cah be accessed with Attachmentbytes extra properties(.name,
.list_rf_names(), .get_embeddedfile(), .all_files)
Note:
If you want to access /RF
"""
return AttachmentBytesDictionary(self._get_embedded_files_root())

def add_attachment(
self,
Expand All @@ -808,7 +786,7 @@ def add_attachment(
Returns:
The filespec DictionaryObject
"""
if not overwrite and filename in self._list_attachments():
if not overwrite and filename in self.attachments_names:
return None
if fname is None:
st = filename.replace("/", "\\/").replace("\\\\/", "\\/")
Expand Down
2 changes: 2 additions & 0 deletions pypdf/generic/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
PREFERED_ATTACHMENT,
ArrayObject,
AttachmentBytes,
AttachmentBytesDictionary,
ContentStream,
DecodedStreamObject,
Destination,
Expand Down Expand Up @@ -440,6 +441,7 @@ def link(
# Data structures
"ArrayObject",
"AttachmentBytes",
"AttachmentBytesDictionary",
"DictionaryObject",
"TreeObject",
"StreamObject",
Expand Down
Loading

0 comments on commit a0ee1a4

Please sign in to comment.