fix but still some mypy

py-pdf · Nov 1, 2023 · a0ee1a4 · a0ee1a4
1 parent 991c07f
commit a0ee1a4
Show file tree

Hide file tree

Showing 6 changed files with 110 additions and 192 deletions.
diff --git a/pypdf/_reader.py b/pypdf/_reader.py
@@ -40,7 +40,6 @@
  Dict,
  Iterable,
  List,
- Mapping,
  Optional,
  Tuple,
  Union,
@@ -86,7 +85,7 @@
 )
 from .generic import (
  ArrayObject,
- AttachmentBytes,
+ AttachmentBytesDictionary,
  BooleanObject,
  ContentStream,
  DecodedStreamObject,
@@ -102,11 +101,9 @@
  NullObject,
  NumberObject,
  PdfObject,
- StreamObject,
  TextStringObject,
  TreeObject,
  ViewerPreferences,
- get_from_file_specification,
  read_object,
 )
 from .types import OutlineType, PagemodeType
@@ -2238,13 +2235,10 @@ def attachments_names(self) -> List[str]:
  Returns:
  List of names
  """
- ef = self._get_embedded_files_root()
- if ef is None:
- return []
- return ef.list_keys()
+ return self.attachments.keys()
 
  @property
- def attachments(self) -> Mapping[str, AttachmentBytes]:
+ def attachments(self) -> AttachmentBytesDictionary:
  """
  extracts the /EF entries as bytes from the embedded files
  Returns:
@@ -2255,98 +2249,7 @@ def attachments(self) -> Mapping[str, AttachmentBytes]:
  Note:
  If you want to access /RF
  """
- ef = self._get_embedded_files_root()
- if ef is None:
- return {}
- d: Dict[str, AttachmentBytes] = {}
- for k, v in ef.list_items().items():
- if len(v) > 1:
- logger_warning(
- "Unexpected amout of entries in attachments, please report"
- "and share the file for analysis with pypdf dev team",
- __name__,
- )
- d[k] = AttachmentBytes(cast(DictionaryObject, v[0].get_object()))
- return d
-
- def _list_attachments(self) -> List[str]:
- """
- Retrieves the list of filenames of file attachments.
-
- Returns:
- list of filenames
- """
- ef = self._get_embedded_files_root()
- if ef:
- lst = ef.list_keys()
- else:
- lst = []
- """
- for ip, p in enumerate(self.pages):
- for a in [_a.get_object()
- for _a in p.get("/Annots",[])]:
- if _a.get_object().get("/Subtype","") != "/FileAttachements":
- continue
- lst.append(f"$page_{ip}.{get_name_from_file_specification(_a)}")
- """
- return lst
-
- def _get_attachment_list(self, name: str) -> List[Union[bytes, Dict[str, bytes]]]:
- out = self._get_attachments(name)[name]
- if isinstance(out, list):
- return out
- return [out]
-
- def _get_attachments(
- self, filename: Optional[str] = None
- ) -> Dict[str, List[Union[bytes, Dict[str, bytes]]]]:
- """
- Retrieves all or selected file attachments of the PDF as a dictionary of file names
- and the file data as a bytestring.
-
- Args:
- filename: If filename is None, then a dictionary of all attachments
- will be returned, where the key is the filename and the value
- is the content. Otherwise, a dictionary with just a single key
- - the filename - and its content will be returned.
-
- Returns:
- dictionary of filename -> Union[bytestring or List[ByteString]]
- if the filename exists multiple times a List of the different version will be provided
- """
- ef = self._get_embedded_files_root()
- if ef is None:
- return {}
- if filename is None:
- return {k: v if len(v) > 1 else v[0] for k, v in self.attachments.items()} # type: ignore
- else:
- lst = ef.list_get(filename)
- if lst is None:
- return {}
- lst = cast(DictionaryObject, lst.get_object())
- efo = cast(DictionaryObject, lst["/EF"].get_object())
- rst = cast(
- StreamObject,
- get_from_file_specification(efo).get_object(),
- ).get_data()
- if isinstance(rst, str):
- rst = rst.encode()
- if "/RF" not in lst:
- return {filename: [rst]}
- else:
- rst2 = {"": rst} # /EF will be returned by empty key
- lst = cast(
- ArrayObject,
- get_from_file_specification(
- cast(DictionaryObject, lst["/RF"].get_object())
- ),
- )
- for i in range(0, len(lst), 2):
- t = cast(StreamObject, lst[i + 1].get_object()).get_data()
- if isinstance(t, str):
- t = t.encode()
- rst2[lst[i]] = t
- return {filename: [rst2]}
+ return AttachmentBytesDictionary(self._get_embedded_files_root())
 
 
 class PdfFileReader(PdfReader): # deprecated

diff --git a/pypdf/_writer.py b/pypdf/_writer.py
@@ -46,7 +46,6 @@
  Dict,
  Iterable,
  List,
- Mapping,
  Optional,
  Pattern,
  Tuple,
@@ -96,6 +95,7 @@
 from .generic import (
  PAGE_FIT,
  ArrayObject,
+ AttachmentBytesDictionary,
  BooleanObject,
  ByteStringObject,
  ContentStream,
@@ -740,48 +740,26 @@ def _create_attachment_root(self) -> NameTree:
  return node
 
  @property
- def embedded_files(self) -> Optional[Mapping[str, List[PdfObject]]]:
- ef = self._get_embedded_files_root()
- if ef:
- return ef.list_items()
- else:
- return None
-
- def _list_attachments(self) -> List[str]:
- ef = self._get_embedded_files_root()
- if ef:
- return ef.list_keys()
- else:
- return []
+ def attachments_names(self) -> List[str]:
+ """
+ Returns:
+ List of names
+ """
+ return self.attachments.keys()
 
  @property
- def attachments(self) -> Mapping[str, List[Union[bytes, Dict[str, bytes]]]]:
- ef = self._get_embedded_files_root()
- if ef:
- d: Dict[str, List[Union[bytes, Dict[str, bytes]]]] = {}
- for k, v in ef.list_items().items():
- if isinstance(v, list):
- if k not in d:
- d[k] = []
- for e in v:
- e = cast(DictionaryObject, e.get_object())
- if "/EF" in e:
- d[k].append(e["/EF"]["/F"].get_data()) # type: ignore
- elif "/RF" in e:
- r = cast(
- ArrayObject, cast(DictionaryObject, e["/RF"])["/F"]
- )
- di = {}
- i = 0
- while i < len(r):
- di[cast(str, r[i])] = cast(
- bytes, r[i + 1].get_object().get_data()
- )
- i += 2
- d[k].append(di)
- return d
- else:
- return {}
+ def attachments(self) -> AttachmentBytesDictionary:
+ """
+ extracts the /EF entries as bytes from the embedded files
+ Returns:
+ Dictionary with the filenames as keys and the file content as bytes,
+ extra data cah be accessed with Attachmentbytes extra properties(.name,
+ .list_rf_names(), .get_embeddedfile(), .all_files)
+
+ Note:
+ If you want to access /RF
+ """
+ return AttachmentBytesDictionary(self._get_embedded_files_root())
 
  def add_attachment(
  self,
@@ -808,7 +786,7 @@ def add_attachment(
  Returns:
  The filespec DictionaryObject
  """
- if not overwrite and filename in self._list_attachments():
+ if not overwrite and filename in self.attachments_names:
  return None
  if fname is None:
  st = filename.replace("/", "\\/").replace("\\\\/", "\\/")

diff --git a/pypdf/generic/__init__.py b/pypdf/generic/__init__.py
@@ -49,6 +49,7 @@
  PREFERED_ATTACHMENT,
  ArrayObject,
  AttachmentBytes,
+ AttachmentBytesDictionary,
  ContentStream,
  DecodedStreamObject,
  Destination,
@@ -440,6 +441,7 @@ def link(
  # Data structures
  "ArrayObject",
  "AttachmentBytes",
+ "AttachmentBytesDictionary",
  "DictionaryObject",
  "TreeObject",
  "StreamObject",