MAINT: Simplify test with None and NullObject (#2829)

py-pdf · Sep 14, 2024 · 8ebd311 · 8ebd311
1 parent 1bbc301
commit 8ebd311
Show file tree

Hide file tree

Showing 13 changed files with 88 additions and 42 deletions.
diff --git a/pypdf/_cmap.py b/pypdf/_cmap.py
@@ -7,8 +7,8 @@
 from .generic import (
  DecodedStreamObject,
  DictionaryObject,
- NullObject,
  StreamObject,
+ is_null_or_none,
 )
 
 
@@ -468,7 +468,7 @@ def compute_space_width(
  cpt += 1
  sp_width = m / max(1, cpt) / 2
 
- if sp_width is None or isinstance(sp_width, NullObject):
+ if is_null_or_none(sp_width):
  sp_width = 0.0
  return sp_width
 
@@ -482,8 +482,9 @@ def type1_alternative(
  if "/FontDescriptor" not in ft:
  return map_dict, space_code, int_entry
  ft_desc = cast(DictionaryObject, ft["/FontDescriptor"]).get("/FontFile")
- if ft_desc is None:
+ if is_null_or_none(ft_desc):
  return map_dict, space_code, int_entry
+ assert ft_desc is not None, "mypy"
  txt = ft_desc.get_object().get_data()
  txt = txt.split(b"eexec\n")[0] # only clear part
  txt = txt.split(b"/Encoding")[1] # to get the encoding part

diff --git a/pypdf/_doc_common.py b/pypdf/_doc_common.py
@@ -85,6 +85,7 @@
  TreeObject,
  ViewerPreferences,
  create_string_object,
+ is_null_or_none,
 )
 from .types import OutlineType, PagemodeType
 from .xmp import XmpInformation
@@ -761,7 +762,7 @@ def _get_inherited(obj: DictionaryObject, key: str) -> Any:
  field = cast(DictionaryObject, field.indirect_reference.get_object()) # type: ignore
  except Exception as exc:
  raise ValueError("field type is invalid") from exc
- if _get_inherited(field, "/FT") is None:
+ if is_null_or_none(_get_inherited(field, "/FT")):
  raise ValueError("field is not valid")
  ret = []
  if field.get("/Subtype", "") == "/Widget":
@@ -852,7 +853,7 @@ def _get_outline(
  return outline
 
  # §12.3.3 Document outline, entries in the outline dictionary
- if lines is not None and "/First" in lines:
+ if not is_null_or_none(lines) and "/First" in lines:
  node = cast(DictionaryObject, lines["/First"])
  self._namedDests = self._get_named_destinations()
 

diff --git a/pypdf/_page.py b/pypdf/_page.py
@@ -84,6 +84,7 @@
  PdfObject,
  RectangleObject,
  StreamObject,
+ is_null_or_none,
 )
 
 try:
@@ -101,7 +102,7 @@ def _get_rectangle(self: Any, name: str, defaults: Iterable[str]) -> RectangleOb
  retval: Union[None, RectangleObject, IndirectObject] = self.get(name)
  if isinstance(retval, RectangleObject):
  return retval
- if retval is None:
+ if is_null_or_none(retval):
  for d in defaults:
  retval = self.get(d)
  if retval is not None:
@@ -492,7 +493,8 @@ def __init__(
  self.inline_images: Optional[Dict[str, ImageFile]] = None
  # below Union for mypy but actually Optional[List[str]]
  self.indirect_reference = indirect_reference
- if indirect_reference is not None:
+ if not is_null_or_none(indirect_reference):
+ assert indirect_reference is not None, "mypy"
  self.update(cast(DictionaryObject, indirect_reference.get_object()))
 
  def hash_bin(self) -> int:
@@ -731,9 +733,10 @@ def _get_inline_images(self) -> Dict[str, ImageFile]:
  entries will be identified as ~1~
  """
  content = self.get_contents()
- if content is None:
+ if is_null_or_none(content):
  return {}
  imgs_data = []
+ assert content is not None, "mypy"
  for param, ope in content.operations:
  if ope == b"INLINE IMAGE":
  imgs_data.append(
@@ -1063,7 +1066,7 @@ def replace_contents(
  for i in range(len(content)):
  content[i] = self.indirect_reference.pdf._add_object(content[i])
 
- if content is None:
+ if is_null_or_none(content):
  if PG.CONTENTS not in self:
  return
  else:
@@ -1084,6 +1087,7 @@ def replace_contents(
  # this will be fixed with the _add_object
  self[NameObject(PG.CONTENTS)] = content
  else:
+ assert content is not None, "mypy"
  content.indirect_reference = self[
  PG.CONTENTS
  ].indirect_reference # TODO: in a future may required generation management
@@ -2218,10 +2222,11 @@ def extract_text(
  if extraction_mode not in ["plain", "layout"]:
  raise ValueError(f"Invalid text extraction mode '{extraction_mode}'")
  if extraction_mode == "layout":
- for visitor in ("visitor_operand_before",
- "visitor_operand_after",
- "visitor_text",
- ):
+ for visitor in (
+ "visitor_operand_before",
+ "visitor_operand_after",
+ "visitor_text",
+ ):
  if locals()[visitor]:
  logger_warning(
  f"Argument {visitor} is ignored in layout mode",

diff --git a/pypdf/_page_labels.py b/pypdf/_page_labels.py
@@ -62,7 +62,13 @@
 
 from ._protocols import PdfCommonDocProtocol
 from ._utils import logger_warning
-from .generic import ArrayObject, DictionaryObject, NullObject, NumberObject
+from .generic import (
+ ArrayObject,
+ DictionaryObject,
+ NullObject,
+ NumberObject,
+ is_null_or_none,
+)
 
 
 def number2uppercase_roman_numeral(num: int) -> str:
@@ -180,11 +186,13 @@ def index2label(reader: PdfCommonDocProtocol, index: int) -> str:
  # kid = {'/Limits': [0, 63], '/Nums': [0, {'/P': 'C1'}, ...]}
  limits = cast(List[int], kid["/Limits"])
  if limits[0] <= index <= limits[1]:
- if kid.get("/Kids", None) is not None:
+ if not is_null_or_none(kid.get("/Kids", None)):
  # Recursive definition.
  level += 1
  if level == 100: # pragma: no cover
- raise NotImplementedError("Too deep nesting is not supported.")
+ raise NotImplementedError(
+ "Too deep nesting is not supported."
+ )
  number_tree = kid
  # Exit the inner `for` loop and continue at the next level with the
  # next iteration of the `while` loop.
@@ -195,10 +203,7 @@ def index2label(reader: PdfCommonDocProtocol, index: int) -> str:
  # and continue with the fallback.
  break
 
- logger_warning(
- f"Could not reliably determine page label for {index}.",
- __name__
- )
+ logger_warning(f"Could not reliably determine page label for {index}.", __name__)
  return str(index + 1) # Fallback if neither /Nums nor /Kids is in the number_tree
 
 

diff --git a/pypdf/_reader.py b/pypdf/_reader.py
@@ -79,6 +79,7 @@
  PdfObject,
  StreamObject,
  TextStringObject,
+ is_null_or_none,
  read_object,
 )
 from .xmp import XmpInformation
@@ -206,11 +207,11 @@ def _info(self) -> Optional[DictionaryObject]:
  /Info Dictionary; None if the entry does not exist
  """
  info = self.trailer.get(TK.INFO, None)
- if info is None:
+ if is_null_or_none(info):
  return None
  else:
  info = info.get_object()
- if info is None:
+ if info == None: # noqa: E711
  raise PdfReadError(
  "Trailer not found or does not point to document information directory"
  )
@@ -225,7 +226,7 @@ def _ID(self) -> Optional[ArrayObject]:
  /ID array; None if the entry does not exist
  """
  id = self.trailer.get(TK.ID, None)
- return None if id is None else cast(ArrayObject, id.get_object())
+ return None if is_null_or_none(id) else cast(ArrayObject, id.get_object())
 
  def _repr_mimebundle_(
  self,
@@ -298,8 +299,9 @@ def _get_page_number_by_indirect(
  x.indirect_reference.idnum: i for i, x in enumerate(self.pages) # type: ignore
  }
 
- if indirect_reference is None or isinstance(indirect_reference, NullObject):
+ if is_null_or_none(indirect_reference):
  return None
+ assert isinstance(indirect_reference, (int, IndirectObject)), "mypy"
  if isinstance(indirect_reference, int):
  idnum = indirect_reference
  else:

diff --git a/pypdf/_writer.py b/pypdf/_writer.py
@@ -107,6 +107,7 @@
  ViewerPreferences,
  create_string_object,
  hex_to_rgb,
+ is_null_or_none,
 )
 from .pagerange import PageRange, PageRangeSpec
 from .types import (
@@ -499,7 +500,7 @@ def _add_page(
  cast(ArrayObject, node[PA.KIDS]).append(page.indirect_reference)
  self.flattened_pages.append(page)
  cpt = 1000
- while node is not None:
+ while not is_null_or_none(node):
  node = cast(DictionaryObject, node.get_object())
  node[NameObject(PA.COUNT)] = NumberObject(cast(int, node[PA.COUNT]) + 1)
  node = node.get(PA.PARENT, None)
@@ -612,8 +613,9 @@ def _get_page_number_by_indirect(
  The page number or None
  """
  # to provide same function as in PdfReader
- if indirect_reference is None or isinstance(indirect_reference, NullObject):
+ if is_null_or_none(indirect_reference):
  return None
+ assert indirect_reference is not None, "mypy"
  if isinstance(indirect_reference, int):
  indirect_reference = IndirectObject(indirect_reference, 0, self)
  obj = indirect_reference.get_object()
@@ -928,7 +930,7 @@ def _update_field_annotation(
  )
  dr = dr.get_object().get("/Font", DictionaryObject()).get_object()
  font_res = dr.get(font_name, None)
- if font_res is not None:
+ if not is_null_or_none(font_res):
  font_res = cast(DictionaryObject, font_res.get_object())
  font_subtype, _, font_encoding, font_map = build_char_map_from_dict(
  200, font_res
@@ -1566,9 +1568,9 @@ def metadata(self) -> Optional[DocumentInformation]:
  Retrieve/set the PDF file's document information dictionary, if it exists.
 
  Args:
- value: Dictionary with the entries to set. If None, remove the /Info entry from the PDF.
+ value: dict with the entries to be set. if None : remove the /Info entry from the pdf.
 
- Note that some PDF files use (XMP) metadata streams instead of document
+ Note that some PDF files use (xmp)metadata streams instead of document
  information dictionaries, and these metadata streams will not be
  accessed by this function.
  """
@@ -2981,7 +2983,7 @@ def _get_filtered_outline(
  if node is None:
  node = NullObject()
  node = node.get_object()
- if node is None or isinstance(node, NullObject):
+ if is_null_or_none(node):
  node = DictionaryObject()
  if node.get("/Type", "") == "/Outlines" or "/Title" not in node:
  node = node.get("/First", None)

diff --git a/pypdf/filters.py b/pypdf/filters.py
@@ -746,9 +746,7 @@ def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes,
  )
 
  # for error reporting
- if (
- hasattr(x_object_obj, "indirect_reference") and x_object_obj is None
- ): # pragma: no cover
+ if x_object_obj is None: # pragma: no cover
  obj_as_text = x_object_obj.indirect_reference.__repr__()
  else:
  obj_as_text = x_object_obj.__repr__()

diff --git a/pypdf/generic/__init__.py b/pypdf/generic/__init__.py
@@ -46,6 +46,7 @@
  PdfObject,
  TextStringObject,
  encode_pdfdocencoding,
+ is_null_or_none,
 )
 from ._data_structures import (
  ArrayObject,
@@ -235,6 +236,7 @@ def link(
  "encode_pdfdocencoding",
  "decode_pdfdocencoding",
  "hex_to_rgb",
+ "is_null_or_none",
  "read_hex_string_from_stream",
  "read_string_from_stream",
 ]
diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py
@@ -214,6 +214,16 @@ def __repr__(self) -> str:
  return "NullObject"
 
 
+def is_null_or_none(x: Any) -> bool:
+ """
+ Returns:
+ True if x is None or NullObject.
+ """
+ return x is None or (
+ isinstance(x, PdfObject) and isinstance(x.get_object(), NullObject)
+ )
+
+
 class BooleanObject(PdfObject):
  def __init__(self, value: Any) -> None:
  self.value = value

diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
@@ -79,6 +79,7 @@
  NumberObject,
  PdfObject,
  TextStringObject,
+ is_null_or_none,
 )
 from ._fit import Fit
 from ._image_inline import (
@@ -451,7 +452,7 @@ def xmp_metadata(self) -> Optional[XmpInformationProtocol]:
  from ..xmp import XmpInformation
 
  metadata = self.get("/Metadata", None)
- if metadata is None:
+ if is_null_or_none(metadata):
  return None
  metadata = metadata.get_object()
 
@@ -651,7 +652,7 @@ def children(self) -> Iterable[Any]:
  if child == self[NameObject("/Last")]:
  return
  child_ref = child.get(NameObject("/Next")) # type: ignore
- if child_ref is None:
+ if is_null_or_none(child_ref):
  return
  child = child_ref.get_object()
 
@@ -661,8 +662,9 @@ def add_child(self, child: Any, pdf: PdfWriterProtocol) -> None:
  def inc_parent_counter_default(
  self, parent: Union[None, IndirectObject, "TreeObject"], n: int
  ) -> None:
- if parent is None:
+ if is_null_or_none(parent):
  return
+ assert parent is not None, "mypy"
  parent = cast("TreeObject", parent.get_object())
  if "/Count" in parent:
  parent[NameObject("/Count")] = NumberObject(
@@ -673,8 +675,9 @@ def inc_parent_counter_default(
  def inc_parent_counter_outline(
  self, parent: Union[None, IndirectObject, "TreeObject"], n: int
  ) -> None:
- if parent is None:
+ if is_null_or_none(parent):
  return
+ assert parent is not None, "mypy"
  parent = cast("TreeObject", parent.get_object())
  # BooleanObject requires comparison with == not is
  opn = parent.get("/%is_open%", True) == True # noqa

diff --git a/pypdf/generic/_fit.py b/pypdf/generic/_fit.py
@@ -1,5 +1,7 @@
 from typing import Any, Optional, Tuple, Union
 
+from ._base import is_null_or_none
+
 
 class Fit:
  def __init__(
@@ -9,8 +11,7 @@ def __init__(
 
  self.fit_type = NameObject(fit_type)
  self.fit_args = [
- NullObject() if a is None or isinstance(a, NullObject) else FloatObject(a)
- for a in fit_args
+ NullObject() if is_null_or_none(a) else FloatObject(a) for a in fit_args
  ]
 
  @classmethod