Skip to content

Commit

Permalink
MAINT: Simplify test with None and NullObject (#2829)
Browse files Browse the repository at this point in the history
  • Loading branch information
pubpub-zz authored Sep 14, 2024
1 parent 1bbc301 commit 8ebd311
Show file tree
Hide file tree
Showing 13 changed files with 88 additions and 42 deletions.
7 changes: 4 additions & 3 deletions pypdf/_cmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from .generic import (
DecodedStreamObject,
DictionaryObject,
NullObject,
StreamObject,
is_null_or_none,
)


Expand Down Expand Up @@ -468,7 +468,7 @@ def compute_space_width(
cpt += 1
sp_width = m / max(1, cpt) / 2

if sp_width is None or isinstance(sp_width, NullObject):
if is_null_or_none(sp_width):
sp_width = 0.0
return sp_width

Expand All @@ -482,8 +482,9 @@ def type1_alternative(
if "/FontDescriptor" not in ft:
return map_dict, space_code, int_entry
ft_desc = cast(DictionaryObject, ft["/FontDescriptor"]).get("/FontFile")
if ft_desc is None:
if is_null_or_none(ft_desc):
return map_dict, space_code, int_entry
assert ft_desc is not None, "mypy"
txt = ft_desc.get_object().get_data()
txt = txt.split(b"eexec\n")[0] # only clear part
txt = txt.split(b"/Encoding")[1] # to get the encoding part
Expand Down
5 changes: 3 additions & 2 deletions pypdf/_doc_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
TreeObject,
ViewerPreferences,
create_string_object,
is_null_or_none,
)
from .types import OutlineType, PagemodeType
from .xmp import XmpInformation
Expand Down Expand Up @@ -761,7 +762,7 @@ def _get_inherited(obj: DictionaryObject, key: str) -> Any:
field = cast(DictionaryObject, field.indirect_reference.get_object()) # type: ignore
except Exception as exc:
raise ValueError("field type is invalid") from exc
if _get_inherited(field, "/FT") is None:
if is_null_or_none(_get_inherited(field, "/FT")):
raise ValueError("field is not valid")
ret = []
if field.get("/Subtype", "") == "/Widget":
Expand Down Expand Up @@ -852,7 +853,7 @@ def _get_outline(
return outline

# §12.3.3 Document outline, entries in the outline dictionary
if lines is not None and "/First" in lines:
if not is_null_or_none(lines) and "/First" in lines:
node = cast(DictionaryObject, lines["/First"])
self._namedDests = self._get_named_destinations()

Expand Down
21 changes: 13 additions & 8 deletions pypdf/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
PdfObject,
RectangleObject,
StreamObject,
is_null_or_none,
)

try:
Expand All @@ -101,7 +102,7 @@ def _get_rectangle(self: Any, name: str, defaults: Iterable[str]) -> RectangleOb
retval: Union[None, RectangleObject, IndirectObject] = self.get(name)
if isinstance(retval, RectangleObject):
return retval
if retval is None:
if is_null_or_none(retval):
for d in defaults:
retval = self.get(d)
if retval is not None:
Expand Down Expand Up @@ -492,7 +493,8 @@ def __init__(
self.inline_images: Optional[Dict[str, ImageFile]] = None
# below Union for mypy but actually Optional[List[str]]
self.indirect_reference = indirect_reference
if indirect_reference is not None:
if not is_null_or_none(indirect_reference):
assert indirect_reference is not None, "mypy"
self.update(cast(DictionaryObject, indirect_reference.get_object()))

def hash_bin(self) -> int:
Expand Down Expand Up @@ -731,9 +733,10 @@ def _get_inline_images(self) -> Dict[str, ImageFile]:
entries will be identified as ~1~
"""
content = self.get_contents()
if content is None:
if is_null_or_none(content):
return {}
imgs_data = []
assert content is not None, "mypy"
for param, ope in content.operations:
if ope == b"INLINE IMAGE":
imgs_data.append(
Expand Down Expand Up @@ -1063,7 +1066,7 @@ def replace_contents(
for i in range(len(content)):
content[i] = self.indirect_reference.pdf._add_object(content[i])

if content is None:
if is_null_or_none(content):
if PG.CONTENTS not in self:
return
else:
Expand All @@ -1084,6 +1087,7 @@ def replace_contents(
# this will be fixed with the _add_object
self[NameObject(PG.CONTENTS)] = content
else:
assert content is not None, "mypy"
content.indirect_reference = self[
PG.CONTENTS
].indirect_reference # TODO: in a future may required generation management
Expand Down Expand Up @@ -2218,10 +2222,11 @@ def extract_text(
if extraction_mode not in ["plain", "layout"]:
raise ValueError(f"Invalid text extraction mode '{extraction_mode}'")
if extraction_mode == "layout":
for visitor in ("visitor_operand_before",
"visitor_operand_after",
"visitor_text",
):
for visitor in (
"visitor_operand_before",
"visitor_operand_after",
"visitor_text",
):
if locals()[visitor]:
logger_warning(
f"Argument {visitor} is ignored in layout mode",
Expand Down
19 changes: 12 additions & 7 deletions pypdf/_page_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,13 @@

from ._protocols import PdfCommonDocProtocol
from ._utils import logger_warning
from .generic import ArrayObject, DictionaryObject, NullObject, NumberObject
from .generic import (
ArrayObject,
DictionaryObject,
NullObject,
NumberObject,
is_null_or_none,
)


def number2uppercase_roman_numeral(num: int) -> str:
Expand Down Expand Up @@ -180,11 +186,13 @@ def index2label(reader: PdfCommonDocProtocol, index: int) -> str:
# kid = {'/Limits': [0, 63], '/Nums': [0, {'/P': 'C1'}, ...]}
limits = cast(List[int], kid["/Limits"])
if limits[0] <= index <= limits[1]:
if kid.get("/Kids", None) is not None:
if not is_null_or_none(kid.get("/Kids", None)):
# Recursive definition.
level += 1
if level == 100: # pragma: no cover
raise NotImplementedError("Too deep nesting is not supported.")
raise NotImplementedError(
"Too deep nesting is not supported."
)
number_tree = kid
# Exit the inner `for` loop and continue at the next level with the
# next iteration of the `while` loop.
Expand All @@ -195,10 +203,7 @@ def index2label(reader: PdfCommonDocProtocol, index: int) -> str:
# and continue with the fallback.
break

logger_warning(
f"Could not reliably determine page label for {index}.",
__name__
)
logger_warning(f"Could not reliably determine page label for {index}.", __name__)
return str(index + 1) # Fallback if neither /Nums nor /Kids is in the number_tree


Expand Down
10 changes: 6 additions & 4 deletions pypdf/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
PdfObject,
StreamObject,
TextStringObject,
is_null_or_none,
read_object,
)
from .xmp import XmpInformation
Expand Down Expand Up @@ -206,11 +207,11 @@ def _info(self) -> Optional[DictionaryObject]:
/Info Dictionary; None if the entry does not exist
"""
info = self.trailer.get(TK.INFO, None)
if info is None:
if is_null_or_none(info):
return None
else:
info = info.get_object()
if info is None:
if info == None: # noqa: E711
raise PdfReadError(
"Trailer not found or does not point to document information directory"
)
Expand All @@ -225,7 +226,7 @@ def _ID(self) -> Optional[ArrayObject]:
/ID array; None if the entry does not exist
"""
id = self.trailer.get(TK.ID, None)
return None if id is None else cast(ArrayObject, id.get_object())
return None if is_null_or_none(id) else cast(ArrayObject, id.get_object())

def _repr_mimebundle_(
self,
Expand Down Expand Up @@ -298,8 +299,9 @@ def _get_page_number_by_indirect(
x.indirect_reference.idnum: i for i, x in enumerate(self.pages) # type: ignore
}

if indirect_reference is None or isinstance(indirect_reference, NullObject):
if is_null_or_none(indirect_reference):
return None
assert isinstance(indirect_reference, (int, IndirectObject)), "mypy"
if isinstance(indirect_reference, int):
idnum = indirect_reference
else:
Expand Down
14 changes: 8 additions & 6 deletions pypdf/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@
ViewerPreferences,
create_string_object,
hex_to_rgb,
is_null_or_none,
)
from .pagerange import PageRange, PageRangeSpec
from .types import (
Expand Down Expand Up @@ -499,7 +500,7 @@ def _add_page(
cast(ArrayObject, node[PA.KIDS]).append(page.indirect_reference)
self.flattened_pages.append(page)
cpt = 1000
while node is not None:
while not is_null_or_none(node):
node = cast(DictionaryObject, node.get_object())
node[NameObject(PA.COUNT)] = NumberObject(cast(int, node[PA.COUNT]) + 1)
node = node.get(PA.PARENT, None)
Expand Down Expand Up @@ -612,8 +613,9 @@ def _get_page_number_by_indirect(
The page number or None
"""
# to provide same function as in PdfReader
if indirect_reference is None or isinstance(indirect_reference, NullObject):
if is_null_or_none(indirect_reference):
return None
assert indirect_reference is not None, "mypy"
if isinstance(indirect_reference, int):
indirect_reference = IndirectObject(indirect_reference, 0, self)
obj = indirect_reference.get_object()
Expand Down Expand Up @@ -928,7 +930,7 @@ def _update_field_annotation(
)
dr = dr.get_object().get("/Font", DictionaryObject()).get_object()
font_res = dr.get(font_name, None)
if font_res is not None:
if not is_null_or_none(font_res):
font_res = cast(DictionaryObject, font_res.get_object())
font_subtype, _, font_encoding, font_map = build_char_map_from_dict(
200, font_res
Expand Down Expand Up @@ -1566,9 +1568,9 @@ def metadata(self) -> Optional[DocumentInformation]:
Retrieve/set the PDF file's document information dictionary, if it exists.
Args:
value: Dictionary with the entries to set. If None, remove the /Info entry from the PDF.
value: dict with the entries to be set. if None : remove the /Info entry from the pdf.
Note that some PDF files use (XMP) metadata streams instead of document
Note that some PDF files use (xmp)metadata streams instead of document
information dictionaries, and these metadata streams will not be
accessed by this function.
"""
Expand Down Expand Up @@ -2981,7 +2983,7 @@ def _get_filtered_outline(
if node is None:
node = NullObject()
node = node.get_object()
if node is None or isinstance(node, NullObject):
if is_null_or_none(node):
node = DictionaryObject()
if node.get("/Type", "") == "/Outlines" or "/Title" not in node:
node = node.get("/First", None)
Expand Down
4 changes: 1 addition & 3 deletions pypdf/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,9 +746,7 @@ def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes,
)

# for error reporting
if (
hasattr(x_object_obj, "indirect_reference") and x_object_obj is None
): # pragma: no cover
if x_object_obj is None: # pragma: no cover
obj_as_text = x_object_obj.indirect_reference.__repr__()
else:
obj_as_text = x_object_obj.__repr__()
Expand Down
2 changes: 2 additions & 0 deletions pypdf/generic/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
PdfObject,
TextStringObject,
encode_pdfdocencoding,
is_null_or_none,
)
from ._data_structures import (
ArrayObject,
Expand Down Expand Up @@ -235,6 +236,7 @@ def link(
"encode_pdfdocencoding",
"decode_pdfdocencoding",
"hex_to_rgb",
"is_null_or_none",
"read_hex_string_from_stream",
"read_string_from_stream",
]
10 changes: 10 additions & 0 deletions pypdf/generic/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,16 @@ def __repr__(self) -> str:
return "NullObject"


def is_null_or_none(x: Any) -> bool:
"""
Returns:
True if x is None or NullObject.
"""
return x is None or (
isinstance(x, PdfObject) and isinstance(x.get_object(), NullObject)
)


class BooleanObject(PdfObject):
def __init__(self, value: Any) -> None:
self.value = value
Expand Down
11 changes: 7 additions & 4 deletions pypdf/generic/_data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
NumberObject,
PdfObject,
TextStringObject,
is_null_or_none,
)
from ._fit import Fit
from ._image_inline import (
Expand Down Expand Up @@ -451,7 +452,7 @@ def xmp_metadata(self) -> Optional[XmpInformationProtocol]:
from ..xmp import XmpInformation

metadata = self.get("/Metadata", None)
if metadata is None:
if is_null_or_none(metadata):
return None
metadata = metadata.get_object()

Expand Down Expand Up @@ -651,7 +652,7 @@ def children(self) -> Iterable[Any]:
if child == self[NameObject("/Last")]:
return
child_ref = child.get(NameObject("/Next")) # type: ignore
if child_ref is None:
if is_null_or_none(child_ref):
return
child = child_ref.get_object()

Expand All @@ -661,8 +662,9 @@ def add_child(self, child: Any, pdf: PdfWriterProtocol) -> None:
def inc_parent_counter_default(
self, parent: Union[None, IndirectObject, "TreeObject"], n: int
) -> None:
if parent is None:
if is_null_or_none(parent):
return
assert parent is not None, "mypy"
parent = cast("TreeObject", parent.get_object())
if "/Count" in parent:
parent[NameObject("/Count")] = NumberObject(
Expand All @@ -673,8 +675,9 @@ def inc_parent_counter_default(
def inc_parent_counter_outline(
self, parent: Union[None, IndirectObject, "TreeObject"], n: int
) -> None:
if parent is None:
if is_null_or_none(parent):
return
assert parent is not None, "mypy"
parent = cast("TreeObject", parent.get_object())
# BooleanObject requires comparison with == not is
opn = parent.get("/%is_open%", True) == True # noqa
Expand Down
5 changes: 3 additions & 2 deletions pypdf/generic/_fit.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import Any, Optional, Tuple, Union

from ._base import is_null_or_none


class Fit:
def __init__(
Expand All @@ -9,8 +11,7 @@ def __init__(

self.fit_type = NameObject(fit_type)
self.fit_args = [
NullObject() if a is None or isinstance(a, NullObject) else FloatObject(a)
for a in fit_args
NullObject() if is_null_or_none(a) else FloatObject(a) for a in fit_args
]

@classmethod
Expand Down
Loading

0 comments on commit 8ebd311

Please sign in to comment.