DOC: Resolve build warnings (#2380)

This fixes #1941 by cleaning up wrong references and adding new docs. Some notes about this: * Building the docs now works without prior package installation. * The intersphinx mapping will now always use the objects of the Python version used for the docs build. * The intersphinx mapping for *Pillow* has been added. * Links to private methods in the developer docs have been replaced by inline code as they are not part of the Sphinx docs. * Some broken/outdated/strange docstrings have been fixed. * New classes have been added to the docs. Module-specific classes have been added to the modules where they are being used directly, some generic classes/modules have been put into new files/pages. * We have to exclude some members to avoid duplicate definitions.
py-pdf · Dec 30, 2023 · 2cdc0d5 · 2cdc0d5
1 parent 2f4f705
commit 2cdc0d5
Show file tree

Hide file tree

Showing 14 changed files with 109 additions and 33 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -14,11 +14,11 @@
 import shutil
 import sys
 
-import pypdf as py_pkg
-
 sys.path.insert(0, os.path.abspath("."))
 sys.path.insert(0, os.path.abspath("../"))
 
+import pypdf as py_pkg # noqa: E402
+
 shutil.copyfile("../CHANGELOG.md", "meta/CHANGELOG.md")
 shutil.copyfile("../CONTRIBUTORS.md", "meta/CONTRIBUTORS.md")
 
@@ -57,8 +57,10 @@
  "myst_parser",
 ]
 
+python_version = ".".join(map(str, sys.version_info[:2]))
 intersphinx_mapping = {
- "python": ("https://docs.python.org/3.8", None),
+ "python": (f"https://docs.python.org/{python_version}", None),
+ "Pillow": ("https://pillow.readthedocs.io/en/latest/", None),
 }
 
 nitpick_ignore_regex = [

diff --git a/docs/dev/pypdf-parsing.md b/docs/dev/pypdf-parsing.md
@@ -20,9 +20,7 @@ structure of parsing:
  decodes these content streams by applying filters (e.g., `FlateDecode`,
  `LZWDecode`) specified in the stream's dictionary. This is only done when the
  object is requested via {py:meth}`PdfReader.get_object
- <pypdf.PdfReader.get_object>` in the
- {py:meth}`PdfReader._get_object_from_stream
- <pypdf.PdfReader._get_object_from_stream>` method.
+ <pypdf.PdfReader.get_object>` in the `PdfReader._get_object_from_stream` method.
 
 ## References
 

diff --git a/docs/index.rst b/docs/index.rst
@@ -61,6 +61,9 @@ You can contribute to `pypdf on GitHub <https://github.com/py-pdf/pypdf>`_.
  modules/annotations
  modules/Fit
  modules/PaperSize
+ modules/constants
+ modules/errors
+ modules/generic
 
 .. toctree::
  :caption: Developer Guide

diff --git a/docs/modules/PageObject.rst b/docs/modules/PageObject.rst
@@ -5,3 +5,15 @@ The PageObject Class
  :members:
  :undoc-members:
  :show-inheritance:
+
+.. autoclass:: pypdf._utils.ImageFile
+ :members:
+ :undoc-members:
+ :show-inheritance:
+ :exclude-members: IndirectObject
+
+.. autoclass:: pypdf._utils.File
+ :members:
+ :undoc-members:
+ :show-inheritance:
+ :exclude-members: IndirectObject
diff --git a/docs/modules/PdfReader.rst b/docs/modules/PdfReader.rst
@@ -5,3 +5,8 @@ The PdfReader Class
  :members:
  :undoc-members:
  :show-inheritance:
+
+.. autoclass:: pypdf.PasswordType
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/modules/PdfWriter.rst b/docs/modules/PdfWriter.rst
@@ -5,3 +5,8 @@ The PdfWriter Class
  :members:
  :undoc-members:
  :show-inheritance:
+
+.. autoclass:: pypdf.ObjectDeletionFlag
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/modules/constants.rst b/docs/modules/constants.rst
@@ -0,0 +1,17 @@
+Constants
+---------
+
+.. autoclass:: pypdf.constants.AnnotationFlag
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+.. autoclass:: pypdf.constants.ImageType
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+.. autoclass:: pypdf.constants.PageLabelStyle
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/modules/errors.rst b/docs/modules/errors.rst
@@ -0,0 +1,7 @@
+Errors
+------
+
+.. automodule:: pypdf.errors
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/docs/modules/generic.rst b/docs/modules/generic.rst
@@ -0,0 +1,25 @@
+Generic PDF objects
+-------------------
+
+.. automodule:: pypdf.generic
+ :members:
+ :undoc-members:
+ :show-inheritance:
+ :exclude-members: Destination, Field, Fit, RectangleObject
+
+.. autoclass:: pypdf._protocols.PdfObjectProtocol
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+
+.. autoclass:: pypdf._protocols.PdfReaderProtocol
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+
+.. autoclass:: pypdf._protocols.PdfWriterProtocol
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/pypdf/_page.py b/pypdf/_page.py
@@ -303,8 +303,8 @@ class PageObject(DictionaryObject):
  """
  PageObject represents a single page within a PDF file.
 
- Typically this object will be created by accessing the
- :meth:`get_page()<pypdf.PdfReader.get_page>` method of the
+ Typically these objects will be created by accessing the
+ :attr:`pages<pypdf.PdfReader.pages>` property of the
  :class:`PdfReader<pypdf.PdfReader>` class, but it is
  also possible to create an empty page with the
  :meth:`create_blank_page()<pypdf._page.PageObject.create_blank_page>` static method.

diff --git a/pypdf/_reader.py b/pypdf/_reader.py
@@ -298,6 +298,7 @@ def __init__(
  ) -> None:
  self.strict = strict
  self.flattened_pages: Optional[List[PageObject]] = None
+ #: Storage of parsed PDF objects.
  self.resolved_objects: Dict[Tuple[Any, Any], Optional[PdfObject]] = {}
  self.xref_index = 0
  self._page_id2num: Optional[
@@ -962,7 +963,7 @@ def _build_outline_item(self, node: DictionaryObject) -> Optional[Destination]:
 
  @property
  def pages(self) -> List[PageObject]:
- """Read-only property that emulates a list of :py:class:`Page<pypdf._page.Page>` objects."""
+ """Read-only property that emulates a list of :py:class:`PageObject<pypdf._page.PageObject>` objects."""
  return _VirtualList(self._get_num_pages, self._get_page) # type: ignore
 
  @property

diff --git a/pypdf/_utils.py b/pypdf/_utils.py
@@ -537,7 +537,7 @@ def replace(self, new_image: Any, **kwargs: Any) -> None:
  Replace the Image with a new PIL image.
 
  Args:
- new_image (Image.Image): The new PIL image to replace the existing image.
+ new_image (PIL.Image.Image): The new PIL image to replace the existing image.
  **kwargs: Additional keyword arguments to pass to `Image.Image.save()`.
 
  Raises:

diff --git a/pypdf/_writer.py b/pypdf/_writer.py
@@ -1049,7 +1049,7 @@ def generate_file_identifiers(self) -> None:
  When a file is first written, both identifiers shall be set to the same value.
  If both identifiers match when a file reference is resolved, it is very
  likely that the correct and unchanged file has been found. If only the first
-  identifier matches, a different version of the correct file has been found.
+ identifier matches, a different version of the correct file has been found.
  see 14.4 "File Identifiers".
  """
  if self._ID:
@@ -2792,14 +2792,16 @@ def set_page_label(
  Args:
  page_index_from: page index of the beginning of the range starting from 0
  page_index_to: page index of the beginning of the range starting from 0
- style: The numbering style to be used for the numeric portion of each page label:
- '/D' Decimal arabic numerals
- '/R' Uppercase roman numerals
- '/r' Lowercase roman numerals
- '/A' Uppercase letters (A to Z for the first 26 pages,
- AA to ZZ for the next 26, and so on)
- '/a' Lowercase letters (a to z for the first 26 pages,
- aa to zz for the next 26, and so on)
+ style: The numbering style to be used for the numeric portion of each page label:
+
+ * ``/D`` Decimal arabic numerals
+ * ``/R`` Uppercase roman numerals
+ * ``/r`` Lowercase roman numerals
+ * ``/A`` Uppercase letters (A to Z for the first 26 pages,
+ AA to ZZ for the next 26, and so on)
+ * ``/a`` Lowercase letters (a to z for the first 26 pages,
+ aa to zz for the next 26, and so on)
+
  prefix: The label prefix for page labels in this range.
  start: The value of the numeric portion for the first page label
  in the range.

diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py
@@ -73,22 +73,21 @@ def clone(
  ignore_fields: Optional[Sequence[Union[str, int]]] = (),
  ) -> "PdfObject":
  """
- clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter)
- force_duplicate: in standard if the object has been already cloned and reference,
- the copy is returned; when force_duplicate == True,
- a new copy is always performed
- ignore_fields : list/tuple of Fields names (for dictionaries that will
- be ignored during cloning (apply also to childs duplication)
- if fields are to be considered for a limited number of levels
- you have to add it as integer:
- eg [1,"/B","/TOTO"] means "/B" will be ignored at first level only
- but "/TOTO" on all levels
- in standard, clone function call _reference_clone (see _reference)
+ Clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter).
+
+ By default, this method will call ``_reference_clone`` (see ``_reference``).
+
 
  Args:
- pdf_dest:
- force_duplicate: (Default value = False)
- ignore_fields:
+ pdf_dest: Target to clone to.
+ force_duplicate: By default, if the object has already been cloned and referenced,
+ the copy will be returned; when ``True``, a new copy will be created.
+ (Default value = ``False``)
+ ignore_fields: List/tuple of field names (for dictionaries) that will be ignored
+ during cloning (applies to children duplication as well). If fields are to be
+ considered for a limited number of levels, you have to add it as integer, for
+ example ``[1,"/B","/TOTO"]`` means that ``"/B"`` will be ignored at the first
+ level only but ``"/TOTO"`` on all levels.
 
  Returns:
  The cloned PdfObject