Skip to content

Commit

Permalink
ENH: Add support for pathlib.Path in PdfMerger.merge (#1190)
Browse files Browse the repository at this point in the history
Replace many os.path usages with pathlib
  • Loading branch information
MartinThoma authored Jul 31, 2022
1 parent ab01f14 commit 42ae312
Show file tree
Hide file tree
Showing 13 changed files with 170 additions and 179 deletions.
9 changes: 5 additions & 4 deletions PyPDF2/_merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
# POSSIBILITY OF SUCH DAMAGE.

from io import BytesIO, FileIO, IOBase
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast

from ._encryption import Encryption
Expand Down Expand Up @@ -99,7 +100,7 @@ def __init__(self, strict: bool = False) -> None:
def merge(
self,
position: int,
fileobj: Union[StrByteType, PdfReader],
fileobj: Union[Path, StrByteType, PdfReader],
outline_item: Optional[str] = None,
pages: Optional[PageRangeSpec] = None,
import_outline: bool = True,
Expand Down Expand Up @@ -184,7 +185,7 @@ def merge(
self.pages[position:position] = srcpages

def _create_stream(
self, fileobj: Union[StrByteType, PdfReader]
self, fileobj: Union[Path, StrByteType, PdfReader]
) -> Tuple[IOBase, bool, Optional[Encryption]]:
# This parameter is passed to self.inputs.append and means
# that the stream used was created in this method.
Expand All @@ -198,7 +199,7 @@ def _create_stream(
# If fileobj is none of the above types, it is not modified
encryption_obj = None
stream: IOBase
if isinstance(fileobj, str):
if isinstance(fileobj, (str, Path)):
stream = FileIO(fileobj, "rb")
my_file = True
elif isinstance(fileobj, PdfReader):
Expand All @@ -224,7 +225,7 @@ def _create_stream(
@deprecate_bookmark(bookmark="outline_item", import_bookmarks="import_outline")
def append(
self,
fileobj: Union[StrByteType, PdfReader],
fileobj: Union[StrByteType, PdfReader, Path],
outline_item: Optional[str] = None,
pages: Union[None, PageRange, Tuple[int, int], Tuple[int, int, int]] = None,
import_outline: bool = True,
Expand Down
21 changes: 11 additions & 10 deletions tests/bench.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
import os
from pathlib import Path

import PyPDF2
from PyPDF2 import PdfReader, Transformation
from PyPDF2.generic import Destination

TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "resources")
SAMPLE_ROOT = os.path.join(PROJECT_ROOT, "sample-files")
TESTS_ROOT = Path(__file__).parent.resolve()
PROJECT_ROOT = TESTS_ROOT.parent
RESOURCE_ROOT = PROJECT_ROOT / "resources"
SAMPLE_ROOT = PROJECT_ROOT / "sample-files"


def page_ops(pdf_path, password):
pdf_path = os.path.join(RESOURCE_ROOT, pdf_path)
pdf_path = RESOURCE_ROOT / pdf_path

reader = PdfReader(pdf_path)

Expand Down Expand Up @@ -50,10 +51,10 @@ def test_page_operations(benchmark):


def merge():
pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
outline = os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf")
pdf_forms = os.path.join(RESOURCE_ROOT, "pdflatex-forms.pdf")
pdf_pw = os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf")
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
outline = RESOURCE_ROOT / "pdflatex-outline.pdf"
pdf_forms = RESOURCE_ROOT / "pdflatex-forms.pdf"
pdf_pw = RESOURCE_ROOT / "libreoffice-writer-password.pdf"

file_merger = PyPDF2.PdfMerger()

Expand Down Expand Up @@ -126,5 +127,5 @@ def text_extraction(pdf_path):


def test_text_extraction(benchmark):
file_path = os.path.join(SAMPLE_ROOT, "009-pdflatex-geotopo/GeoTopo.pdf")
file_path = SAMPLE_ROOT / "009-pdflatex-geotopo/GeoTopo.pdf"
benchmark(text_extraction, file_path)
9 changes: 5 additions & 4 deletions tests/test_basic_features.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import os
from pathlib import Path

from PyPDF2 import PdfReader, PdfWriter

TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "resources")
TESTS_ROOT = Path(__file__).parent.resolve()
PROJECT_ROOT = TESTS_ROOT.parent
RESOURCE_ROOT = PROJECT_ROOT / "resources"


def test_basic_features():
pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
reader = PdfReader(pdf_path)
writer = PdfWriter()

Expand Down
20 changes: 10 additions & 10 deletions tests/test_encryption.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os
from pathlib import Path

import pytest

Expand All @@ -14,9 +14,9 @@
except ImportError:
HAS_PYCRYPTODOME = False

TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "resources")
TESTS_ROOT = Path(__file__).parent.resolve()
PROJECT_ROOT = TESTS_ROOT.parent
RESOURCE_ROOT = PROJECT_ROOT / "resources"


@pytest.mark.parametrize(
Expand Down Expand Up @@ -51,7 +51,7 @@
],
)
def test_encryption(name, requres_pycryptodome):
inputfile = os.path.join(RESOURCE_ROOT, "encryption", name)
inputfile = RESOURCE_ROOT / "encryption" / name
if requres_pycryptodome and not HAS_PYCRYPTODOME:
with pytest.raises(DependencyError) as exc:
ipdf = PyPDF2.PdfReader(inputfile)
Expand All @@ -61,7 +61,7 @@ def test_encryption(name, requres_pycryptodome):
return
else:
ipdf = PyPDF2.PdfReader(inputfile)
if inputfile.endswith("unencrypted.pdf"):
if str(inputfile).endswith("unencrypted.pdf"):
assert not ipdf.is_encrypted
else:
assert ipdf.is_encrypted
Expand Down Expand Up @@ -91,7 +91,7 @@ def test_encryption(name, requres_pycryptodome):
def test_both_password(name, user_passwd, owner_passwd):
from PyPDF2 import PasswordType

inputfile = os.path.join(RESOURCE_ROOT, "encryption", name)
inputfile = RESOURCE_ROOT / "encryption" / name
ipdf = PyPDF2.PdfReader(inputfile)
assert ipdf.is_encrypted
assert ipdf.decrypt(user_passwd) == PasswordType.USER_PASSWORD
Expand All @@ -113,7 +113,7 @@ def test_get_page_of_encrypted_file_new_algorithm(pdffile, password):
This is a regression test for issue 327:
IndexError for get_page() of decrypted file
"""
path = os.path.join(RESOURCE_ROOT, pdffile)
path = RESOURCE_ROOT / pdffile
PyPDF2.PdfReader(path, password=password).pages[0]


Expand All @@ -133,7 +133,7 @@ def test_get_page_of_encrypted_file_new_algorithm(pdffile, password):
@pytest.mark.skipif(not HAS_PYCRYPTODOME, reason="No pycryptodome")
def test_encryption_merge(names):
pdf_merger = PyPDF2.PdfMerger()
files = [os.path.join(RESOURCE_ROOT, "encryption", x) for x in names]
files = [RESOURCE_ROOT / "encryption" / x for x in names]
pdfs = [PyPDF2.PdfReader(x) for x in files]
for pdf in pdfs:
if pdf.is_encrypted:
Expand All @@ -157,7 +157,7 @@ def test_encrypt_decrypt_class(cryptcls):


def test_decrypt_not_decrypted_pdf():
path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
path = RESOURCE_ROOT / "crazyones.pdf"
with pytest.raises(PdfReadError) as exc:
PdfReader(path, password="nonexistant")
assert exc.value.args[0] == "Not encrypted file"
15 changes: 8 additions & 7 deletions tests/test_generic.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
from io import BytesIO
from pathlib import Path
from unittest.mock import patch

import pytest
Expand Down Expand Up @@ -33,9 +34,9 @@

from . import get_pdf_from_url

TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "resources")
TESTS_ROOT = Path(__file__).parent.resolve()
PROJECT_ROOT = TESTS_ROOT.parent
RESOURCE_ROOT = PROJECT_ROOT / "resources"


def test_float_object_exception():
Expand Down Expand Up @@ -395,7 +396,7 @@ def test_remove_child_not_in_tree():


def test_remove_child_in_tree():
pdf = os.path.join(RESOURCE_ROOT, "form.pdf")
pdf = RESOURCE_ROOT / "form.pdf"

tree = TreeObject()
reader = PdfReader(pdf)
Expand Down Expand Up @@ -503,7 +504,7 @@ def test_issue_997(mock_logger_warning):

def test_annotation_builder_free_text():
# Arrange
pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
reader = PdfReader(pdf_path)
page = reader.pages[0]
writer = PdfWriter()
Expand Down Expand Up @@ -533,7 +534,7 @@ def test_annotation_builder_free_text():

def test_annotation_builder_line():
# Arrange
pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
reader = PdfReader(pdf_path)
page = reader.pages[0]
writer = PdfWriter()
Expand All @@ -558,7 +559,7 @@ def test_annotation_builder_line():

def test_annotation_builder_link():
# Arrange
pdf_path = os.path.join(RESOURCE_ROOT, "outline-without-title.pdf")
pdf_path = RESOURCE_ROOT / "outline-without-title.pdf"
reader = PdfReader(pdf_path)
page = reader.pages[0]
writer = PdfWriter()
Expand Down
10 changes: 5 additions & 5 deletions tests/test_javascript.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
import os
from pathlib import Path

import pytest

from PyPDF2 import PdfReader, PdfWriter

# Configure path environment
TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "resources")
TESTS_ROOT = Path(__file__).parent.resolve()
PROJECT_ROOT = TESTS_ROOT.parent
RESOURCE_ROOT = PROJECT_ROOT / "resources"


@pytest.fixture()
def pdf_file_writer():
reader = PdfReader(os.path.join(RESOURCE_ROOT, "crazyones.pdf"))
reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
writer = PdfWriter()
writer.append_pages_from_reader(reader)
return writer
Expand Down
31 changes: 14 additions & 17 deletions tests/test_merger.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import sys
from io import BytesIO
from pathlib import Path

import pytest

Expand All @@ -10,18 +11,18 @@

from . import get_pdf_from_url

TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "resources")
TESTS_ROOT = Path(__file__).parent.resolve()
PROJECT_ROOT = TESTS_ROOT.parent
RESOURCE_ROOT = PROJECT_ROOT / "resources"

sys.path.append(PROJECT_ROOT)
sys.path.append(str(PROJECT_ROOT))


def test_merge():
pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
outline = os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf")
pdf_forms = os.path.join(RESOURCE_ROOT, "pdflatex-forms.pdf")
pdf_pw = os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf")
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
outline = RESOURCE_ROOT / "pdflatex-outline.pdf"
pdf_forms = RESOURCE_ROOT / "pdflatex-forms.pdf"
pdf_pw = RESOURCE_ROOT / "libreoffice-writer-password.pdf"

merger = PyPDF2.PdfMerger()

Expand Down Expand Up @@ -122,7 +123,7 @@ def test_merge():

def test_merge_page_exception():
merger = PyPDF2.PdfMerger()
pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
with pytest.raises(TypeError) as exc:
merger.merge(0, pdf_path, pages="a:b")
assert exc.value.args[0] == '"pages" must be a tuple of (start, stop[, step])'
Expand All @@ -131,14 +132,14 @@ def test_merge_page_exception():

def test_merge_page_tuple():
merger = PyPDF2.PdfMerger()
pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
merger.merge(0, pdf_path, pages=(0, 1))
merger.close()


def test_merge_write_closed_fh():
merger = PyPDF2.PdfMerger()
pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
merger.append(pdf_path)

err_closed = "close() was called and thus the writer cannot be used anymore"
Expand Down Expand Up @@ -313,9 +314,7 @@ def test_iss1145():


def test_deprecate_bookmark_decorator_warning():
reader = PdfReader(
os.path.join(RESOURCE_ROOT, "outlines-with-invalid-destinations.pdf")
)
reader = PdfReader(RESOURCE_ROOT / "outlines-with-invalid-destinations.pdf")
merger = PdfMerger()
with pytest.warns(
UserWarning,
Expand All @@ -326,9 +325,7 @@ def test_deprecate_bookmark_decorator_warning():

@pytest.mark.filterwarnings("ignore::UserWarning")
def test_deprecate_bookmark_decorator_output():
reader = PdfReader(
os.path.join(RESOURCE_ROOT, "outlines-with-invalid-destinations.pdf")
)
reader = PdfReader(RESOURCE_ROOT / "outlines-with-invalid-destinations.pdf")
merger = PdfMerger()
merger.merge(0, reader, import_bookmarks=True)
first_oi_title = 'Valid Destination: Action /GoTo Named Destination "section.1"'
Expand Down
Loading

0 comments on commit 42ae312

Please sign in to comment.