From d5c7465184c555d465e8f00b77e3ca2e8a9013d2 Mon Sep 17 00:00:00 2001 From: "Cimon Lucas (LCM)" Date: Tue, 10 Oct 2023 10:56:18 +0200 Subject: [PATCH] Adding unit tests --- .github/workflows/github-ci.yaml | 24 +++--- .pre-commit-config.yaml | 12 +-- pdfly/cat.py | 39 ++++++--- requirements/ci.txt | 47 ++++------ requirements/dev.in | 1 + requirements/dev.txt | 144 +++++++++++++++++-------------- tests/__init__.py | 1 + tests/conftest.py | 31 +++++++ tests/test_cat.py | 82 ++++++++++++++++++ tests/test_cli.py | 22 +++++ tests/test_extract_images.py | 25 ++++++ 11 files changed, 300 insertions(+), 128 deletions(-) create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/test_cat.py create mode 100644 tests/test_cli.py create mode 100644 tests/test_extract_images.py diff --git a/.github/workflows/github-ci.yaml b/.github/workflows/github-ci.yaml index 0f9a45d..4abd7c2 100644 --- a/.github/workflows/github-ci.yaml +++ b/.github/workflows/github-ci.yaml @@ -25,24 +25,22 @@ jobs: python-version: ${{ matrix.python-version }} - name: Upgrade pip - run: | - python -m pip install --upgrade pip + run: python -m pip install --upgrade pip - name: Install requirements (python 3) - run: | - pip install -r requirements/ci.txt + run: pip install -r requirements/ci.txt - name: Install pdfly - run: | - pip install . + run: pip install . - - name: Test with black + - name: Lint with black run: black --check . - - name: Test with flake8 - run: | - flake8 . --exclude build + - name: Lint with flake8 + run: flake8 . --exclude build - - name: Test with mypy - run: | - mypy . --ignore-missing-imports --exclude build + - name: Lint with mypy + run: mypy . --ignore-missing-imports --exclude build + + - name: Run tests + run: pytest -vv diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c56e580..bef76d2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ # pre-commit run --all-files repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: check-ast - id: check-byte-order-marker @@ -22,27 +22,27 @@ repos: # hooks: # - id: mypy - repo: https://github.com/psf/black - rev: 23.1.0 + rev: 23.9.1 hooks: - id: black args: [--target-version, py36] - repo: https://github.com/asottile/blacken-docs - rev: 1.13.0 + rev: 1.16.0 hooks: - id: blacken-docs additional_dependencies: [black==22.1.0] - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: 'v0.0.254' + rev: 'v0.0.292' hooks: - id: ruff args: [--fix] - repo: https://github.com/asottile/pyupgrade - rev: v3.3.1 + rev: v3.15.0 hooks: - id: pyupgrade args: [--py36-plus] - repo: https://github.com/pycqa/flake8 - rev: 6.0.0 + rev: 6.1.0 hooks: - id: flake8 args: ["--ignore", "E,W,F"] diff --git a/pdfly/cat.py b/pdfly/cat.py index 3bc88cb..dfecebd 100644 --- a/pdfly/cat.py +++ b/pdfly/cat.py @@ -42,10 +42,9 @@ # see https://github.com/py-pdf/pypdf/LICENSE -import os +import os, sys import traceback from pathlib import Path -from sys import exit, stderr, stdout from typing import List from pypdf import PdfMerger, parse_filename_page_ranges @@ -54,29 +53,43 @@ def main( filename: Path, fn_pgrgs: List[str], output: Path, verbose: bool ) -> None: - fn_pgrgs_l = list(fn_pgrgs) - fn_pgrgs_l.insert(0, str(filename)) - filename_page_ranges = parse_filename_page_ranges(fn_pgrgs_l) # type: ignore + filename_page_ranges = parse_filepaths_and_pagerange_args(filename, fn_pgrgs) if output: output_fh = open(output, "wb") else: - stdout.flush() - output_fh = os.fdopen(stdout.fileno(), "wb") + sys.stdout.flush() + output_fh = os.fdopen(sys.stdout.fileno(), "wb") merger = PdfMerger() in_fs = {} try: for filename, page_range in filename_page_ranges: # type: ignore if verbose: - print(filename, page_range, file=stderr) + print(filename, page_range, file=sys.stderr) if filename not in in_fs: in_fs[filename] = open(filename, "rb") merger.append(in_fs[filename], pages=page_range) + merger.write(output_fh) except Exception: - print(traceback.format_exc(), file=stderr) - print(f"Error while reading {filename}", file=stderr) - exit(1) - merger.write(output_fh) - output_fh.close() + print(traceback.format_exc(), file=sys.stderr) + print(f"Error while reading {filename}", file=sys.stderr) + sys.exit(1) + finally: + output_fh.close() # In 3.0, input files must stay open until output is written. # Not closing the in_fs because this script exits now. + + +def parse_filepaths_and_pagerange_args(filename: Path, fn_pgrgs: List[str]): + fn_pgrgs_l = list(fn_pgrgs) + fn_pgrgs_l.insert(0, str(filename)) + filename_page_ranges, invalid_filepaths = [], [] + for filename, page_range in parse_filename_page_ranges(fn_pgrgs_l): # type: ignore + if Path(filename).is_file(): + filename_page_ranges.append((filename, page_range)) + else: + invalid_filepaths.append(filename) + if invalid_filepaths: + print(f"Invalid file path or page range provided: {' '.join(invalid_filepaths)}", file=sys.stderr) + sys.exit(2) + return filename_page_ranges diff --git a/requirements/ci.txt b/requirements/ci.txt index 3a1039b..1bc6f3b 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.7 +# This file is autogenerated by pip-compile with Python 3.8 # by the following command: # # pip-compile requirements/ci.in @@ -8,65 +8,50 @@ astor==0.8.1 # via flake8-simplify attrs==23.1.0 # via flake8-bugbear -black==23.3.0 +black==23.9.1 # via -r requirements/ci.in -click==8.1.5 +click==8.1.7 # via black -flake8==5.0.4 +flake8==6.1.0 # via # -r requirements/ci.in # flake8-bugbear # flake8-comprehensions # flake8-isort # flake8-simplify -flake8-bugbear==23.3.12 +flake8-bugbear==23.9.16 # via -r requirements/ci.in -flake8-comprehensions==3.13.0 +flake8-comprehensions==3.14.0 # via -r requirements/ci.in -flake8-isort==6.0.0 +flake8-isort==6.1.0 # via -r requirements/ci.in -flake8-simplify==0.20.0 +flake8-simplify==0.21.0 # via -r requirements/ci.in -importlib-metadata==4.2.0 - # via - # attrs - # click - # flake8 - # flake8-comprehensions - # flake8-simplify -isort==5.11.5 +isort==5.12.0 # via flake8-isort mccabe==0.7.0 # via flake8 -mypy==1.4.1 +mypy==1.5.1 # via -r requirements/ci.in mypy-extensions==1.0.0 # via # black # mypy -packaging==23.1 +packaging==23.2 # via black -pathspec==0.11.1 +pathspec==0.11.2 # via black -platformdirs==3.9.0 +platformdirs==3.11.0 # via black -pycodestyle==2.9.1 +pycodestyle==2.11.0 # via flake8 -pyflakes==2.5.0 +pyflakes==3.1.0 # via flake8 tomli==2.0.1 # via # black # mypy -typed-ast==1.5.5 - # via - # black - # mypy -typing-extensions==4.7.1 +typing-extensions==4.8.0 # via # black - # importlib-metadata # mypy - # platformdirs -zipp==3.15.0 - # via importlib-metadata diff --git a/requirements/dev.in b/requirements/dev.in index 6a2b12f..8c6fc66 100644 --- a/requirements/dev.in +++ b/requirements/dev.in @@ -2,5 +2,6 @@ black pip-tools pre-commit pytest-cov +pytest-timeout twine wheel diff --git a/requirements/dev.txt b/requirements/dev.txt index fc9bcf7..6a5e2d9 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -1,135 +1,149 @@ # -# This file is autogenerated by pip-compile with python 3.10 -# To update, run: +# This file is autogenerated by pip-compile with Python 3.8 +# by the following command: # # pip-compile requirements/dev.in # -attrs==21.4.0 - # via pytest -black==22.3.0 +black==23.9.1 # via -r requirements/dev.in -bleach==5.0.0 - # via readme-renderer -certifi==2021.10.8 +build==1.0.3 + # via pip-tools +certifi==2023.7.22 # via requests -cffi==1.15.0 +cffi==1.16.0 # via cryptography -cfgv==3.3.1 +cfgv==3.4.0 # via pre-commit -charset-normalizer==2.0.12 +charset-normalizer==3.3.0 # via requests -click==8.1.2 +click==8.1.7 # via # black # pip-tools -commonmark==0.9.1 - # via rich -coverage[toml]==6.3.2 +coverage[toml]==7.3.2 # via pytest-cov -cryptography==36.0.2 +cryptography==41.0.4 # via secretstorage -distlib==0.3.4 +distlib==0.3.7 # via virtualenv -docutils==0.18.1 +docutils==0.20.1 # via readme-renderer -filelock==3.6.0 +exceptiongroup==1.1.3 + # via pytest +filelock==3.12.4 # via virtualenv -identify==2.4.12 +identify==2.5.30 # via pre-commit -idna==3.3 +idna==3.4 # via requests -importlib-metadata==4.11.3 +importlib-metadata==6.8.0 # via + # build # keyring # twine -iniconfig==1.1.1 +importlib-resources==6.1.0 + # via keyring +iniconfig==2.0.0 # via pytest +jaraco-classes==3.3.0 + # via keyring jeepney==0.8.0 # via # keyring # secretstorage -keyring==23.5.0 +keyring==24.2.0 # via twine -mypy-extensions==0.4.3 +markdown-it-py==3.0.0 + # via rich +mdurl==0.1.2 + # via markdown-it-py +more-itertools==10.1.0 + # via jaraco-classes +mypy-extensions==1.0.0 # via black -nodeenv==1.6.0 +nh3==0.2.14 + # via readme-renderer +nodeenv==1.8.0 # via pre-commit -packaging==21.3 - # via pytest -pathspec==0.9.0 +packaging==23.2 + # via + # black + # build + # pytest +pathspec==0.11.2 # via black -pep517==0.12.0 - # via pip-tools -pip-tools==6.6.0 +pip-tools==7.3.0 # via -r requirements/dev.in -pkginfo==1.8.2 +pkginfo==1.9.6 # via twine -platformdirs==2.5.1 +platformdirs==3.11.0 # via # black # virtualenv -pluggy==1.0.0 +pluggy==1.3.0 # via pytest -pre-commit==2.18.1 +pre-commit==3.4.0 # via -r requirements/dev.in -py==1.11.0 - # via pytest pycparser==2.21 # via cffi -pygments==2.11.2 +pygments==2.16.1 # via # readme-renderer # rich -pyparsing==3.0.8 - # via packaging -pytest==7.1.1 - # via pytest-cov -pytest-cov==3.0.0 +pyproject-hooks==1.0.0 + # via build +pytest==7.4.2 + # via + # pytest-cov + # pytest-timeout +pytest-cov==4.1.0 # via -r requirements/dev.in -pyyaml==6.0 +pytest-timeout==2.2.0 + # via -r requirements/dev.in +pyyaml==6.0.1 # via pre-commit -readme-renderer==34.0 +readme-renderer==42.0 # via twine -requests==2.27.1 +requests==2.31.0 # via # requests-toolbelt # twine -requests-toolbelt==0.9.1 +requests-toolbelt==1.0.0 # via twine rfc3986==2.0.0 # via twine -rich==12.2.0 +rich==13.6.0 # via twine -secretstorage==3.3.1 +secretstorage==3.3.3 # via keyring -six==1.16.0 - # via - # bleach - # virtualenv -toml==0.10.2 - # via pre-commit tomli==2.0.1 # via # black + # build # coverage - # pep517 + # pip-tools + # pyproject-hooks # pytest -twine==4.0.0 +twine==4.0.2 # via -r requirements/dev.in -urllib3==1.26.9 +typing-extensions==4.8.0 + # via + # black + # rich +urllib3==2.0.6 # via # requests # twine -virtualenv==20.14.1 +virtualenv==20.24.5 # via pre-commit -webencodings==0.5.1 - # via bleach -wheel==0.37.1 +wheel==0.41.2 # via # -r requirements/dev.in # pip-tools -zipp==3.8.0 - # via importlib-metadata +zipp==3.17.0 + # via + # importlib-metadata + # importlib-resources # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ + diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..c33a4d0 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,31 @@ +"""Utilities and fixtures that are available automatically for all tests.""" + +import os +from pathlib import Path + +from pdfly.cli import entry_point + +TESTS_ROOT = Path(__file__).parent.resolve() +PROJECT_ROOT = TESTS_ROOT.parent +RESOURCES_ROOT = PROJECT_ROOT / "resources" + +def run_cli(args): + try: + entry_point(args) + except SystemExit as error: + return error.code + +try: + from contextlib import chdir +except ImportError: # Fallback when not available (< Python 3.11): + from contextlib import contextmanager + + @contextmanager + def chdir(dir_path): + """Non thread-safe context manager to change the current working directory.""" + cwd = Path.cwd() + os.chdir(dir_path) + try: + yield + finally: + os.chdir(cwd) diff --git a/tests/test_cat.py b/tests/test_cat.py new file mode 100644 index 0000000..a68225a --- /dev/null +++ b/tests/test_cat.py @@ -0,0 +1,82 @@ +import pytest + +from pypdf import PdfReader + +from .conftest import chdir, run_cli, RESOURCES_ROOT + + +def test_cat_incorrect_number_of_args(capsys, tmp_path): + with chdir(tmp_path): + exit_code = run_cli(["cat", str(RESOURCES_ROOT / "box.pdf")]) + assert exit_code == 2 + captured = capsys.readouterr() + assert "Missing argument" in captured.err + + +def test_cat_two_files_ok(capsys, tmp_path): + with chdir(tmp_path): + exit_code = run_cli( + [ + "cat", + str(RESOURCES_ROOT / "box.pdf"), + str(RESOURCES_ROOT / "jpeg.pdf"), + "--output", + "./out.pdf", + ] + ) + captured = capsys.readouterr() + assert exit_code == 0, captured + assert not captured.err + reader = PdfReader(tmp_path / "out.pdf") + assert len(reader.pages) == 2 + + +def test_cat_subset_ok(capsys, tmp_path): + with chdir(tmp_path): + exit_code = run_cli( + [ + "cat", + str(RESOURCES_ROOT / "GeoBase_NHNC1_Data_Model_UML_EN.pdf"), + "13:15", + "--output", + "./out.pdf", + ] + ) + captured = capsys.readouterr() + assert exit_code == 0, captured + assert not captured.err + reader = PdfReader(tmp_path / "out.pdf") + assert len(reader.pages) == 2 + + +@pytest.mark.parametrize( + "page_range", + ["a", "-", "1-", "1-1-1", "1:1:1:1"], +) +def test_cat_subset_invalid_args(capsys, tmp_path, page_range): + with chdir(tmp_path): + exit_code = run_cli([ + "cat", + str(RESOURCES_ROOT / "jpeg.pdf"), + page_range, + "--output", + "./out.pdf", + ]) + captured = capsys.readouterr() + assert exit_code == 2, captured + assert "Invalid file path or page range provided" in captured.err + + +@pytest.mark.skip(reason="This check is not implemented yet") +def test_cat_subset_warn_on_missing_pages(capsys, tmp_path): + with chdir(tmp_path): + exit_code = run_cli([ + "cat", + str(RESOURCES_ROOT / "jpeg.pdf"), + "2", + "--output", + "./out.pdf", + ]) + captured = capsys.readouterr() + assert exit_code == 0, captured + assert "WARN" in captured.out diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..bf44f35 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,22 @@ +import sys +from subprocess import check_output + +from pypdf import __version__ as pypdf_version + +from .conftest import run_cli + + +def test_pypdf_cli_can_be_invoked_as_a_module(): + stdout = check_output( + [sys.executable, "-m", "pdfly", "--help"] # noqa: S603 + ).decode() + assert "pdfly [OPTIONS] COMMAND [ARGS]..." in stdout + assert "pdfly is a pure-python cli application for manipulating PDF files." in stdout + + +def test_pypdf_cli_version(capsys): + exit_code = run_cli(["--version"]) + captured = capsys.readouterr() + assert not captured.err + assert pypdf_version in captured.out + assert exit_code == 0 diff --git a/tests/test_extract_images.py b/tests/test_extract_images.py new file mode 100644 index 0000000..328235f --- /dev/null +++ b/tests/test_extract_images.py @@ -0,0 +1,25 @@ +import pytest + +from .conftest import chdir, run_cli, RESOURCES_ROOT + + +def test_extract_images_jpg_png(capsys, tmp_path): + with chdir(tmp_path): + run_cli( + [ + "extract-images", + str(RESOURCES_ROOT / "GeoBase_NHNC1_Data_Model_UML_EN.pdf"), + ] + ) + captured = capsys.readouterr() + assert not captured.err + assert "Extracted 3 images" in captured.out + + +@pytest.mark.xfail() # There is currently a bug there +def test_extract_images_monochrome(capsys, tmp_path): + with chdir(tmp_path): + run_cli(["extract-images", str(RESOURCES_ROOT / "box.pdf")]) + captured = capsys.readouterr() + assert not captured.err + assert "Image extracted" in captured.out