From d0cc612e3b8747e5df9a44762ce47d2fb65022ee Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sat, 9 Apr 2022 23:08:40 +0200 Subject: [PATCH] Initial commit adding CI, pre-commit + 2-up.py --- .github/workflows/code-quality.yaml | 44 +++++++++++++++++++++ .isort.cfg | 7 ++++ .pre-commit-config.yaml | 38 +++++++++++++++++++ 2-up.py | 35 +++++++++++++++++ README.md | 20 +++++++++- ci.in | 8 ++++ ci.txt | 58 ++++++++++++++++++++++++++++ pdf-image-extractor.py | 59 +++++++++++++++++++++++++++++ 8 files changed, 267 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/code-quality.yaml create mode 100644 .isort.cfg create mode 100644 .pre-commit-config.yaml create mode 100644 2-up.py create mode 100644 ci.in create mode 100644 ci.txt create mode 100644 pdf-image-extractor.py diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml new file mode 100644 index 0000000..0784cbe --- /dev/null +++ b/.github/workflows/code-quality.yaml @@ -0,0 +1,44 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Unit Tests + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10"] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Upgrade pip + run: | + python -m pip install --upgrade pip + + - name: Install requirements (python 3) + run: | + pip install -r ci.txt + + - name: Test with black + run: black --check . + + - name: Test with flake8 + run: | + flake8 + + - name: Test with mypy + run: | + mypy . --ignore-missing-imports diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 0000000..ea049eb --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,7 @@ +[settings] +line_length=79 +indent=' ' +multi_line_output=3 +length_sort=0 +include_trailing_comma=True +skip=docs diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..a403a1b --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,38 @@ +# pre-commit run --all-files +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.2.0 + hooks: + - id: check-ast + - id: check-byte-order-marker + - id: check-case-conflict + - id: check-docstring-first + - id: check-yaml + - id: debug-statements + - id: end-of-file-fixer + - id: trailing-whitespace + - id: mixed-line-ending + - id: check-added-large-files + args: ['--maxkb=1000'] +- repo: https://gitlab.com/pycqa/flake8 + rev: 3.9.2 + hooks: + - id: flake8 +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v0.942 + hooks: + - id: mypy +- repo: https://github.com/psf/black + rev: 22.3.0 + hooks: + - id: black +- repo: https://github.com/asottile/pyupgrade + rev: v2.31.1 + hooks: + - id: pyupgrade + args: [--py36-plus] +- repo: https://github.com/asottile/blacken-docs + rev: v1.12.1 + hooks: + - id: blacken-docs + additional_dependencies: [black==22.1.0] diff --git a/2-up.py b/2-up.py new file mode 100644 index 0000000..24b89a8 --- /dev/null +++ b/2-up.py @@ -0,0 +1,35 @@ +""" +Create a booklet-style PDF from a single input. + +Pairs of two pages will be put on one page (left and right) + +usage: python 2-up.py input_file output_file +""" + +from PyPDF2 import PdfFileWriter, PdfFileReader +import sys + + +def main(): + if len(sys.argv) != 3: + print("usage: python 2-up.py input_file output_file") + sys.exit(1) + print("2-up input " + sys.argv[1]) + reader = PdfFileReader(open(sys.argv[1], "rb")) + writer = PdfFileWriter() + for iter in range(0, reader.getNumPages() - 1, 2): + lhs = reader.getPage(iter) + rhs = reader.getPage(iter + 1) + lhs.mergeTranslatedPage(rhs, lhs.mediaBox.getUpperRight_x(), 0, True) + writer.addPage(lhs) + print(str(iter) + " "), + sys.stdout.flush() + + print(f"writing {sys.argv[2]}") + with open(sys.argv[2], "wb") as fp: + writer.write(fp) + print("done.") + + +if __name__ == "__main__": + main() diff --git a/README.md b/README.md index 486e35e..33d1058 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,18 @@ -# py-pdf-community-snippets -Python code examples how to interact with PDF files +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) + +# Python PDF Community Snippets + +The PPCS (Python PDF Community Snippets) is a collection of Python code snippets +that show how to use Python to interact with PDF documents - reading, writing, +analyzing, and modifying. Whatever you can imagine. + +This is NOT only limited to PyPDF2! + +## Rules + +1. Python Scripts that do something with PDF files only. +2. All scripts are under the BSD license. If you add a script you agree to that. +3. All scripts are self-contained. They don't use other scripts in the repository, but they are allowed to use PyPI packages +4. All scripts are at most 1000 lines long. +5. All scripts have a reasonable docstring that explains what the script is good for. +6. All scripts pass CI ([black formatting](https://pypi.org/project/black/), [mypy](https://pypi.org/project/mypy/), [flake8](https://pypi.org/project/flake8/)) - don't worry, we will help you with that one! diff --git a/ci.in b/ci.in new file mode 100644 index 0000000..8d38cec --- /dev/null +++ b/ci.in @@ -0,0 +1,8 @@ +black +flake8 +flake8-bugbear +flake8-comprehensions +flake8-isort +flake8-no-implicit-concat +flake8-simplify +mypy diff --git a/ci.txt b/ci.txt new file mode 100644 index 0000000..1375a4e --- /dev/null +++ b/ci.txt @@ -0,0 +1,58 @@ +# +# This file is autogenerated by pip-compile with python 3.10 +# To update, run: +# +# pip-compile ci.in +# +astor==0.8.1 + # via flake8-simplify +attrs==21.4.0 + # via flake8-bugbear +black==22.3.0 + # via -r ci.in +click==8.1.2 + # via black +flake8==4.0.1 + # via + # -r ci.in + # flake8-bugbear + # flake8-comprehensions + # flake8-isort + # flake8-no-implicit-concat + # flake8-simplify +flake8-bugbear==22.3.23 + # via -r ci.in +flake8-comprehensions==3.8.0 + # via -r ci.in +flake8-isort==4.1.1 + # via -r ci.in +flake8-no-implicit-concat==0.3.3 + # via -r ci.in +flake8-simplify==0.19.2 + # via -r ci.in +isort==5.10.1 + # via flake8-isort +mccabe==0.6.1 + # via flake8 +mypy==0.942 + # via -r ci.in +mypy-extensions==0.4.3 + # via + # black + # mypy +pathspec==0.9.0 + # via black +platformdirs==2.5.1 + # via black +pycodestyle==2.8.0 + # via flake8 +pyflakes==2.4.0 + # via flake8 +testfixtures==6.18.5 + # via flake8-isort +tomli==2.0.1 + # via + # black + # mypy +typing-extensions==4.1.1 + # via mypy diff --git a/pdf-image-extractor.py b/pdf-image-extractor.py new file mode 100644 index 0000000..9b9536c --- /dev/null +++ b/pdf-image-extractor.py @@ -0,0 +1,59 @@ +""" +Extract images from PDF without resampling or altering. + +Adapted from work by Sylvain Pelissier +http://stackoverflow.com/questions/2693820/extract-images-from-pdf-without-resampling-in-python +""" + +import sys +import PyPDF2 +from PIL import Image + +if len(sys.argv) != 2: + print(f"\nUsage: python {sys.argv[0]} input_file\n") + sys.exit(1) + +pdf = sys.argv[1] + +if __name__ == "__main__": + input1 = PyPDF2.PdfFileReader(open(pdf, "rb")) + page0 = input1.getPage(30) + + if "/XObject" in page0["/Resources"]: + xObject = page0["/Resources"]["/XObject"].getObject() + + for obj in xObject: + if xObject[obj]["/Subtype"] == "/Image": + size = (xObject[obj]["/Width"], xObject[obj]["/Height"]) + data = xObject[obj].getData() + if xObject[obj]["/ColorSpace"] == "/DeviceRGB": + mode = "RGB" + else: + mode = "P" + + if "/Filter" in xObject[obj]: + if xObject[obj]["/Filter"] == "/FlateDecode": + img = Image.frombytes(mode, size, data) + if "/SMask" in xObject[obj]: # add alpha channel + alpha = Image.frombytes( + "L", size, xObject[obj]["/SMask"].getData() + ) + img.putalpha(alpha) + img.save(obj[1:] + ".png") + elif xObject[obj]["/Filter"] == "/DCTDecode": + img = open(obj[1:] + ".jpg", "wb") + img.write(data) + img.close() + elif xObject[obj]["/Filter"] == "/JPXDecode": + img = open(obj[1:] + ".jp2", "wb") + img.write(data) + img.close() + elif xObject[obj]["/Filter"] == "/CCITTFaxDecode": + img = open(obj[1:] + ".tiff", "wb") + img.write(data) + img.close() + else: + img = Image.frombytes(mode, size, data) + img.save(obj[1:] + ".png") + else: + print("No image found.")