Skip to content

Commit

Permalink
Initial commit adding CI, pre-commit + 2-up.py
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinThoma committed Apr 9, 2022
1 parent 7a845c0 commit d0cc612
Show file tree
Hide file tree
Showing 8 changed files with 267 additions and 2 deletions.
44 changes: 44 additions & 0 deletions .github/workflows/code-quality.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Unit Tests

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]

jobs:
build:

runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10"]

steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}

- name: Upgrade pip
run: |
python -m pip install --upgrade pip
- name: Install requirements (python 3)
run: |
pip install -r ci.txt
- name: Test with black
run: black --check .

- name: Test with flake8
run: |
flake8
- name: Test with mypy
run: |
mypy . --ignore-missing-imports
7 changes: 7 additions & 0 deletions .isort.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[settings]
line_length=79
indent=' '
multi_line_output=3
length_sort=0
include_trailing_comma=True
skip=docs
38 changes: 38 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# pre-commit run --all-files
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.2.0
hooks:
- id: check-ast
- id: check-byte-order-marker
- id: check-case-conflict
- id: check-docstring-first
- id: check-yaml
- id: debug-statements
- id: end-of-file-fixer
- id: trailing-whitespace
- id: mixed-line-ending
- id: check-added-large-files
args: ['--maxkb=1000']
- repo: https://gitlab.com/pycqa/flake8
rev: 3.9.2
hooks:
- id: flake8
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.942
hooks:
- id: mypy
- repo: https://github.com/psf/black
rev: 22.3.0
hooks:
- id: black
- repo: https://github.com/asottile/pyupgrade
rev: v2.31.1
hooks:
- id: pyupgrade
args: [--py36-plus]
- repo: https://github.com/asottile/blacken-docs
rev: v1.12.1
hooks:
- id: blacken-docs
additional_dependencies: [black==22.1.0]
35 changes: 35 additions & 0 deletions 2-up.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""
Create a booklet-style PDF from a single input.
Pairs of two pages will be put on one page (left and right)
usage: python 2-up.py input_file output_file
"""

from PyPDF2 import PdfFileWriter, PdfFileReader
import sys


def main():
if len(sys.argv) != 3:
print("usage: python 2-up.py input_file output_file")
sys.exit(1)
print("2-up input " + sys.argv[1])
reader = PdfFileReader(open(sys.argv[1], "rb"))
writer = PdfFileWriter()
for iter in range(0, reader.getNumPages() - 1, 2):
lhs = reader.getPage(iter)
rhs = reader.getPage(iter + 1)
lhs.mergeTranslatedPage(rhs, lhs.mediaBox.getUpperRight_x(), 0, True)
writer.addPage(lhs)
print(str(iter) + " "),
sys.stdout.flush()

print(f"writing {sys.argv[2]}")
with open(sys.argv[2], "wb") as fp:
writer.write(fp)
print("done.")


if __name__ == "__main__":
main()
20 changes: 18 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,18 @@
# py-pdf-community-snippets
Python code examples how to interact with PDF files
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)

# Python PDF Community Snippets

The PPCS (Python PDF Community Snippets) is a collection of Python code snippets
that show how to use Python to interact with PDF documents - reading, writing,
analyzing, and modifying. Whatever you can imagine.

This is NOT only limited to PyPDF2!

## Rules

1. Python Scripts that do something with PDF files only.
2. All scripts are under the BSD license. If you add a script you agree to that.
3. All scripts are self-contained. They don't use other scripts in the repository, but they are allowed to use PyPI packages
4. All scripts are at most 1000 lines long.
5. All scripts have a reasonable docstring that explains what the script is good for.
6. All scripts pass CI ([black formatting](https://pypi.org/project/black/), [mypy](https://pypi.org/project/mypy/), [flake8](https://pypi.org/project/flake8/)) - don't worry, we will help you with that one!
8 changes: 8 additions & 0 deletions ci.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
black
flake8
flake8-bugbear
flake8-comprehensions
flake8-isort
flake8-no-implicit-concat
flake8-simplify
mypy
58 changes: 58 additions & 0 deletions ci.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#
# This file is autogenerated by pip-compile with python 3.10
# To update, run:
#
# pip-compile ci.in
#
astor==0.8.1
# via flake8-simplify
attrs==21.4.0
# via flake8-bugbear
black==22.3.0
# via -r ci.in
click==8.1.2
# via black
flake8==4.0.1
# via
# -r ci.in
# flake8-bugbear
# flake8-comprehensions
# flake8-isort
# flake8-no-implicit-concat
# flake8-simplify
flake8-bugbear==22.3.23
# via -r ci.in
flake8-comprehensions==3.8.0
# via -r ci.in
flake8-isort==4.1.1
# via -r ci.in
flake8-no-implicit-concat==0.3.3
# via -r ci.in
flake8-simplify==0.19.2
# via -r ci.in
isort==5.10.1
# via flake8-isort
mccabe==0.6.1
# via flake8
mypy==0.942
# via -r ci.in
mypy-extensions==0.4.3
# via
# black
# mypy
pathspec==0.9.0
# via black
platformdirs==2.5.1
# via black
pycodestyle==2.8.0
# via flake8
pyflakes==2.4.0
# via flake8
testfixtures==6.18.5
# via flake8-isort
tomli==2.0.1
# via
# black
# mypy
typing-extensions==4.1.1
# via mypy
59 changes: 59 additions & 0 deletions pdf-image-extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""
Extract images from PDF without resampling or altering.
Adapted from work by Sylvain Pelissier
http://stackoverflow.com/questions/2693820/extract-images-from-pdf-without-resampling-in-python
"""

import sys
import PyPDF2
from PIL import Image

if len(sys.argv) != 2:
print(f"\nUsage: python {sys.argv[0]} input_file\n")
sys.exit(1)

pdf = sys.argv[1]

if __name__ == "__main__":
input1 = PyPDF2.PdfFileReader(open(pdf, "rb"))
page0 = input1.getPage(30)

if "/XObject" in page0["/Resources"]:
xObject = page0["/Resources"]["/XObject"].getObject()

for obj in xObject:
if xObject[obj]["/Subtype"] == "/Image":
size = (xObject[obj]["/Width"], xObject[obj]["/Height"])
data = xObject[obj].getData()
if xObject[obj]["/ColorSpace"] == "/DeviceRGB":
mode = "RGB"
else:
mode = "P"

if "/Filter" in xObject[obj]:
if xObject[obj]["/Filter"] == "/FlateDecode":
img = Image.frombytes(mode, size, data)
if "/SMask" in xObject[obj]: # add alpha channel
alpha = Image.frombytes(
"L", size, xObject[obj]["/SMask"].getData()
)
img.putalpha(alpha)
img.save(obj[1:] + ".png")
elif xObject[obj]["/Filter"] == "/DCTDecode":
img = open(obj[1:] + ".jpg", "wb")
img.write(data)
img.close()
elif xObject[obj]["/Filter"] == "/JPXDecode":
img = open(obj[1:] + ".jp2", "wb")
img.write(data)
img.close()
elif xObject[obj]["/Filter"] == "/CCITTFaxDecode":
img = open(obj[1:] + ".tiff", "wb")
img.write(data)
img.close()
else:
img = Image.frombytes(mode, size, data)
img.save(obj[1:] + ".png")
else:
print("No image found.")

0 comments on commit d0cc612

Please sign in to comment.