From d0cc612e3b8747e5df9a44762ce47d2fb65022ee Mon Sep 17 00:00:00 2001
From: Martin Thoma <info@martin-thoma.de>
Date: Sat, 9 Apr 2022 23:08:40 +0200
Subject: [PATCH] Initial commit adding CI, pre-commit + 2-up.py

---
 .github/workflows/code-quality.yaml | 44 +++++++++++++++++++++
 .isort.cfg                          |  7 ++++
 .pre-commit-config.yaml             | 38 +++++++++++++++++++
 2-up.py                             | 35 +++++++++++++++++
 README.md                           | 20 +++++++++-
 ci.in                               |  8 ++++
 ci.txt                              | 58 ++++++++++++++++++++++++++++
 pdf-image-extractor.py              | 59 +++++++++++++++++++++++++++++
 8 files changed, 267 insertions(+), 2 deletions(-)
 create mode 100644 .github/workflows/code-quality.yaml
 create mode 100644 .isort.cfg
 create mode 100644 .pre-commit-config.yaml
 create mode 100644 2-up.py
 create mode 100644 ci.in
 create mode 100644 ci.txt
 create mode 100644 pdf-image-extractor.py

diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml
new file mode 100644
index 0000000..0784cbe
--- /dev/null
+++ b/.github/workflows/code-quality.yaml
@@ -0,0 +1,44 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: Unit Tests
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.8", "3.9", "3.10"]
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Upgrade pip
+      run: |
+        python -m pip install --upgrade pip
+
+    - name: Install requirements (python 3)
+      run: |
+        pip install -r ci.txt
+
+    - name: Test with black
+      run: black --check .
+
+    - name: Test with flake8
+      run: |
+        flake8
+
+    - name: Test with mypy
+      run: |
+        mypy . --ignore-missing-imports
diff --git a/.isort.cfg b/.isort.cfg
new file mode 100644
index 0000000..ea049eb
--- /dev/null
+++ b/.isort.cfg
@@ -0,0 +1,7 @@
+[settings]
+line_length=79
+indent='    '
+multi_line_output=3
+length_sort=0
+include_trailing_comma=True
+skip=docs
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..a403a1b
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,38 @@
+# pre-commit run --all-files
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.2.0
+    hooks:
+    -   id: check-ast
+    -   id: check-byte-order-marker
+    -   id: check-case-conflict
+    -   id: check-docstring-first
+    -   id: check-yaml
+    -   id: debug-statements
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+    -   id: mixed-line-ending
+    -   id: check-added-large-files
+        args: ['--maxkb=1000']
+-   repo: https://gitlab.com/pycqa/flake8
+    rev: 3.9.2
+    hooks:
+    -   id: flake8
+-   repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v0.942
+    hooks:
+    -   id: mypy
+-   repo: https://github.com/psf/black
+    rev: 22.3.0
+    hooks:
+    -   id: black
+-   repo: https://github.com/asottile/pyupgrade
+    rev: v2.31.1
+    hooks:
+    -   id: pyupgrade
+        args: [--py36-plus]
+-   repo: https://github.com/asottile/blacken-docs
+    rev: v1.12.1
+    hooks:
+    -   id: blacken-docs
+        additional_dependencies: [black==22.1.0]
diff --git a/2-up.py b/2-up.py
new file mode 100644
index 0000000..24b89a8
--- /dev/null
+++ b/2-up.py
@@ -0,0 +1,35 @@
+"""
+Create a booklet-style PDF from a single input.
+
+Pairs of two pages will be put on one page (left and right)
+
+usage: python 2-up.py input_file output_file
+"""
+
+from PyPDF2 import PdfFileWriter, PdfFileReader
+import sys
+
+
+def main():
+    if len(sys.argv) != 3:
+        print("usage: python 2-up.py input_file output_file")
+        sys.exit(1)
+    print("2-up input " + sys.argv[1])
+    reader = PdfFileReader(open(sys.argv[1], "rb"))
+    writer = PdfFileWriter()
+    for iter in range(0, reader.getNumPages() - 1, 2):
+        lhs = reader.getPage(iter)
+        rhs = reader.getPage(iter + 1)
+        lhs.mergeTranslatedPage(rhs, lhs.mediaBox.getUpperRight_x(), 0, True)
+        writer.addPage(lhs)
+        print(str(iter) + " "),
+        sys.stdout.flush()
+
+    print(f"writing {sys.argv[2]}")
+    with open(sys.argv[2], "wb") as fp:
+        writer.write(fp)
+    print("done.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/README.md b/README.md
index 486e35e..33d1058 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,18 @@
-# py-pdf-community-snippets
-Python code examples how to interact with PDF files
+[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+
+# Python PDF Community Snippets
+
+The PPCS (Python PDF Community Snippets) is a collection of Python code snippets
+that show how to use Python to interact with PDF documents - reading, writing,
+analyzing, and modifying. Whatever you can imagine.
+
+This is NOT only limited to PyPDF2!
+
+## Rules
+
+1. Python Scripts that do something with PDF files only.
+2. All scripts are under the BSD license. If you add a script you agree to that.
+3. All scripts are self-contained. They don't use other scripts in the repository, but they are allowed to use PyPI packages
+4. All scripts are at most 1000 lines long.
+5. All scripts have a reasonable docstring that explains what the script is good for.
+6. All scripts pass CI ([black formatting](https://pypi.org/project/black/), [mypy](https://pypi.org/project/mypy/), [flake8](https://pypi.org/project/flake8/)) - don't worry, we will help you with that one!
diff --git a/ci.in b/ci.in
new file mode 100644
index 0000000..8d38cec
--- /dev/null
+++ b/ci.in
@@ -0,0 +1,8 @@
+black
+flake8
+flake8-bugbear
+flake8-comprehensions
+flake8-isort
+flake8-no-implicit-concat
+flake8-simplify
+mypy
diff --git a/ci.txt b/ci.txt
new file mode 100644
index 0000000..1375a4e
--- /dev/null
+++ b/ci.txt
@@ -0,0 +1,58 @@
+#
+# This file is autogenerated by pip-compile with python 3.10
+# To update, run:
+#
+#    pip-compile ci.in
+#
+astor==0.8.1
+    # via flake8-simplify
+attrs==21.4.0
+    # via flake8-bugbear
+black==22.3.0
+    # via -r ci.in
+click==8.1.2
+    # via black
+flake8==4.0.1
+    # via
+    #   -r ci.in
+    #   flake8-bugbear
+    #   flake8-comprehensions
+    #   flake8-isort
+    #   flake8-no-implicit-concat
+    #   flake8-simplify
+flake8-bugbear==22.3.23
+    # via -r ci.in
+flake8-comprehensions==3.8.0
+    # via -r ci.in
+flake8-isort==4.1.1
+    # via -r ci.in
+flake8-no-implicit-concat==0.3.3
+    # via -r ci.in
+flake8-simplify==0.19.2
+    # via -r ci.in
+isort==5.10.1
+    # via flake8-isort
+mccabe==0.6.1
+    # via flake8
+mypy==0.942
+    # via -r ci.in
+mypy-extensions==0.4.3
+    # via
+    #   black
+    #   mypy
+pathspec==0.9.0
+    # via black
+platformdirs==2.5.1
+    # via black
+pycodestyle==2.8.0
+    # via flake8
+pyflakes==2.4.0
+    # via flake8
+testfixtures==6.18.5
+    # via flake8-isort
+tomli==2.0.1
+    # via
+    #   black
+    #   mypy
+typing-extensions==4.1.1
+    # via mypy
diff --git a/pdf-image-extractor.py b/pdf-image-extractor.py
new file mode 100644
index 0000000..9b9536c
--- /dev/null
+++ b/pdf-image-extractor.py
@@ -0,0 +1,59 @@
+"""
+Extract images from PDF without resampling or altering.
+
+Adapted from work by Sylvain Pelissier
+http://stackoverflow.com/questions/2693820/extract-images-from-pdf-without-resampling-in-python
+"""
+
+import sys
+import PyPDF2
+from PIL import Image
+
+if len(sys.argv) != 2:
+    print(f"\nUsage: python {sys.argv[0]} input_file\n")
+    sys.exit(1)
+
+pdf = sys.argv[1]
+
+if __name__ == "__main__":
+    input1 = PyPDF2.PdfFileReader(open(pdf, "rb"))
+    page0 = input1.getPage(30)
+
+    if "/XObject" in page0["/Resources"]:
+        xObject = page0["/Resources"]["/XObject"].getObject()
+
+        for obj in xObject:
+            if xObject[obj]["/Subtype"] == "/Image":
+                size = (xObject[obj]["/Width"], xObject[obj]["/Height"])
+                data = xObject[obj].getData()
+                if xObject[obj]["/ColorSpace"] == "/DeviceRGB":
+                    mode = "RGB"
+                else:
+                    mode = "P"
+
+                if "/Filter" in xObject[obj]:
+                    if xObject[obj]["/Filter"] == "/FlateDecode":
+                        img = Image.frombytes(mode, size, data)
+                        if "/SMask" in xObject[obj]:  # add alpha channel
+                            alpha = Image.frombytes(
+                                "L", size, xObject[obj]["/SMask"].getData()
+                            )
+                            img.putalpha(alpha)
+                        img.save(obj[1:] + ".png")
+                    elif xObject[obj]["/Filter"] == "/DCTDecode":
+                        img = open(obj[1:] + ".jpg", "wb")
+                        img.write(data)
+                        img.close()
+                    elif xObject[obj]["/Filter"] == "/JPXDecode":
+                        img = open(obj[1:] + ".jp2", "wb")
+                        img.write(data)
+                        img.close()
+                    elif xObject[obj]["/Filter"] == "/CCITTFaxDecode":
+                        img = open(obj[1:] + ".tiff", "wb")
+                        img.write(data)
+                        img.close()
+                else:
+                    img = Image.frombytes(mode, size, data)
+                    img.save(obj[1:] + ".png")
+    else:
+        print("No image found.")