Merge branch 'main' into patch-1

py-pdf · Apr 15, 2022 · 7a6284a · 7a6284a
2 parents 77e83a2 + 012709f
commit 7a6284a
Show file tree

Hide file tree

Showing 33 changed files with 521 additions and 328 deletions.
diff --git a/.github/workflows/github-ci.yaml b/.github/workflows/github-ci.yaml
@@ -0,0 +1,98 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: CI
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  tests:
+    name: pytest on ${{ matrix.python-version }}
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["2.7", "3.6", "3.7", "3.8", "3.9", "3.10"]
+
+    steps:
+    - name: Checkout Code
+      uses: actions/checkout@v3
+    - name: Setup Python
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Upgrade pip
+      run: |
+        python -m pip install --upgrade pip
+    - name: Install requirements (Python 3)
+      if: matrix.python-version != '2.7'
+      run: |
+        pip install -r requirements/ci.txt
+    - name: Install requirements (Python 2)
+      if: matrix.python-version == '2.7'
+      run: |
+        pip install pillow pytest coverage
+    - name: Install PyPDF2
+      run: |
+        pip install .
+    - name: Test with flake8
+      run: |
+        flake8 . --ignore=E203,W503,W504,E,F403,F405
+      if: matrix.python-version != '2.7'
+    - name: Test with pytest
+      run: |
+        python -m coverage run --parallel-mode -m pytest Tests -vv
+    - name: Upload coverage data
+      uses: actions/upload-artifact@v3
+      with:
+        name: coverage-data
+        path: .coverage.*
+        if-no-files-found: ignore
+
+  package:
+    name: Build & verify package
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v3
+        with:
+          python-version: ${{env.PYTHON_LATEST}}
+
+      - run: python -m pip install build twine check-wheel-contents
+      - run: python -m build --sdist --wheel .
+      - run: ls -l dist
+      - run: check-wheel-contents dist/*.whl
+      - name: Check long_description
+        run: python -m twine check dist/*
+
+  coverage:
+    name: Combine & check coverage.
+    runs-on: ubuntu-latest
+    needs: tests
+
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v3
+        with:
+          # Use latest Python, so it understands all syntax.
+          python-version: ${{env.PYTHON_LATEST}}
+
+      - run: python -m pip install --upgrade coverage[toml]
+
+      - uses: actions/download-artifact@v3
+        with:
+          name: coverage-data
+
+      - name: Combine coverage & create xml report
+        run: |
+          python -m coverage combine
+          python -m coverage xml
+      - name: Upload Coverage to Codecov
+        uses: codecov/codecov-action@v2
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          files: ./coverage.xml
diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml
diff --git a/.gitignore b/.gitignore
@@ -4,10 +4,13 @@
 .tox
 build
 .idea/*
-.coverage
 *.egg-info/
 dist/*
 
+# Code coverage artifacts
+.coverage*
+coverage.xml
+
 # Editors / IDEs
 .vscode/
 

diff --git a/.travis.yml b/.travis.yml
diff --git a/Makefile b/Makefile
@@ -14,4 +14,4 @@ clean:
 	rm -rf Tests/__pycache__ PyPDF2/__pycache__ Image9.png htmlcov docs/_build dist dont_commit_merged.pdf dont_commit_writer.pdf PyPDF2.egg-info PyPDF2_pdfLocation.txt
 
 test:
-	pytest Tests/tests.py Tests --cov --cov-report term-missing -vv --cov-report html
+	pytest Tests --cov --cov-report term-missing -vv --cov-report html
diff --git a/PyPDF2/filters.py b/PyPDF2/filters.py
@@ -1,5 +1,3 @@
-# vim: sw=4:expandtab:foldmethod=marker
-#
 # Copyright (c) 2006, Mathieu Fenniak
 # All rights reserved.
 #
@@ -40,7 +38,7 @@
     from cStringIO import StringIO
 else:
     from io import StringIO
-    import struct
+import struct
 
 try:
     import zlib
@@ -356,6 +354,10 @@ def decode(data, decodeParms=None):
 class CCITTFaxDecode(object):
     def decode(data, decodeParms=None, height=0):
         if decodeParms:
+            from PyPDF2.generic import ArrayObject
+            if isinstance(decodeParms, ArrayObject):
+                if len(decodeParms) == 1:
+                    decodeParms = decodeParms[0]
             if decodeParms.get("/K", 1) == -1:
                 CCITTgroup = 4
             else:
@@ -451,6 +453,10 @@ def _xobj_to_image(x_object_obj):
             img_byte_arr = io.BytesIO()
             img.save(img_byte_arr, format="PNG")
             data = img_byte_arr.getvalue()
+        elif x_object_obj["/Filter"] in (["/LZWDecode"], ['/ASCII85Decode'], ['/CCITTFaxDecode']):
+            from PyPDF2.utils import b_
+            extension = ".png"
+            data = b_(data)
         elif x_object_obj["/Filter"] == "/DCTDecode":
             extension = ".jpg"
         elif x_object_obj["/Filter"] == "/JPXDecode":

diff --git a/PyPDF2/generic.py b/PyPDF2/generic.py
@@ -44,6 +44,8 @@
 import decimal
 import codecs
 
+from PyPDF2.utils import ERR_STREAM_TRUNCATED_PREMATURELY
+
 ObjectPrefix = b_('/<[tf(n%')
 NumberSigns = b_('+-')
 IndirectPattern = re.compile(b_(r"[+-]?(\d+)\s+(\d+)\s+R[^a-zA-Z]"))
@@ -199,17 +201,15 @@ def readFromStream(stream, pdf):
         while True:
             tok = stream.read(1)
             if not tok:
-                # stream has truncated prematurely
-                raise PdfStreamError("Stream has ended unexpectedly")
+                raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
             if tok.isspace():
                 break
             idnum += tok
         generation = b_("")
         while True:
             tok = stream.read(1)
             if not tok:
-                # stream has truncated prematurely
-                raise PdfStreamError("Stream has ended unexpectedly")
+                raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
             if tok.isspace():
                 if not generation:
                     continue
@@ -273,10 +273,11 @@ def readFromStream(stream):
     readFromStream = staticmethod(readFromStream)
 
 
-##
-# Given a string (either a "str" or "unicode"), create a ByteStringObject or a
-# TextStringObject to represent the string.
 def createStringObject(string):
+    """
+    Given a string (either a "str" or "unicode"), create a ByteStringObject or a
+    TextStringObject to represent the string.
+    """
     if isinstance(string, utils.string_type):
         return TextStringObject(string)
     elif isinstance(string, utils.bytes_type):
@@ -306,8 +307,7 @@ def readHexStringFromStream(stream):
     while True:
         tok = readNonWhitespace(stream)
         if not tok:
-            # stream has truncated prematurely
-            raise PdfStreamError("Stream has ended unexpectedly")
+            raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
         if tok == b_(">"):
             break
         x += tok
@@ -328,8 +328,7 @@ def readStringFromStream(stream):
     while True:
         tok = stream.read(1)
         if not tok:
-            # stream has truncated prematurely
-            raise PdfStreamError("Stream has ended unexpectedly")
+            raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
         if tok == b_("("):
             parens += 1
         elif tok == b_(")"):
@@ -392,16 +391,17 @@ def readStringFromStream(stream):
     return createStringObject(txt)
 
 
-##
-# Represents a string object where the text encoding could not be determined.
-# This occurs quite often, as the PDF spec doesn't provide an alternate way to
-# represent strings -- for example, the encryption data stored in files (like
-# /O) is clearly not text, but is still stored in a "String" object.
 class ByteStringObject(utils.bytes_type, PdfObject):
+    """
+    Represents a string object where the text encoding could not be determined.
+    This occurs quite often, as the PDF spec doesn't provide an alternate way to
+    represent strings -- for example, the encryption data stored in files (like
+    /O) is clearly not text, but is still stored in a "String" object.
+    """
 
     ##
     # For compatibility with TextStringObject.original_bytes.  This method
-    # returns self.
+    #  self.
     original_bytes = property(lambda self: self)
 
     def writeToStream(self, stream, encryption_key):
@@ -413,12 +413,14 @@ def writeToStream(self, stream, encryption_key):
         stream.write(b_(">"))
 
 
-##
-# Represents a string object that has been decoded into a real unicode string.
-# If read from a PDF document, this string appeared to match the
-# PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to
-# occur.
 class TextStringObject(utils.string_type, PdfObject):
+    """
+    Represents a string object that has been decoded into a real unicode string.
+    If read from a PDF document, this string appeared to match the
+    PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to
+    occur.
+    """
+
     autodetect_pdfdocencoding = False
     autodetect_utf16 = False
 
@@ -569,8 +571,7 @@ def readFromStream(stream, pdf):
                 skipOverComment(stream)
                 continue
             if not tok:
-                # stream has truncated prematurely
-                raise PdfStreamError("Stream has ended unexpectedly")
+                raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
 
             if debug: print(("Tok:", tok))
             if tok == b_(">"):

diff --git a/PyPDF2/merger.py b/PyPDF2/merger.py
@@ -1,5 +1,3 @@
-# vim: sw=4:expandtab:foldmethod=marker
-#
 # Copyright (c) 2006, Mathieu Fenniak
 # All rights reserved.
 #
@@ -119,13 +117,13 @@ def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=T
             fileobj = StreamIO(filecontent)
             my_file = True
         elif isinstance(fileobj, PdfFileReader):
+            if hasattr(fileobj, '_decryption_key'):
+                decryption_key = fileobj._decryption_key
             orig_tell = fileobj.stream.tell()
             fileobj.stream.seek(0)
             filecontent = StreamIO(fileobj.stream.read())
             fileobj.stream.seek(orig_tell) # reset the stream to its original location
             fileobj = filecontent
-            if hasattr(fileobj, '_decryption_key'):
-                decryption_key = fileobj._decryption_key
             my_file = True
 
         # Create a new PdfFileReader instance using the stream

diff --git a/PyPDF2/pdf.py b/PyPDF2/pdf.py
@@ -1,7 +1,5 @@
 # -*- coding: utf-8 -*-
 #
-# vim: sw=4:expandtab:foldmethod=marker
-#
 # Copyright (c) 2006, Mathieu Fenniak
 # Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
 #
@@ -1637,7 +1635,7 @@ def _getObjectFromStream(self, indirectReference):
                 streamData.seek(0, 0)
                 lines = streamData.readlines()
                 for i in range(0, len(lines)):
-                    print((lines[i]))
+                    print(lines[i])
                 streamData.seek(pos, 0)
             try:
                 obj = readObject(streamData, self)
@@ -2588,11 +2586,6 @@ def mergeRotatedScaledTranslatedPage(self, page2, rotation, scale, tx, ty, expan
                                                  ctm[1][0], ctm[1][1],
                                                  ctm[2][0], ctm[2][1]], expand)
 
-    ##
-    # Applys a transformation matrix the page.
-    #
-    # @param ctm   A 6 elements tuple containing the operands of the
-    #              transformation matrix
     def addTransformation(self, ctm):
         """
         Applies a transformation matrix to the page.