Skip to content

Commit

Permalink
Merge branch 'main' into patch-1
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinThoma authored Apr 15, 2022
2 parents 77e83a2 + 012709f commit 7a6284a
Show file tree
Hide file tree
Showing 33 changed files with 521 additions and 328 deletions.
98 changes: 98 additions & 0 deletions .github/workflows/github-ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: CI

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]

jobs:
tests:
name: pytest on ${{ matrix.python-version }}
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["2.7", "3.6", "3.7", "3.8", "3.9", "3.10"]

steps:
- name: Checkout Code
uses: actions/checkout@v3
- name: Setup Python
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Upgrade pip
run: |
python -m pip install --upgrade pip
- name: Install requirements (Python 3)
if: matrix.python-version != '2.7'
run: |
pip install -r requirements/ci.txt
- name: Install requirements (Python 2)
if: matrix.python-version == '2.7'
run: |
pip install pillow pytest coverage
- name: Install PyPDF2
run: |
pip install .
- name: Test with flake8
run: |
flake8 . --ignore=E203,W503,W504,E,F403,F405
if: matrix.python-version != '2.7'
- name: Test with pytest
run: |
python -m coverage run --parallel-mode -m pytest Tests -vv
- name: Upload coverage data
uses: actions/upload-artifact@v3
with:
name: coverage-data
path: .coverage.*
if-no-files-found: ignore

package:
name: Build & verify package
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
with:
python-version: ${{env.PYTHON_LATEST}}

- run: python -m pip install build twine check-wheel-contents
- run: python -m build --sdist --wheel .
- run: ls -l dist
- run: check-wheel-contents dist/*.whl
- name: Check long_description
run: python -m twine check dist/*

coverage:
name: Combine & check coverage.
runs-on: ubuntu-latest
needs: tests

steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
with:
# Use latest Python, so it understands all syntax.
python-version: ${{env.PYTHON_LATEST}}

- run: python -m pip install --upgrade coverage[toml]

- uses: actions/download-artifact@v3
with:
name: coverage-data

- name: Combine coverage & create xml report
run: |
python -m coverage combine
python -m coverage xml
- name: Upload Coverage to Codecov
uses: codecov/codecov-action@v2
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ./coverage.xml
51 changes: 0 additions & 51 deletions .github/workflows/unit-tests.yaml

This file was deleted.

5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@
.tox
build
.idea/*
.coverage
*.egg-info/
dist/*

# Code coverage artifacts
.coverage*
coverage.xml

# Editors / IDEs
.vscode/

Expand Down
19 changes: 0 additions & 19 deletions .travis.yml

This file was deleted.

2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ clean:
rm -rf Tests/__pycache__ PyPDF2/__pycache__ Image9.png htmlcov docs/_build dist dont_commit_merged.pdf dont_commit_writer.pdf PyPDF2.egg-info PyPDF2_pdfLocation.txt

test:
pytest Tests/tests.py Tests --cov --cov-report term-missing -vv --cov-report html
pytest Tests --cov --cov-report term-missing -vv --cov-report html
12 changes: 9 additions & 3 deletions PyPDF2/filters.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# vim: sw=4:expandtab:foldmethod=marker
#
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
Expand Down Expand Up @@ -40,7 +38,7 @@
from cStringIO import StringIO
else:
from io import StringIO
import struct
import struct

try:
import zlib
Expand Down Expand Up @@ -356,6 +354,10 @@ def decode(data, decodeParms=None):
class CCITTFaxDecode(object):
def decode(data, decodeParms=None, height=0):
if decodeParms:
from PyPDF2.generic import ArrayObject
if isinstance(decodeParms, ArrayObject):
if len(decodeParms) == 1:
decodeParms = decodeParms[0]
if decodeParms.get("/K", 1) == -1:
CCITTgroup = 4
else:
Expand Down Expand Up @@ -451,6 +453,10 @@ def _xobj_to_image(x_object_obj):
img_byte_arr = io.BytesIO()
img.save(img_byte_arr, format="PNG")
data = img_byte_arr.getvalue()
elif x_object_obj["/Filter"] in (["/LZWDecode"], ['/ASCII85Decode'], ['/CCITTFaxDecode']):
from PyPDF2.utils import b_
extension = ".png"
data = b_(data)
elif x_object_obj["/Filter"] == "/DCTDecode":
extension = ".jpg"
elif x_object_obj["/Filter"] == "/JPXDecode":
Expand Down
49 changes: 25 additions & 24 deletions PyPDF2/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
import decimal
import codecs

from PyPDF2.utils import ERR_STREAM_TRUNCATED_PREMATURELY

ObjectPrefix = b_('/<[tf(n%')
NumberSigns = b_('+-')
IndirectPattern = re.compile(b_(r"[+-]?(\d+)\s+(\d+)\s+R[^a-zA-Z]"))
Expand Down Expand Up @@ -199,17 +201,15 @@ def readFromStream(stream, pdf):
while True:
tok = stream.read(1)
if not tok:
# stream has truncated prematurely
raise PdfStreamError("Stream has ended unexpectedly")
raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
if tok.isspace():
break
idnum += tok
generation = b_("")
while True:
tok = stream.read(1)
if not tok:
# stream has truncated prematurely
raise PdfStreamError("Stream has ended unexpectedly")
raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
if tok.isspace():
if not generation:
continue
Expand Down Expand Up @@ -273,10 +273,11 @@ def readFromStream(stream):
readFromStream = staticmethod(readFromStream)


##
# Given a string (either a "str" or "unicode"), create a ByteStringObject or a
# TextStringObject to represent the string.
def createStringObject(string):
"""
Given a string (either a "str" or "unicode"), create a ByteStringObject or a
TextStringObject to represent the string.
"""
if isinstance(string, utils.string_type):
return TextStringObject(string)
elif isinstance(string, utils.bytes_type):
Expand Down Expand Up @@ -306,8 +307,7 @@ def readHexStringFromStream(stream):
while True:
tok = readNonWhitespace(stream)
if not tok:
# stream has truncated prematurely
raise PdfStreamError("Stream has ended unexpectedly")
raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
if tok == b_(">"):
break
x += tok
Expand All @@ -328,8 +328,7 @@ def readStringFromStream(stream):
while True:
tok = stream.read(1)
if not tok:
# stream has truncated prematurely
raise PdfStreamError("Stream has ended unexpectedly")
raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
if tok == b_("("):
parens += 1
elif tok == b_(")"):
Expand Down Expand Up @@ -392,16 +391,17 @@ def readStringFromStream(stream):
return createStringObject(txt)


##
# Represents a string object where the text encoding could not be determined.
# This occurs quite often, as the PDF spec doesn't provide an alternate way to
# represent strings -- for example, the encryption data stored in files (like
# /O) is clearly not text, but is still stored in a "String" object.
class ByteStringObject(utils.bytes_type, PdfObject):
"""
Represents a string object where the text encoding could not be determined.
This occurs quite often, as the PDF spec doesn't provide an alternate way to
represent strings -- for example, the encryption data stored in files (like
/O) is clearly not text, but is still stored in a "String" object.
"""

##
# For compatibility with TextStringObject.original_bytes. This method
# returns self.
# self.
original_bytes = property(lambda self: self)

def writeToStream(self, stream, encryption_key):
Expand All @@ -413,12 +413,14 @@ def writeToStream(self, stream, encryption_key):
stream.write(b_(">"))


##
# Represents a string object that has been decoded into a real unicode string.
# If read from a PDF document, this string appeared to match the
# PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to
# occur.
class TextStringObject(utils.string_type, PdfObject):
"""
Represents a string object that has been decoded into a real unicode string.
If read from a PDF document, this string appeared to match the
PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to
occur.
"""

autodetect_pdfdocencoding = False
autodetect_utf16 = False

Expand Down Expand Up @@ -569,8 +571,7 @@ def readFromStream(stream, pdf):
skipOverComment(stream)
continue
if not tok:
# stream has truncated prematurely
raise PdfStreamError("Stream has ended unexpectedly")
raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)

if debug: print(("Tok:", tok))
if tok == b_(">"):
Expand Down
6 changes: 2 additions & 4 deletions PyPDF2/merger.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# vim: sw=4:expandtab:foldmethod=marker
#
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
Expand Down Expand Up @@ -119,13 +117,13 @@ def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=T
fileobj = StreamIO(filecontent)
my_file = True
elif isinstance(fileobj, PdfFileReader):
if hasattr(fileobj, '_decryption_key'):
decryption_key = fileobj._decryption_key
orig_tell = fileobj.stream.tell()
fileobj.stream.seek(0)
filecontent = StreamIO(fileobj.stream.read())
fileobj.stream.seek(orig_tell) # reset the stream to its original location
fileobj = filecontent
if hasattr(fileobj, '_decryption_key'):
decryption_key = fileobj._decryption_key
my_file = True

# Create a new PdfFileReader instance using the stream
Expand Down
9 changes: 1 addition & 8 deletions PyPDF2/pdf.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# -*- coding: utf-8 -*-
#
# vim: sw=4:expandtab:foldmethod=marker
#
# Copyright (c) 2006, Mathieu Fenniak
# Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
#
Expand Down Expand Up @@ -1637,7 +1635,7 @@ def _getObjectFromStream(self, indirectReference):
streamData.seek(0, 0)
lines = streamData.readlines()
for i in range(0, len(lines)):
print((lines[i]))
print(lines[i])
streamData.seek(pos, 0)
try:
obj = readObject(streamData, self)
Expand Down Expand Up @@ -2588,11 +2586,6 @@ def mergeRotatedScaledTranslatedPage(self, page2, rotation, scale, tx, ty, expan
ctm[1][0], ctm[1][1],
ctm[2][0], ctm[2][1]], expand)

##
# Applys a transformation matrix the page.
#
# @param ctm A 6 elements tuple containing the operands of the
# transformation matrix
def addTransformation(self, ctm):
"""
Applies a transformation matrix to the page.
Expand Down
Loading

0 comments on commit 7a6284a

Please sign in to comment.