Skip to content

Commit

Permalink
ROB: Cope with invalid length in streams (#861)
Browse files Browse the repository at this point in the history
In case of invalid extract stream data looking for endstream

Closes #301
  • Loading branch information
pubpub-zz authored May 7, 2022
1 parent b580a45 commit e48bc6d
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 19 deletions.
28 changes: 28 additions & 0 deletions PyPDF2/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,29 @@ def writeToStream(self, stream, encryption_key):

@staticmethod
def readFromStream(stream, pdf):
def getNextObjPos(p, p1, remGens, pdf):
l = pdf.xref[remGens[0]]
for o in l:
if p1 > l[o] and p < l[o]:
p1 = l[o]
if len(remGens) == 1:
return p1
else:
return getNextObjPos(p, p1, remGens[1:], pdf)

def readUnsizedFromSteam(stream, pdf):
# we are just pointing at beginning of the stream
eon = getNextObjPos(stream.tell(), 2**32, [g for g in pdf.xref], pdf) - 1
curr = stream.tell()
rw = stream.read(eon - stream.tell())
p = rw.find(b_("endstream"))
if p < 0:
raise PdfReadError(
f"Unable to find 'endstream' marker for obj starting at {curr}."
)
stream.seek(curr + p + 9)
return rw[: p - 1]

tmp = stream.read(2)
if tmp != b_("<<"):
raise PdfReadError(
Expand Down Expand Up @@ -641,6 +664,7 @@ def readFromStream(stream, pdf):
t = stream.tell()
length = pdf.getObject(length)
stream.seek(t, 0)
pstart = stream.tell()
data["__streamdata__"] = stream.read(length)
e = readNonWhitespace(stream)
ndstream = stream.read(8)
Expand All @@ -657,6 +681,10 @@ def readFromStream(stream, pdf):
if end == b_("endstream"):
# we found it by looking back one character further.
data["__streamdata__"] = data["__streamdata__"][:-1]
elif not pdf.strict:
stream.seek(pstart, 0)
data["__streamdata__"] = readUnsizedFromSteam(stream, pdf)
pos = stream.tell()
else:
stream.seek(pos, 0)
raise PdfReadError(
Expand Down
Binary file added resources/issue-301.pdf
Binary file not shown.
53 changes: 36 additions & 17 deletions tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,31 +280,50 @@ def test_DictionaryObject_read_from_stream_stream_no_newline():
assert exc.value.args[0] == "Stream data must be followed by a newline"


def test_DictionaryObject_read_from_stream_stream_no_stream_length():
@pytest.mark.parametrize(("strict"), [(True), (False)])
def test_DictionaryObject_read_from_stream_stream_no_stream_length(strict):
stream = BytesIO(b"<< /S /GoTo >>stream\n")
pdf = None

class tst: # to replace pdf
strict = False

pdf = tst()
pdf.strict = strict
with pytest.raises(PdfReadError) as exc:
DictionaryObject.readFromStream(stream, pdf)
assert exc.value.args[0] == "Stream length not defined"


def test_DictionaryObject_read_from_stream_stream_stream_missing_endstream2():
stream = BytesIO(b"<< /S /GoTo /Length 10 >>stream\n ")
pdf = None
with pytest.raises(PdfReadError) as exc:
DictionaryObject.readFromStream(stream, pdf)
assert (
exc.value.args[0]
== "Unable to find 'endstream' marker after stream at byte 0x21."
)
@pytest.mark.parametrize(
("strict", "length", "shouldFail"),
[
(True, 6, False),
(True, 10, False),
(True, 4, True),
(False, 6, False),
(False, 10, False),
],
)
def test_DictionaryObject_read_from_stream_stream_stream_valid(
strict, length, shouldFail
):
stream = BytesIO(b"<< /S /GoTo /Length %d >>stream\nBT /F1\nendstream\n" % length)

class tst: # to replace pdf
strict = True

pdf = tst()
pdf.strict = strict
with pytest.raises(PdfReadError) as exc:
do = DictionaryObject.readFromStream(stream, pdf)
# TODO: What should happen with the stream?
assert do == {"/S": "/GoTo"}
if length in (6, 10):
assert b"BT /F1" in do._StreamObject__data
raise PdfReadError("__ALLGOOD__")
print(exc.value)
assert shouldFail ^ (exc.value.args[0] == "__ALLGOOD__")

def test_DictionaryObject_read_from_stream_stream_stream_valid():
stream = BytesIO(b"<< /S /GoTo /Length 10 >>stream\nBT /F1\nendstream\n")
pdf = None
do = DictionaryObject.readFromStream(stream, pdf)
# TODO: What should happen with the stream?
assert do == {"/S": "/GoTo"}


def test_RectangleObject():
Expand Down
14 changes: 12 additions & 2 deletions tests/test_writer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from io import BytesIO

import pytest

Expand Down Expand Up @@ -334,8 +335,6 @@ def test_add_link():

def test_io_streams():
"""This is the example from the docs ("Streaming data")."""
# Arrange
from io import BytesIO

filepath = os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf")
with open(filepath, "rb") as fh:
Expand All @@ -359,3 +358,14 @@ def test_regression_issue670():
pdf_writer.addPage(reader.getPage(0))
with open("dont_commit_issue670.pdf", "wb") as f_pdf:
pdf_writer.write(f_pdf)

def test_issue301():
"""
Test with invalid stream length object
"""
with open(os.path.join(RESOURCE_ROOT, "issue-301.pdf"), "rb") as f:
r = PdfFileReader(f)
w = PdfFileWriter()
w.appendPagesFromReader(r)
o = BytesIO()
w.write(o)

0 comments on commit e48bc6d

Please sign in to comment.