Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions pypdf/generic/_image_inline.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,18 +148,26 @@ def extract_inline_DCT(stream: StreamType) -> bytes:
Extract DCT (JPEG) stream from inline image.
The stream will be moved onto the EI.
"""
def read(length: int) -> bytes:
# If 0 bytes are returned, and *size* was not 0, this indicates end of file.
# If the object is in non-blocking mode and no bytes are available, `None` is returned.
_result = stream.read(length)
if _result is None or len(_result) != length:
raise PdfReadError("Unexpected end of stream")
return _result

data_out: bytes = b""
# Read Blocks of data (ID/Size/data) up to ID=FF/D9
# https://www.digicamsoft.com/itu/itu-t81-36.html
notfirst = False
not_first = False
while True:
c = stream.read(1)
if notfirst or (c == b"\xff"):
c = read(1)
if not_first or (c == b"\xff"):
data_out += c
if c != b"\xff":
continue
notfirst = True
c = stream.read(1)
not_first = True
c = read(1)
data_out += c
if c == b"\xff":
stream.seek(-1, 1) # pragma: no cover
Expand All @@ -172,10 +180,10 @@ def extract_inline_DCT(stream: StreamType) -> bytes:
b"\xda\xdb\xdc\xdd\xde\xdf"
b"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xfe"
):
c = stream.read(2)
c = read(2)
data_out += c
sz = c[0] * 256 + c[1]
data_out += stream.read(sz - 2)
data_out += read(sz - 2)

ei_tok = read_non_whitespace(stream)
ei_tok += stream.read(2)
Expand Down
16 changes: 16 additions & 0 deletions tests/generic/test_image_inline.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
"""Test the pypdf.generic._image_inline module."""
from io import BytesIO

import pytest

from pypdf import PdfReader
from pypdf.errors import PdfReadError
from pypdf.generic._image_inline import is_followed_by_binary_data
from tests import get_data_from_url


def test_is_followed_by_binary_data():
Expand Down Expand Up @@ -59,3 +64,14 @@ def test_is_followed_by_binary_data():

stream = BytesIO(b"1234.56 42 13 37 10 20 c\n")
assert not is_followed_by_binary_data(stream)


@pytest.mark.enable_socket
def test_extract_inline_dct__early_end_of_file():
url = "https://github.com/user-attachments/files/23056988/inline_dct__early_eof.pdf"
name = "inline_dct__early_eof.pdf"
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
page = reader.pages[0]

with pytest.raises(expected_exception=PdfReadError, match=r"^Unexpected end of stream$"):
page.images[0].image.load()
Loading