Skip to content

Commit

Permalink
ENH: add decode_as_image() to ContentStreams
Browse files Browse the repository at this point in the history
  • Loading branch information
pubpub-zz committed May 1, 2024
1 parent b1b55e6 commit 854c467
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 0 deletions.
20 changes: 20 additions & 0 deletions pypdf/generic/_data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -947,6 +947,26 @@ def flate_encode(self, level: int = -1) -> "EncodedStreamObject":
retval._data = FlateDecode.encode(b_(self._data), level)
return retval

def decode_as_image(self) -> Any:
"""
Try to decode the stream object as an image
Returns:
a PIL image if proper decoding has been found
"""
from ..filters import _xobj_to_image

if self.get("/Subtype", "") != "/Image":
try:
msg = f"{self.indirect_reference} does not seems to be an Image" # pragma: no cover
except AttributeError:
msg = f"{self.__repr__()} object does not seems to be an Image" # pragma: no cover
logger_warning(msg, __name__)
extension, byte_stream, img = _xobj_to_image(self)
if extension is None:
return None # pragma: no cover
return img


class DecodedStreamObject(StreamObject):
pass
Expand Down
21 changes: 21 additions & 0 deletions tests/test_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,3 +346,24 @@ def test_corrupted_jpeg_iss2266(pdf, pdf_name, images, images_name, filtr):
print(fn) # noqa: T201
img = Image.open(BytesIO(zf.read(fn)))
assert image_similarity(reader.pages[p].images[i].image, img) >= 0.99


@pytest.mark.enable_socket()
def test_extract_image_from_object(caplog):
url = "https://github.com/py-pdf/pypdf/files/15176076/B2.pdf"
name = "iss2613.pdf"
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
image = reader.pages[0]["/Resources"]["/Pattern"]["/P1"]["/Resources"]["/XObject"][
"/X1"
].decode_as_image()
assert isinstance(image, Image.Image)
with pytest.raises(Exception):
co = reader.pages[0].get_contents()
co.decode_as_image()
assert "does not seems to be an Image" in caplog.text
caplog.clear()
co.indirect_reference = "for_test"
with pytest.raises(Exception):
co = reader.pages[0].get_contents()
co.decode_as_image()
assert "does not seems to be an Image" in caplog.text

0 comments on commit 854c467

Please sign in to comment.