diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index 3ca761403..cae9a2c04 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -947,6 +947,26 @@ def flate_encode(self, level: int = -1) -> "EncodedStreamObject": retval._data = FlateDecode.encode(b_(self._data), level) return retval + def decode_as_image(self) -> Any: + """ + Try to decode the stream object as an image + + Returns: + a PIL image if proper decoding has been found + """ + from ..filters import _xobj_to_image + + if self.get("/Subtype", "") != "/Image": + try: + msg = f"{self.indirect_reference} does not seems to be an Image" # pragma: no cover + except AttributeError: + msg = f"{self.__repr__()} object does not seems to be an Image" # pragma: no cover + logger_warning(msg, __name__) + extension, byte_stream, img = _xobj_to_image(self) + if extension is None: + return None # pragma: no cover + return img + class DecodedStreamObject(StreamObject): pass diff --git a/tests/test_images.py b/tests/test_images.py index ad694d669..df64d0cfe 100644 --- a/tests/test_images.py +++ b/tests/test_images.py @@ -346,3 +346,24 @@ def test_corrupted_jpeg_iss2266(pdf, pdf_name, images, images_name, filtr): print(fn) # noqa: T201 img = Image.open(BytesIO(zf.read(fn))) assert image_similarity(reader.pages[p].images[i].image, img) >= 0.99 + + +@pytest.mark.enable_socket() +def test_extract_image_from_object(caplog): + url = "https://github.com/py-pdf/pypdf/files/15176076/B2.pdf" + name = "iss2613.pdf" + reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) + image = reader.pages[0]["/Resources"]["/Pattern"]["/P1"]["/Resources"]["/XObject"][ + "/X1" + ].decode_as_image() + assert isinstance(image, Image.Image) + with pytest.raises(Exception): + co = reader.pages[0].get_contents() + co.decode_as_image() + assert "does not seems to be an Image" in caplog.text + caplog.clear() + co.indirect_reference = "for_test" + with pytest.raises(Exception): + co = reader.pages[0].get_contents() + co.decode_as_image() + assert "does not seems to be an Image" in caplog.text