Skip to content

Commit

Permalink
MAINT: Move tests for images to own module (#2071)
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinThoma authored Aug 8, 2023
1 parent 3f18d77 commit aad26dd
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 26 deletions.
2 changes: 1 addition & 1 deletion sample-files
109 changes: 109 additions & 0 deletions tests/test_images.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
"""
Tests which ensure that image extraction works properly go here.
Typically, tests in here should compare the extracted images count, names,
and/or the actual image data with the expected value.
"""

from io import BytesIO
from pathlib import Path

import pytest

from pypdf import PdfReader
from pypdf._page import PageObject

from . import get_pdf_from_url

TESTS_ROOT = Path(__file__).parent.resolve()
PROJECT_ROOT = TESTS_ROOT.parent
RESOURCE_ROOT = PROJECT_ROOT / "resources"
SAMPLE_ROOT = PROJECT_ROOT / "sample-files"


@pytest.mark.enable_socket()
def test_image_new_property():
url = "https://github.com/py-pdf/pypdf/files/11219022/pdf_font_garbled.pdf"
name = "pdf_font_garbled.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
assert reader.pages[0].images.keys() == [
"/I0",
"/I1",
"/I2",
"/I3",
"/I4",
"/I5",
"/I6",
"/I7",
"/I8",
"/I9",
["/TPL1", "/Image5"],
["/TPL2", "/Image53"],
["/TPL2", "/Image37"],
["/TPL2", "/Image49"],
["/TPL2", "/Image51"],
["/TPL2", "/Image39"],
["/TPL2", "/Image57"],
["/TPL2", "/Image55"],
["/TPL2", "/Image43"],
["/TPL2", "/Image30"],
["/TPL2", "/Image22"],
["/TPL2", "/Image41"],
["/TPL2", "/Image47"],
["/TPL2", "/Image45"],
["/TPL3", "/Image65"],
["/TPL3", "/Image30"],
["/TPL3", "/Image61"],
["/TPL4", "/Image30"],
["/TPL5", "/Image30"],
["/TPL6", "/Image30"],
["/TPL7", "/Image30"],
["/TPL8", "/Image30"],
["/TPL9", "/Image30"],
["/TPL10", "/Image30"],
["/TPL11", "/Image30"],
["/TPL12", "/Image30"],
]
assert len(reader.pages[0].images.items()) == 36
assert reader.pages[0].images[0].name == "I0.png"
assert len(reader.pages[0].images[-1].data) == 15168
assert reader.pages[0].images["/TPL1", "/Image5"].image.format == "JPEG"
assert (
reader.pages[0].images["/I0"].indirect_reference.get_object()
== reader.pages[0]["/Resources"]["/XObject"]["/I0"]
)
list(reader.pages[0].images[0:2])
with pytest.raises(TypeError):
reader.pages[0].images[b"0"]
with pytest.raises(IndexError):
reader.pages[0].images[9999]
# just for test coverage:
with pytest.raises(KeyError):
reader.pages[0]._get_image(["test"], reader.pages[0])
assert list(PageObject(None, None).images) == []


@pytest.mark.parametrize(
("src", "page_index", "image_key", "expected"),
[
(
SAMPLE_ROOT / "009-pdflatex-geotopo/GeoTopo.pdf",
23,
"/Im2",
SAMPLE_ROOT / "009-pdflatex-geotopo/page-23-Im2.png",
),
# (SAMPLE_ROOT / "009-pdflatex-geotopo/GeoTopo.pdf", 30, '/Fm22',
# SAMPLE_ROOT / "009-pdflatex-geotopo/page-30-Fm22.png"),
],
)
@pytest.mark.samples()
def test_image_extraction(src, page_index, image_key, expected):
reader = PdfReader(src)
actual_image = reader.pages[page_index].images[image_key]
if not expected.exists():
# A little helper for test generation
with open(f"page-{page_index}-{actual_image.name}", "wb") as fp:
fp.write(actual_image.data)
with open(expected, "rb") as fp:
expected_data = fp.read()
assert actual_image.data == expected_data
25 changes: 0 additions & 25 deletions tests/test_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -1184,31 +1184,6 @@ def test_pdf_pages_missing_type():
writer.pages[0]


@pytest.mark.enable_socket()
def test_image_new_property():
url = "https://github.com/py-pdf/pypdf/files/11219022/pdf_font_garbled.pdf"
name = "pdf_font_garbled.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
reader.pages[0].images.keys()
reader.pages[0].images.items()
reader.pages[0].images[0].name
reader.pages[0].images[-1].data
reader.pages[0].images["/TPL1", "/Image5"].image
assert (
reader.pages[0].images["/I0"].indirect_reference.get_object()
== reader.pages[0]["/Resources"]["/XObject"]["/I0"]
)
list(reader.pages[0].images[0:2])
with pytest.raises(TypeError):
reader.pages[0].images[b"0"]
with pytest.raises(IndexError):
reader.pages[0].images[9999]
# just for test coverage:
with pytest.raises(KeyError):
reader.pages[0]._get_image(["test"], reader.pages[0])
assert list(PageObject(None, None).images) == []


@pytest.mark.samples()
def test_compression():
"""Test for issue #1897"""
Expand Down
1 change: 1 addition & 0 deletions tests/test_pdfa.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def document_information_has_analoguos_xml(src: BytesIO) -> bool:
return document_information_has_analoguos_xml(src)


@pytest.mark.samples()
@pytest.mark.parametrize(
("src", "diagnostic_write_name"),
[
Expand Down

0 comments on commit aad26dd

Please sign in to comment.