Skip to content

Commit

Permalink
BUG: Inverse color in CMYK images (#1947)
Browse files Browse the repository at this point in the history
Closes #1895

CMYK image without decode requires reverting scale (see PDF 1.7 spec, p243, §2, last sentence)
  • Loading branch information
pubpub-zz authored Jul 8, 2023
1 parent c239073 commit 8753663
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 2 deletions.
19 changes: 19 additions & 0 deletions pypdf/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,25 @@ def _handle_jpx(
elif lfilters is None:
img, image_format, extension = Image.frombytes(mode, size, data), "PNG", ".png"

# CMYK image without decode requires reverting scale (cf p243,2§ last sentence)
decode = x_object_obj.get(
IA.DECODE, ([1.0, 0.0] * 4) if img.mode == "CMYK" else None
)
if (
isinstance(color_space, ArrayObject)
and color_space[0].get_object() == "/Indexed"
):
decode = None # decode is meanless of Indexed
if decode is not None and not all(decode[i] == i % 2 for i in range(len(decode))):
lut: List[int] = []
for i in range(0, len(decode), 2):
dmin = decode[i]
dmax = decode[i + 1]
lut.extend(
round(255.0 * (j / 255.0 * (dmax - dmin) + dmin)) for j in range(256)
)
img = img.point(lut)

if IA.S_MASK in x_object_obj: # add alpha channel
alpha = _xobj_to_image(x_object_obj[IA.S_MASK])[2]
if img.size != alpha.size:
Expand Down
27 changes: 25 additions & 2 deletions tests/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,8 +366,8 @@ def test_tiff_predictor():


@pytest.mark.enable_socket()
def test_cmyk():
"""Decode cmyk with transparency"""
def test_rgba():
"""Decode rgb with transparency"""
url = "https://corpora.tika.apache.org/base/docs/govdocs1/972/972174.pdf"
name = "tika-972174.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
Expand All @@ -385,6 +385,29 @@ def test_cmyk():
assert d < 0.01


@pytest.mark.enable_socket()
def test_cmyk():
"""Decode cmyk"""
try:
from Crypto.Cipher import AES # noqa: F401
except ImportError:
return # the file is encrypted
url = "https://github.com/py-pdf/pypdf/files/11962229/DB-5368770_Vitocal_200-G.pdf"
name = "Vitocal.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
url_png = "https://user-images.githubusercontent.com/4083478/251283945-38c5b92c-cf94-473c-bb57-a51b74fc39be.jpg"
name_png = "VitocalImage.png"
refimg = Image.open(
BytesIO(get_pdf_from_url(url_png, name=name_png))
) # not a pdf but it works
data = reader.pages[1].images[0]
diff = ImageChops.difference(data.image, refimg)
d = sqrt(
sum([(a * a + b * b + c * c + d * d) for a, b, c, d in diff.getdata()])
) / (diff.size[0] * diff.size[1])
assert d < 0.01


@pytest.mark.enable_socket()
def test_iss1863():
"""Test doc from iss1863"""
Expand Down

0 comments on commit 8753663

Please sign in to comment.