Skip to content

Commit

Permalink
Pleasing VeraPDF / fixing ICC profiles insertion / warn on CMYK not s…
Browse files Browse the repository at this point in the history
…upported - close #697
  • Loading branch information
Lucas-C committed Feb 28, 2023
1 parent e9dfb18 commit 667a295
Show file tree
Hide file tree
Showing 10 changed files with 55 additions and 55 deletions.
2 changes: 1 addition & 1 deletion docs/index.md
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ or [open a discussion](https://github.com/PyFPDF/fpdf2/discussions).
### Related ###

* Looking for alternative libraries? Check out [this detailed list of PDF-related Python libs by Patrick Maupin (`pdfrw` author)](https://github.com/pmaupin/pdfrw#other-libraries).
There is also [borb](https://github.com/jorisschellekens/borb), [PyPDF2](https://github.com/py-pdf/PyPDF2), [pikepdf](https://github.com/pikepdf/pikepdf), [WeasyPrint](https://github.com/Kozea/WeasyPrint), [pydyf](https://pypi.org/project/pydyf/) and [PyMuPDF](https://pymupdf.readthedocs.io/en/latest/index.html): [features comparison](https://pymupdf.readthedocs.io/en/latest/about.html), [examples](https://github.com/pymupdf/PyMuPDF-Utilities/tree/master/examples#examples), [Jupyter notebooks](https://github.com/pymupdf/PyMuPDF-Utilities/tree/master/jupyter-notebooks)
There is also [borb](https://github.com/jorisschellekens/borb), [PyPDF2](https://github.com/py-pdf/PyPDF2), [pikepdf](https://github.com/pikepdf/pikepdf), [WeasyPrint](https://github.com/Kozea/WeasyPrint), [pydyf](https://pypi.org/project/pydyf/) and [PyMuPDF](https://pymupdf.readthedocs.io/en/latest/index.html): [features comparison](https://pymupdf.readthedocs.io/en/latest/about.html), [examples](https://github.com/pymupdf/PyMuPDF-Utilities/tree/master/examples#examples), [Jupyter notebooks](https://github.com/pymupdf/PyMuPDF-Utilities/tree/master/jupyter-notebooks).
We have some documentations about combining `fpdf2` with [`borb`](CombineWithBorb.md), [`pdfrw`](CombineWithPdfrw.md), & [`PyPDF2`](CombineWithPyPDF2.md).
* [Create PDFs with Python](https://www.youtube.com/playlist?list=PLjNQtX45f0dR9K2sMJ5ad9wVjqslNBIC0) : a series of tutorial videos by bvalgard
* [digidigital/Extensions-and-Scripts-for-pyFPDF-fpdf2](https://github.com/digidigital/Extensions-and-Scripts-for-pyFPDF-fpdf2) : scripts ported from PHP to add transpareny to elements of the page or part of an image, allow to write circular text,
Expand Down
27 changes: 15 additions & 12 deletions fpdf/image_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,17 +314,22 @@ def _decode_base64_image(base64Image):
]


def iccp_is_valid(iccp):
"""
checks the validity of an iccp profile
"""
def is_iccp_valid(iccp, filename):
"Checks the validity of an ICC profile"
try:
iccp_io = BytesIO(iccp)
profile = ImageCms.getOpenProfile(iccp_io)
ImageCms.getProfileInfo(profile)
return True
profile = ImageCms.getOpenProfile(BytesIO(iccp))
except ImageCms.PyCMSError:
LOGGER.warning("Invalid ICC Profile in file %s", filename)
return False
color_space = profile.profile.xcolor_space.strip()
if color_space not in ("GRAY", "RGB"):
LOGGER.warning(
"Unsupported color space %s in ICC Profile of file %s - cf. issue #711",
color_space,
filename,
)
return False
return True


def get_img_info(filename, img=None, image_filter="AUTO", dims=None):
Expand Down Expand Up @@ -371,10 +376,8 @@ def get_img_info(filename, img=None, image_filter="AUTO", dims=None):

iccp = None
if "icc_profile" in img.info:
iccp = img.info.get("icc_profile")
if not iccp_is_valid(iccp):
LOGGER.error("ICCP for %s is invalid", filename)
iccp = None
if is_iccp_valid(img.info["icc_profile"], filename):
iccp = img.info["icc_profile"]

if img_raw_data is not None and not img_altered:
# if we can use the original image bytes directly we do (JPEG and group4 TIFF only):
Expand Down
5 changes: 4 additions & 1 deletion fpdf/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,7 @@ def _ensure_iccp(self, img_info):
iccp_content = iccp_c
break
assert iccp_content is not None
# Note: n should be 4 if the profile ColorSpace is CMYK
iccp_obj = PDFICCPObject(
contents=iccp_content, n=img_info["dpn"], alternate=img_info["cs"]
)
Expand All @@ -770,11 +771,13 @@ def _add_image(self, info):
["/Indexed", "/DeviceRGB", f"{len(info['pal']) // 3 - 1}"]
)
elif iccp_i is not None:
# indexed images are not supposed to have ICC profiles
iccp_pdf_i = self._ensure_iccp(info)
color_space = PDFArray(["/ICCBased", str(iccp_pdf_i), str("0"), "R"])
elif color_space == "DeviceCMYK":
decode = "[1 0 1 0 1 0 1 0]"
raise NotImplementedError(
"fpdf2 does not support DeviceCMYK ColorSpace yet - cf. issue #711"
)

decode_parms = f"<<{info['dp']} /BitsPerComponent {info['bpc']}>>"
img_obj = PDFXObject(
Expand Down
15 changes: 8 additions & 7 deletions scripts/checker_commons.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def aggregate(pdf_filepath, report, aggregated_report_filepath):
"errors": defaultdict(list),
}
try:
with open(aggregated_report_filepath) as agg_file:
with open(aggregated_report_filepath, encoding="utf8") as agg_file:
prev_agg_report = json.load(agg_file)
agg_report["failures"].update(prev_agg_report["failures"])
agg_report["errors"].update(prev_agg_report["errors"])
Expand All @@ -22,33 +22,34 @@ def aggregate(pdf_filepath, report, aggregated_report_filepath):
else:
for error in report.get("errors", []):
agg_report["errors"][error].append(pdf_filepath)
with open(aggregated_report_filepath, "w") as agg_file:
with open(aggregated_report_filepath, "w", encoding="utf8") as agg_file:
json.dump(agg_report, agg_file)


def print_aggregated_report(
aggregated_report_filepath, checks_details_url, ignore_whitelist_filepath
):
with open(aggregated_report_filepath) as agg_file:
with open(aggregated_report_filepath, encoding="utf8") as agg_file:
agg_report = json.load(agg_file)
if "version" in agg_report:
print(agg_report["version"])
print("Documentation on the checks:", checks_details_url)
print("# AGGREGATED REPORT #")
if agg_report["failures"]:
print("Failures:")
for failure, pdf_filepaths in agg_report["failures"].items():
for failure, pdf_filepaths in sorted(agg_report["failures"].items()):
print(f"- {failure} ({len(pdf_filepaths)}): {', '.join(pdf_filepaths)}")
print("Errors:")
sort_key = lambda error: -len(error[1])
for error, pdf_filepaths in sorted(agg_report["errors"].items(), key=sort_key):
for error, pdf_filepaths in sorted(
sorted(agg_report["errors"].items(), key=lambda error: -len(error[1]))
):
print(f"- {error} ({len(pdf_filepaths)}): {', '.join(pdf_filepaths)}")
fail_on_unexpected_check_failure(agg_report, ignore_whitelist_filepath)


def fail_on_unexpected_check_failure(agg_report, ignore_whitelist_filepath):
"exit(1) if there is any non-passing & non-whitelisted error remaining"
with open(ignore_whitelist_filepath) as ignore_file:
with open(ignore_whitelist_filepath, encoding="utf8") as ignore_file:
ignore = json.load(ignore_file)
errors = set(agg_report["errors"].keys()) - set(ignore["errors"].keys())
if agg_report["failures"] or errors:
Expand Down
4 changes: 2 additions & 2 deletions scripts/verapdf-ignore.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"6.1.2-2": "REASON: this required byte value greater than 127 at the beginning of a file seemingly only appears in the ISO 19005-1:2005 spec and no other public one",
"6.1.3-1": "REASON: fpdf2 does not currently define a file /ID in the trailer section",
"6.1.3-2": "REASON: fpdf2 allows to create encrypted files",
"6.2.3-2": "REASON: fpdf2 does not currently support defining ICC profiles",
"6.2.3-4": "REASON: fpdf2 does not currently support defining ICC profiles",
"6.2.3-1": "REASON: false positive - fpdf2 does not currently produce any ICCInputProfile",
"6.2.3-2": "REASON: fpdf2 does not currently support PDF/A",
"6.3.4-1": "REASON: fpdf2 still allows using the PostScript standard 14 fonts. Quoting PDF 1.7 spec from 2006: Beginning with PDF 1.5, the special treatment given to the standard 14 fonts is deprecated. All fonts used in a PDF document should be represented using a com- plete font descriptor. For backwards capability, viewer applications must still provide the special treatment identified for the standard 14 fonts.",
"6.3.5-3": "FIXME: corresponding GitHub issue -> https://github.com/PyFPDF/fpdf2/issues/88",
"6.4-1": "REASON: enabled by default, can be disabled by setting pdf.allow_images_transparency = False",
Expand Down
21 changes: 10 additions & 11 deletions scripts/verapdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,20 @@

AGGREGATED_REPORT_FILEPATH = "verapdf-aggregated.json"
IGNORE_WHITELIST_FILEPATH = "scripts/verapdf-ignore.json"
CHECKS_DETAILS_URL = "https://docs.verapdf.org/validation/pdfa-part1/ & https://docs.verapdf.org/validation/pdfa-parts-2-and-3/"
CHECKS_DETAILS_URL = "https://docs.verapdf.org/validation/"
BAT_EXT = ".bat" if sys.platform in ("cygwin", "win32") else ""


def analyze_pdf_file(pdf_filepath):
output = run(
[
"verapdf/verapdf" + BAT_EXT,
"--format",
"text",
"-v",
pdf_filepath,
],
stdout=PIPE,
).stdout.decode()
command = [
"verapdf/verapdf" + BAT_EXT,
"--format",
"text",
"-v",
pdf_filepath,
]
# print(" ".join(command))
output = run(command, check=False, stdout=PIPE).stdout.decode()
report = parse_output(output)
aggregate(pdf_filepath, report, AGGREGATED_REPORT_FILEPATH)

Expand Down
3 changes: 2 additions & 1 deletion test/image/image_types/test_insert_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,11 +132,12 @@ def test_insert_jpg_icc(tmp_path):
assert_pdf_equal(pdf, HERE / "image_types_insert_jpg_icc.pdf", tmp_path)


def test_insert_jpg_invalid_icc(tmp_path):
def test_insert_jpg_invalid_icc(caplog, tmp_path):
pdf = fpdf.FPDF()
pdf.add_page(format=(448, 498))
pdf.set_margin(0)
pdf.image(HERE / "insert_images_insert_jpg_icc_invalid.jpg", x=0, y=0, h=498)
assert "Invalid ICC Profile in file" in caplog.text
assert_pdf_equal(pdf, HERE / "image_types_insert_jpg_icc_invalid.pdf", tmp_path)


Expand Down
Binary file modified test/image/png_images/image_png_insert_png_files.pdf
Binary file not shown.
29 changes: 11 additions & 18 deletions test/image/png_images/test_png_file.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,21 @@
from pathlib import Path

import fpdf
from fpdf import FPDF
from test.conftest import assert_pdf_equal


HERE = Path(__file__).resolve().parent


def test_insert_png_files(tmp_path):
pdf = fpdf.FPDF(unit="pt")
pdf.compress = False

not_supported = {
"e59ec0cfb8ab64558099543dc19f8378.png", # Interlacing not supported:
"6c853ed9dacd5716bc54eb59cec30889.png", # 16-bit depth not supported:
"ac6343a98f8edabfcc6e536dd75aacb0.png", # Interlacing not supported:
"93e6127b9c4e7a99459c558b81d31bc5.png", # Interlacing not supported:
"18f9baf3834980f4b80a3e82ad45be48.png", # Interlacing not supported:
"51a4d21670dc8dfa8ffc9e54afd62f5f.png", # Interlacing not supported:
}

def test_insert_png_files(caplog, tmp_path):
pdf = FPDF()
for path in sorted(HERE.glob("*.png")):
if path.name not in not_supported:
pdf.add_page()
pdf.image(str(path), x=0, y=0, w=0, h=0)

pdf.add_page()
pdf.image(str(path), x=0, y=0, w=0, h=0)
# Note: 7 of those images have an ICC profile, and there are only 5 distinct ICC profiles among them
assert_pdf_equal(pdf, HERE / "image_png_insert_png_files.pdf", tmp_path)

assert "Unsupported color space CMYK in ICC Profile of file" in caplog.text
# Note: the warning above comes from the following files, for which ImageMagics also raise warnings:
# identify-im6.q16: iCCP: profile 'icc': 'CMYK': invalid ICC profile color space `test/image/png_images/0839d93f8e77e21acd0ac40a80b14b7b.png'
# identify-im6.q16: iCCP: profile 'icc': 'CMYK': invalid ICC profile color space `test/image/png_images/1ebd73c1d3fbc89782f29507364128fc.png'
4 changes: 2 additions & 2 deletions test/image/test_url_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
from test.conftest import assert_pdf_equal

HERE = Path(__file__).resolve().parent
PNG_IMG_URL = "https://upload.wikimedia.org/wikipedia/commons/7/70/Example.png"


def test_png_url(tmp_path):
pdf = fpdf.FPDF()
pdf.add_page()
png = "https://upload.wikimedia.org/wikipedia/commons/7/70/Example.png"
pdf.image(png, x=15, y=15, w=30, h=25)
pdf.image(PNG_IMG_URL, x=15, y=15, w=30, h=25)
assert_pdf_equal(pdf, HERE / "image_png_url.pdf", tmp_path)

0 comments on commit 667a295

Please sign in to comment.