diff --git a/CHANGELOG.md b/CHANGELOG.md index 5367ed7e..0f1295ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Expose new `optimization.get_optimization_method` to get the proper optimization method to call for a given image format - Add `optimization.get_optimization_method` to get the proper optimization method to call for a given image format - New `creator.Creator.convert_and_check_metadata` to convert metadata to bytes or str for known use cases and check proper type is passed to libzim +- Add svg2png image conversion function #113 +- Add `conversion.convert_svg2png` image conversion function + support for SVG in `probing.format_for` #113 ## Changed diff --git a/pyproject.toml b/pyproject.toml index d4ab9696..4ea678b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ dependencies = [ # limited and we use only a very small subset of it. "regex>=2020.7.14", "pymupdf>=1.24.0,<2.0", + "CairoSVG>=2.2.0,<3.0", # youtube-dl should be updated as frequently as possible "yt-dlp" ] diff --git a/src/zimscraperlib/image/conversion.py b/src/zimscraperlib/image/conversion.py index 3edac08e..ed4522d0 100644 --- a/src/zimscraperlib/image/conversion.py +++ b/src/zimscraperlib/image/conversion.py @@ -3,9 +3,11 @@ from __future__ import annotations +import io import pathlib from typing import IO +import cairosvg.svg from PIL.Image import open as pilopen from zimscraperlib.constants import ALPHA_NOT_SUPPORTED @@ -40,6 +42,39 @@ def convert_image( save_image(image, dst, fmt, **params) +def convert_svg2png( + src: str | pathlib.Path | io.BytesIO, + dst: pathlib.Path | IO[bytes], + width: int | None = None, + height: int | None = None, +): + """Convert a SVG to a PNG + + Output width and height might be specified if resize is needed. + PNG background is transparent. + """ + kwargs = {} + if isinstance(src, pathlib.Path): + src = str(src) + if isinstance(src, str): + kwargs["url"] = src + else: + kwargs["bytestring"] = src.getvalue() + if width: + kwargs["output_width"] = width + if height: + kwargs["output_height"] = height + if isinstance(dst, pathlib.Path): + cairosvg.svg2png(write_to=str(dst), **kwargs) + else: + result = cairosvg.svg2png(**kwargs) + if not isinstance(result, bytes): + raise Exception( + "Unexpected type returned by cairosvg.svg2png" + ) # pragma: no cover + dst.write(result) + + def create_favicon(src: pathlib.Path, dst: pathlib.Path) -> None: """generate a squared favicon from a source image""" if dst.suffix != ".ico": diff --git a/src/zimscraperlib/image/probing.py b/src/zimscraperlib/image/probing.py index 6b28091e..06b89ae3 100644 --- a/src/zimscraperlib/image/probing.py +++ b/src/zimscraperlib/image/probing.py @@ -12,6 +12,8 @@ import colorthief import PIL.Image +from zimscraperlib.filesystem import get_content_mimetype, get_file_mimetype + def get_colors( src: pathlib.Path, *, use_palette: bool | None = True @@ -59,8 +61,23 @@ def format_for( ) -> str | None: """Pillow format of a given filename, either Pillow-detected or from suffix""" if not from_suffix: - with PIL.Image.open(src) as img: - return img.format + try: + with PIL.Image.open(src) as img: + return img.format + except PIL.UnidentifiedImageError: + # Fallback based on mimetype for SVG which are not supported by PIL + if ( + isinstance(src, pathlib.Path) + and get_file_mimetype(src) == "image/svg+xml" + ): + return "SVG" + elif ( + isinstance(src, io.BytesIO) + and get_content_mimetype(src.getvalue()) == "image/svg+xml" + ): + return "SVG" + else: # pragma: no cover + raise if not isinstance(src, pathlib.Path): raise ValueError( @@ -70,8 +87,11 @@ def format_for( from PIL.Image import EXTENSION as PIL_FMT_EXTENSION from PIL.Image import init as init_pil - init_pil() - return PIL_FMT_EXTENSION[src.suffix] if src.suffix in PIL_FMT_EXTENSION else None + init_pil() # populate the PIL_FMT_EXTENSION dictionary + + known_extensions = {".svg": "SVG"} + known_extensions.update(PIL_FMT_EXTENSION) + return known_extensions[src.suffix] if src.suffix in known_extensions else None def is_valid_image( diff --git a/tests/image/test_image.py b/tests/image/test_image.py index 9cfd1af8..0441ebb0 100644 --- a/tests/image/test_image.py +++ b/tests/image/test_image.py @@ -17,7 +17,11 @@ from resizeimage.imageexceptions import ImageSizeError from zimscraperlib.image import presets -from zimscraperlib.image.conversion import convert_image, create_favicon +from zimscraperlib.image.conversion import ( + convert_image, + convert_svg2png, + create_favicon, +) from zimscraperlib.image.optimization import ( ensure_matches, get_optimization_method, @@ -64,8 +68,15 @@ def get_src_dst( jpg_image: pathlib.Path | None = None, gif_image: pathlib.Path | None = None, webp_image: pathlib.Path | None = None, + svg_image: pathlib.Path | None = None, ) -> tuple[pathlib.Path, pathlib.Path]: - options = {"png": png_image, "jpg": jpg_image, "webp": webp_image, "gif": gif_image} + options = { + "png": png_image, + "jpg": jpg_image, + "webp": webp_image, + "gif": gif_image, + "svg": svg_image, + } if fmt not in options: raise LookupError(f"Unsupported fmt passed: {fmt}") src = options[fmt] @@ -328,6 +339,42 @@ def test_convert_path_src_io_dst(png_image: pathlib.Path): assert dst_image.format == "PNG" +def test_convert_svg_io_src_path_dst(svg_image: pathlib.Path, tmp_path: pathlib.Path): + src = io.BytesIO(svg_image.read_bytes()) + dst = tmp_path / "test.png" + convert_svg2png(src, dst) + dst_image = Image.open(dst) + assert dst_image.format == "PNG" + + +def test_convert_svg_io_src_io_dst(svg_image: pathlib.Path): + src = io.BytesIO(svg_image.read_bytes()) + dst = io.BytesIO() + convert_svg2png(src, dst) + dst_image = Image.open(dst) + assert dst_image.format == "PNG" + + +def test_convert_svg_path_src_path_dst(svg_image: pathlib.Path, tmp_path: pathlib.Path): + src = svg_image + dst = tmp_path / "test.png" + convert_svg2png(src, dst, width=96, height=96) + dst_image = Image.open(dst) + assert dst_image.format == "PNG" + assert dst_image.width == 96 + assert dst_image.height == 96 + + +def test_convert_svg_path_src_io_dst(svg_image: pathlib.Path): + src = svg_image + dst = io.BytesIO() + convert_svg2png(src, dst, width=96, height=96) + dst_image = Image.open(dst) + assert dst_image.format == "PNG" + assert dst_image.width == 96 + assert dst_image.height == 96 + + @pytest.mark.parametrize( "fmt,exp_size", [("png", 128), ("jpg", 128)], @@ -576,10 +623,10 @@ def test_ensure_matches(webp_image): @pytest.mark.parametrize( "fmt,expected", - [("png", "PNG"), ("jpg", "JPEG"), ("gif", "GIF"), ("webp", "WEBP")], + [("png", "PNG"), ("jpg", "JPEG"), ("gif", "GIF"), ("webp", "WEBP"), ("svg", "SVG")], ) def test_format_for_real_images_suffix( - png_image, jpg_image, gif_image, webp_image, tmp_path, fmt, expected + png_image, jpg_image, gif_image, webp_image, svg_image, tmp_path, fmt, expected ): src, _ = get_src_dst( tmp_path, @@ -588,16 +635,17 @@ def test_format_for_real_images_suffix( jpg_image=jpg_image, gif_image=gif_image, webp_image=webp_image, + svg_image=svg_image, ) assert format_for(src) == expected @pytest.mark.parametrize( "fmt,expected", - [("png", "PNG"), ("jpg", "JPEG"), ("gif", "GIF"), ("webp", "WEBP")], + [("png", "PNG"), ("jpg", "JPEG"), ("gif", "GIF"), ("webp", "WEBP"), ("svg", "SVG")], ) def test_format_for_real_images_content_path( - png_image, jpg_image, gif_image, webp_image, tmp_path, fmt, expected + png_image, jpg_image, gif_image, webp_image, svg_image, tmp_path, fmt, expected ): src, _ = get_src_dst( tmp_path, @@ -606,16 +654,17 @@ def test_format_for_real_images_content_path( jpg_image=jpg_image, gif_image=gif_image, webp_image=webp_image, + svg_image=svg_image, ) assert format_for(src, from_suffix=False) == expected @pytest.mark.parametrize( "fmt,expected", - [("png", "PNG"), ("jpg", "JPEG"), ("gif", "GIF"), ("webp", "WEBP")], + [("png", "PNG"), ("jpg", "JPEG"), ("gif", "GIF"), ("webp", "WEBP"), ("svg", "SVG")], ) def test_format_for_real_images_content_bytes( - png_image, jpg_image, gif_image, webp_image, tmp_path, fmt, expected + png_image, jpg_image, gif_image, webp_image, svg_image, tmp_path, fmt, expected ): src, _ = get_src_dst( tmp_path, @@ -624,6 +673,7 @@ def test_format_for_real_images_content_bytes( jpg_image=jpg_image, gif_image=gif_image, webp_image=webp_image, + svg_image=svg_image, ) assert format_for(io.BytesIO(src.read_bytes()), from_suffix=False) == expected @@ -635,6 +685,7 @@ def test_format_for_real_images_content_bytes( ("image.jpg", "JPEG"), ("image.gif", "GIF"), ("image.webp", "WEBP"), + ("image.svg", "SVG"), ("image.raster", None), ], )