From 7e2893106e75fefdfddc967a0dfadb1573840445 Mon Sep 17 00:00:00 2001 From: Jeremy Singer-Vine Date: Wed, 19 Jul 2023 08:56:18 -0400 Subject: [PATCH] Add `antialias` parameter to `Page.to_image(...)` ... and associated methods. Thanks to @cmdlineluser for flagging. For details, see: https://github.com/jsvine/pdfplumber/discussions/899#discussioncomment-6464765 --- README.md | 7 ++++--- pdfplumber/display.py | 7 ++++++- pdfplumber/page.py | 5 ++++- tests/test_display.py | 4 ++++ 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index e12589eb..bf7b3d70 100644 --- a/README.md +++ b/README.md @@ -255,9 +255,10 @@ If you pass the `pdfminer.six`-handling `laparams` parameter to `pdfplumber.open To turn any page (including cropped pages) into an `PageImage` object, call `my_page.to_image()`. You can optionally pass *one* of the following keyword arguments: -- `resolution`: The desired number pixels per inch. Defaults to 72. -- `width`: The desired image width in pixels. -- `height`: The desired image width in pixels. +- `resolution`: The desired number pixels per inch. Default: `72`. Type: `int`. +- `width`: The desired image width in pixels. Default: unset, determined by `resolution`. Type: `int`. +- `height`: The desired image width in pixels. Default: unset, determined by `resolution`. Type: `int`. +- `antialias`: Whether to use antialiasing when creating the image. Setting to `True` creates images with less-jagged text and graphics, but with larger file sizes. Default: `False`. Type: `bool`. For instance: diff --git a/pdfplumber/display.py b/pdfplumber/display.py index b5cd9acf..9b826424 100644 --- a/pdfplumber/display.py +++ b/pdfplumber/display.py @@ -38,6 +38,7 @@ def get_page_image( page_ix: int, resolution: Union[int, float], password: Optional[str], + antialias: bool = False, ) -> PIL.Image.Image: # If we are working with a file object saved to disk if hasattr(stream, "name"): @@ -54,7 +55,9 @@ def get_page_image( input_data=src, password=password, scale=resolution / 72, - no_smoothtext=True, + no_smoothtext=not antialias, + no_smoothpath=not antialias, + no_smoothimage=not antialias, # Non-modifiable arguments renderer=pypdfium2._helpers.page.PdfPage.render, converter=pypdfium2.PdfBitmap.to_pil, @@ -73,6 +76,7 @@ def __init__( page: "Page", original: Optional[PIL.Image.Image] = None, resolution: Union[int, float] = DEFAULT_RESOLUTION, + antialias: bool = False, ): self.page = page if original is None: @@ -80,6 +84,7 @@ def __init__( stream=page.pdf.stream, page_ix=page.page_number - 1, resolution=resolution, + antialias=antialias, password=page.pdf.password, ) else: diff --git a/pdfplumber/page.py b/pdfplumber/page.py index 87df397c..521002f4 100644 --- a/pdfplumber/page.py +++ b/pdfplumber/page.py @@ -460,6 +460,7 @@ def to_image( resolution: Optional[Union[int, float]] = None, width: Optional[Union[int, float]] = None, height: Optional[Union[int, float]] = None, + antialias: bool = False, ) -> "PageImage": """ You can pass a maximum of 1 of the following: @@ -479,7 +480,9 @@ def to_image( elif height is not None: resolution = 72 * height / self.height - return PageImage(self, resolution=resolution or DEFAULT_RESOLUTION) + return PageImage( + self, resolution=resolution or DEFAULT_RESOLUTION, antialias=antialias + ) def to_dict(self, object_types: Optional[List[str]] = None) -> Dict[str, Any]: if object_types is None: diff --git a/tests/test_display.py b/tests/test_display.py index dcb7a54c..88bc0cd5 100644 --- a/tests/test_display.py +++ b/tests/test_display.py @@ -97,6 +97,10 @@ def test_no_quantize(self): self.im.save(b, "PNG", quantize=False) assert len(b.getvalue()) > len(self.im._repr_png_()) + def test_antialias(self): + aa = self.pdf.pages[0].to_image(antialias=True) + assert len(aa._repr_png_()) > len(self.im._repr_png_()) + def test_decompression_bomb(self): original_max = PIL.Image.MAX_IMAGE_PIXELS PIL.Image.MAX_IMAGE_PIXELS = 10