Skip to content

Commit 7e28931

Browse files
committed
Add antialias parameter to Page.to_image(...)
... and associated methods. Thanks to @cmdlineluser for flagging. For details, see: #899 (reply in thread)
1 parent f3c628a commit 7e28931

File tree

4 files changed

+18
-5
lines changed

4 files changed

+18
-5
lines changed

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -255,9 +255,10 @@ If you pass the `pdfminer.six`-handling `laparams` parameter to `pdfplumber.open
255255

256256
To turn any page (including cropped pages) into an `PageImage` object, call `my_page.to_image()`. You can optionally pass *one* of the following keyword arguments:
257257

258-
- `resolution`: The desired number pixels per inch. Defaults to 72.
259-
- `width`: The desired image width in pixels.
260-
- `height`: The desired image width in pixels.
258+
- `resolution`: The desired number pixels per inch. Default: `72`. Type: `int`.
259+
- `width`: The desired image width in pixels. Default: unset, determined by `resolution`. Type: `int`.
260+
- `height`: The desired image width in pixels. Default: unset, determined by `resolution`. Type: `int`.
261+
- `antialias`: Whether to use antialiasing when creating the image. Setting to `True` creates images with less-jagged text and graphics, but with larger file sizes. Default: `False`. Type: `bool`.
261262

262263
For instance:
263264

pdfplumber/display.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ def get_page_image(
3838
page_ix: int,
3939
resolution: Union[int, float],
4040
password: Optional[str],
41+
antialias: bool = False,
4142
) -> PIL.Image.Image:
4243
# If we are working with a file object saved to disk
4344
if hasattr(stream, "name"):
@@ -54,7 +55,9 @@ def get_page_image(
5455
input_data=src,
5556
password=password,
5657
scale=resolution / 72,
57-
no_smoothtext=True,
58+
no_smoothtext=not antialias,
59+
no_smoothpath=not antialias,
60+
no_smoothimage=not antialias,
5861
# Non-modifiable arguments
5962
renderer=pypdfium2._helpers.page.PdfPage.render,
6063
converter=pypdfium2.PdfBitmap.to_pil,
@@ -73,13 +76,15 @@ def __init__(
7376
page: "Page",
7477
original: Optional[PIL.Image.Image] = None,
7578
resolution: Union[int, float] = DEFAULT_RESOLUTION,
79+
antialias: bool = False,
7680
):
7781
self.page = page
7882
if original is None:
7983
self.original = get_page_image(
8084
stream=page.pdf.stream,
8185
page_ix=page.page_number - 1,
8286
resolution=resolution,
87+
antialias=antialias,
8388
password=page.pdf.password,
8489
)
8590
else:

pdfplumber/page.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,7 @@ def to_image(
460460
resolution: Optional[Union[int, float]] = None,
461461
width: Optional[Union[int, float]] = None,
462462
height: Optional[Union[int, float]] = None,
463+
antialias: bool = False,
463464
) -> "PageImage":
464465
"""
465466
You can pass a maximum of 1 of the following:
@@ -479,7 +480,9 @@ def to_image(
479480
elif height is not None:
480481
resolution = 72 * height / self.height
481482

482-
return PageImage(self, resolution=resolution or DEFAULT_RESOLUTION)
483+
return PageImage(
484+
self, resolution=resolution or DEFAULT_RESOLUTION, antialias=antialias
485+
)
483486

484487
def to_dict(self, object_types: Optional[List[str]] = None) -> Dict[str, Any]:
485488
if object_types is None:

tests/test_display.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,10 @@ def test_no_quantize(self):
9797
self.im.save(b, "PNG", quantize=False)
9898
assert len(b.getvalue()) > len(self.im._repr_png_())
9999

100+
def test_antialias(self):
101+
aa = self.pdf.pages[0].to_image(antialias=True)
102+
assert len(aa._repr_png_()) > len(self.im._repr_png_())
103+
100104
def test_decompression_bomb(self):
101105
original_max = PIL.Image.MAX_IMAGE_PIXELS
102106
PIL.Image.MAX_IMAGE_PIXELS = 10

0 commit comments

Comments
 (0)