From 22494e8da2504ec95ac2bf280e5f7ffe8c64b042 Mon Sep 17 00:00:00 2001 From: Jeremy Singer-Vine Date: Sun, 4 Aug 2024 13:14:12 -0400 Subject: [PATCH] Make Page.crop(...) also crop .annots/.hyperlinks h/t @Safrone in #1171 --- CHANGELOG.md | 1 + pdfplumber/page.py | 6 +++++- tests/test_basics.py | 4 ++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ce93880a..3ba79a3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ All notable changes to this project will be documented in this file. The format ### Fixed - Fix error on getting `.annots`/`.hyperlinks` from `CroppedPage` (due to missing `.rotation` and `.initial_doctop` attributes) (h/t @Safrone). ([#1171](https://github.com/jsvine/pdfplumber/issues/1171) + [e5737d2](https://github.com/jsvine/pdfplumber/commit/e5737d2)) +- Fix problem where `Page.crop(...)` was not cropping `.annots/.hyperlinks` (h/t @Safrone). [#1171](https://github.com/jsvine/pdfplumber/issues/1171) - Fix calculation of coordinates for `.annots` on `CroppedPage`s. ([0bbb340](https://github.com/jsvine/pdfplumber/commit/0bbb340)) - Dereference structure element attributes (h/t @dhdaines). ([#1169](https://github.com/jsvine/pdfplumber/pull/1169)) - Fix `Page.get_attr(...)` so that it fully resolves references before determining whether the attribute's value is `None` (h/t @zzhangyun + @mkl-public). ([#1176](https://github.com/jsvine/pdfplumber/issues/1176)) diff --git a/pdfplumber/page.py b/pdfplumber/page.py index e3db62f2..e9ae725e 100644 --- a/pdfplumber/page.py +++ b/pdfplumber/page.py @@ -329,7 +329,11 @@ def parse(annot: T_obj) -> T_obj: return parsed raw = resolve_all(self.page_obj.annots) or [] - return list(map(parse, raw)) + parsed = list(map(parse, raw)) + if isinstance(self, CroppedPage): + return self._crop_fn(parsed) + else: + return parsed @property def hyperlinks(self) -> T_obj_list: diff --git a/tests/test_basics.py b/tests/test_basics.py index ff186f5f..de983fcb 100644 --- a/tests/test_basics.py +++ b/tests/test_basics.py @@ -69,6 +69,10 @@ def test_annots_cropped(self): assert len(cropped.annots) == 13 assert len(cropped.hyperlinks) == 1 + h0_bbox = pdfplumber.utils.obj_to_bbox(page.hyperlinks[0]) + cropped = page.crop(h0_bbox) + assert len(cropped.annots) == len(cropped.hyperlinks) == 1 + def test_annots_rotated(self): def get_annot(filename, n=0): path = os.path.join(HERE, "pdfs", filename)