Skip to content

Commit 0de6da9

Browse files
committed
Fix bug for re-crops that use relative=True (#914)
When using relative=True for a re-crop, pdfplumber was passing the wrong bounding box to the cropping function. This commit fixes that bug and also refactors CroppedPage.__init__(...) for clarity and consistency's sake.
1 parent 474f74c commit 0de6da9

File tree

2 files changed

+21
-13
lines changed

2 files changed

+21
-13
lines changed

pdfplumber/page.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -383,14 +383,10 @@ def outside_bbox(
383383
"""
384384
Same as .crop, except only includes objects fully within the bbox
385385
"""
386-
p = CroppedPage(
386+
return CroppedPage(
387387
self, bbox, relative=relative, strict=strict, crop_fn=utils.outside_bbox
388388
)
389389

390-
# Reset, because this operation should not actually change bbox
391-
p.bbox = self.bbox
392-
return p
393-
394390
def filter(self, test_function: Callable[[T_obj], bool]) -> "FilteredPage":
395391
return FilteredPage(self, test_function)
396392

@@ -490,27 +486,31 @@ class CroppedPage(DerivedPage):
490486
def __init__(
491487
self,
492488
parent_page: Page,
493-
bbox: T_bbox,
489+
crop_bbox: T_bbox,
494490
crop_fn: Callable[[T_obj_list, T_bbox], T_obj_list] = utils.crop_to_bbox,
495491
relative: bool = False,
496492
strict: bool = True,
497493
):
498494
if relative:
499495
o_x0, o_top, _, _ = parent_page.bbox
500-
x0, top, x1, bottom = bbox
501-
self.bbox = (x0 + o_x0, top + o_top, x1 + o_x0, bottom + o_top)
502-
else:
503-
self.bbox = bbox
496+
x0, top, x1, bottom = crop_bbox
497+
crop_bbox = (x0 + o_x0, top + o_top, x1 + o_x0, bottom + o_top)
504498

505499
if strict:
506-
test_proposed_bbox(self.bbox, parent_page.bbox)
500+
test_proposed_bbox(crop_bbox, parent_page.bbox)
507501

508502
def _crop_fn(objs: T_obj_list) -> T_obj_list:
509-
return crop_fn(objs, bbox)
503+
return crop_fn(objs, crop_bbox)
504+
505+
super().__init__(parent_page)
510506

511507
self._crop_fn = _crop_fn
512508

513-
super().__init__(parent_page)
509+
# Note: testing for original function passed, not _crop_fn
510+
if crop_fn is utils.outside_bbox:
511+
self.bbox = parent_page.bbox
512+
else:
513+
self.bbox = crop_bbox
514514

515515
@property
516516
def objects(self) -> Dict[str, T_obj_list]:

tests/test_basics.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,14 @@ def test_relative_crop(self):
102102
(0.5 * float(bottom.width), 0, bottom.width, bottom.height), relative=True
103103
)
104104

105+
# An extra test for issue #914, in which relative crops were
106+
# using the the wrong bboxes for cropping, leading to empty object-lists
107+
crop_right = page.crop((page.width / 2, 0, page.width, page.height))
108+
crop_right_again_rel = crop_right.crop(
109+
(0, 0, crop_right.width / 2, page.height), relative=True
110+
)
111+
assert len(crop_right_again_rel.chars)
112+
105113
def test_invalid_crops(self):
106114
page = self.pdf.pages[0]
107115
with pytest.raises(ValueError):

0 commit comments

Comments
 (0)