Skip to content

Commit

Permalink
Fix bug for re-crops that use relative=True (#914)
Browse files Browse the repository at this point in the history
When using relative=True for a re-crop, pdfplumber was passing the wrong
bounding box to the cropping function. This commit fixes that bug and
also refactors CroppedPage.__init__(...) for clarity and consistency's
sake.
  • Loading branch information
jsvine committed Jul 2, 2023
1 parent 474f74c commit 0de6da9
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 13 deletions.
26 changes: 13 additions & 13 deletions pdfplumber/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,14 +383,10 @@ def outside_bbox(
"""
Same as .crop, except only includes objects fully within the bbox
"""
p = CroppedPage(
return CroppedPage(
self, bbox, relative=relative, strict=strict, crop_fn=utils.outside_bbox
)

# Reset, because this operation should not actually change bbox
p.bbox = self.bbox
return p

def filter(self, test_function: Callable[[T_obj], bool]) -> "FilteredPage":
return FilteredPage(self, test_function)

Expand Down Expand Up @@ -490,27 +486,31 @@ class CroppedPage(DerivedPage):
def __init__(
self,
parent_page: Page,
bbox: T_bbox,
crop_bbox: T_bbox,
crop_fn: Callable[[T_obj_list, T_bbox], T_obj_list] = utils.crop_to_bbox,
relative: bool = False,
strict: bool = True,
):
if relative:
o_x0, o_top, _, _ = parent_page.bbox
x0, top, x1, bottom = bbox
self.bbox = (x0 + o_x0, top + o_top, x1 + o_x0, bottom + o_top)
else:
self.bbox = bbox
x0, top, x1, bottom = crop_bbox
crop_bbox = (x0 + o_x0, top + o_top, x1 + o_x0, bottom + o_top)

if strict:
test_proposed_bbox(self.bbox, parent_page.bbox)
test_proposed_bbox(crop_bbox, parent_page.bbox)

def _crop_fn(objs: T_obj_list) -> T_obj_list:
return crop_fn(objs, bbox)
return crop_fn(objs, crop_bbox)

super().__init__(parent_page)

self._crop_fn = _crop_fn

super().__init__(parent_page)
# Note: testing for original function passed, not _crop_fn
if crop_fn is utils.outside_bbox:
self.bbox = parent_page.bbox
else:
self.bbox = crop_bbox

@property
def objects(self) -> Dict[str, T_obj_list]:
Expand Down
8 changes: 8 additions & 0 deletions tests/test_basics.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,14 @@ def test_relative_crop(self):
(0.5 * float(bottom.width), 0, bottom.width, bottom.height), relative=True
)

# An extra test for issue #914, in which relative crops were
# using the the wrong bboxes for cropping, leading to empty object-lists
crop_right = page.crop((page.width / 2, 0, page.width, page.height))
crop_right_again_rel = crop_right.crop(
(0, 0, crop_right.width / 2, page.height), relative=True
)
assert len(crop_right_again_rel.chars)

def test_invalid_crops(self):
page = self.pdf.pages[0]
with pytest.raises(ValueError):
Expand Down

0 comments on commit 0de6da9

Please sign in to comment.