Skip to content

Commit

Permalink
BUG: Line breaks are not generated due to incorrect calculation of te…
Browse files Browse the repository at this point in the history
…xt leading (#2890)

Closes #2262.
  • Loading branch information
ssjkamei authored Oct 4, 2024
1 parent abb62ac commit fcb103a
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 1 deletion.
3 changes: 2 additions & 1 deletion pypdf/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -1965,7 +1965,8 @@ def process_operation(operator: bytes, operands: List[Any]) -> None:
elif operator == b"Tw":
space_scale = 1.0 + float(operands[0])
elif operator == b"TL":
TL = float(operands[0])
scale_x = math.sqrt(tm_matrix[0]**2 + tm_matrix[2]**2)
TL = float(operands[0]) * font_size * scale_x
elif operator == b"Tf":
if text != "":
output += text # .translate(cmap)
Expand Down
8 changes: 8 additions & 0 deletions tests/test_text_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,3 +211,11 @@ def test_space_position_calculation():
page = reader.pages[3]
extracted = page.extract_text()
assert "Shortly after the Geneva BOF session, the" in extracted


def test_text_leading_height_unit():
"""Tests for #2262"""
reader = PdfReader(RESOURCE_ROOT / "toy.pdf")
page = reader.pages[0]
extracted = page.extract_text()
assert "Something[cited]\n" in extracted

0 comments on commit fcb103a

Please sign in to comment.