Skip to content

Commit

Permalink
BUG: Missing spaces in extract_text() method (py-pdf#1328) calculatio…
Browse files Browse the repository at this point in the history
…n efficiency
  • Loading branch information
ssjkamei committed Sep 24, 2024
1 parent fd1c489 commit 2873b9e
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions pypdf/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -1985,12 +1985,13 @@ def process_operation(operator: bytes, operands: List[Any]) -> None:
process_operation(b"TL", [-operands[1]])
process_operation(b"Td", operands)
elif operator == b"TJ":
# The space width may be smaller than the font width, so the width should be 95%.
_confirm_space_width = _space_width * 0.95
for op in operands[0]:
if isinstance(op, (str, bytes)):
process_operation(b"Tj", [op])
# The space width may be smaller than the font width, so the width should be 95%.
if isinstance(op, (int, float, NumberObject, FloatObject)) and (
(abs(float(op) / 0.95) >= _space_width)
(abs(float(op)) >= _confirm_space_width)
and (len(text) > 0)
and (text[-1] != " ")
):
Expand Down

0 comments on commit 2873b9e

Please sign in to comment.