Skip to content

Commit

Permalink
BUG: Fix error in cmap extraction (#1544)
Browse files Browse the repository at this point in the history
Fixes #1533 and late #1091
  • Loading branch information
pubpub-zz authored Jan 21, 2023
1 parent a6aad31 commit c1f8742
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 4 deletions.
9 changes: 5 additions & 4 deletions pypdf/_cmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,13 +280,11 @@ def parse_bfrange(
) -> Union[None, Tuple[int, int]]:
lst = [x for x in line.split(b" ") if x]
closure_found = False
nbi = max(len(lst[0]), len(lst[1]))
map_dict[-1] = ceil(nbi / 2)
fmt = b"%%0%dX" % (map_dict[-1] * 2)
if multiline_rg is not None:
fmt = b"%%0%dX" % (map_dict[-1] * 2)
a = multiline_rg[0] # a, b not in the current line
b = multiline_rg[1]
for sq in lst[1:]:
for sq in lst[0:]:
if sq == b"]":
closure_found = True
break
Expand All @@ -301,6 +299,9 @@ def parse_bfrange(
else:
a = int(lst[0], 16)
b = int(lst[1], 16)
nbi = max(len(lst[0]), len(lst[1]))
map_dict[-1] = ceil(nbi / 2)
fmt = b"%%0%dX" % (map_dict[-1] * 2)
if lst[2] == b"[":
for sq in lst[3:]:
if sq == b"]":
Expand Down
10 changes: 10 additions & 0 deletions tests/test_cmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pytest

from pypdf import PdfReader
from pypdf._cmap import build_char_map
from pypdf.errors import PdfReadWarning

from . import get_pdf_from_url
Expand Down Expand Up @@ -102,3 +103,12 @@ def test_iss1379():
name = "02voc.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
reader.pages[2].extract_text()


@pytest.mark.external
def test_iss1533():
url = "https://github.com/py-pdf/pypdf/files/10376149/iss1533.pdf"
name = "iss1533.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
reader.pages[0].extract_text() # no error
assert build_char_map("/F", 200, reader.pages[0])[3]["\x01"] == "Ü"

0 comments on commit c1f8742

Please sign in to comment.