Skip to content

Commit 82faf98

Browse files
mbiermambierma
andauthored
ROB: Silently ignore Adobe Ascii85 whitespace for suffix detection (#3528)
Based on the PDF standards "the ASCII85Decode filter shall ignore all white-space characters". --------- Co-authored-by: mbierma <3448579-mbierma@users.noreply.gitlab.com>
1 parent cd172d9 commit 82faf98

File tree

2 files changed

+9
-0
lines changed

2 files changed

+9
-0
lines changed

pypdf/filters.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,8 @@ def decode(
482482
if isinstance(data, str):
483483
data = data.encode()
484484
data = data.strip(WHITESPACES_AS_BYTES)
485+
if len(data) > 2 and data.endswith(b">"):
486+
data = data[:-1].rstrip(WHITESPACES_AS_BYTES) + data[-1:]
485487
try:
486488
return a85decode(data, adobe=True, ignorechars=WHITESPACES_AS_BYTES)
487489
except ValueError as error:

tests/test_filters.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -649,6 +649,13 @@ def test_ascii85decode__non_recoverable(caplog):
649649
assert caplog.text == ""
650650

651651

652+
def test_ascii85decode__ignore_whitespaces(caplog):
653+
"""Whitespace characters must be silently ignored"""
654+
data = b"Cqa;:3k~\n>"
655+
result = ASCII85Decode.decode(data)
656+
assert result == b"l\xbe`\x8d:"
657+
658+
652659
@pytest.mark.enable_socket
653660
def test_ccitt_fax_decode__black_is_1():
654661
url = "https://github.com/user-attachments/files/19288881/imagemagick-CCITTFaxDecode_BlackIs1-true.pdf"

0 commit comments

Comments
 (0)