From 18fb00743202c848a405e0f928500c662d5af14c Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Fri, 27 Sep 2024 18:38:58 +0200 Subject: [PATCH 1/2] BUG: Cope with unbalanced delimiters in dictionnary object closes #2877 --- pypdf/generic/_data_structures.py | 2 ++ tests/test_reader.py | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index 2a004c15b..58a3477fa 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -570,6 +570,8 @@ def read_unsized_from_stream( try: try: key = read_object(stream, pdf) + if isinstance(key, NullObject): + break if not isinstance(key, NameObject): raise PdfReadError( f"Expecting a NameObject for key but found {key!r}" diff --git a/tests/test_reader.py b/tests/test_reader.py index 9fb898ab0..7ac0ec8ce 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -1703,3 +1703,12 @@ def test_space_in_names_to_continue_processing(caplog): reader = PdfReader(BytesIO(b), strict=True) with pytest.raises(PdfReadError): obj = reader.get_object(70) + + +@pytest.mark.enable_socket() +def test_unbalanced_brackets_in_dictionnaryobject(caplog): + """Cf #2877""" + url = "https://github.com/user-attachments/files/17162634/7f40cb209fb97d1782bffcefc5e7be40.pdf" + name = "iss2877.pdf" # reused + reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) + assert len(reader.pages) == 43 # note: /Count = 46 but 3 kids are None From 43286230a1db40592b48485a3c1a14e1713de749 Mon Sep 17 00:00:00 2001 From: Stefan <96178532+stefan6419846@users.noreply.github.com> Date: Fri, 27 Sep 2024 19:10:42 +0200 Subject: [PATCH 2/2] Update tests/test_reader.py --- tests/test_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_reader.py b/tests/test_reader.py index 7ac0ec8ce..30da20adb 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -1706,7 +1706,7 @@ def test_space_in_names_to_continue_processing(caplog): @pytest.mark.enable_socket() -def test_unbalanced_brackets_in_dictionnaryobject(caplog): +def test_unbalanced_brackets_in_dictionary_object(caplog): """Cf #2877""" url = "https://github.com/user-attachments/files/17162634/7f40cb209fb97d1782bffcefc5e7be40.pdf" name = "iss2877.pdf" # reused