Skip to content

Commit

Permalink
TST: Add MCVE showing outline title issue (#1123)
Browse files Browse the repository at this point in the history
See #1121
  • Loading branch information
MartinThoma authored Jul 17, 2022
1 parent ed5ecd9 commit 5ddf4cb
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 1 deletion.
2 changes: 1 addition & 1 deletion sample-files
62 changes: 62 additions & 0 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
PdfReadWarning,
)
from PyPDF2.filters import _xobj_to_image
from PyPDF2.generic import Destination

from . import get_pdf_from_url

Expand All @@ -30,6 +31,7 @@
TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "resources")
EXTERNAL_ROOT = Path(PROJECT_ROOT) / "sample-files"


@pytest.mark.parametrize(
Expand Down Expand Up @@ -872,3 +874,63 @@ def test_outline_font_format():
name = "tika-924546.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
assert reader.outlines[0].font_format == 2


@pytest.mark.xfail(reason="#1121")
def test_outline_title_issue_1121():
reader = PdfReader(EXTERNAL_ROOT / "014-outlines/mistitled_outlines_example.pdf")

def get_titles_only(outlines, results=None):
if results is None:
results = []
if isinstance(outlines, list):
for outline in outlines:
if isinstance(outline, Destination):
results.append(outline.title)
else:
results.append(get_titles_only(outline))
else:
raise ValueError(f"got {type(outlines)}")
return results

assert get_titles_only(reader.outlines) == [
"First",
[
"Second",
"Third",
"Fourth",
[
"Fifth",
"Sixth",
],
"Seventh",
[
"Eighth",
"Ninth",
],
],
"Tenth",
[
"Eleventh",
"Twelfth",
"Thirteenth",
"Fourteenth",
],
"Fifteenth",
[
"Sixteenth",
"Seventeenth",
],
"Eighteenth",
"Nineteenth",
[
"Twentieth",
"Twenty-first",
"Twenty-second",
"Twenty-third",
"Twenty-fourth",
"Twenty-fifth",
"Twenty-sixth",
"Twenty-seventh",
],
]

0 comments on commit 5ddf4cb

Please sign in to comment.