Skip to content

Commit

Permalink
BUG: Avoid isolating the graphics state multiple times (fixes py-pdf#…
Browse files Browse the repository at this point in the history
  • Loading branch information
stefan6419846 committed Sep 28, 2023
1 parent 552c8e0 commit 89debe6
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 0 deletions.
21 changes: 21 additions & 0 deletions pypdf/generic/_data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -1023,6 +1023,8 @@ def __init__(
super().set_data(b_(stream_data))
self.forced_encoding = forced_encoding

self._has_isolated_graphics_state: Optional[bool] = None

def clone(
self,
pdf_dest: Any,
Expand Down Expand Up @@ -1229,12 +1231,31 @@ def operations(self, operations: List[Tuple[Any, Any]]) -> None:
self._operations = operations
self._data = b""

@property
def has_isolated_graphics_state(self) -> bool:
if self._has_isolated_graphics_state is None:
if self._operations:
self._has_isolated_graphics_state = self._operations[0] == "q" and self._operations[-1] == "Q"
elif self._data:
# Check for the character with the linebreak as inserted by `isolate_graphics_state`.
self._has_isolated_graphics_state = self._data[:2] == b"q\n" and self._data[-2:] == b"Q\n"
else:
# Empty stream.
self._has_isolated_graphics_state = True

return self._has_isolated_graphics_state

def isolate_graphics_state(self) -> None:
if self.has_isolated_graphics_state:
# No need to isolate again.
return

if self._operations:
self._operations.insert(0, ([], "q"))
self._operations.append(([], "Q"))
elif self._data:
self._data = b"q\n" + b_(self._data) + b"Q\n"
self._has_isolated_graphics_state = True

# This overrides the parent method:
def write_to_stream(
Expand Down
51 changes: 51 additions & 0 deletions tests/test_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -1288,3 +1288,54 @@ def test_get_contents_from_nullobject():
p = writer.add_blank_page(100, 100)
p[NameObject("/Contents")] = writer._add_object(NullObject())
p.get_contents()


@pytest.mark.enable_socket()
def test_has_isolated_graphics_state():
# Real example.
url = "https://github.com/py-pdf/pypdf/files/12428859/out1.pdf"
name = "isolate-graphics-state.pdf"
page = PdfReader(BytesIO(get_data_from_url(url, name=name))).pages[0]
content_stream = page.get_contents()
assert content_stream is not None

assert content_stream.has_isolated_graphics_state is False
content_stream.isolate_graphics_state()
assert content_stream.has_isolated_graphics_state is True

# Empty stream handling.
content_stream = ContentStream(stream=None, pdf="dummy.pdf")
assert content_stream.has_isolated_graphics_state is True

# Handling of string-based checks.
content_stream = ContentStream(stream=None, pdf="dummy.pdf")
content_stream.set_data(b"q\n 841.680 0 0 595.200 0.000 0.000 cm\n/Im0 Do\nQ\n\n \n")
assert content_stream.has_isolated_graphics_state is False

content_stream = ContentStream(stream=None, pdf="dummy.pdf")
content_stream.set_data(b"q\n 841.680 0 0 595.200 0.000 0.000 cm\n/Im0 Do\nQ\n")
assert content_stream.has_isolated_graphics_state is True

# Dummy example to test caching.
content_stream = ContentStream(stream=None, pdf="dummy.pdf")
assert content_stream._has_isolated_graphics_state is None
content_stream._has_isolated_graphics_state = True
assert content_stream.has_isolated_graphics_state is True
content_stream._has_isolated_graphics_state = False
assert content_stream.has_isolated_graphics_state is False


@pytest.mark.enable_socket()
def test_isolate_graphics_state():
url = "https://github.com/py-pdf/pypdf/files/12428859/out1.pdf"
name = "isolate-graphics-state.pdf"
page = PdfReader(BytesIO(get_data_from_url(url, name=name))).pages[0]
content_stream = page.get_contents()
assert content_stream is not None

# This page is not considered isolated at the beginning due to the final characters.
assert content_stream._data == b"q\n 841.680 0 0 595.200 0.000 0.000 cm\n/Im0 Do\nQ\n\n \n"
content_stream.isolate_graphics_state()
assert content_stream._data == b"q\nq\n 841.680 0 0 595.200 0.000 0.000 cm\n/Im0 Do\nQ\n\n \nQ\n"
content_stream.isolate_graphics_state()
assert content_stream._data == b"q\nq\n 841.680 0 0 595.200 0.000 0.000 cm\n/Im0 Do\nQ\n\n \nQ\n"

0 comments on commit 89debe6

Please sign in to comment.