From 8417a8358b541f10658b5776e795c66acc7792fc Mon Sep 17 00:00:00 2001
From: Ronuk Raval <ronuk.raval@gmail.com>
Date: Thu, 29 Dec 2022 11:20:43 -0500
Subject: [PATCH] ROB: ignore_eof everywhere for read_until_regex

This was initially motivated by `NumberObject.read_from_stream`, which
was calling `read_until_regex` with the default value of
`ignore_eof=False` and thus raising exceptions like:

```
PyPDF2.errors.PdfStreamError: Stream has ended unexpectedly
```

https://github.com/py-pdf/PyPDF2/commit/431ba7092037af7d1c296f8f280aca167859ce61
demonstrates a similar fix for `NameObject.read_from_stream`.

From discussion in https://github.com/py-pdf/pypdf/pull/1505, it was
realized that the change to `NumberObject.read_from_stream` had now made
ALL callers of `read_until_regex` pass `ignore_eof=True`. It's cleaner
to remove the parameter entirely and change the default behaviour.
---
 pypdf/_utils.py                   | 15 +++------------
 pypdf/generic/_base.py            |  2 +-
 pypdf/generic/_data_structures.py |  2 +-
 tests/test_utils.py               | 11 +----------
 4 files changed, 6 insertions(+), 24 deletions(-)

diff --git a/pypdf/_utils.py b/pypdf/_utils.py
index 4da2663fc..fdc52b126 100644
--- a/pypdf/_utils.py
+++ b/pypdf/_utils.py
@@ -163,31 +163,22 @@ def skip_over_comment(stream: StreamType) -> None:
             tok = stream.read(1)
 
 
-def read_until_regex(
-    stream: StreamType, regex: Pattern[bytes], ignore_eof: bool = False
-) -> bytes:
+def read_until_regex(stream: StreamType, regex: Pattern[bytes]) -> bytes:
     """
     Read until the regular expression pattern matched (ignore the match).
+    Treats EOF on the underlying stream as the end of the token to be matched.
 
     Args:
-      ignore_eof: If true, ignore end-of-line and return immediately
       regex: re.Pattern
-      ignore_eof:  (Default value = False)
 
     Returns:
       The read bytes.
-
-    Raises:
-      PdfStreamError: on premature end-of-file
-
     """
     name = b""
     while True:
         tok = stream.read(16)
         if not tok:
-            if ignore_eof:
-                return name
-            raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
+            return name
         m = regex.search(tok)
         if m is not None:
             name += tok[: m.start()]
diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py
index b1adcc557..d973515a5 100644
--- a/pypdf/generic/_base.py
+++ b/pypdf/generic/_base.py
@@ -620,7 +620,7 @@ def read_from_stream(stream: StreamType, pdf: Any) -> "NameObject":  # PdfReader
         name = stream.read(1)
         if name != NameObject.surfix:
             raise PdfReadError("name read error")
-        name += read_until_regex(stream, NameObject.delimiter_pattern, ignore_eof=True)
+        name += read_until_regex(stream, NameObject.delimiter_pattern)
         try:
             # Name objects should represent irregular characters
             # with a '#' followed by the symbol's hex number
diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index 2e472f51c..27160b18b 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -969,7 +969,7 @@ def __parse_content_stream(self, stream: StreamType) -> None:
                 break
             stream.seek(-1, 1)
             if peek.isalpha() or peek in (b"'", b'"'):
-                operator = read_until_regex(stream, NameObject.delimiter_pattern, True)
+                operator = read_until_regex(stream, NameObject.delimiter_pattern)
                 if operator == b"BI":
                     # begin inline image - a completely different parsing
                     # mechanism is required, of course... thanks buddy...
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 841c8d712..d05127c5d 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -62,20 +62,11 @@ def test_skip_over_comment(stream, remainder):
     assert stream.read() == remainder
 
 
-def test_read_until_regex_premature_ending_raise():
-    import re
-
-    stream = io.BytesIO(b"")
-    with pytest.raises(PdfStreamError) as exc:
-        read_until_regex(stream, re.compile(b"."))
-    assert exc.value.args[0] == "Stream has ended unexpectedly"
-
-
 def test_read_until_regex_premature_ending_name():
     import re
 
     stream = io.BytesIO(b"")
-    assert read_until_regex(stream, re.compile(b"."), ignore_eof=True) == b""
+    assert read_until_regex(stream, re.compile(b".")) == b""
 
 
 @pytest.mark.parametrize(