From b2a662fc6b391f176bbf9da45657d21ef5b363ac Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Fri, 20 Jan 2023 00:31:15 -0800 Subject: [PATCH] [3.10] gh-101144: Allow zipfile.Path .open & .read_text encoding to be positional (GH-101179) (GH-101182) The zipfile.Path open() and read_text() encoding parameter can be supplied as a positional argument without causing a TypeError again. 3.10.0b1 included a regression that made it keyword only. Documentation update included as users writing code to be compatible with a wide range of versions will need to consider this for some time.. (cherry picked from commit 5927013e47a8c63b70e104152351f3447baa819c) (cherry picked from commit efe3a389cabd7295e6e0938767cdc4055c871e3c) Co-authored-by: Gregory P. Smith [Google] Automerge-Triggered-By: GH:gpshead --- Doc/library/zipfile.rst | 12 ++++ Lib/test/test_zipfile.py | 66 ++++++++++++++++++- Lib/zipfile.py | 15 +++-- ...-01-18-17-58-50.gh-issue-101144.FHd8Un.rst | 4 ++ 4 files changed, 91 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-01-18-17-58-50.gh-issue-101144.FHd8Un.rst diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst index 10fffb70dbdb63..eab1eaf4312d29 100644 --- a/Doc/library/zipfile.rst +++ b/Doc/library/zipfile.rst @@ -509,6 +509,12 @@ Path objects are traversable using the ``/`` operator or ``joinpath``. Added support for text and binary modes for open. Default mode is now text. + .. versionchanged:: 3.10.10 + The ``encoding`` parameter can be supplied as a positional argument + without causing a :exc:`TypeError`. As it could in 3.9. Code needing to + be compatible with unpatched 3.10 and 3.11 versions must pass all + :class:`io.TextIOWrapper` arguments, ``encoding`` included, as keywords. + .. method:: Path.iterdir() Enumerate the children of the current directory. @@ -533,6 +539,12 @@ Path objects are traversable using the ``/`` operator or ``joinpath``. :class:`io.TextIOWrapper` (except ``buffer``, which is implied by the context). + .. versionchanged:: 3.10.10 + The ``encoding`` parameter can be supplied as a positional argument + without causing a :exc:`TypeError`. As it could in 3.9. Code needing to + be compatible with unpatched 3.10 and 3.11 versions must pass all + :class:`io.TextIOWrapper` arguments, ``encoding`` included, as keywords. + .. method:: Path.read_bytes() Read the current file as bytes. diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index e557d569a119ce..cf40412f85266d 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -10,6 +10,7 @@ import struct import subprocess import sys +from test.support.script_helper import assert_python_ok import time import unittest import unittest.mock as mock @@ -2933,7 +2934,69 @@ def test_open(self, alpharep): a, b, g = root.iterdir() with a.open(encoding="utf-8") as strm: data = strm.read() - assert data == "content of a" + self.assertEqual(data, "content of a") + with a.open('r', "utf-8") as strm: # not a kw, no gh-101144 TypeError + data = strm.read() + self.assertEqual(data, "content of a") + + def test_open_encoding_utf16(self): + in_memory_file = io.BytesIO() + zf = zipfile.ZipFile(in_memory_file, "w") + zf.writestr("path/16.txt", "This was utf-16".encode("utf-16")) + zf.filename = "test_open_utf16.zip" + root = zipfile.Path(zf) + (path,) = root.iterdir() + u16 = path.joinpath("16.txt") + with u16.open('r', "utf-16") as strm: + data = strm.read() + self.assertEqual(data, "This was utf-16") + with u16.open(encoding="utf-16") as strm: + data = strm.read() + self.assertEqual(data, "This was utf-16") + + def test_open_encoding_errors(self): + in_memory_file = io.BytesIO() + zf = zipfile.ZipFile(in_memory_file, "w") + zf.writestr("path/bad-utf8.bin", b"invalid utf-8: \xff\xff.") + zf.filename = "test_read_text_encoding_errors.zip" + root = zipfile.Path(zf) + (path,) = root.iterdir() + u16 = path.joinpath("bad-utf8.bin") + + # encoding= as a positional argument for gh-101144. + data = u16.read_text("utf-8", errors="ignore") + self.assertEqual(data, "invalid utf-8: .") + with u16.open("r", "utf-8", errors="surrogateescape") as f: + self.assertEqual(f.read(), "invalid utf-8: \udcff\udcff.") + + # encoding= both positional and keyword is an error; gh-101144. + with self.assertRaisesRegex(TypeError, "encoding"): + data = u16.read_text("utf-8", encoding="utf-8") + + # both keyword arguments work. + with u16.open("r", encoding="utf-8", errors="strict") as f: + # error during decoding with wrong codec. + with self.assertRaises(UnicodeDecodeError): + f.read() + + def test_encoding_warnings(self): + """EncodingWarning must blame the read_text and open calls.""" + code = '''\ +import io, zipfile +with zipfile.ZipFile(io.BytesIO(), "w") as zf: + zf.filename = '' + zf.writestr("path/file.txt", b"Spanish Inquisition") + root = zipfile.Path(zf) + (path,) = root.iterdir() + file_path = path.joinpath("file.txt") + unused = file_path.read_text() # should warn + file_path.open("r").close() # should warn +''' + proc = assert_python_ok('-X', 'warn_default_encoding', '-c', code) + warnings = proc.err.splitlines() + self.assertEqual(len(warnings), 2, proc.err) + self.assertRegex(warnings[0], rb"^:8: EncodingWarning:") + self.assertRegex(warnings[1], rb"^:9: EncodingWarning:") def test_open_write(self): """ @@ -2975,6 +3038,7 @@ def test_read(self, alpharep): root = zipfile.Path(alpharep) a, b, g = root.iterdir() assert a.read_text(encoding="utf-8") == "content of a" + a.read_text("utf-8") # No positional arg TypeError per gh-101144. assert a.read_bytes() == b"content of a" @pass_alpharep diff --git a/Lib/zipfile.py b/Lib/zipfile.py index eee1f47ed097a5..dc4eb38e3a9025 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -2236,6 +2236,11 @@ def _name_set(self): return self.__lookup +def _extract_text_encoding(encoding=None, *args, **kwargs): + # stacklevel=3 so that the caller of the caller see any warning. + return io.text_encoding(encoding, 3), args, kwargs + + class Path: """ A pathlib-compatible interface for zip files. @@ -2345,9 +2350,9 @@ def open(self, mode='r', *args, pwd=None, **kwargs): if args or kwargs: raise ValueError("encoding args invalid for binary operation") return stream - else: - kwargs["encoding"] = io.text_encoding(kwargs.get("encoding")) - return io.TextIOWrapper(stream, *args, **kwargs) + # Text mode: + encoding, args, kwargs = _extract_text_encoding(*args, **kwargs) + return io.TextIOWrapper(stream, encoding, *args, **kwargs) @property def name(self): @@ -2358,8 +2363,8 @@ def filename(self): return pathlib.Path(self.root.filename).joinpath(self.at) def read_text(self, *args, **kwargs): - kwargs["encoding"] = io.text_encoding(kwargs.get("encoding")) - with self.open('r', *args, **kwargs) as strm: + encoding, args, kwargs = _extract_text_encoding(*args, **kwargs) + with self.open('r', encoding, *args, **kwargs) as strm: return strm.read() def read_bytes(self): diff --git a/Misc/NEWS.d/next/Library/2023-01-18-17-58-50.gh-issue-101144.FHd8Un.rst b/Misc/NEWS.d/next/Library/2023-01-18-17-58-50.gh-issue-101144.FHd8Un.rst new file mode 100644 index 00000000000000..2454ea83ef00af --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-01-18-17-58-50.gh-issue-101144.FHd8Un.rst @@ -0,0 +1,4 @@ +Make :func:`zipfile.Path.open` and :func:`zipfile.Path.read_text` also accept +``encoding`` as a positional argument. This was the behavior in Python 3.9 and +earlier. Earlier 3.10 versions had a regression where supplying it as a positional +argument would lead to a :exc:`TypeError`.