Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix encoding positional parameter #89

Merged
merged 7 commits into from
Jan 27, 2023
Merged
6 changes: 6 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
v3.12.0
=======

* gh-101144: Honor ``encoding`` as positional parameter
to ``Path.open()`` and ``Path.read_text()``.

v3.11.0
=======

Expand Down
13 changes: 13 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import builtins
import sys


def pytest_configure():
add_future_flags()


def add_future_flags():
if sys.version_info > (3, 10):
return

builtins.EncodingWarning = type('EncodingWarning', (Warning,), {})
8 changes: 6 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,12 @@
url='https://peps.python.org/pep-{pep_number:0>4}/',
),
dict(
pattern=r'(Python #|bpo-)(?P<python>\d+)',
url='http://bugs.python.org/issue{python}',
pattern=r'(bpo-)(?P<bpo>\d+)',
url='http://bugs.python.org/issue{bpo}',
),
dict(
pattern=r'(gh-)(?P<python_gh>\d+)',
url='http://bugs.python.org/issue{python_gh}',
),
],
)
Expand Down
72 changes: 67 additions & 5 deletions tests/test_zipp.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import io
import zipfile
import itertools
import contextlib
import pathlib
import unittest
import tempfile
import shutil
import string
import pickle
import itertools
import string
import sys
import unittest
import zipfile

import jaraco.itertools
import func_timeout
Expand Down Expand Up @@ -140,7 +141,66 @@ def test_open(self, alpharep):
a, b, g = root.iterdir()
with a.open(encoding="utf-8") as strm:
data = strm.read()
assert data == "content of a"
self.assertEqual(data, "content of a")
with a.open('r', "utf-8") as strm: # not a kw, no gh-101144 TypeError
data = strm.read()
self.assertEqual(data, "content of a")

def test_open_encoding_utf16(self):
in_memory_file = io.BytesIO()
zf = zipfile.ZipFile(in_memory_file, "w")
zf.writestr("path/16.txt", "This was utf-16".encode("utf-16"))
zf.filename = "test_open_utf16.zip"
root = zipp.Path(zf)
(path,) = root.iterdir()
u16 = path.joinpath("16.txt")
with u16.open('r', "utf-16") as strm:
data = strm.read()
assert data == "This was utf-16"
with u16.open(encoding="utf-16") as strm:
data = strm.read()
assert data == "This was utf-16"

def test_open_encoding_errors(self):
in_memory_file = io.BytesIO()
zf = zipfile.ZipFile(in_memory_file, "w")
zf.writestr("path/bad-utf8.bin", b"invalid utf-8: \xff\xff.")
zf.filename = "test_read_text_encoding_errors.zip"
root = zipp.Path(zf)
(path,) = root.iterdir()
u16 = path.joinpath("bad-utf8.bin")

# encoding= as a positional argument for gh-101144.
data = u16.read_text("utf-8", errors="ignore")
assert data == "invalid utf-8: ."
with u16.open("r", "utf-8", errors="surrogateescape") as f:
assert f.read() == "invalid utf-8: \udcff\udcff."

# encoding= both positional and keyword is an error; gh-101144.
with self.assertRaisesRegex(TypeError, "encoding"):
data = u16.read_text("utf-8", encoding="utf-8")

# both keyword arguments work.
with u16.open("r", encoding="utf-8", errors="strict") as f:
# error during decoding with wrong codec.
with self.assertRaises(UnicodeDecodeError):
f.read()

@unittest.skipIf(
not getattr(sys.flags, 'warn_default_encoding', 0),
"Requires warn_default_encoding",
)
@pass_alpharep
def test_encoding_warnings(self, alpharep):
"""EncodingWarning must blame the read_text and open calls."""
assert sys.flags.warn_default_encoding
root = zipp.Path(alpharep)
with self.assertWarns(EncodingWarning) as wc:
root.joinpath("a.txt").read_text()
assert __file__ == wc.filename
with self.assertWarns(EncodingWarning) as wc:
root.joinpath("a.txt").open("r").close()
assert __file__ == wc.filename

def test_open_write(self):
"""
Expand Down Expand Up @@ -182,6 +242,8 @@ def test_read(self, alpharep):
root = zipp.Path(alpharep)
a, b, g = root.iterdir()
assert a.read_text(encoding="utf-8") == "content of a"
# Also check positional encoding arg (gh-101144).
assert a.read_text("utf-8") == "content of a"
assert a.read_bytes() == b"content of a"

@pass_alpharep
Expand Down
15 changes: 10 additions & 5 deletions zipp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,11 @@ def _name_set(self):
return self.__lookup


def _extract_text_encoding(encoding=None, *args, **kwargs):
# stacklevel=3 so that the caller of the caller see any warning.
return text_encoding(encoding, 3), args, kwargs


class Path:
"""
A pathlib-compatible interface for zip files.
Expand Down Expand Up @@ -273,9 +278,9 @@ def open(self, mode='r', *args, pwd=None, **kwargs):
if args or kwargs:
raise ValueError("encoding args invalid for binary operation")
return stream
else:
kwargs["encoding"] = text_encoding(kwargs.get("encoding"))
return io.TextIOWrapper(stream, *args, **kwargs)
# Text mode:
encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
return io.TextIOWrapper(stream, encoding, *args, **kwargs)

@property
def name(self):
Expand All @@ -298,8 +303,8 @@ def filename(self):
return pathlib.Path(self.root.filename).joinpath(self.at)

def read_text(self, *args, **kwargs):
kwargs["encoding"] = text_encoding(kwargs.get("encoding"))
with self.open('r', *args, **kwargs) as strm:
encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
with self.open('r', encoding, *args, **kwargs) as strm:
return strm.read()

def read_bytes(self):
Expand Down