Skip to content

Commit 72a1090

Browse files
authored
CLN: let codecs validate the possible values of encoding and encoding errors (#43616)
1 parent 6791678 commit 72a1090

File tree

3 files changed

+6
-24
lines changed

3 files changed

+6
-24
lines changed

pandas/io/common.py

+4-22
Original file line numberDiff line numberDiff line change
@@ -276,10 +276,6 @@ def _get_filepath_or_buffer(
276276

277277
compression = dict(compression, method=compression_method)
278278

279-
# uniform encoding names
280-
if encoding is not None:
281-
encoding = encoding.replace("_", "-").lower()
282-
283279
# bz2 and xz do not write the byte order mark for utf-16 and utf-32
284280
# print a warning when writing such files
285281
if (
@@ -602,25 +598,11 @@ def get_handle(
602598
if _is_binary_mode(path_or_buf, mode) and "b" not in mode:
603599
mode += "b"
604600

605-
# valdiate errors
601+
# validate encoding and errors
602+
if isinstance(encoding, str):
603+
codecs.lookup(encoding)
606604
if isinstance(errors, str):
607-
errors = errors.lower()
608-
if errors not in (
609-
None,
610-
"strict",
611-
"ignore",
612-
"replace",
613-
"xmlcharrefreplace",
614-
"backslashreplace",
615-
"namereplace",
616-
"surrogateescape",
617-
"surrogatepass",
618-
):
619-
raise ValueError(
620-
f"Invalid value for `encoding_errors` ({errors}). Please see "
621-
+ "https://docs.python.org/3/library/codecs.html#error-handlers "
622-
+ "for valid values."
623-
)
605+
codecs.lookup_error(errors)
624606

625607
# open URLs
626608
ioargs = _get_filepath_or_buffer(

pandas/tests/io/test_common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -593,7 +593,7 @@ def test_encoding_errors(encoding_errors, format):
593593
def test_bad_encdoing_errors():
594594
# GH 39777
595595
with tm.ensure_clean() as path:
596-
with pytest.raises(ValueError, match="Invalid value for `encoding_errors`"):
596+
with pytest.raises(LookupError, match="unknown error handler name"):
597597
icom.get_handle(path, "w", errors="bad")
598598

599599

pandas/tests/io/xml/test_xml.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -709,7 +709,7 @@ def test_utf16_encoding(datapath, parser):
709709

710710
def test_unknown_encoding(datapath, parser):
711711
filename = datapath("io", "data", "xml", "baby_names.xml")
712-
with pytest.raises(LookupError, match=("unknown encoding: uft-8")):
712+
with pytest.raises(LookupError, match=("unknown encoding: UFT-8")):
713713
read_xml(filename, encoding="UFT-8", parser=parser)
714714

715715

0 commit comments

Comments
 (0)