From c4fc137da38356c5cac06029b8d5c842e0998cf7 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 14 Nov 2023 17:37:56 +0200 Subject: [PATCH 1/2] gh-111942: Fix crashes in TextIOWrapper.reconfigure() (GH-111976) * Fix crash when encoding is not string or None. * Fix crash when both line_buffering and write_through raise exception when converted ti int. * Add a number of tests for constructor and reconfigure() method with invalid arguments. (cherry picked from commit ee06fffd38cb51ce1c045da9d8336d9ce13c318a) Co-authored-by: Serhiy Storchaka --- Lib/test/test_io.py | 86 ++++++++++++++++++- ...-11-10-22-08-28.gh-issue-111942.MDFm6v.rst | 2 + Modules/_io/textio.c | 37 +++++++- 3 files changed, 121 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-11-10-22-08-28.gh-issue-111942.MDFm6v.rst diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 0f1d526b0e7b3b..136dc6897cba45 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -81,6 +81,10 @@ def _default_chunk_size(): ) +class BadIndex: + def __index__(self): + 1/0 + class MockRawIOWithoutRead: """A RawIO implementation without read(), so as to exercise the default RawIO.read() which calls readinto().""" @@ -2613,8 +2617,31 @@ def test_constructor(self): self.assertEqual(t.encoding, "utf-8") self.assertEqual(t.line_buffering, True) self.assertEqual("\xe9\n", t.readline()) - self.assertRaises(TypeError, t.__init__, b, encoding="utf-8", newline=42) - self.assertRaises(ValueError, t.__init__, b, encoding="utf-8", newline='xyzzy') + invalid_type = TypeError if self.is_C else ValueError + with self.assertRaises(invalid_type): + t.__init__(b, encoding=42) + with self.assertRaises(UnicodeEncodeError): + t.__init__(b, encoding='\udcfe') + with self.assertRaises(ValueError): + t.__init__(b, encoding='utf-8\0') + with self.assertRaises(invalid_type): + t.__init__(b, encoding="utf-8", errors=42) + if support.Py_DEBUG or sys.flags.dev_mode or self.is_C: + with self.assertRaises(UnicodeEncodeError): + t.__init__(b, encoding="utf-8", errors='\udcfe') + if support.Py_DEBUG or sys.flags.dev_mode: + # TODO: If encoded to UTF-8, should also be checked for + # embedded null characters. + with self.assertRaises(ValueError): + t.__init__(b, encoding="utf-8", errors='replace\0') + with self.assertRaises(TypeError): + t.__init__(b, encoding="utf-8", newline=42) + with self.assertRaises(ValueError): + t.__init__(b, encoding="utf-8", newline='\udcfe') + with self.assertRaises(ValueError): + t.__init__(b, encoding="utf-8", newline='\n\0') + with self.assertRaises(ValueError): + t.__init__(b, encoding="utf-8", newline='xyzzy') def test_uninitialized(self): t = self.TextIOWrapper.__new__(self.TextIOWrapper) @@ -3663,6 +3690,59 @@ def test_reconfigure_defaults(self): self.assertEqual(txt.detach().getvalue(), b'LF\nCRLF\r\n') + def test_reconfigure_errors(self): + txt = self.TextIOWrapper(self.BytesIO(), 'ascii', 'replace', '\r') + with self.assertRaises(TypeError): # there was a crash + txt.reconfigure(encoding=42) + if self.is_C: + with self.assertRaises(UnicodeEncodeError): + txt.reconfigure(encoding='\udcfe') + with self.assertRaises(LookupError): + txt.reconfigure(encoding='locale\0') + # TODO: txt.reconfigure(encoding='utf-8\0') + # TODO: txt.reconfigure(encoding='nonexisting') + with self.assertRaises(TypeError): + txt.reconfigure(errors=42) + if self.is_C: + with self.assertRaises(UnicodeEncodeError): + txt.reconfigure(errors='\udcfe') + # TODO: txt.reconfigure(errors='ignore\0') + # TODO: txt.reconfigure(errors='nonexisting') + with self.assertRaises(TypeError): + txt.reconfigure(newline=42) + with self.assertRaises(ValueError): + txt.reconfigure(newline='\udcfe') + with self.assertRaises(ValueError): + txt.reconfigure(newline='xyz') + if not self.is_C: + # TODO: Should fail in C too. + with self.assertRaises(ValueError): + txt.reconfigure(newline='\n\0') + if self.is_C: + # TODO: Use __bool__(), not __index__(). + with self.assertRaises(ZeroDivisionError): + txt.reconfigure(line_buffering=BadIndex()) + with self.assertRaises(OverflowError): + txt.reconfigure(line_buffering=2**1000) + with self.assertRaises(ZeroDivisionError): + txt.reconfigure(write_through=BadIndex()) + with self.assertRaises(OverflowError): + txt.reconfigure(write_through=2**1000) + with self.assertRaises(ZeroDivisionError): # there was a crash + txt.reconfigure(line_buffering=BadIndex(), + write_through=BadIndex()) + self.assertEqual(txt.encoding, 'ascii') + self.assertEqual(txt.errors, 'replace') + self.assertIs(txt.line_buffering, False) + self.assertIs(txt.write_through, False) + + txt.reconfigure(encoding='latin1', errors='ignore', newline='\r\n', + line_buffering=True, write_through=True) + self.assertEqual(txt.encoding, 'latin1') + self.assertEqual(txt.errors, 'ignore') + self.assertIs(txt.line_buffering, True) + self.assertIs(txt.write_through, True) + def test_reconfigure_newline(self): raw = self.BytesIO(b'CR\rEOF') txt = self.TextIOWrapper(raw, 'ascii', newline='\n') @@ -4693,9 +4773,11 @@ def load_tests(loader, tests, pattern): if test.__name__.startswith("C"): for name, obj in c_io_ns.items(): setattr(test, name, obj) + test.is_C = True elif test.__name__.startswith("Py"): for name, obj in py_io_ns.items(): setattr(test, name, obj) + test.is_C = False suite = loader.suiteClass() for test in tests: diff --git a/Misc/NEWS.d/next/Library/2023-11-10-22-08-28.gh-issue-111942.MDFm6v.rst b/Misc/NEWS.d/next/Library/2023-11-10-22-08-28.gh-issue-111942.MDFm6v.rst new file mode 100644 index 00000000000000..4fc505c8f257a6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-11-10-22-08-28.gh-issue-111942.MDFm6v.rst @@ -0,0 +1,2 @@ +Fix crashes in :meth:`io.TextIOWrapper.reconfigure` when pass invalid +arguments, e.g. non-string encoding. diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 7d5afb4c4441d6..5096cfe988705a 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1272,24 +1272,34 @@ textiowrapper_change_encoding(textio *self, PyObject *encoding, errors = &_Py_ID(strict); } } + Py_INCREF(errors); + const char *c_encoding = PyUnicode_AsUTF8(encoding); + if (c_encoding == NULL) { + Py_DECREF(encoding); + Py_DECREF(errors); + return -1; + } const char *c_errors = PyUnicode_AsUTF8(errors); if (c_errors == NULL) { Py_DECREF(encoding); + Py_DECREF(errors); return -1; } // Create new encoder & decoder PyObject *codec_info = _PyCodec_LookupTextEncoding( - PyUnicode_AsUTF8(encoding), "codecs.open()"); + c_encoding, "codecs.open()"); if (codec_info == NULL) { Py_DECREF(encoding); + Py_DECREF(errors); return -1; } if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 || _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) { Py_DECREF(codec_info); Py_DECREF(encoding); + Py_DECREF(errors); return -1; } Py_DECREF(codec_info); @@ -1327,6 +1337,26 @@ _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding, int write_through; const char *newline = NULL; + if (encoding != Py_None && !PyUnicode_Check(encoding)) { + PyErr_Format(PyExc_TypeError, + "reconfigure() argument 'encoding' must be str or None, not %s", + Py_TYPE(encoding)->tp_name); + return NULL; + } + if (errors != Py_None && !PyUnicode_Check(errors)) { + PyErr_Format(PyExc_TypeError, + "reconfigure() argument 'errors' must be str or None, not %s", + Py_TYPE(errors)->tp_name); + return NULL; + } + if (newline_obj != NULL && newline_obj != Py_None && + !PyUnicode_Check(newline_obj)) + { + PyErr_Format(PyExc_TypeError, + "reconfigure() argument 'newline' must be str or None, not %s", + Py_TYPE(newline_obj)->tp_name); + return NULL; + } /* Check if something is in the read buffer */ if (self->decoded_chars != NULL) { if (encoding != Py_None || errors != Py_None || newline_obj != NULL) { @@ -1345,9 +1375,12 @@ _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding, line_buffering = convert_optional_bool(line_buffering_obj, self->line_buffering); + if (line_buffering < 0) { + return NULL; + } write_through = convert_optional_bool(write_through_obj, self->write_through); - if (line_buffering < 0 || write_through < 0) { + if (write_through < 0) { return NULL; } From 52cafc893d5abda6baad20c67368a1ad14bff961 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 15 Nov 2023 14:55:46 +0100 Subject: [PATCH 2/2] [3.12] gh-111942: Fix SystemError in the TextIOWrapper constructor (GH-112061) (GH-112089) In non-debug more the check for the "errors" argument is skipped, and then PyUnicode_AsUTF8() can fail, but its result was not checked. Co-authored-by: Victor Stinner (cherry picked from commit 9302f05f9af07332c414b3c19003efd1b1763cf3) Co-authored-by: Serhiy Storchaka --- Lib/test/test_io.py | 4 +--- .../2023-11-14-18-43-55.gh-issue-111942.x1pnrj.rst | 2 ++ Modules/_io/textio.c | 13 +++++++++++-- 3 files changed, 14 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-11-14-18-43-55.gh-issue-111942.x1pnrj.rst diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 136dc6897cba45..78537057f32666 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -2629,9 +2629,7 @@ def test_constructor(self): if support.Py_DEBUG or sys.flags.dev_mode or self.is_C: with self.assertRaises(UnicodeEncodeError): t.__init__(b, encoding="utf-8", errors='\udcfe') - if support.Py_DEBUG or sys.flags.dev_mode: - # TODO: If encoded to UTF-8, should also be checked for - # embedded null characters. + if support.Py_DEBUG or sys.flags.dev_mode or self.is_C: with self.assertRaises(ValueError): t.__init__(b, encoding="utf-8", errors='replace\0') with self.assertRaises(TypeError): diff --git a/Misc/NEWS.d/next/Library/2023-11-14-18-43-55.gh-issue-111942.x1pnrj.rst b/Misc/NEWS.d/next/Library/2023-11-14-18-43-55.gh-issue-111942.x1pnrj.rst new file mode 100644 index 00000000000000..ca58a6fa5d6ae1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-11-14-18-43-55.gh-issue-111942.x1pnrj.rst @@ -0,0 +1,2 @@ +Fix SystemError in the TextIOWrapper constructor with non-encodable "errors" +argument in non-debug mode. diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 5096cfe988705a..b994dc3d7c45b1 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1099,6 +1099,15 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, else if (io_check_errors(errors)) { return -1; } + Py_ssize_t errors_len; + const char *errors_str = PyUnicode_AsUTF8AndSize(errors, &errors_len); + if (errors_str == NULL) { + return -1; + } + if (strlen(errors_str) != (size_t)errors_len) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + return -1; + } if (validate_newline(newline) < 0) { return -1; @@ -1171,11 +1180,11 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, Py_INCREF(buffer); /* Build the decoder object */ - if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0) + if (_textiowrapper_set_decoder(self, codec_info, errors_str) != 0) goto error; /* Build the encoder object */ - if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0) + if (_textiowrapper_set_encoder(self, codec_info, errors_str) != 0) goto error; /* Finished sorting out the codec details */