diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 28398896467898..2ee4e64d635303 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -720,6 +720,13 @@ def test_isidentifier(self): self.assertFalse("©".isidentifier()) self.assertFalse("0".isidentifier()) + @support.cpython_only + def test_isidentifier_legacy(self): + import _testcapi + u = '𝖀𝖓𝖎𝖈𝖔𝖉𝖊' + self.assertTrue(u.isidentifier()) + self.assertTrue(_testcapi.unicode_legacy_string(u).isidentifier()) + def test_isprintable(self): self.assertTrue("".isprintable()) self.assertTrue(" ".isprintable()) diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-05-11-20-53-52.bpo-40596.dwOH_X.rst b/Misc/NEWS.d/next/Core and Builtins/2020-05-11-20-53-52.bpo-40596.dwOH_X.rst new file mode 100644 index 00000000000000..1252db4dc9848d --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-05-11-20-53-52.bpo-40596.dwOH_X.rst @@ -0,0 +1,2 @@ +Fixed :meth:`str.isidentifier` for non-canonicalized strings containing +non-BMP characters on Windows. diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 525f5ac5d5775a..5a9f5c56c5f6b8 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -2319,6 +2319,11 @@ valid_identifier(PyObject *s) Py_TYPE(s)->tp_name); return 0; } + /* Since there is no way to return an error from PyUnicode_IsIdentifier() + we have to call explicitly PyUnicode_READY(). */ + if (PyUnicode_READY(s) < 0) { + return 0; + } if (!PyUnicode_IsIdentifier(s)) { PyErr_SetString(PyExc_TypeError, "__slots__ must be identifiers"); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 18b9458721de18..ea85e4cca03f2f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12373,6 +12373,9 @@ static PyObject * unicode_isidentifier_impl(PyObject *self) /*[clinic end generated code: output=fe585a9666572905 input=2d807a104f21c0c5]*/ { + if (PyUnicode_READY(self) < 0) { + return NULL; + } return PyBool_FromLong(PyUnicode_IsIdentifier(self)); }