From 2d7b8ce89b4284b829fddede7030e0fff5d83f4b Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Thu, 31 Mar 2022 18:14:50 +0300 Subject: [PATCH] [3.9] bpo-47182: Fix crash by named unicode characters after interpreter reinitialization (GH-32212) Automerge-Triggered-By: GH:tiran. (cherry picked from commit 44e915028d75f7cef141aa1aada962465a5907d6) Co-authored-by: Christian Heimes --- .../2022-03-31-15-37-02.bpo-47182.e_4SsC.rst | 2 + Objects/unicodeobject.c | 133 ++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-03-31-15-37-02.bpo-47182.e_4SsC.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-03-31-15-37-02.bpo-47182.e_4SsC.rst b/Misc/NEWS.d/next/Core and Builtins/2022-03-31-15-37-02.bpo-47182.e_4SsC.rst new file mode 100644 index 00000000000000..08036bc680933b --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-03-31-15-37-02.bpo-47182.e_4SsC.rst @@ -0,0 +1,2 @@ +Fix a crash when using a named unicode character like ``"\N{digit nine}"`` +after the main interpreter has been initialized a second time. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7767d140e6c395..fe9da0389b016c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -16316,7 +16316,140 @@ _PyUnicode_Fini(PyThreadState *tstate) unicode_clear_static_strings(); } +<<<<<<< HEAD _PyUnicode_FiniEncodings(&tstate->interp->unicode.fs_codec); +||||||| parent of 44e915028d7 (bpo-47182: Fix crash by named unicode characters after interpreter reinitialization (GH-32212)) + _PyStaticType_Dealloc(&EncodingMapType); + _PyStaticType_Dealloc(&PyFieldNameIter_Type); + _PyStaticType_Dealloc(&PyFormatterIter_Type); +} + + +static void unicode_static_dealloc(PyObject *op) +{ + PyASCIIObject *ascii = _PyASCIIObject_CAST(op); + + assert(ascii->state.compact); + + if (ascii->state.ascii) { + if (ascii->wstr) { + PyObject_Free(ascii->wstr); + ascii->wstr = NULL; + } + } + else { + PyCompactUnicodeObject* compact = (PyCompactUnicodeObject*)op; + void* data = (void*)(compact + 1); + if (ascii->wstr && ascii->wstr != data) { + PyObject_Free(ascii->wstr); + ascii->wstr = NULL; + compact->wstr_length = 0; + } + if (compact->utf8) { + PyObject_Free(compact->utf8); + compact->utf8 = NULL; + compact->utf8_length = 0; + } + } +} + + +void +_PyUnicode_Fini(PyInterpreterState *interp) +{ + struct _Py_unicode_state *state = &interp->unicode; + + if (_Py_IsMainInterpreter(interp)) { + // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() + assert(interned == NULL); + } + + _PyUnicode_FiniEncodings(&state->fs_codec); + + unicode_clear_identifiers(state); + + // Clear the single character singletons + for (int i = 0; i < 128; i++) { + unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).ascii[i]); + } + for (int i = 0; i < 128; i++) { + unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).latin1[i]); + } +} + + +void +_PyStaticUnicode_Dealloc(PyObject *op) +{ + unicode_static_dealloc(op); +======= + _PyStaticType_Dealloc(&EncodingMapType); + _PyStaticType_Dealloc(&PyFieldNameIter_Type); + _PyStaticType_Dealloc(&PyFormatterIter_Type); +} + + +static void unicode_static_dealloc(PyObject *op) +{ + PyASCIIObject *ascii = _PyASCIIObject_CAST(op); + + assert(ascii->state.compact); + + if (ascii->state.ascii) { + if (ascii->wstr) { + PyObject_Free(ascii->wstr); + ascii->wstr = NULL; + } + } + else { + PyCompactUnicodeObject* compact = (PyCompactUnicodeObject*)op; + void* data = (void*)(compact + 1); + if (ascii->wstr && ascii->wstr != data) { + PyObject_Free(ascii->wstr); + ascii->wstr = NULL; + compact->wstr_length = 0; + } + if (compact->utf8) { + PyObject_Free(compact->utf8); + compact->utf8 = NULL; + compact->utf8_length = 0; + } + } +} + + +void +_PyUnicode_Fini(PyInterpreterState *interp) +{ + struct _Py_unicode_state *state = &interp->unicode; + + if (_Py_IsMainInterpreter(interp)) { + // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() + assert(interned == NULL); + // bpo-47182: force a unicodedata CAPI capsule re-import on + // subsequent initialization of main interpreter. + ucnhash_capi = NULL; + } + + _PyUnicode_FiniEncodings(&state->fs_codec); + + unicode_clear_identifiers(state); + + // Clear the single character singletons + for (int i = 0; i < 128; i++) { + unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).ascii[i]); + } + for (int i = 0; i < 128; i++) { + unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).latin1[i]); + } +} + + +void +_PyStaticUnicode_Dealloc(PyObject *op) +{ + unicode_static_dealloc(op); +>>>>>>> 44e915028d7 (bpo-47182: Fix crash by named unicode characters after interpreter reinitialization (GH-32212)) }