From 3aa5e2acf8fe1ee225d2abbbac499f176311652a Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 25 Sep 2024 12:39:08 -0700 Subject: [PATCH 1/2] gh-116510: Fix crash due to shared immortal interned strings. Fix a crash caused by immortal interned strings being shared between sub-interpreters that use basic single-phase init. In that case, the string can be used by an interpreter that outlives the interpeter that created and interned it. For interpreters that share obmalloc state, also share the interned dict with the main interpreter. --- ...-09-25-12-37-58.gh-issue-116510.WeBAx1.rst | 5 +++ Objects/unicodeobject.c | 42 ++++++++++++++++--- 2 files changed, 42 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-09-25-12-37-58.gh-issue-116510.WeBAx1.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-25-12-37-58.gh-issue-116510.WeBAx1.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-25-12-37-58.gh-issue-116510.WeBAx1.rst new file mode 100644 index 00000000000000..e1e0408f548498 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-25-12-37-58.gh-issue-116510.WeBAx1.rst @@ -0,0 +1,5 @@ +Fix a crash caused by immortal interned strings being shared between +sub-interpreters that use basic single-phase init. In that case, the string +can be used by an interpreter that outlives the interpeter that created and +interned it. For interpreters that share obmalloc state, also share the +interned dict with the main interpreter. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index fc9379be02f4dc..2b58a1538296be 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -266,6 +266,23 @@ hashtable_unicode_compare(const void *key1, const void *key2) } } +/* Return true if this interpreter should share the main interpreter's + intern_dict. That's important for interpreters which load basic + single-phase init extension modules (m_size == -1). There could be interned + immortal strings that are shared between interpreters, due to the + PyDict_Update(mdict, m_copy) call in import_find_extension(). + + It's not safe to deallocate those strings until all interpreters that + potentially use them are freed. By storing them in the main interpreter, we + ensure they get freed after all other interpreters are freed. +*/ +static bool +has_shared_intern_dict(PyInterpreterState *interp) +{ + PyInterpreterState *main_interp = _PyInterpreterState_Main(); + return interp != main_interp && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC; +} + static int init_interned_dict(PyInterpreterState *interp) { @@ -284,9 +301,16 @@ init_interned_dict(PyInterpreterState *interp) } } assert(get_interned_dict(interp) == NULL); - PyObject *interned = interned = PyDict_New(); - if (interned == NULL) { - return -1; + PyObject *interned; + if (has_shared_intern_dict(interp)) { + interned = get_interned_dict(_PyInterpreterState_Main()); + Py_INCREF(interned); + } + else { + interned = PyDict_New(); + if (interned == NULL) { + return -1; + } } _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = interned; return 0; @@ -295,6 +319,9 @@ init_interned_dict(PyInterpreterState *interp) static void clear_interned_dict(PyInterpreterState *interp) { + if (has_shared_intern_dict(interp)) { + return; // the dict doesn't belong to this interpreter + } PyObject *interned = get_interned_dict(interp); if (interned != NULL) { PyDict_Clear(interned); @@ -14855,6 +14882,9 @@ PyUnicode_InternFromString(const char *cp) void _PyUnicode_ClearInterned(PyInterpreterState *interp) { + if (has_shared_intern_dict(interp)) { + return; // the dict doesn't belong to this interpreter + } PyObject *interned = get_interned_dict(interp); if (interned == NULL) { return; @@ -15364,8 +15394,10 @@ _PyUnicode_Fini(PyInterpreterState *interp) { struct _Py_unicode_state *state = &interp->unicode; - // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() - assert(get_interned_dict(interp) == NULL); + if (!has_shared_intern_dict(interp)) { + // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() + assert(get_interned_dict(interp) == NULL); + } _PyUnicode_FiniEncodings(&state->fs_codec); From 82cfa9675a7632a8b940fd9de8d18d904745cfa5 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 26 Sep 2024 17:09:59 -0700 Subject: [PATCH 2/2] Small code cleanup as suggested by review. --- Objects/unicodeobject.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2b58a1538296be..ac52b560bc8c6e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -319,12 +319,12 @@ init_interned_dict(PyInterpreterState *interp) static void clear_interned_dict(PyInterpreterState *interp) { - if (has_shared_intern_dict(interp)) { - return; // the dict doesn't belong to this interpreter - } PyObject *interned = get_interned_dict(interp); if (interned != NULL) { - PyDict_Clear(interned); + if (!has_shared_intern_dict(interp)) { + // only clear if the dict belongs to this interpreter + PyDict_Clear(interned); + } Py_DECREF(interned); _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL; } @@ -14882,15 +14882,19 @@ PyUnicode_InternFromString(const char *cp) void _PyUnicode_ClearInterned(PyInterpreterState *interp) { - if (has_shared_intern_dict(interp)) { - return; // the dict doesn't belong to this interpreter - } PyObject *interned = get_interned_dict(interp); if (interned == NULL) { return; } assert(PyDict_CheckExact(interned)); + if (has_shared_intern_dict(interp)) { + // the dict doesn't belong to this interpreter, skip the debug + // checks on it and just clear the pointer to it + clear_interned_dict(interp); + return; + } + /* TODO: * Currently, the runtime is not able to guarantee that it can exit without * allocations that carry over to a future initialization of Python within