From 082aae0e198d6a06236778d72c17440ede48004b Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 30 Sep 2024 10:47:28 -0700 Subject: [PATCH 1/2] Revert "[3.13] gh-116510: Fix crash due to shared immortal interned strings. (gh-124646)" This reverts commit dc09a0c67fd888ba86673eeecc345d070360a146. --- ...-09-26-18-21-06.gh-issue-116510.FacUWO.rst | 5 -- Objects/unicodeobject.c | 48 +++---------------- 2 files changed, 6 insertions(+), 47 deletions(-) delete mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst deleted file mode 100644 index e3741321006548..00000000000000 --- a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst +++ /dev/null @@ -1,5 +0,0 @@ -Fix a crash caused by immortal interned strings being shared between -sub-interpreters that use basic single-phase init. In that case, the string -can be used by an interpreter that outlives the interpreter that created and -interned it. For interpreters that share obmalloc state, also share the -interned dict with the main interpreter. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index c69a64de062baa..5a6ae78fe23bae 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -277,37 +277,13 @@ hashtable_unicode_compare(const void *key1, const void *key2) } } -/* Return true if this interpreter should share the main interpreter's - intern_dict. That's important for interpreters which load basic - single-phase init extension modules (m_size == -1). There could be interned - immortal strings that are shared between interpreters, due to the - PyDict_Update(mdict, m_copy) call in import_find_extension(). - - It's not safe to deallocate those strings until all interpreters that - potentially use them are freed. By storing them in the main interpreter, we - ensure they get freed after all other interpreters are freed. -*/ -static bool -has_shared_intern_dict(PyInterpreterState *interp) -{ - PyInterpreterState *main_interp = _PyInterpreterState_Main(); - return interp != main_interp && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC; -} - static int init_interned_dict(PyInterpreterState *interp) { assert(get_interned_dict(interp) == NULL); - PyObject *interned; - if (has_shared_intern_dict(interp)) { - interned = get_interned_dict(_PyInterpreterState_Main()); - Py_INCREF(interned); - } - else { - interned = PyDict_New(); - if (interned == NULL) { - return -1; - } + PyObject *interned = interned = PyDict_New(); + if (interned == NULL) { + return -1; } _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = interned; return 0; @@ -318,10 +294,7 @@ clear_interned_dict(PyInterpreterState *interp) { PyObject *interned = get_interned_dict(interp); if (interned != NULL) { - if (!has_shared_intern_dict(interp)) { - // only clear if the dict belongs to this interpreter - PyDict_Clear(interned); - } + PyDict_Clear(interned); Py_DECREF(interned); _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL; } @@ -15333,13 +15306,6 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) } assert(PyDict_CheckExact(interned)); - if (has_shared_intern_dict(interp)) { - // the dict doesn't belong to this interpreter, skip the debug - // checks on it and just clear the pointer to it - clear_interned_dict(interp); - return; - } - #ifdef INTERNED_STATS fprintf(stderr, "releasing %zd interned strings\n", PyDict_GET_SIZE(interned)); @@ -15861,10 +15827,8 @@ _PyUnicode_Fini(PyInterpreterState *interp) { struct _Py_unicode_state *state = &interp->unicode; - if (!has_shared_intern_dict(interp)) { - // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() - assert(get_interned_dict(interp) == NULL); - } + // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() + assert(get_interned_dict(interp) == NULL); _PyUnicode_FiniEncodings(&state->fs_codec); From 40c78e58eef0553d43b12658b3af8aa72681e15d Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 30 Sep 2024 11:42:56 -0700 Subject: [PATCH 2/2] gh-124785: alternative immortal string clear fix The GH-124646 fix has the issue that it breaks the trace-refs debug build. This is a simpler fix that avoids the use-after-free crashes by just leaking immortal interned strings allocated by legacy subinterpreters. --- Include/internal/pycore_interp.h | 4 ++ ...-09-30-11-49-58.gh-issue-124785.sSKaJF.rst | 2 + Objects/unicodeobject.c | 41 +++++++++++++++++++ Python/import.c | 5 +++ 4 files changed, 52 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-09-30-11-49-58.gh-issue-124785.sSKaJF.rst diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 45d85b2baceef2..1ecf86629a0629 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -123,6 +123,10 @@ struct _is { /* Has been fully initialized via pylifecycle.c. */ int _ready; int finalizing; + /* Set if a subinterpreter imports basic single-phase extensions and + * therefore needs to leak interned strings (it's too tricky to safely + * free them). */ + int leak_interned_strings; uintptr_t last_restart_version; struct pythreads { diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-30-11-49-58.gh-issue-124785.sSKaJF.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-30-11-49-58.gh-issue-124785.sSKaJF.rst new file mode 100644 index 00000000000000..21d0d45234a2dc --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-30-11-49-58.gh-issue-124785.sSKaJF.rst @@ -0,0 +1,2 @@ +Use simpler fix for interned string cleanup. This avoids the potential +use-after-free crashes but doesn't break the trace-refs debug build. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5a6ae78fe23bae..3203f3efe80bf3 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -15296,6 +15296,42 @@ PyUnicode_InternFromString(const char *cp) return s; } +void +clear_interned_dict_legacy(PyInterpreterState *interp) +{ + // See GH-116510 for why this extra care is needed. Immortal interned + // strings can be shared between subinterpreters in this case and we + // can't safely free them without a potential use-after-free crash. + PyObject *interned = get_interned_dict(interp); + Py_ssize_t pos = 0; + PyObject *s, *ignored_value; + while (PyDict_Next(interned, &pos, &s, &ignored_value)) { + assert(PyUnicode_IS_READY(s)); + switch (PyUnicode_CHECK_INTERNED(s)) { + case SSTATE_INTERNED_IMMORTAL: + case SSTATE_INTERNED_IMMORTAL_STATIC: + // immortal strings leak since we can't safely free them + break; + case SSTATE_INTERNED_MORTAL: + // Restore 2 references held by the interned dict; these will + // be decref'd by clear_interned_dict's PyDict_Clear. + Py_SET_REFCNT(s, Py_REFCNT(s) + 2); +#ifdef Py_REF_DEBUG + /* let's be pedantic with the ref total */ + _Py_IncRefTotal(_PyThreadState_GET()); + _Py_IncRefTotal(_PyThreadState_GET()); +#endif + break; + case SSTATE_NOT_INTERNED: + /* fall through */ + default: + Py_UNREACHABLE(); + } + _PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED; + } + PyDict_Clear(interned); + _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL; +} void _PyUnicode_ClearInterned(PyInterpreterState *interp) @@ -15306,6 +15342,11 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) } assert(PyDict_CheckExact(interned)); + if (interp->leak_interned_strings) { + clear_interned_dict_legacy(interp); + return; + } + #ifdef INTERNED_STATS fprintf(stderr, "releasing %zd interned strings\n", PyDict_GET_SIZE(interned)); diff --git a/Python/import.c b/Python/import.c index 2ec596828e3e6f..7b819c314cacff 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1897,6 +1897,11 @@ import_find_extension(PyThreadState *tstate, return NULL; } + /* Interned strings used by this module can be shared between + * subinterpreters, due to the PyDict_Update() call in basic single-phase + * module import case. */ + tstate->interp->leak_interned_strings = 1; + PyObject *mod = reload_singlephase_extension(tstate, cached, info); if (mod == NULL) { return NULL;