From 40c78e58eef0553d43b12658b3af8aa72681e15d Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 30 Sep 2024 11:42:56 -0700 Subject: [PATCH] gh-124785: alternative immortal string clear fix The GH-124646 fix has the issue that it breaks the trace-refs debug build. This is a simpler fix that avoids the use-after-free crashes by just leaking immortal interned strings allocated by legacy subinterpreters. --- Include/internal/pycore_interp.h | 4 ++ ...-09-30-11-49-58.gh-issue-124785.sSKaJF.rst | 2 + Objects/unicodeobject.c | 41 +++++++++++++++++++ Python/import.c | 5 +++ 4 files changed, 52 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-09-30-11-49-58.gh-issue-124785.sSKaJF.rst diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 45d85b2baceef2..1ecf86629a0629 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -123,6 +123,10 @@ struct _is { /* Has been fully initialized via pylifecycle.c. */ int _ready; int finalizing; + /* Set if a subinterpreter imports basic single-phase extensions and + * therefore needs to leak interned strings (it's too tricky to safely + * free them). */ + int leak_interned_strings; uintptr_t last_restart_version; struct pythreads { diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-30-11-49-58.gh-issue-124785.sSKaJF.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-30-11-49-58.gh-issue-124785.sSKaJF.rst new file mode 100644 index 00000000000000..21d0d45234a2dc --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-30-11-49-58.gh-issue-124785.sSKaJF.rst @@ -0,0 +1,2 @@ +Use simpler fix for interned string cleanup. This avoids the potential +use-after-free crashes but doesn't break the trace-refs debug build. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5a6ae78fe23bae..3203f3efe80bf3 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -15296,6 +15296,42 @@ PyUnicode_InternFromString(const char *cp) return s; } +void +clear_interned_dict_legacy(PyInterpreterState *interp) +{ + // See GH-116510 for why this extra care is needed. Immortal interned + // strings can be shared between subinterpreters in this case and we + // can't safely free them without a potential use-after-free crash. + PyObject *interned = get_interned_dict(interp); + Py_ssize_t pos = 0; + PyObject *s, *ignored_value; + while (PyDict_Next(interned, &pos, &s, &ignored_value)) { + assert(PyUnicode_IS_READY(s)); + switch (PyUnicode_CHECK_INTERNED(s)) { + case SSTATE_INTERNED_IMMORTAL: + case SSTATE_INTERNED_IMMORTAL_STATIC: + // immortal strings leak since we can't safely free them + break; + case SSTATE_INTERNED_MORTAL: + // Restore 2 references held by the interned dict; these will + // be decref'd by clear_interned_dict's PyDict_Clear. + Py_SET_REFCNT(s, Py_REFCNT(s) + 2); +#ifdef Py_REF_DEBUG + /* let's be pedantic with the ref total */ + _Py_IncRefTotal(_PyThreadState_GET()); + _Py_IncRefTotal(_PyThreadState_GET()); +#endif + break; + case SSTATE_NOT_INTERNED: + /* fall through */ + default: + Py_UNREACHABLE(); + } + _PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED; + } + PyDict_Clear(interned); + _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL; +} void _PyUnicode_ClearInterned(PyInterpreterState *interp) @@ -15306,6 +15342,11 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) } assert(PyDict_CheckExact(interned)); + if (interp->leak_interned_strings) { + clear_interned_dict_legacy(interp); + return; + } + #ifdef INTERNED_STATS fprintf(stderr, "releasing %zd interned strings\n", PyDict_GET_SIZE(interned)); diff --git a/Python/import.c b/Python/import.c index 2ec596828e3e6f..7b819c314cacff 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1897,6 +1897,11 @@ import_find_extension(PyThreadState *tstate, return NULL; } + /* Interned strings used by this module can be shared between + * subinterpreters, due to the PyDict_Update() call in basic single-phase + * module import case. */ + tstate->interp->leak_interned_strings = 1; + PyObject *mod = reload_singlephase_extension(tstate, cached, info); if (mod == NULL) { return NULL;