Skip to content

Commit

Permalink
pythongh-124785: alternative immortal string cleanup fix
Browse files Browse the repository at this point in the history
The pythongh-124646 fix has the issue that it breaks the trace-refs debug
build.  This is a simpler fix that avoids the use-after-free crashes by
just leaking immortal interned strings allocated by legacy
subinterpreters.
  • Loading branch information
nascheme committed Sep 30, 2024
1 parent 082aae0 commit c609173
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 0 deletions.
4 changes: 4 additions & 0 deletions Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ struct _is {
/* Has been fully initialized via pylifecycle.c. */
int _ready;
int finalizing;
/* Set if a subinterpreter imports basic single-phase extensions and
* therefore needs to leak interned strings (it's too tricky to safely
* free them). */
int leak_interned_strings;

uintptr_t last_restart_version;
struct pythreads {
Expand Down
41 changes: 41 additions & 0 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -15296,6 +15296,42 @@ PyUnicode_InternFromString(const char *cp)
return s;
}

void
clear_interned_dict_legacy(PyInterpreterState *interp)
{
// See GH-116510 for why this extra care is needed. Immortal interned
// strings can be shared between subinterpreters in this case and we
// can't safely free them without a potential use-after-free crash.
PyObject *interned = get_interned_dict(interp);
Py_ssize_t pos = 0;
PyObject *s, *ignored_value;
while (PyDict_Next(interned, &pos, &s, &ignored_value)) {
assert(PyUnicode_IS_READY(s));
switch (PyUnicode_CHECK_INTERNED(s)) {
case SSTATE_INTERNED_IMMORTAL:
case SSTATE_INTERNED_IMMORTAL_STATIC:
// immortal strings leak since we can't safely free them
break;
case SSTATE_INTERNED_MORTAL:
// Restore 2 references held by the interned dict; these will
// be decref'd by clear_interned_dict's PyDict_Clear.
Py_SET_REFCNT(s, Py_REFCNT(s) + 2);
#ifdef Py_REF_DEBUG
/* let's be pedantic with the ref total */
_Py_IncRefTotal(_PyThreadState_GET());
_Py_IncRefTotal(_PyThreadState_GET());
#endif
break;
case SSTATE_NOT_INTERNED:
/* fall through */
default:
Py_UNREACHABLE();
}
_PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED;
}
PyDict_Clear(interned);
_Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL;
}

void
_PyUnicode_ClearInterned(PyInterpreterState *interp)
Expand All @@ -15306,6 +15342,11 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
}
assert(PyDict_CheckExact(interned));

if (interp->leak_interned_strings) {
clear_interned_dict_legacy(interp);
return;
}

#ifdef INTERNED_STATS
fprintf(stderr, "releasing %zd interned strings\n",
PyDict_GET_SIZE(interned));
Expand Down
5 changes: 5 additions & 0 deletions Python/import.c
Original file line number Diff line number Diff line change
Expand Up @@ -1897,6 +1897,11 @@ import_find_extension(PyThreadState *tstate,
return NULL;
}

/* Interned strings used by this module can be shared between
* subinterpreters, due to the PyDict_Update() call in basic single-phase
* module import case. */
tstate->interp->leak_interned_strings = 1;

PyObject *mod = reload_singlephase_extension(tstate, cached, info);
if (mod == NULL) {
return NULL;
Expand Down

0 comments on commit c609173

Please sign in to comment.