Skip to content

Commit

Permalink
pythongh-116510: Fix a Crash Due to Shared Immortal Interned Strings (p…
Browse files Browse the repository at this point in the history
…ythongh-124865)

Fix a crash caused by immortal interned strings being shared between
sub-interpreters that use basic single-phase init. In that case, the string
can be used by an interpreter that outlives the interpreter that created and
interned it. For interpreters that share obmalloc state, also share the
interned dict with the main interpreter.

This is an un-revert of pythongh-124646 that then addresses the Py_TRACE_REFS
failures identified by pythongh-124785.
  • Loading branch information
ericsnowcurrently authored Oct 9, 2024
1 parent d501153 commit f2cb399
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 6 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Fix a crash caused by immortal interned strings being shared between
sub-interpreters that use basic single-phase init. In that case, the string
can be used by an interpreter that outlives the interpreter that created and
interned it. For interpreters that share obmalloc state, also share the
interned dict with the main interpreter.
36 changes: 36 additions & 0 deletions Objects/object.c
Original file line number Diff line number Diff line change
Expand Up @@ -2491,6 +2491,42 @@ _Py_ResurrectReference(PyObject *op)


#ifdef Py_TRACE_REFS
/* Make sure the ref is associated with the right interpreter.
* This only needs special attention for heap-allocated objects
* that have been immortalized, and only when the object might
* outlive the interpreter where it was created. That means the
* object was necessarily created using a global allocator
* (i.e. from the main interpreter). Thus in that specific case
* we move the object over to the main interpreter's refchain.
*
* This was added for the sake of the immortal interned strings,
* where legacy subinterpreters share the main interpreter's
* interned dict (and allocator), and therefore the strings can
* outlive the subinterpreter.
*
* It may make sense to fold this into _Py_SetImmortalUntracked(),
* but that requires further investigation. In the meantime, it is
* up to the caller to know if this is needed. There should be
* very few cases.
*/
void
_Py_NormalizeImmortalReference(PyObject *op)
{
assert(_Py_IsImmortal(op));
PyInterpreterState *interp = _PyInterpreterState_GET();
if (!_PyRefchain_IsTraced(interp, op)) {
return;
}
PyInterpreterState *main_interp = _PyInterpreterState_Main();
if (interp != main_interp
&& interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC)
{
assert(!_PyRefchain_IsTraced(main_interp, op));
_PyRefchain_Remove(interp, op);
_PyRefchain_Trace(main_interp, op);
}
}

void
_Py_ForgetReference(PyObject *op)
{
Expand Down
56 changes: 50 additions & 6 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -281,13 +281,37 @@ hashtable_unicode_compare(const void *key1, const void *key2)
}
}

/* Return true if this interpreter should share the main interpreter's
intern_dict. That's important for interpreters which load basic
single-phase init extension modules (m_size == -1). There could be interned
immortal strings that are shared between interpreters, due to the
PyDict_Update(mdict, m_copy) call in import_find_extension().
It's not safe to deallocate those strings until all interpreters that
potentially use them are freed. By storing them in the main interpreter, we
ensure they get freed after all other interpreters are freed.
*/
static bool
has_shared_intern_dict(PyInterpreterState *interp)
{
PyInterpreterState *main_interp = _PyInterpreterState_Main();
return interp != main_interp && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC;
}

static int
init_interned_dict(PyInterpreterState *interp)
{
assert(get_interned_dict(interp) == NULL);
PyObject *interned = interned = PyDict_New();
if (interned == NULL) {
return -1;
PyObject *interned;
if (has_shared_intern_dict(interp)) {
interned = get_interned_dict(_PyInterpreterState_Main());
Py_INCREF(interned);
}
else {
interned = PyDict_New();
if (interned == NULL) {
return -1;
}
}
_Py_INTERP_CACHED_OBJECT(interp, interned_strings) = interned;
return 0;
Expand All @@ -298,7 +322,10 @@ clear_interned_dict(PyInterpreterState *interp)
{
PyObject *interned = get_interned_dict(interp);
if (interned != NULL) {
PyDict_Clear(interned);
if (!has_shared_intern_dict(interp)) {
// only clear if the dict belongs to this interpreter
PyDict_Clear(interned);
}
Py_DECREF(interned);
_Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL;
}
Expand Down Expand Up @@ -15401,6 +15428,10 @@ _PyUnicode_InternStatic(PyInterpreterState *interp, PyObject **p)
assert(*p);
}

#ifdef Py_TRACE_REFS
extern void _Py_NormalizeImmortalReference(PyObject *);
#endif

static void
immortalize_interned(PyObject *s)
{
Expand All @@ -15416,6 +15447,10 @@ immortalize_interned(PyObject *s)
#endif
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL;
_Py_SetImmortal(s);
#ifdef Py_TRACE_REFS
/* Make sure the ref is associated with the right interpreter. */
_Py_NormalizeImmortalReference(s);
#endif
}

static /* non-null */ PyObject*
Expand Down Expand Up @@ -15609,6 +15644,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
}
assert(PyDict_CheckExact(interned));

if (has_shared_intern_dict(interp)) {
// the dict doesn't belong to this interpreter, skip the debug
// checks on it and just clear the pointer to it
clear_interned_dict(interp);
return;
}

#ifdef INTERNED_STATS
fprintf(stderr, "releasing %zd interned strings\n",
PyDict_GET_SIZE(interned));
Expand Down Expand Up @@ -16117,8 +16159,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
{
struct _Py_unicode_state *state = &interp->unicode;

// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
assert(get_interned_dict(interp) == NULL);
if (!has_shared_intern_dict(interp)) {
// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
assert(get_interned_dict(interp) == NULL);
}

_PyUnicode_FiniEncodings(&state->fs_codec);

Expand Down

0 comments on commit f2cb399

Please sign in to comment.