Skip to content

Commit f2cb399

Browse files
gh-116510: Fix a Crash Due to Shared Immortal Interned Strings (gh-124865)
Fix a crash caused by immortal interned strings being shared between sub-interpreters that use basic single-phase init. In that case, the string can be used by an interpreter that outlives the interpreter that created and interned it. For interpreters that share obmalloc state, also share the interned dict with the main interpreter. This is an un-revert of gh-124646 that then addresses the Py_TRACE_REFS failures identified by gh-124785.
1 parent d501153 commit f2cb399

File tree

3 files changed

+91
-6
lines changed

3 files changed

+91
-6
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fix a crash caused by immortal interned strings being shared between
2+
sub-interpreters that use basic single-phase init. In that case, the string
3+
can be used by an interpreter that outlives the interpreter that created and
4+
interned it. For interpreters that share obmalloc state, also share the
5+
interned dict with the main interpreter.

Objects/object.c

+36
Original file line numberDiff line numberDiff line change
@@ -2491,6 +2491,42 @@ _Py_ResurrectReference(PyObject *op)
24912491

24922492

24932493
#ifdef Py_TRACE_REFS
2494+
/* Make sure the ref is associated with the right interpreter.
2495+
* This only needs special attention for heap-allocated objects
2496+
* that have been immortalized, and only when the object might
2497+
* outlive the interpreter where it was created. That means the
2498+
* object was necessarily created using a global allocator
2499+
* (i.e. from the main interpreter). Thus in that specific case
2500+
* we move the object over to the main interpreter's refchain.
2501+
*
2502+
* This was added for the sake of the immortal interned strings,
2503+
* where legacy subinterpreters share the main interpreter's
2504+
* interned dict (and allocator), and therefore the strings can
2505+
* outlive the subinterpreter.
2506+
*
2507+
* It may make sense to fold this into _Py_SetImmortalUntracked(),
2508+
* but that requires further investigation. In the meantime, it is
2509+
* up to the caller to know if this is needed. There should be
2510+
* very few cases.
2511+
*/
2512+
void
2513+
_Py_NormalizeImmortalReference(PyObject *op)
2514+
{
2515+
assert(_Py_IsImmortal(op));
2516+
PyInterpreterState *interp = _PyInterpreterState_GET();
2517+
if (!_PyRefchain_IsTraced(interp, op)) {
2518+
return;
2519+
}
2520+
PyInterpreterState *main_interp = _PyInterpreterState_Main();
2521+
if (interp != main_interp
2522+
&& interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC)
2523+
{
2524+
assert(!_PyRefchain_IsTraced(main_interp, op));
2525+
_PyRefchain_Remove(interp, op);
2526+
_PyRefchain_Trace(main_interp, op);
2527+
}
2528+
}
2529+
24942530
void
24952531
_Py_ForgetReference(PyObject *op)
24962532
{

Objects/unicodeobject.c

+50-6
Original file line numberDiff line numberDiff line change
@@ -281,13 +281,37 @@ hashtable_unicode_compare(const void *key1, const void *key2)
281281
}
282282
}
283283

284+
/* Return true if this interpreter should share the main interpreter's
285+
intern_dict. That's important for interpreters which load basic
286+
single-phase init extension modules (m_size == -1). There could be interned
287+
immortal strings that are shared between interpreters, due to the
288+
PyDict_Update(mdict, m_copy) call in import_find_extension().
289+
290+
It's not safe to deallocate those strings until all interpreters that
291+
potentially use them are freed. By storing them in the main interpreter, we
292+
ensure they get freed after all other interpreters are freed.
293+
*/
294+
static bool
295+
has_shared_intern_dict(PyInterpreterState *interp)
296+
{
297+
PyInterpreterState *main_interp = _PyInterpreterState_Main();
298+
return interp != main_interp && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC;
299+
}
300+
284301
static int
285302
init_interned_dict(PyInterpreterState *interp)
286303
{
287304
assert(get_interned_dict(interp) == NULL);
288-
PyObject *interned = interned = PyDict_New();
289-
if (interned == NULL) {
290-
return -1;
305+
PyObject *interned;
306+
if (has_shared_intern_dict(interp)) {
307+
interned = get_interned_dict(_PyInterpreterState_Main());
308+
Py_INCREF(interned);
309+
}
310+
else {
311+
interned = PyDict_New();
312+
if (interned == NULL) {
313+
return -1;
314+
}
291315
}
292316
_Py_INTERP_CACHED_OBJECT(interp, interned_strings) = interned;
293317
return 0;
@@ -298,7 +322,10 @@ clear_interned_dict(PyInterpreterState *interp)
298322
{
299323
PyObject *interned = get_interned_dict(interp);
300324
if (interned != NULL) {
301-
PyDict_Clear(interned);
325+
if (!has_shared_intern_dict(interp)) {
326+
// only clear if the dict belongs to this interpreter
327+
PyDict_Clear(interned);
328+
}
302329
Py_DECREF(interned);
303330
_Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL;
304331
}
@@ -15401,6 +15428,10 @@ _PyUnicode_InternStatic(PyInterpreterState *interp, PyObject **p)
1540115428
assert(*p);
1540215429
}
1540315430

15431+
#ifdef Py_TRACE_REFS
15432+
extern void _Py_NormalizeImmortalReference(PyObject *);
15433+
#endif
15434+
1540415435
static void
1540515436
immortalize_interned(PyObject *s)
1540615437
{
@@ -15416,6 +15447,10 @@ immortalize_interned(PyObject *s)
1541615447
#endif
1541715448
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL;
1541815449
_Py_SetImmortal(s);
15450+
#ifdef Py_TRACE_REFS
15451+
/* Make sure the ref is associated with the right interpreter. */
15452+
_Py_NormalizeImmortalReference(s);
15453+
#endif
1541915454
}
1542015455

1542115456
static /* non-null */ PyObject*
@@ -15609,6 +15644,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
1560915644
}
1561015645
assert(PyDict_CheckExact(interned));
1561115646

15647+
if (has_shared_intern_dict(interp)) {
15648+
// the dict doesn't belong to this interpreter, skip the debug
15649+
// checks on it and just clear the pointer to it
15650+
clear_interned_dict(interp);
15651+
return;
15652+
}
15653+
1561215654
#ifdef INTERNED_STATS
1561315655
fprintf(stderr, "releasing %zd interned strings\n",
1561415656
PyDict_GET_SIZE(interned));
@@ -16117,8 +16159,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
1611716159
{
1611816160
struct _Py_unicode_state *state = &interp->unicode;
1611916161

16120-
// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
16121-
assert(get_interned_dict(interp) == NULL);
16162+
if (!has_shared_intern_dict(interp)) {
16163+
// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
16164+
assert(get_interned_dict(interp) == NULL);
16165+
}
1612216166

1612316167
_PyUnicode_FiniEncodings(&state->fs_codec);
1612416168

0 commit comments

Comments
 (0)