Skip to content

Commit 49da170

Browse files
[3.12] gh-116510: Fix a Crash Due to Shared Immortal Interned Strings (gh-125205)
Fix a crash caused by immortal interned strings being shared between sub-interpreters that use basic single-phase init. In that case, the string can be used by an interpreter that outlives the interpreter that created and interned it. For interpreters that share obmalloc state, also share the interned dict with the main interpreter. This is an un-revert of gh-124646 that then addresses the Py_TRACE_REFS failures identified by gh-124785 (i.e. backporting gh-125709 too). (cherry picked from commit f2cb399, AKA gh-124865) Co-authored-by: Eric Snow <ericsnowcurrently@gmail.com>
1 parent b49e902 commit 49da170

File tree

10 files changed

+124
-20
lines changed

10 files changed

+124
-20
lines changed

Doc/library/sys.rst

+29
Original file line numberDiff line numberDiff line change
@@ -907,6 +907,35 @@ always available.
907907
It is not guaranteed to exist in all implementations of Python.
908908

909909

910+
.. function:: getobjects(limit[, type])
911+
912+
This function only exists if CPython was built using the
913+
specialized configure option :option:`--with-trace-refs`.
914+
It is intended only for debugging garbage-collection issues.
915+
916+
Return a list of up to *limit* dynamically allocated Python objects.
917+
If *type* is given, only objects of that exact type (not subtypes)
918+
are included.
919+
920+
Objects from the list are not safe to use.
921+
Specifically, the result will include objects from all interpreters that
922+
share their object allocator state (that is, ones created with
923+
:c:member:`PyInterpreterConfig.use_main_obmalloc` set to 1
924+
or using :c:func:`Py_NewInterpreter`, and the
925+
:ref:`main interpreter <sub-interpreter-support>`).
926+
Mixing objects from different interpreters may lead to crashes
927+
or other unexpected behavior.
928+
929+
.. impl-detail::
930+
931+
This function should be used for specialized purposes only.
932+
It is not guaranteed to exist in all implementations of Python.
933+
934+
.. versionchanged:: next
935+
936+
The result may include objects from other interpreters.
937+
938+
910939
.. function:: getprofile()
911940

912941
.. index::

Doc/using/configure.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,7 @@ Debug options
459459
Effects:
460460

461461
* Define the ``Py_TRACE_REFS`` macro.
462-
* Add :func:`!sys.getobjects` function.
462+
* Add :func:`sys.getobjects` function.
463463
* Add :envvar:`PYTHONDUMPREFS` environment variable.
464464

465465
This build is not ABI compatible with release build (default build) or debug

Doc/whatsnew/3.12.rst

+11
Original file line numberDiff line numberDiff line change
@@ -2301,3 +2301,14 @@ email
23012301
check if the *strict* paramater is available.
23022302
(Contributed by Thomas Dwyer and Victor Stinner for :gh:`102988` to improve
23032303
the CVE-2023-27043 fix.)
2304+
2305+
2306+
Notable changes in 3.12.8
2307+
=========================
2308+
2309+
sys
2310+
---
2311+
2312+
* The previously undocumented special function :func:`sys.getobjects`,
2313+
which only exists in specialized builds of Python, may now return objects
2314+
from other interpreters than the one it's called in.

Include/internal/pycore_object_state.h

+7-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,13 @@ struct _py_object_state {
2424
* together via the _ob_prev and _ob_next members of a PyObject, which
2525
* exist only in a Py_TRACE_REFS build.
2626
*/
27-
PyObject refchain;
27+
PyObject *refchain;
28+
/* In most cases, refchain points to _refchain_obj.
29+
* In sub-interpreters that share objmalloc state with the main interp,
30+
* refchain points to the main interpreter's _refchain_obj, and their own
31+
* _refchain_obj is unused.
32+
*/
33+
PyObject _refchain_obj;
2834
#endif
2935
int _not_used;
3036
};

Include/internal/pycore_runtime_init.h

-7
Original file line numberDiff line numberDiff line change
@@ -132,15 +132,8 @@ extern PyTypeObject _PyExc_MemoryError;
132132
.context_ver = 1, \
133133
}
134134

135-
#ifdef Py_TRACE_REFS
136-
# define _py_object_state_INIT(INTERP) \
137-
{ \
138-
.refchain = {&INTERP.object_state.refchain, &INTERP.object_state.refchain}, \
139-
}
140-
#else
141135
# define _py_object_state_INIT(INTERP) \
142136
{ 0 }
143-
#endif
144137

145138

146139
// global objects
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fix a crash caused by immortal interned strings being shared between
2+
sub-interpreters that use basic single-phase init. In that case, the string
3+
can be used by an interpreter that outlives the interpreter that created and
4+
interned it. For interpreters that share obmalloc state, also share the
5+
interned dict with the main interpreter.

Objects/object.c

+18-4
Original file line numberDiff line numberDiff line change
@@ -159,11 +159,27 @@ _PyDebug_PrintTotalRefs(void) {
159159

160160
#ifdef Py_TRACE_REFS
161161

162-
#define REFCHAIN(interp) &interp->object_state.refchain
162+
#define REFCHAIN(interp) interp->object_state.refchain
163+
164+
static inline int
165+
has_own_refchain(PyInterpreterState *interp)
166+
{
167+
if (interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) {
168+
return (_Py_IsMainInterpreter(interp)
169+
|| _PyInterpreterState_Main() == NULL);
170+
}
171+
return 1;
172+
}
163173

164174
static inline void
165175
init_refchain(PyInterpreterState *interp)
166176
{
177+
if (!has_own_refchain(interp)) {
178+
// Legacy subinterpreters share a refchain with the main interpreter.
179+
REFCHAIN(interp) = REFCHAIN(_PyInterpreterState_Main());
180+
return;
181+
}
182+
REFCHAIN(interp) = &interp->object_state._refchain_obj;
167183
PyObject *refchain = REFCHAIN(interp);
168184
refchain->_ob_prev = refchain;
169185
refchain->_ob_next = refchain;
@@ -2010,9 +2026,7 @@ void
20102026
_PyObject_InitState(PyInterpreterState *interp)
20112027
{
20122028
#ifdef Py_TRACE_REFS
2013-
if (!_Py_IsMainInterpreter(interp)) {
2014-
init_refchain(interp);
2015-
}
2029+
init_refchain(interp);
20162030
#endif
20172031
}
20182032

Objects/unicodeobject.c

+42-6
Original file line numberDiff line numberDiff line change
@@ -287,13 +287,37 @@ hashtable_unicode_compare(const void *key1, const void *key2)
287287
}
288288
}
289289

290+
/* Return true if this interpreter should share the main interpreter's
291+
intern_dict. That's important for interpreters which load basic
292+
single-phase init extension modules (m_size == -1). There could be interned
293+
immortal strings that are shared between interpreters, due to the
294+
PyDict_Update(mdict, m_copy) call in import_find_extension().
295+
296+
It's not safe to deallocate those strings until all interpreters that
297+
potentially use them are freed. By storing them in the main interpreter, we
298+
ensure they get freed after all other interpreters are freed.
299+
*/
300+
static bool
301+
has_shared_intern_dict(PyInterpreterState *interp)
302+
{
303+
PyInterpreterState *main_interp = _PyInterpreterState_Main();
304+
return interp != main_interp && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC;
305+
}
306+
290307
static int
291308
init_interned_dict(PyInterpreterState *interp)
292309
{
293310
assert(get_interned_dict(interp) == NULL);
294-
PyObject *interned = interned = PyDict_New();
295-
if (interned == NULL) {
296-
return -1;
311+
PyObject *interned;
312+
if (has_shared_intern_dict(interp)) {
313+
interned = get_interned_dict(_PyInterpreterState_Main());
314+
Py_INCREF(interned);
315+
}
316+
else {
317+
interned = PyDict_New();
318+
if (interned == NULL) {
319+
return -1;
320+
}
297321
}
298322
_Py_INTERP_CACHED_OBJECT(interp, interned_strings) = interned;
299323
return 0;
@@ -304,7 +328,10 @@ clear_interned_dict(PyInterpreterState *interp)
304328
{
305329
PyObject *interned = get_interned_dict(interp);
306330
if (interned != NULL) {
307-
PyDict_Clear(interned);
331+
if (!has_shared_intern_dict(interp)) {
332+
// only clear if the dict belongs to this interpreter
333+
PyDict_Clear(interned);
334+
}
308335
Py_DECREF(interned);
309336
_Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL;
310337
}
@@ -15152,6 +15179,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
1515215179
}
1515315180
assert(PyDict_CheckExact(interned));
1515415181

15182+
if (has_shared_intern_dict(interp)) {
15183+
// the dict doesn't belong to this interpreter, skip the debug
15184+
// checks on it and just clear the pointer to it
15185+
clear_interned_dict(interp);
15186+
return;
15187+
}
15188+
1515515189
#ifdef INTERNED_STATS
1515615190
fprintf(stderr, "releasing %zd interned strings\n",
1515715191
PyDict_GET_SIZE(interned));
@@ -15670,8 +15704,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
1567015704
{
1567115705
struct _Py_unicode_state *state = &interp->unicode;
1567215706

15673-
// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
15674-
assert(get_interned_dict(interp) == NULL);
15707+
if (!has_shared_intern_dict(interp)) {
15708+
// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
15709+
assert(get_interned_dict(interp) == NULL);
15710+
}
1567515711

1567615712
_PyUnicode_FiniEncodings(&state->fs_codec);
1567715713

Python/pylifecycle.c

+8
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,10 @@ pycore_create_interpreter(_PyRuntimeState *runtime,
650650
return status;
651651
}
652652

653+
// This could be done in init_interpreter() (in pystate.c) if it
654+
// didn't depend on interp->feature_flags being set already.
655+
_PyObject_InitState(interp);
656+
653657
PyThreadState *tstate = _PyThreadState_New(interp);
654658
if (tstate == NULL) {
655659
return _PyStatus_ERR("can't make first thread");
@@ -2103,6 +2107,10 @@ new_interpreter(PyThreadState **tstate_p, const PyInterpreterConfig *config)
21032107
goto error;
21042108
}
21052109

2110+
// This could be done in init_interpreter() (in pystate.c) if it
2111+
// didn't depend on interp->feature_flags being set already.
2112+
_PyObject_InitState(interp);
2113+
21062114
status = init_interp_create_gil(tstate, config->gil);
21072115
if (_PyStatus_EXCEPTION(status)) {
21082116
goto error;

Python/pystate.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -686,7 +686,9 @@ init_interpreter(PyInterpreterState *interp,
686686
_obmalloc_pools_INIT(interp->obmalloc.pools);
687687
memcpy(&interp->obmalloc.pools.used, temp, sizeof(temp));
688688
}
689-
_PyObject_InitState(interp);
689+
690+
// We would call _PyObject_InitState() at this point
691+
// if interp->feature_flags were alredy set.
690692

691693
_PyEval_InitState(interp, pending_lock);
692694
_PyGC_InitState(&interp->gc);

0 commit comments

Comments
 (0)