diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index bf2ea94f23f84d..221f9bd6942207 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -907,6 +907,35 @@ always available. It is not guaranteed to exist in all implementations of Python. +.. function:: getobjects(limit[, type]) + + This function only exists if CPython was built using the + specialized configure option :option:`--with-trace-refs`. + It is intended only for debugging garbage-collection issues. + + Return a list of up to *limit* dynamically allocated Python objects. + If *type* is given, only objects of that exact type (not subtypes) + are included. + + Objects from the list are not safe to use. + Specifically, the result will include objects from all interpreters that + share their object allocator state (that is, ones created with + :c:member:`PyInterpreterConfig.use_main_obmalloc` set to 1 + or using :c:func:`Py_NewInterpreter`, and the + :ref:`main interpreter `). + Mixing objects from different interpreters may lead to crashes + or other unexpected behavior. + + .. impl-detail:: + + This function should be used for specialized purposes only. + It is not guaranteed to exist in all implementations of Python. + + .. versionchanged:: next + + The result may include objects from other interpreters. + + .. function:: getprofile() .. index:: diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index 51af4e3b7ad7a8..fed1d1e2c75940 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -459,7 +459,7 @@ Debug options Effects: * Define the ``Py_TRACE_REFS`` macro. - * Add :func:`!sys.getobjects` function. + * Add :func:`sys.getobjects` function. * Add :envvar:`PYTHONDUMPREFS` environment variable. This build is not ABI compatible with release build (default build) or debug diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 5cfbe12ab1f666..ee347d2000d8dc 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -2301,3 +2301,14 @@ email check if the *strict* paramater is available. (Contributed by Thomas Dwyer and Victor Stinner for :gh:`102988` to improve the CVE-2023-27043 fix.) + + +Notable changes in 3.12.8 +========================= + +sys +--- + +* The previously undocumented special function :func:`sys.getobjects`, + which only exists in specialized builds of Python, may now return objects + from other interpreters than the one it's called in. diff --git a/Include/internal/pycore_object_state.h b/Include/internal/pycore_object_state.h index 65feb5af969f8b..6e07b1a01b0e34 100644 --- a/Include/internal/pycore_object_state.h +++ b/Include/internal/pycore_object_state.h @@ -24,7 +24,13 @@ struct _py_object_state { * together via the _ob_prev and _ob_next members of a PyObject, which * exist only in a Py_TRACE_REFS build. */ - PyObject refchain; + PyObject *refchain; + /* In most cases, refchain points to _refchain_obj. + * In sub-interpreters that share objmalloc state with the main interp, + * refchain points to the main interpreter's _refchain_obj, and their own + * _refchain_obj is unused. + */ + PyObject _refchain_obj; #endif int _not_used; }; diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index e5f9e17efff24b..ad90ea680a52c3 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -132,15 +132,8 @@ extern PyTypeObject _PyExc_MemoryError; .context_ver = 1, \ } -#ifdef Py_TRACE_REFS -# define _py_object_state_INIT(INTERP) \ - { \ - .refchain = {&INTERP.object_state.refchain, &INTERP.object_state.refchain}, \ - } -#else # define _py_object_state_INIT(INTERP) \ { 0 } -#endif // global objects diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst new file mode 100644 index 00000000000000..e3741321006548 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst @@ -0,0 +1,5 @@ +Fix a crash caused by immortal interned strings being shared between +sub-interpreters that use basic single-phase init. In that case, the string +can be used by an interpreter that outlives the interpreter that created and +interned it. For interpreters that share obmalloc state, also share the +interned dict with the main interpreter. diff --git a/Objects/object.c b/Objects/object.c index 6b2e0aeaab9903..706719116da3e2 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -159,11 +159,27 @@ _PyDebug_PrintTotalRefs(void) { #ifdef Py_TRACE_REFS -#define REFCHAIN(interp) &interp->object_state.refchain +#define REFCHAIN(interp) interp->object_state.refchain + +static inline int +has_own_refchain(PyInterpreterState *interp) +{ + if (interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) { + return (_Py_IsMainInterpreter(interp) + || _PyInterpreterState_Main() == NULL); + } + return 1; +} static inline void init_refchain(PyInterpreterState *interp) { + if (!has_own_refchain(interp)) { + // Legacy subinterpreters share a refchain with the main interpreter. + REFCHAIN(interp) = REFCHAIN(_PyInterpreterState_Main()); + return; + } + REFCHAIN(interp) = &interp->object_state._refchain_obj; PyObject *refchain = REFCHAIN(interp); refchain->_ob_prev = refchain; refchain->_ob_next = refchain; @@ -2010,9 +2026,7 @@ void _PyObject_InitState(PyInterpreterState *interp) { #ifdef Py_TRACE_REFS - if (!_Py_IsMainInterpreter(interp)) { - init_refchain(interp); - } + init_refchain(interp); #endif } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ba11351f003007..c885be5b6682aa 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -287,13 +287,37 @@ hashtable_unicode_compare(const void *key1, const void *key2) } } +/* Return true if this interpreter should share the main interpreter's + intern_dict. That's important for interpreters which load basic + single-phase init extension modules (m_size == -1). There could be interned + immortal strings that are shared between interpreters, due to the + PyDict_Update(mdict, m_copy) call in import_find_extension(). + + It's not safe to deallocate those strings until all interpreters that + potentially use them are freed. By storing them in the main interpreter, we + ensure they get freed after all other interpreters are freed. +*/ +static bool +has_shared_intern_dict(PyInterpreterState *interp) +{ + PyInterpreterState *main_interp = _PyInterpreterState_Main(); + return interp != main_interp && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC; +} + static int init_interned_dict(PyInterpreterState *interp) { assert(get_interned_dict(interp) == NULL); - PyObject *interned = interned = PyDict_New(); - if (interned == NULL) { - return -1; + PyObject *interned; + if (has_shared_intern_dict(interp)) { + interned = get_interned_dict(_PyInterpreterState_Main()); + Py_INCREF(interned); + } + else { + interned = PyDict_New(); + if (interned == NULL) { + return -1; + } } _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = interned; return 0; @@ -304,7 +328,10 @@ clear_interned_dict(PyInterpreterState *interp) { PyObject *interned = get_interned_dict(interp); if (interned != NULL) { - PyDict_Clear(interned); + if (!has_shared_intern_dict(interp)) { + // only clear if the dict belongs to this interpreter + PyDict_Clear(interned); + } Py_DECREF(interned); _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL; } @@ -15152,6 +15179,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) } assert(PyDict_CheckExact(interned)); + if (has_shared_intern_dict(interp)) { + // the dict doesn't belong to this interpreter, skip the debug + // checks on it and just clear the pointer to it + clear_interned_dict(interp); + return; + } + #ifdef INTERNED_STATS fprintf(stderr, "releasing %zd interned strings\n", PyDict_GET_SIZE(interned)); @@ -15670,8 +15704,10 @@ _PyUnicode_Fini(PyInterpreterState *interp) { struct _Py_unicode_state *state = &interp->unicode; - // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() - assert(get_interned_dict(interp) == NULL); + if (!has_shared_intern_dict(interp)) { + // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() + assert(get_interned_dict(interp) == NULL); + } _PyUnicode_FiniEncodings(&state->fs_codec); diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 6d580c6d4892fd..e9c1a0d72d6d4d 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -650,6 +650,10 @@ pycore_create_interpreter(_PyRuntimeState *runtime, return status; } + // This could be done in init_interpreter() (in pystate.c) if it + // didn't depend on interp->feature_flags being set already. + _PyObject_InitState(interp); + PyThreadState *tstate = _PyThreadState_New(interp); if (tstate == NULL) { return _PyStatus_ERR("can't make first thread"); @@ -2103,6 +2107,10 @@ new_interpreter(PyThreadState **tstate_p, const PyInterpreterConfig *config) goto error; } + // This could be done in init_interpreter() (in pystate.c) if it + // didn't depend on interp->feature_flags being set already. + _PyObject_InitState(interp); + status = init_interp_create_gil(tstate, config->gil); if (_PyStatus_EXCEPTION(status)) { goto error; diff --git a/Python/pystate.c b/Python/pystate.c index f0e0d4117e4259..6aed9ac312a8a4 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -686,7 +686,9 @@ init_interpreter(PyInterpreterState *interp, _obmalloc_pools_INIT(interp->obmalloc.pools); memcpy(&interp->obmalloc.pools.used, temp, sizeof(temp)); } - _PyObject_InitState(interp); + + // We would call _PyObject_InitState() at this point + // if interp->feature_flags were alredy set. _PyEval_InitState(interp, pending_lock); _PyGC_InitState(&interp->gc);