diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index e0d2ac9bf93ad8..65bc11ca0f5ba9 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -83,8 +83,6 @@ struct _ts { unsigned int bound_gilstate:1; /* Currently in use (maybe holds the GIL). */ unsigned int active:1; - /* Currently holds the GIL. */ - unsigned int holds_gil:1; /* various stages of finalization */ unsigned int finalizing:1; @@ -92,7 +90,7 @@ struct _ts { unsigned int finalized:1; /* padding to align to 4 bytes */ - unsigned int :23; + unsigned int :24; } _status; #ifdef Py_BUILD_CORE # define _PyThreadState_WHENCE_NOTSET -1 @@ -103,6 +101,10 @@ struct _ts { # define _PyThreadState_WHENCE_GILSTATE 4 # define _PyThreadState_WHENCE_EXEC 5 #endif + + /* Currently holds the GIL. Must be its own field to avoid data races */ + int holds_gil; + int _whence; /* Thread state (_Py_THREAD_ATTACHED, _Py_THREAD_DETACHED, _Py_THREAD_SUSPENDED). diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 9ec59e60f609ab..f3667a8aa71c27 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -27,6 +27,10 @@ extern "C" { // "suspended" state. Only the thread performing a stop-the-world pause may // transition a thread from the "suspended" state back to the "detached" state. // +// The "shutting down" state is used when the interpreter is being finalized. +// Threads in this state can't do anything other than block the OS thread. +// (See _PyThreadState_HangThread). +// // State transition diagram: // // (bound thread) (stop-the-world thread) @@ -37,9 +41,10 @@ extern "C" { // // The (bound thread) and (stop-the-world thread) labels indicate which thread // is allowed to perform the transition. -#define _Py_THREAD_DETACHED 0 -#define _Py_THREAD_ATTACHED 1 -#define _Py_THREAD_SUSPENDED 2 +#define _Py_THREAD_DETACHED 0 +#define _Py_THREAD_ATTACHED 1 +#define _Py_THREAD_SUSPENDED 2 +#define _Py_THREAD_SHUTTING_DOWN 3 /* Check if the current thread is the main thread. @@ -118,7 +123,8 @@ extern _Py_thread_local PyThreadState *_Py_tss_tstate; extern int _PyThreadState_CheckConsistency(PyThreadState *tstate); #endif -int _PyThreadState_MustExit(PyThreadState *tstate); +extern int _PyThreadState_MustExit(PyThreadState *tstate); +extern void _PyThreadState_HangThread(PyThreadState *tstate); // Export for most shared extensions, used via _PyThreadState_GET() static // inline function. @@ -169,6 +175,11 @@ extern void _PyThreadState_Detach(PyThreadState *tstate); // to the "detached" state. extern void _PyThreadState_Suspend(PyThreadState *tstate); +// Mark the thread state as "shutting down". This is used during interpreter +// and runtime finalization. The thread may no longer attach to the +// interpreter and will instead block via _PyThreadState_HangThread(). +extern void _PyThreadState_SetShuttingDown(PyThreadState *tstate); + // Perform a stop-the-world pause for all threads in the all interpreters. // // Threads in the "attached" state are paused and transitioned to the "GC" @@ -238,7 +249,7 @@ PyAPI_FUNC(PyThreadState *) _PyThreadState_NewBound( PyInterpreterState *interp, int whence); extern PyThreadState * _PyThreadState_RemoveExcept(PyThreadState *tstate); -extern void _PyThreadState_DeleteList(PyThreadState *list); +extern void _PyThreadState_DeleteList(PyThreadState *list, int is_after_fork); extern void _PyThreadState_ClearMimallocHeaps(PyThreadState *tstate); // Export for '_testinternalcapi' shared extension diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 1260b957ce9482..2ec32b64adde0b 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -171,6 +171,8 @@ extern PyTypeObject _PyExc_MemoryError; #define _PyThreadStateImpl_INIT \ { \ .base = _PyThreadState_INIT, \ + /* The thread and the interpreter's linked list hold a reference */ \ + .refcount = 2, \ } #define _PyThreadState_INIT \ diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index 624b29e32ed463..6f50bb2f26307a 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -21,6 +21,10 @@ typedef struct _PyThreadStateImpl { // semi-public fields are in PyThreadState. PyThreadState base; + // The reference count field is used to synchronize deallocation of the + // thread state during runtime finalization. + Py_ssize_t refcount; + // These are addresses, but we need to convert to ints to avoid UB. uintptr_t c_stack_top; uintptr_t c_stack_soft_limit; diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-02-27-18-48-42.gh-issue-124878.DS0MIL.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-27-18-48-42.gh-issue-124878.DS0MIL.rst new file mode 100644 index 00000000000000..842566433d5597 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-27-18-48-42.gh-issue-124878.DS0MIL.rst @@ -0,0 +1,2 @@ +Fix race conditions during runtime finalization that could lead to accessing +freed memory. diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index dab6b395cb2e46..d423854b8ebff3 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -333,9 +333,6 @@ thread_run(void *boot_raw) // _PyRuntimeState_SetFinalizing() is called. At this point, all Python // threads must exit, except of the thread calling Py_Finalize() which // holds the GIL and must not exit. - // - // At this stage, tstate can be a dangling pointer (point to freed memory), - // it's ok to call _PyThreadState_MustExit() with a dangling pointer. if (_PyThreadState_MustExit(tstate)) { // Don't call PyThreadState_Clear() nor _PyThreadState_DeleteCurrent(). // These functions are called on tstate indirectly by Py_Finalize() diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index bf8ad19416304f..25ebc98435c5da 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -715,7 +715,7 @@ PyOS_AfterFork_Child(void) // may call destructors. PyThreadState *list = _PyThreadState_RemoveExcept(tstate); _PyEval_StartTheWorldAll(&_PyRuntime); - _PyThreadState_DeleteList(list); + _PyThreadState_DeleteList(list, /*is_after_fork=*/1); _PyImport_ReInitLock(tstate->interp); _PyImport_ReleaseLock(tstate->interp); diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 7a3cd8d8044739..2c1cc17b2ffa13 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -6,8 +6,8 @@ #include "pycore_pyerrors.h" // _PyErr_GetRaisedException() #include "pycore_pylifecycle.h" // _PyErr_Print() #include "pycore_pymem.h" // _PyMem_IsPtrFreed() +#include "pycore_pystate.h" // PyThread_hang_thread() #include "pycore_pystats.h" // _Py_PrintSpecializationStats() -#include "pycore_pythread.h" // PyThread_hang_thread() /* Notes about the implementation: @@ -206,7 +206,7 @@ drop_gil_impl(PyThreadState *tstate, struct _gil_runtime_state *gil) _Py_ANNOTATE_RWLOCK_RELEASED(&gil->locked, /*is_write=*/1); _Py_atomic_store_int_relaxed(&gil->locked, 0); if (tstate != NULL) { - tstate->_status.holds_gil = 0; + tstate->holds_gil = 0; } COND_SIGNAL(gil->cond); MUTEX_UNLOCK(gil->mutex); @@ -231,7 +231,7 @@ drop_gil(PyInterpreterState *interp, PyThreadState *tstate, int final_release) // Check if we have the GIL before dropping it. tstate will be NULL if // take_gil() detected that this thread has been destroyed, in which case // we know we have the GIL. - if (tstate != NULL && !tstate->_status.holds_gil) { + if (tstate != NULL && !tstate->holds_gil) { return; } #endif @@ -296,15 +296,14 @@ take_gil(PyThreadState *tstate) thread which called Py_Finalize(), this thread cannot continue. This code path can be reached by a daemon thread after Py_Finalize() - completes. In this case, tstate is a dangling pointer: points to - PyThreadState freed memory. + completes. This used to call a *thread_exit API, but that was not safe as it lacks stack unwinding and local variable destruction important to C++. gh-87135: The best that can be done is to hang the thread as the public APIs calling this have no error reporting mechanism (!). */ - PyThread_hang_thread(); + _PyThreadState_HangThread(tstate); } assert(_PyThreadState_CheckConsistency(tstate)); @@ -353,7 +352,7 @@ take_gil(PyThreadState *tstate) } // gh-87135: hang the thread as *thread_exit() is not a safe // API. It lacks stack unwind and local variable destruction. - PyThread_hang_thread(); + _PyThreadState_HangThread(tstate); } assert(_PyThreadState_CheckConsistency(tstate)); @@ -404,11 +403,11 @@ take_gil(PyThreadState *tstate) /* tstate could be a dangling pointer, so don't pass it to drop_gil(). */ drop_gil(interp, NULL, 1); - PyThread_hang_thread(); + _PyThreadState_HangThread(tstate); } assert(_PyThreadState_CheckConsistency(tstate)); - tstate->_status.holds_gil = 1; + tstate->holds_gil = 1; _Py_unset_eval_breaker_bit(tstate, _PY_GIL_DROP_REQUEST_BIT); update_eval_breaker_for_thread(interp, tstate); @@ -460,7 +459,7 @@ PyEval_ThreadsInitialized(void) static inline int current_thread_holds_gil(struct _gil_runtime_state *gil, PyThreadState *tstate) { - int holds_gil = tstate->_status.holds_gil; + int holds_gil = tstate->holds_gil; // holds_gil is the source of truth; check that last_holder and gil->locked // are consistent with it. diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 1a7f312a604614..4007e43c98d640 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -2036,18 +2036,23 @@ _Py_Finalize(_PyRuntimeState *runtime) // XXX Call something like _PyImport_Disable() here? - /* Destroy the state of all threads of the interpreter, except of the + /* Remove the state of all threads of the interpreter, except for the current thread. In practice, only daemon threads should still be alive, except if wait_for_thread_shutdown() has been cancelled by CTRL+C. - Clear frames of other threads to call objects destructors. Destructors - will be called in the current Python thread. Since - _PyRuntimeState_SetFinalizing() has been called, no other Python thread - can take the GIL at this point: if they try, they will exit - immediately. We start the world once we are the only thread state left, + We start the world once we are the only thread state left, before we call destructors. */ PyThreadState *list = _PyThreadState_RemoveExcept(tstate); + for (PyThreadState *p = list; p != NULL; p = p->next) { + _PyThreadState_SetShuttingDown(p); + } _PyEval_StartTheWorldAll(runtime); - _PyThreadState_DeleteList(list); + + /* Clear frames of other threads to call objects destructors. Destructors + will be called in the current Python thread. Since + _PyRuntimeState_SetFinalizing() has been called, no other Python thread + can take the GIL at this point: if they try, they will hang in + _PyThreadState_HangThread. */ + _PyThreadState_DeleteList(list, /*is_after_fork=*/0); /* At this point no Python code should be running at all. The only thread state left should be the main thread of the main diff --git a/Python/pystate.c b/Python/pystate.c index 72e2627e95b559..bfefd1044c725b 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1461,6 +1461,15 @@ free_threadstate(_PyThreadStateImpl *tstate) } } +static void +decref_threadstate(_PyThreadStateImpl *tstate) +{ + if (_Py_atomic_add_ssize(&tstate->refcount, -1) == 1) { + // The last reference to the thread state is gone. + free_threadstate(tstate); + } +} + /* Get the thread state to a minimal consistent state. Further init happens in pylifecycle.c before it can be used. All fields not initialized here are expected to be zeroed out, @@ -1925,8 +1934,12 @@ _PyThreadState_RemoveExcept(PyThreadState *tstate) // Deletes the thread states in the linked list `list`. // // This is intended to be used in conjunction with _PyThreadState_RemoveExcept. +// +// If `is_after_fork` is true, the thread states are immediately freed. +// Otherwise, they are decref'd because they may still be referenced by an +// OS thread. void -_PyThreadState_DeleteList(PyThreadState *list) +_PyThreadState_DeleteList(PyThreadState *list, int is_after_fork) { // The world can't be stopped because we PyThreadState_Clear() can // call destructors. @@ -1936,7 +1949,12 @@ _PyThreadState_DeleteList(PyThreadState *list) for (p = list; p; p = next) { next = p->next; PyThreadState_Clear(p); - free_threadstate((_PyThreadStateImpl *)p); + if (is_after_fork) { + free_threadstate((_PyThreadStateImpl *)p); + } + else { + decref_threadstate((_PyThreadStateImpl *)p); + } } } @@ -2069,12 +2087,19 @@ static void tstate_wait_attach(PyThreadState *tstate) { do { - int expected = _Py_THREAD_SUSPENDED; - - // Wait until we're switched out of SUSPENDED to DETACHED. - _PyParkingLot_Park(&tstate->state, &expected, sizeof(tstate->state), - /*timeout=*/-1, NULL, /*detach=*/0); - + int state = _Py_atomic_load_int_relaxed(&tstate->state); + if (state == _Py_THREAD_SUSPENDED) { + // Wait until we're switched out of SUSPENDED to DETACHED. + _PyParkingLot_Park(&tstate->state, &state, sizeof(tstate->state), + /*timeout=*/-1, NULL, /*detach=*/0); + } + else if (state == _Py_THREAD_SHUTTING_DOWN) { + // We're shutting down, so we can't attach. + _PyThreadState_HangThread(tstate); + } + else { + assert(state == _Py_THREAD_DETACHED); + } // Once we're back in DETACHED we can re-attach } while (!tstate_try_attach(tstate)); } @@ -2105,7 +2130,7 @@ _PyThreadState_Attach(PyThreadState *tstate) tstate_activate(tstate); #ifdef Py_GIL_DISABLED - if (_PyEval_IsGILEnabled(tstate) && !tstate->_status.holds_gil) { + if (_PyEval_IsGILEnabled(tstate) && !tstate->holds_gil) { // The GIL was enabled between our call to _PyEval_AcquireLock() // and when we attached (the GIL can't go from enabled to disabled // here because only a thread holding the GIL can disable @@ -2188,6 +2213,15 @@ _PyThreadState_Suspend(PyThreadState *tstate) HEAD_UNLOCK(runtime); } +void +_PyThreadState_SetShuttingDown(PyThreadState *tstate) +{ + _Py_atomic_store_int(&tstate->state, _Py_THREAD_SHUTTING_DOWN); +#ifdef Py_GIL_DISABLED + _PyParkingLot_UnparkAll(&tstate->state); +#endif +} + // Decrease stop-the-world counter of remaining number of threads that need to // pause. If we are the final thread to pause, notify the requesting thread. static void @@ -2988,43 +3022,27 @@ _PyThreadState_CheckConsistency(PyThreadState *tstate) #endif -// Check if a Python thread must exit immediately, rather than taking the GIL -// if Py_Finalize() has been called. +// Check if a Python thread must call _PyThreadState_HangThread(), rather than +// taking the GIL or attaching to the interpreter if Py_Finalize() has been +// called. // // When this function is called by a daemon thread after Py_Finalize() has been -// called, the GIL does no longer exist. -// -// tstate can be a dangling pointer (point to freed memory): only tstate value -// is used, the pointer is not deferenced. +// called, the GIL may no longer exist. // // tstate must be non-NULL. int _PyThreadState_MustExit(PyThreadState *tstate) { - /* bpo-39877: Access _PyRuntime directly rather than using - tstate->interp->runtime to support calls from Python daemon threads. - After Py_Finalize() has been called, tstate can be a dangling pointer: - point to PyThreadState freed memory. */ - unsigned long finalizing_id = _PyRuntimeState_GetFinalizingID(&_PyRuntime); - PyThreadState *finalizing = _PyRuntimeState_GetFinalizing(&_PyRuntime); - if (finalizing == NULL) { - // XXX This isn't completely safe from daemon thraeds, - // since tstate might be a dangling pointer. - finalizing = _PyInterpreterState_GetFinalizing(tstate->interp); - finalizing_id = _PyInterpreterState_GetFinalizingID(tstate->interp); - } - // XXX else check &_PyRuntime._main_interpreter._initial_thread - if (finalizing == NULL) { - return 0; - } - else if (finalizing == tstate) { - return 0; - } - else if (finalizing_id == PyThread_get_thread_ident()) { - /* gh-109793: we must have switched interpreters. */ - return 0; - } - return 1; + int state = _Py_atomic_load_int_relaxed(&tstate->state); + return state == _Py_THREAD_SHUTTING_DOWN; +} + +void +_PyThreadState_HangThread(PyThreadState *tstate) +{ + _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate; + decref_threadstate(tstate_impl); + PyThread_hang_thread(); } /********************/ diff --git a/Python/qsbr.c b/Python/qsbr.c index 0df1285cc8e063..5b689c6039c950 100644 --- a/Python/qsbr.c +++ b/Python/qsbr.c @@ -240,7 +240,7 @@ _Py_qsbr_unregister(PyThreadState *tstate) // gh-119369: GIL must be released (if held) to prevent deadlocks, because // we might not have an active tstate, which means that blocking on PyMutex // locks will not implicitly release the GIL. - assert(!tstate->_status.holds_gil); + assert(!tstate->holds_gil); PyMutex_Lock(&shared->mutex); // NOTE: we must load (or reload) the thread state's qbsr inside the mutex diff --git a/Tools/tsan/suppressions.txt b/Tools/tsan/suppressions.txt index c70b0ddca059c2..6bda5ecd570889 100644 --- a/Tools/tsan/suppressions.txt +++ b/Tools/tsan/suppressions.txt @@ -1,8 +1,5 @@ # This file contains suppressions for the default (with GIL) build. # reference: https://github.com/google/sanitizers/wiki/ThreadSanitizerSuppressions -# gh-124878: race condition when interpreter finalized while daemon thread runs -race:free_threadstate - # https://gist.github.com/mpage/daaf32b39180c1989572957b943eb665 thread:pthread_create diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index c32c43db19cb96..fd47c85af1adb1 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -10,29 +10,20 @@ # These entries are for warnings that trigger in a library function, as called # by a CPython function. -# gh-124878: race condition when interpreter finalized while daemon thread runs -race:free_threadstate - # These warnings trigger directly in a CPython function. race_top:assign_version_tag race_top:_multiprocessing_SemLock_acquire_impl race_top:_Py_slot_tp_getattr_hook -race_top:add_threadstate race_top:dump_traceback race_top:fatal_error race_top:_multiprocessing_SemLock_release_impl race_top:_PyFrame_GetCode race_top:_PyFrame_Initialize -race_top:PyInterpreterState_ThreadHead race_top:_PyObject_TryGetInstanceAttribute -race_top:PyThreadState_Next race_top:PyUnstable_InterpreterFrame_GetLine -race_top:tstate_delete_common -race_top:tstate_is_freed race_top:type_modified_unlocked race_top:write_thread_id -race_top:PyThreadState_Clear # gh-129068: race on shared range iterators (test_free_threading.test_zip.ZipThreading.test_threading) race_top:rangeiter_next