From a50f9b6d118560a8c63ddd90a85d3426c638dab6 Mon Sep 17 00:00:00 2001 From: Victor Stinner <vstinner@python.org> Date: Fri, 8 Sep 2023 12:02:19 +0200 Subject: [PATCH] gh-108987: Fix _thread.start_new_thread() race condition Fix _thread.start_new_thread() race condition. If a thread is created during Python finalization, the newly spawned thread now exits immediately instead of trying to access freed memory and lead to a crash. thread_run() calls PyEval_AcquireThread() which checks if the thread must exit. The problem was that tstate was dereferenced earlier in _PyThreadState_Bind() which leads to a crash most of the time. Move _PyThreadState_CheckConsistency() from thread_run() to _PyThreadState_Bind(). --- Include/internal/pycore_pystate.h | 2 + ...-09-08-12-09-55.gh-issue-108987.x5AIG8.rst | 4 ++ Modules/_threadmodule.c | 47 ++++++++++++------- Python/ceval_gil.c | 28 ++--------- Python/pystate.c | 29 ++++++++++++ 5 files changed, 69 insertions(+), 41 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-09-08-12-09-55.gh-issue-108987.x5AIG8.rst diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 9c0e42e7bad06c..9fc8ae903b2ac0 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -71,6 +71,8 @@ extern _Py_thread_local PyThreadState *_Py_tss_tstate; extern int _PyThreadState_CheckConsistency(PyThreadState *tstate); #endif +int _PyThreadState_MustExit(PyThreadState *tstate); + // Export for most shared extensions, used via _PyThreadState_GET() static // inline function. PyAPI_FUNC(PyThreadState *) _PyThreadState_GetCurrent(void); diff --git a/Misc/NEWS.d/next/Library/2023-09-08-12-09-55.gh-issue-108987.x5AIG8.rst b/Misc/NEWS.d/next/Library/2023-09-08-12-09-55.gh-issue-108987.x5AIG8.rst new file mode 100644 index 00000000000000..16526ee748d869 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-09-08-12-09-55.gh-issue-108987.x5AIG8.rst @@ -0,0 +1,4 @@ +Fix :func:`_thread.start_new_thread` race condition. If a thread is created +during Python finalization, the newly spawned thread now exits immediately +instead of trying to access freed memory and lead to a crash. Patch by +Victor Stinner. diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 05bb49756c9303..7692bacccc4909 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -1051,21 +1051,21 @@ _localdummy_destroyed(PyObject *localweakref, PyObject *dummyweakref) /* Module functions */ struct bootstate { - PyInterpreterState *interp; + PyThreadState *tstate; PyObject *func; PyObject *args; PyObject *kwargs; - PyThreadState *tstate; - _PyRuntimeState *runtime; }; static void -thread_bootstate_free(struct bootstate *boot) +thread_bootstate_free(struct bootstate *boot, int decref) { - Py_DECREF(boot->func); - Py_DECREF(boot->args); - Py_XDECREF(boot->kwargs); + if (decref) { + Py_DECREF(boot->func); + Py_DECREF(boot->args); + Py_XDECREF(boot->kwargs); + } PyMem_Free(boot); } @@ -1076,9 +1076,24 @@ thread_run(void *boot_raw) struct bootstate *boot = (struct bootstate *) boot_raw; PyThreadState *tstate = boot->tstate; - // gh-104690: If Python is being finalized and PyInterpreterState_Delete() - // was called, tstate becomes a dangling pointer. - assert(_PyThreadState_CheckConsistency(tstate)); + // gh-108987: If _thread.start_new_thread() is called before or while + // Python is being finalized, thread_run() can called *after*. + // _PyRuntimeState_SetFinalizing() is called. At this point, all Python + // threads must exit, except of the thread calling Py_Finalize() whch holds + // the GIL and must not exit. + // + // At this stage, tstate can be a dangling pointer (point to freed memory), + // it's ok to call _PyThreadState_MustExit() with a dangling pointer. + if (_PyThreadState_MustExit(tstate)) { + // Don't call PyThreadState_Clear() nor _PyThreadState_DeleteCurrent(). + // These functions are called on tstate indirectly by Py_Finalize() + // which calls _PyInterpreterState_Clear(). + // + // Py_DECREF() cannot be called because the GIL is not held: leak + // references on purpose. Python is being finalized anyway. + thread_bootstate_free(boot, 0); + goto exit; + } _PyThreadState_Bind(tstate); PyEval_AcquireThread(tstate); @@ -1097,14 +1112,17 @@ thread_run(void *boot_raw) Py_DECREF(res); } - thread_bootstate_free(boot); + thread_bootstate_free(boot, 1); + tstate->interp->threads.count--; PyThreadState_Clear(tstate); _PyThreadState_DeleteCurrent(tstate); +exit: // bpo-44434: Don't call explicitly PyThread_exit_thread(). On Linux with // the glibc, pthread_exit() can abort the whole process if dlopen() fails // to open the libgcc_s.so library (ex: EMFILE error). + return; } static PyObject * @@ -1128,7 +1146,6 @@ and False otherwise.\n"); static PyObject * thread_PyThread_start_new_thread(PyObject *self, PyObject *fargs) { - _PyRuntimeState *runtime = &_PyRuntime; PyObject *func, *args, *kwargs = NULL; if (!PyArg_UnpackTuple(fargs, "start_new_thread", 2, 3, @@ -1171,8 +1188,7 @@ thread_PyThread_start_new_thread(PyObject *self, PyObject *fargs) if (boot == NULL) { return PyErr_NoMemory(); } - boot->interp = _PyInterpreterState_GET(); - boot->tstate = _PyThreadState_New(boot->interp); + boot->tstate = _PyThreadState_New(interp); if (boot->tstate == NULL) { PyMem_Free(boot); if (!PyErr_Occurred()) { @@ -1180,7 +1196,6 @@ thread_PyThread_start_new_thread(PyObject *self, PyObject *fargs) } return NULL; } - boot->runtime = runtime; boot->func = Py_NewRef(func); boot->args = Py_NewRef(args); boot->kwargs = Py_XNewRef(kwargs); @@ -1189,7 +1204,7 @@ thread_PyThread_start_new_thread(PyObject *self, PyObject *fargs) if (ident == PYTHREAD_INVALID_THREAD_ID) { PyErr_SetString(ThreadError, "can't start new thread"); PyThreadState_Clear(boot->tstate); - thread_bootstate_free(boot); + thread_bootstate_free(boot, 1); return NULL; } return PyLong_FromUnsignedLong(ident); diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index cef5317b46bf8e..3b7e6cb1bda3ff 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -329,28 +329,6 @@ drop_gil(struct _ceval_state *ceval, PyThreadState *tstate) } -/* Check if a Python thread must exit immediately, rather than taking the GIL - if Py_Finalize() has been called. - - When this function is called by a daemon thread after Py_Finalize() has been - called, the GIL does no longer exist. - - tstate must be non-NULL. */ -static inline int -tstate_must_exit(PyThreadState *tstate) -{ - /* bpo-39877: Access _PyRuntime directly rather than using - tstate->interp->runtime to support calls from Python daemon threads. - After Py_Finalize() has been called, tstate can be a dangling pointer: - point to PyThreadState freed memory. */ - PyThreadState *finalizing = _PyRuntimeState_GetFinalizing(&_PyRuntime); - if (finalizing == NULL) { - finalizing = _PyInterpreterState_GetFinalizing(tstate->interp); - } - return (finalizing != NULL && finalizing != tstate); -} - - /* Take the GIL. The function saves errno at entry and restores its value at exit. @@ -366,7 +344,7 @@ take_gil(PyThreadState *tstate) // XXX It may be more correct to check tstate->_status.finalizing. // XXX assert(!tstate->_status.cleared); - if (tstate_must_exit(tstate)) { + if (_PyThreadState_MustExit(tstate)) { /* bpo-39877: If Py_Finalize() has been called and tstate is not the thread which called Py_Finalize(), exit immediately the thread. @@ -404,7 +382,7 @@ take_gil(PyThreadState *tstate) _Py_atomic_load_relaxed(&gil->locked) && gil->switch_number == saved_switchnum) { - if (tstate_must_exit(tstate)) { + if (_PyThreadState_MustExit(tstate)) { MUTEX_UNLOCK(gil->mutex); // gh-96387: If the loop requested a drop request in a previous // iteration, reset the request. Otherwise, drop_gil() can @@ -444,7 +422,7 @@ take_gil(PyThreadState *tstate) MUTEX_UNLOCK(gil->switch_mutex); #endif - if (tstate_must_exit(tstate)) { + if (_PyThreadState_MustExit(tstate)) { /* bpo-36475: If Py_Finalize() has been called and tstate is not the thread which called Py_Finalize(), exit immediately the thread. diff --git a/Python/pystate.c b/Python/pystate.c index 09c3538ad7b872..b5c4fd7fb50616 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1907,6 +1907,10 @@ PyThreadState_Swap(PyThreadState *newts) void _PyThreadState_Bind(PyThreadState *tstate) { + // gh-104690: If Python is being finalized and PyInterpreterState_Delete() + // was called, tstate becomes a dangling pointer. + assert(_PyThreadState_CheckConsistency(tstate)); + bind_tstate(tstate); // This makes sure there's a gilstate tstate bound // as soon as possible. @@ -2908,6 +2912,31 @@ _PyThreadState_CheckConsistency(PyThreadState *tstate) #endif +// Check if a Python thread must exit immediately, rather than taking the GIL +// if Py_Finalize() has been called. +// +// When this function is called by a daemon thread after Py_Finalize() has been +// called, the GIL does no longer exist. +// +// tstate can be a dangling pointer (point to freed memory): only tstate value +// is used, the pointer is not deferenced. +// +// tstate must be non-NULL. +int +_PyThreadState_MustExit(PyThreadState *tstate) +{ + /* bpo-39877: Access _PyRuntime directly rather than using + tstate->interp->runtime to support calls from Python daemon threads. + After Py_Finalize() has been called, tstate can be a dangling pointer: + point to PyThreadState freed memory. */ + PyThreadState *finalizing = _PyRuntimeState_GetFinalizing(&_PyRuntime); + if (finalizing == NULL) { + finalizing = _PyInterpreterState_GetFinalizing(tstate->interp); + } + return (finalizing != NULL && finalizing != tstate); +} + + #ifdef __cplusplus } #endif