Skip to content

Commit 6f23410

Browse files
swtaarrsdiegorusso
authored andcommitted
pythongh-112175: Add eval_breaker to PyThreadState (python#115194)
This change adds an `eval_breaker` field to `PyThreadState`. The primary motivation is for performance in free-threaded builds: with thread-local eval breakers, we can stop a specific thread (e.g., for an async exception) without interrupting other threads. The source of truth for the global instrumentation version is stored in the `instrumentation_version` field in PyInterpreterState. Threads usually read the version from their local `eval_breaker`, where it continues to be colocated with the eval breaker bits.
1 parent 39bb8a5 commit 6f23410

19 files changed

+262
-169
lines changed

Include/cpython/pystate.h

+5
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,11 @@ struct _ts {
6868
PyThreadState *next;
6969
PyInterpreterState *interp;
7070

71+
/* The global instrumentation version in high bits, plus flags indicating
72+
when to break out of the interpreter loop in lower bits. See details in
73+
pycore_ceval.h. */
74+
uintptr_t eval_breaker;
75+
7176
struct {
7277
/* Has been initialized to a safe state.
7378

Include/internal/pycore_ceval.h

+26-24
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ PyAPI_FUNC(int) _PyEval_MakePendingCalls(PyThreadState *);
4242

4343
extern void _Py_FinishPendingCalls(PyThreadState *tstate);
4444
extern void _PyEval_InitState(PyInterpreterState *);
45-
extern void _PyEval_SignalReceived(PyInterpreterState *interp);
45+
extern void _PyEval_SignalReceived(void);
4646

4747
// bitwise flags:
4848
#define _Py_PENDING_MAINTHREADONLY 1
@@ -55,7 +55,6 @@ PyAPI_FUNC(int) _PyEval_AddPendingCall(
5555
void *arg,
5656
int flags);
5757

58-
extern void _PyEval_SignalAsyncExc(PyInterpreterState *interp);
5958
#ifdef HAVE_FORK
6059
extern PyStatus _PyEval_ReInitThreads(PyThreadState *tstate);
6160
#endif
@@ -200,40 +199,43 @@ int _PyEval_UnpackIterable(PyThreadState *tstate, PyObject *v, int argcnt, int a
200199
void _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame);
201200

202201

203-
#define _PY_GIL_DROP_REQUEST_BIT 0
204-
#define _PY_SIGNALS_PENDING_BIT 1
205-
#define _PY_CALLS_TO_DO_BIT 2
206-
#define _PY_ASYNC_EXCEPTION_BIT 3
207-
#define _PY_GC_SCHEDULED_BIT 4
208-
#define _PY_EVAL_PLEASE_STOP_BIT 5
209-
#define _PY_EVAL_EXPLICIT_MERGE_BIT 6
202+
/* Bits that can be set in PyThreadState.eval_breaker */
203+
#define _PY_GIL_DROP_REQUEST_BIT (1U << 0)
204+
#define _PY_SIGNALS_PENDING_BIT (1U << 1)
205+
#define _PY_CALLS_TO_DO_BIT (1U << 2)
206+
#define _PY_ASYNC_EXCEPTION_BIT (1U << 3)
207+
#define _PY_GC_SCHEDULED_BIT (1U << 4)
208+
#define _PY_EVAL_PLEASE_STOP_BIT (1U << 5)
209+
#define _PY_EVAL_EXPLICIT_MERGE_BIT (1U << 6)
210210

211211
/* Reserve a few bits for future use */
212212
#define _PY_EVAL_EVENTS_BITS 8
213213
#define _PY_EVAL_EVENTS_MASK ((1 << _PY_EVAL_EVENTS_BITS)-1)
214214

215215
static inline void
216-
_Py_set_eval_breaker_bit(PyInterpreterState *interp, uint32_t bit, uint32_t set)
216+
_Py_set_eval_breaker_bit(PyThreadState *tstate, uintptr_t bit)
217217
{
218-
assert(set == 0 || set == 1);
219-
uintptr_t to_set = set << bit;
220-
uintptr_t mask = ((uintptr_t)1) << bit;
221-
uintptr_t old = _Py_atomic_load_uintptr(&interp->ceval.eval_breaker);
222-
if ((old & mask) == to_set) {
223-
return;
224-
}
225-
uintptr_t new;
226-
do {
227-
new = (old & ~mask) | to_set;
228-
} while (!_Py_atomic_compare_exchange_uintptr(&interp->ceval.eval_breaker, &old, new));
218+
_Py_atomic_or_uintptr(&tstate->eval_breaker, bit);
219+
}
220+
221+
static inline void
222+
_Py_unset_eval_breaker_bit(PyThreadState *tstate, uintptr_t bit)
223+
{
224+
_Py_atomic_and_uintptr(&tstate->eval_breaker, ~bit);
229225
}
230226

231-
static inline bool
232-
_Py_eval_breaker_bit_is_set(PyInterpreterState *interp, int32_t bit)
227+
static inline int
228+
_Py_eval_breaker_bit_is_set(PyThreadState *tstate, uintptr_t bit)
233229
{
234-
return _Py_atomic_load_uintptr_relaxed(&interp->ceval.eval_breaker) & (((uintptr_t)1) << bit);
230+
uintptr_t b = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker);
231+
return (b & bit) != 0;
235232
}
236233

234+
// Free-threaded builds use these functions to set or unset a bit on all
235+
// threads in the given interpreter.
236+
void _Py_set_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit);
237+
void _Py_unset_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit);
238+
237239

238240
#ifdef __cplusplus
239241
}

Include/internal/pycore_ceval_state.h

+4-7
Original file line numberDiff line numberDiff line change
@@ -78,13 +78,10 @@ struct _ceval_runtime_state {
7878

7979

8080
struct _ceval_state {
81-
/* This single variable consolidates all requests to break out of
82-
* the fast path in the eval loop.
83-
* It is by far the hottest field in this struct and
84-
* should be placed at the beginning. */
85-
uintptr_t eval_breaker;
86-
/* Avoid false sharing */
87-
int64_t padding[7];
81+
/* This variable holds the global instrumentation version. When a thread is
82+
running, this value is overlaid onto PyThreadState.eval_breaker so that
83+
changes in the instrumentation version will trigger the eval breaker. */
84+
uintptr_t instrumentation_version;
8885
int recursion_limit;
8986
struct _gil_runtime_state *gil;
9087
int own_gil;

Include/internal/pycore_gc.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ extern PyObject *_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs);
286286

287287
// Functions to clear types free lists
288288
extern void _PyGC_ClearAllFreeLists(PyInterpreterState *interp);
289-
extern void _Py_ScheduleGC(PyInterpreterState *interp);
289+
extern void _Py_ScheduleGC(PyThreadState *tstate);
290290
extern void _Py_RunGC(PyThreadState *tstate);
291291

292292
#ifdef __cplusplus

Include/internal/pycore_runtime.h

+3
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,10 @@ typedef struct pyruntimestate {
191191
int64_t next_id;
192192
} interpreters;
193193

194+
/* Platform-specific identifier and PyThreadState, respectively, for the
195+
main thread in the main interpreter. */
194196
unsigned long main_thread;
197+
PyThreadState *main_tstate;
195198

196199
/* ---------- IMPORTANT ---------------------------
197200
The fields above this line are declared as early as
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Every ``PyThreadState`` now has its own ``eval_breaker``, allowing specific threads to be interrupted.

Modules/signalmodule.c

+4-7
Original file line numberDiff line numberDiff line change
@@ -276,11 +276,7 @@ trip_signal(int sig_num)
276276
cleared in PyErr_CheckSignals() before .tripped. */
277277
_Py_atomic_store_int(&is_tripped, 1);
278278

279-
/* Signals are always handled by the main interpreter */
280-
PyInterpreterState *interp = _PyInterpreterState_Main();
281-
282-
/* Notify ceval.c */
283-
_PyEval_SignalReceived(interp);
279+
_PyEval_SignalReceived();
284280

285281
/* And then write to the wakeup fd *after* setting all the globals and
286282
doing the _PyEval_SignalReceived. We used to write to the wakeup fd
@@ -303,6 +299,7 @@ trip_signal(int sig_num)
303299

304300
int fd = wakeup.fd;
305301
if (fd != INVALID_FD) {
302+
PyInterpreterState *interp = _PyInterpreterState_Main();
306303
unsigned char byte = (unsigned char)sig_num;
307304
#ifdef MS_WINDOWS
308305
if (wakeup.use_send) {
@@ -1770,8 +1767,8 @@ PyErr_CheckSignals(void)
17701767
Python code to ensure signals are handled. Checking for the GC here
17711768
allows long running native code to clean cycles created using the C-API
17721769
even if it doesn't run the evaluation loop */
1773-
if (_Py_eval_breaker_bit_is_set(tstate->interp, _PY_GC_SCHEDULED_BIT)) {
1774-
_Py_set_eval_breaker_bit(tstate->interp, _PY_GC_SCHEDULED_BIT, 0);
1770+
if (_Py_eval_breaker_bit_is_set(tstate, _PY_GC_SCHEDULED_BIT)) {
1771+
_Py_unset_eval_breaker_bit(tstate, _PY_GC_SCHEDULED_BIT);
17751772
_Py_RunGC(tstate);
17761773
}
17771774

Python/brc.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ _Py_brc_queue_object(PyObject *ob)
9494
}
9595

9696
// Notify owning thread
97-
_Py_set_eval_breaker_bit(interp, _PY_EVAL_EXPLICIT_MERGE_BIT, 1);
97+
_Py_set_eval_breaker_bit(&tstate->base, _PY_EVAL_EXPLICIT_MERGE_BIT);
9898

9999
PyMutex_Unlock(&bucket->mutex);
100100
}

Python/bytecodes.c

+3-4
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
#include "Python.h"
1010
#include "pycore_abstract.h" // _PyIndex_Check()
11-
#include "pycore_ceval.h" // _PyEval_SignalAsyncExc()
1211
#include "pycore_code.h"
1312
#include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS
1413
#include "pycore_function.h"
@@ -146,7 +145,7 @@ dummy_func(
146145
TIER_ONE_ONLY
147146
assert(frame == tstate->current_frame);
148147
uintptr_t global_version =
149-
_Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) &
148+
_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) &
150149
~_PY_EVAL_EVENTS_MASK;
151150
uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
152151
assert((code_version & 255) == 0);
@@ -168,14 +167,14 @@ dummy_func(
168167
DEOPT_IF(_Py_emscripten_signal_clock == 0);
169168
_Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING;
170169
#endif
171-
uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker);
170+
uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker);
172171
uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
173172
assert((version & _PY_EVAL_EVENTS_MASK) == 0);
174173
DEOPT_IF(eval_breaker != version);
175174
}
176175

177176
inst(INSTRUMENTED_RESUME, (--)) {
178-
uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & ~_PY_EVAL_EVENTS_MASK;
177+
uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & ~_PY_EVAL_EVENTS_MASK;
179178
uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version;
180179
if (code_version != global_version) {
181180
if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) {

Python/ceval.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#include "Python.h"
66
#include "pycore_abstract.h" // _PyIndex_Check()
77
#include "pycore_call.h" // _PyObject_CallNoArgs()
8-
#include "pycore_ceval.h" // _PyEval_SignalAsyncExc()
8+
#include "pycore_ceval.h"
99
#include "pycore_code.h"
1010
#include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS
1111
#include "pycore_function.h"

0 commit comments

Comments
 (0)