Skip to content

Commit b24c916

Browse files
authored
gh-112529: Make the GC scheduling thread-safe (#114880)
The GC keeps track of the number of allocations (less deallocations) since the last GC. This buffers the count in thread-local state and uses atomic operations to modify the per-interpreter count. The thread-local buffering avoids contention on shared state. A consequence is that the GC scheduling is not as precise, so "test_sneaky_frame_object" is skipped because it requires that the GC be run exactly after allocating a frame object.
1 parent f92857a commit b24c916

File tree

7 files changed

+71
-16
lines changed

7 files changed

+71
-16
lines changed

Include/internal/pycore_gc.h

+7
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,13 @@ struct _gc_runtime_state {
260260
Py_ssize_t long_lived_pending;
261261
};
262262

263+
#ifdef Py_GIL_DISABLED
264+
struct _gc_thread_state {
265+
/* Thread-local allocation count. */
266+
Py_ssize_t alloc_count;
267+
};
268+
#endif
269+
263270

264271
extern void _PyGC_InitState(struct _gc_runtime_state *);
265272

Include/internal/pycore_tstate.h

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ typedef struct _PyThreadStateImpl {
2828
PyThreadState base;
2929

3030
#ifdef Py_GIL_DISABLED
31+
struct _gc_thread_state gc;
3132
struct _mimalloc_thread_state mimalloc;
3233
struct _Py_object_freelists freelists;
3334
struct _brc_thread_state brc;

Lib/test/test_frame.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
_testcapi = None
1414

1515
from test import support
16-
from test.support import threading_helper
16+
from test.support import threading_helper, Py_GIL_DISABLED
1717
from test.support.script_helper import assert_python_ok
1818

1919

@@ -294,6 +294,7 @@ def gen():
294294
assert_python_ok("-c", code)
295295

296296
@support.cpython_only
297+
@unittest.skipIf(Py_GIL_DISABLED, "test requires precise GC scheduling")
297298
def test_sneaky_frame_object(self):
298299

299300
def trace(frame, event, arg):

Lib/test/test_gc.py

+1
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,7 @@ def __del__(self):
363363
# To minimize variations, though, we first store the get_count() results
364364
# and check them at the end.
365365
@refcount_test
366+
@unittest.skipIf(Py_GIL_DISABLED, 'needs precise allocation counts')
366367
def test_get_count(self):
367368
gc.collect()
368369
a, b, c = gc.get_count()

Modules/gcmodule.c

+10
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,16 @@ gc_get_count_impl(PyObject *module)
201201
/*[clinic end generated code: output=354012e67b16398f input=a392794a08251751]*/
202202
{
203203
GCState *gcstate = get_gc_state();
204+
205+
#ifdef Py_GIL_DISABLED
206+
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
207+
struct _gc_thread_state *gc = &tstate->gc;
208+
209+
// Flush the local allocation count to the global count
210+
_Py_atomic_add_int(&gcstate->generations[0].count, (int)gc->alloc_count);
211+
gc->alloc_count = 0;
212+
#endif
213+
204214
return Py_BuildValue("(iii)",
205215
gcstate->generations[0].count,
206216
gcstate->generations[1].count,

Objects/typeobject.c

+2
Original file line numberDiff line numberDiff line change
@@ -1835,6 +1835,8 @@ _PyType_AllocNoTrack(PyTypeObject *type, Py_ssize_t nitems)
18351835
if (presize) {
18361836
((PyObject **)alloc)[0] = NULL;
18371837
((PyObject **)alloc)[1] = NULL;
1838+
}
1839+
if (PyType_IS_GC(type)) {
18381840
_PyObject_GC_Link(obj);
18391841
}
18401842
memset(obj, '\0', size);

Python/gc_free_threading.c

+48-15
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ typedef struct _gc_runtime_state GCState;
2323
# define GC_DEBUG
2424
#endif
2525

26+
// Each thread buffers the count of allocated objects in a thread-local
27+
// variable up to +/- this amount to reduce the overhead of updating
28+
// the global count.
29+
#define LOCAL_ALLOC_COUNT_THRESHOLD 512
30+
2631
// Automatically choose the generation that needs collecting.
2732
#define GENERATION_AUTO (-1)
2833

@@ -959,6 +964,41 @@ gc_should_collect(GCState *gcstate)
959964
gcstate->generations[1].threshold == 0);
960965
}
961966

967+
static void
968+
record_allocation(PyThreadState *tstate)
969+
{
970+
struct _gc_thread_state *gc = &((_PyThreadStateImpl *)tstate)->gc;
971+
972+
// We buffer the allocation count to avoid the overhead of atomic
973+
// operations for every allocation.
974+
gc->alloc_count++;
975+
if (gc->alloc_count >= LOCAL_ALLOC_COUNT_THRESHOLD) {
976+
// TODO: Use Py_ssize_t for the generation count.
977+
GCState *gcstate = &tstate->interp->gc;
978+
_Py_atomic_add_int(&gcstate->generations[0].count, (int)gc->alloc_count);
979+
gc->alloc_count = 0;
980+
981+
if (gc_should_collect(gcstate) &&
982+
!_Py_atomic_load_int_relaxed(&gcstate->collecting))
983+
{
984+
_Py_ScheduleGC(tstate->interp);
985+
}
986+
}
987+
}
988+
989+
static void
990+
record_deallocation(PyThreadState *tstate)
991+
{
992+
struct _gc_thread_state *gc = &((_PyThreadStateImpl *)tstate)->gc;
993+
994+
gc->alloc_count--;
995+
if (gc->alloc_count <= -LOCAL_ALLOC_COUNT_THRESHOLD) {
996+
GCState *gcstate = &tstate->interp->gc;
997+
_Py_atomic_add_int(&gcstate->generations[0].count, (int)gc->alloc_count);
998+
gc->alloc_count = 0;
999+
}
1000+
}
1001+
9621002
static void
9631003
gc_collect_internal(PyInterpreterState *interp, struct collection_state *state)
9641004
{
@@ -981,6 +1021,9 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state)
9811021
}
9821022
}
9831023

1024+
// Record the number of live GC objects
1025+
interp->gc.long_lived_total = state->long_lived_total;
1026+
9841027
// Clear weakrefs and enqueue callbacks (but do not call them).
9851028
clear_weakrefs(state);
9861029
_PyEval_StartTheWorld(interp);
@@ -1090,7 +1133,6 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
10901133

10911134
m = state.collected;
10921135
n = state.uncollectable;
1093-
gcstate->long_lived_total = state.long_lived_total;
10941136

10951137
if (gcstate->debug & _PyGC_DEBUG_STATS) {
10961138
double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1);
@@ -1530,15 +1572,7 @@ _Py_ScheduleGC(PyInterpreterState *interp)
15301572
void
15311573
_PyObject_GC_Link(PyObject *op)
15321574
{
1533-
PyThreadState *tstate = _PyThreadState_GET();
1534-
GCState *gcstate = &tstate->interp->gc;
1535-
gcstate->generations[0].count++;
1536-
1537-
if (gc_should_collect(gcstate) &&
1538-
!_Py_atomic_load_int_relaxed(&gcstate->collecting))
1539-
{
1540-
_Py_ScheduleGC(tstate->interp);
1541-
}
1575+
record_allocation(_PyThreadState_GET());
15421576
}
15431577

15441578
void
@@ -1564,7 +1598,7 @@ gc_alloc(PyTypeObject *tp, size_t basicsize, size_t presize)
15641598
((PyObject **)mem)[1] = NULL;
15651599
}
15661600
PyObject *op = (PyObject *)(mem + presize);
1567-
_PyObject_GC_Link(op);
1601+
record_allocation(tstate);
15681602
return op;
15691603
}
15701604

@@ -1646,10 +1680,9 @@ PyObject_GC_Del(void *op)
16461680
PyErr_SetRaisedException(exc);
16471681
#endif
16481682
}
1649-
GCState *gcstate = get_gc_state();
1650-
if (gcstate->generations[0].count > 0) {
1651-
gcstate->generations[0].count--;
1652-
}
1683+
1684+
record_deallocation(_PyThreadState_GET());
1685+
16531686
PyObject_Free(((char *)op)-presize);
16541687
}
16551688

0 commit comments

Comments
 (0)