Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit cbfc9a5

Browse files
committedFeb 7, 2024
pythongh-115103: Implement delayed memory reclamation (QSBR)
1 parent fedbf77 commit cbfc9a5

13 files changed

+485
-0
lines changed
 

‎Doc/license.rst

+32
Original file line numberDiff line numberDiff line change
@@ -1095,3 +1095,35 @@ which is distributed under the MIT license::
10951095
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
10961096
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
10971097
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1098+
1099+
1100+
Global Unbounded Sequences (GUS)
1101+
--------------------------------
1102+
1103+
The file :file:`Python/qsbr.c` is adapted from FreeBSD's "Global Unbounded
1104+
Sequences" safe memory reclamation scheme in
1105+
`subr_smr.c <https://github.com/freebsd/freebsd-src/blob/main/sys/kern/subr_smr.c>`_.
1106+
The file is distributed under the 2-Clause BSD License::
1107+
1108+
Copyright (c) 2019,2020 Jeffrey Roberson <jeff@FreeBSD.org>
1109+
1110+
Redistribution and use in source and binary forms, with or without
1111+
modification, are permitted provided that the following conditions
1112+
are met:
1113+
1. Redistributions of source code must retain the above copyright
1114+
notice unmodified, this list of conditions, and the following
1115+
disclaimer.
1116+
2. Redistributions in binary form must reproduce the above copyright
1117+
notice, this list of conditions and the following disclaimer in the
1118+
documentation and/or other materials provided with the distribution.
1119+
1120+
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1121+
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1122+
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
1123+
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
1124+
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
1125+
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1126+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1127+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1128+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
1129+
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

‎Include/internal/pycore_interp.h

+2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ extern "C" {
3131
#include "pycore_mimalloc.h" // struct _mimalloc_interp_state
3232
#include "pycore_object_state.h" // struct _py_object_state
3333
#include "pycore_obmalloc.h" // struct _obmalloc_state
34+
#include "pycore_qsbr.h" // struct _qsbr_state
3435
#include "pycore_tstate.h" // _PyThreadStateImpl
3536
#include "pycore_tuple.h" // struct _Py_tuple_state
3637
#include "pycore_typeobject.h" // struct types_state
@@ -198,6 +199,7 @@ struct _is {
198199
struct _warnings_runtime_state warnings;
199200
struct atexit_state atexit;
200201
struct _stoptheworld_state stoptheworld;
202+
struct _qsbr_shared qsbr;
201203

202204
#if defined(Py_GIL_DISABLED)
203205
struct _mimalloc_interp_state mimalloc;

‎Include/internal/pycore_qsbr.h

+117
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
#ifndef Py_INTERNAL_QSBR_H
2+
#define Py_INTERNAL_QSBR_H
3+
4+
#include <stdbool.h>
5+
#include <stdint.h>
6+
#include "pycore_lock.h" // PyMutex
7+
8+
#ifdef __cplusplus
9+
extern "C" {
10+
#endif
11+
12+
#ifndef Py_BUILD_CORE
13+
# error "this header requires Py_BUILD_CORE define"
14+
#endif
15+
16+
struct _qsbr_shared;
17+
struct _PyThreadStateImpl; // forward declare to avoid circular dependency
18+
19+
// Per-thread state
20+
struct _qsbr_thread_state {
21+
// Last observed write sequence (or 0 if detached)
22+
uint64_t seq;
23+
24+
// Shared (per-interpreter) QSBR state
25+
struct _qsbr_shared *shared;
26+
27+
// Thread state (or NULL)
28+
PyThreadState *tstate;
29+
30+
// Used to defer advancing write sequence a fixed number of times
31+
int deferrals;
32+
33+
// Is this thread state allocated?
34+
bool allocated;
35+
struct _qsbr_thread_state *freelist_next;
36+
};
37+
38+
// Padding to avoid false sharing
39+
struct _qsbr_pad {
40+
struct _qsbr_thread_state qsbr;
41+
char __padding[64 - sizeof(struct _qsbr_thread_state)];
42+
};
43+
44+
// Per-interpreter state
45+
struct _qsbr_shared {
46+
// Always odd, incremented by two
47+
uint64_t wr_seq;
48+
49+
// Minimum observed read sequence
50+
uint64_t rd_seq;
51+
52+
// Array of QSBR thread states.
53+
struct _qsbr_pad *array;
54+
Py_ssize_t size;
55+
56+
// Freelist of unused _qsbr_thread_states (protected by mutex)
57+
PyMutex mutex;
58+
struct _qsbr_thread_state *freelist;
59+
};
60+
61+
static inline uint64_t
62+
_Py_qsbr_shared_current(struct _qsbr_shared *shared)
63+
{
64+
return _Py_atomic_load_uint64(&shared->wr_seq); // at least acquire
65+
}
66+
67+
static inline void
68+
_Py_qsbr_quiescent_state(struct _qsbr_thread_state *qsbr)
69+
{
70+
uint64_t seq = _Py_qsbr_shared_current(qsbr->shared);
71+
_Py_atomic_store_uint64_relaxed(&qsbr->seq, seq); // probably release
72+
}
73+
74+
// Advance the write sequence and return the new goal.
75+
extern uint64_t
76+
_Py_qsbr_advance(struct _qsbr_shared *shared);
77+
78+
// Batches requests to advance the write sequence. This advances the write
79+
// sequence every N calls. Returns the new goal.
80+
extern uint64_t
81+
_Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr);
82+
83+
// Have the read sequences advanced to the given goal?
84+
extern bool
85+
_Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal);
86+
87+
// Called when thread attaches to interpreter
88+
extern void
89+
_Py_qsbr_attach(struct _qsbr_thread_state *qsbr);
90+
91+
// Called when thread detaches from interpreter
92+
extern void
93+
_Py_qsbr_detach(struct _qsbr_thread_state *qsbr);
94+
95+
// Reserves (allocates) a QSBR state and returns its index
96+
extern Py_ssize_t
97+
_Py_qsbr_reserve(PyInterpreterState *interp);
98+
99+
// Associates a PyThreadState with the QSBR state at the given index
100+
extern void
101+
_Py_qsbr_register(struct _PyThreadStateImpl *tstate,
102+
PyInterpreterState *interp, Py_ssize_t index);
103+
104+
// Disassociates a PyThreadState from the QSBR state and frees the QSBR state.
105+
extern void
106+
_Py_qsbr_unregister(struct _PyThreadStateImpl *tstate);
107+
108+
extern void
109+
_Py_qsbr_fini(PyInterpreterState *interp);
110+
111+
extern void
112+
_Py_qsbr_after_fork(struct _qsbr_shared *shared, struct _qsbr_thread_state *qsbr);
113+
114+
#ifdef __cplusplus
115+
}
116+
#endif
117+
#endif /* !Py_INTERNAL_QSBR_H */

‎Include/internal/pycore_runtime_init.h

+4
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,10 @@ extern PyTypeObject _PyExc_MemoryError;
169169
{ .threshold = 10, }, \
170170
}, \
171171
}, \
172+
.qsbr = { \
173+
.wr_seq = 1, \
174+
.rd_seq = 1, \
175+
}, \
172176
.object_state = _py_object_state_INIT(INTERP), \
173177
.dtoa = _dtoa_state_INIT(&(INTERP)), \
174178
.dict_state = _dict_state_INIT, \

‎Include/internal/pycore_tstate.h

+2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ extern "C" {
1010

1111
#include "pycore_freelist.h" // struct _Py_freelist_state
1212
#include "pycore_mimalloc.h" // struct _mimalloc_thread_state
13+
#include "pycore_qsbr.h" // struct qsbr
1314

1415

1516
// Every PyThreadState is actually allocated as a _PyThreadStateImpl. The
@@ -20,6 +21,7 @@ typedef struct _PyThreadStateImpl {
2021
PyThreadState base;
2122

2223
#ifdef Py_GIL_DISABLED
24+
struct _qsbr_thread_state *qsbr;
2325
struct _mimalloc_thread_state mimalloc;
2426
struct _Py_freelist_state freelist_state;
2527
#endif

‎Makefile.pre.in

+2
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,7 @@ PYTHON_OBJS= \
455455
Python/pystate.o \
456456
Python/pythonrun.o \
457457
Python/pytime.o \
458+
Python/qsbr.o \
458459
Python/bootstrap_hash.o \
459460
Python/specialize.o \
460461
Python/structmember.o \
@@ -1158,6 +1159,7 @@ PYTHON_HEADERS= \
11581159
$(srcdir)/Include/internal/pycore_pystats.h \
11591160
$(srcdir)/Include/internal/pycore_pythonrun.h \
11601161
$(srcdir)/Include/internal/pycore_pythread.h \
1162+
$(srcdir)/Include/internal/pycore_qsbr.h \
11611163
$(srcdir)/Include/internal/pycore_range.h \
11621164
$(srcdir)/Include/internal/pycore_runtime.h \
11631165
$(srcdir)/Include/internal/pycore_runtime_init.h \

‎PCbuild/_freeze_module.vcxproj

+1
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@
252252
<ClCompile Include="..\Python\pythonrun.c" />
253253
<ClCompile Include="..\Python\Python-tokenize.c" />
254254
<ClCompile Include="..\Python\pytime.c" />
255+
<ClCompile Include="..\Python\qsbr.c" />
255256
<ClCompile Include="..\Python\specialize.c" />
256257
<ClCompile Include="..\Python\structmember.c" />
257258
<ClCompile Include="..\Python\suggestions.c" />

‎PCbuild/_freeze_module.vcxproj.filters

+3
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,9 @@
373373
<ClCompile Include="..\Python\pytime.c">
374374
<Filter>Source Files</Filter>
375375
</ClCompile>
376+
<ClCompile Include="..\Python\qsbr.c">
377+
<Filter>Source Files</Filter>
378+
</ClCompile>
376379
<ClCompile Include="..\Objects\rangeobject.c">
377380
<Filter>Source Files</Filter>
378381
</ClCompile>

‎PCbuild/pythoncore.vcxproj

+2
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@
274274
<ClInclude Include="..\Include\internal\pycore_pystats.h" />
275275
<ClInclude Include="..\Include\internal\pycore_pythonrun.h" />
276276
<ClInclude Include="..\Include\internal\pycore_pythread.h" />
277+
<ClInclude Include="..\Include\internal\pycore_qsbr.h" />
277278
<ClInclude Include="..\Include\internal\pycore_range.h" />
278279
<ClInclude Include="..\Include\internal\pycore_runtime.h" />
279280
<ClInclude Include="..\Include\internal\pycore_runtime_init.h" />
@@ -611,6 +612,7 @@
611612
<ClCompile Include="..\Python\pystrcmp.c" />
612613
<ClCompile Include="..\Python\pystrhex.c" />
613614
<ClCompile Include="..\Python\pystrtod.c" />
615+
<ClCompile Include="..\Python\qsbr.c" />
614616
<ClCompile Include="..\Python\dtoa.c" />
615617
<ClCompile Include="..\Python\Python-ast.c" />
616618
<ClCompile Include="..\Python\Python-tokenize.c" />

‎PCbuild/pythoncore.vcxproj.filters

+6
Original file line numberDiff line numberDiff line change
@@ -747,6 +747,9 @@
747747
<ClInclude Include="..\Include\internal\pycore_pythread.h">
748748
<Filter>Include\internal</Filter>
749749
</ClInclude>
750+
<ClInclude Include="..\Include\internal\pycore_qsbr.h">
751+
<Filter>Include\internal</Filter>
752+
</ClInclude>
750753
<ClInclude Include="..\Include\internal\pycore_range.h">
751754
<Filter>Include\internal</Filter>
752755
</ClInclude>
@@ -1412,6 +1415,9 @@
14121415
<ClCompile Include="..\Python\pystrtod.c">
14131416
<Filter>Python</Filter>
14141417
</ClCompile>
1418+
<ClCompile Include="..\Python\qsbr.c">
1419+
<Filter>Python</Filter>
1420+
</ClCompile>
14151421
<ClCompile Include="..\Python\dtoa.c">
14161422
<Filter>Python</Filter>
14171423
</ClCompile>

‎Python/ceval_macros.h

+7
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,12 @@
8686
#define PRE_DISPATCH_GOTO() ((void)0)
8787
#endif
8888

89+
#ifdef Py_GIL_DISABLED
90+
#define QSBR_QUIESCENT_STATE(tstate) _Py_qsbr_quiescent_state(((_PyThreadStateImpl *)tstate)->qsbr)
91+
#else
92+
#define QSBR_QUIESCENT_STATE(tstate)
93+
#endif
94+
8995

9096
/* Do interpreter dispatch accounting for tracing and instrumentation */
9197
#define DISPATCH() \
@@ -117,6 +123,7 @@
117123

118124
#define CHECK_EVAL_BREAKER() \
119125
_Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); \
126+
QSBR_QUIESCENT_STATE(tstate); \
120127
if (_Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & _PY_EVAL_EVENTS_MASK) { \
121128
if (_Py_HandlePending(tstate) != 0) { \
122129
GOTO_ERROR(error); \

‎Python/pystate.c

+27
Original file line numberDiff line numberDiff line change
@@ -951,6 +951,8 @@ PyInterpreterState_Delete(PyInterpreterState *interp)
951951
PyThread_free_lock(interp->id_mutex);
952952
}
953953

954+
_Py_qsbr_fini(interp);
955+
954956
_PyObject_FiniState(interp);
955957

956958
free_interpreter(interp);
@@ -1372,6 +1374,14 @@ new_threadstate(PyInterpreterState *interp, int whence)
13721374
if (new_tstate == NULL) {
13731375
return NULL;
13741376
}
1377+
#ifdef Py_GIL_DISABLED
1378+
Py_ssize_t qsbr_idx = _Py_qsbr_reserve(interp);
1379+
if (qsbr_idx < 0) {
1380+
PyMem_RawFree(new_tstate);
1381+
return NULL;
1382+
}
1383+
#endif
1384+
13751385
/* We serialize concurrent creation to protect global state. */
13761386
HEAD_LOCK(runtime);
13771387

@@ -1398,6 +1408,9 @@ new_threadstate(PyInterpreterState *interp, int whence)
13981408
sizeof(*tstate));
13991409
}
14001410

1411+
#ifdef Py_GIL_DISABLED
1412+
_Py_qsbr_register(tstate, interp, qsbr_idx);
1413+
#endif
14011414
init_threadstate(tstate, interp, id, whence);
14021415
add_threadstate(interp, (PyThreadState *)tstate, old_head);
14031416

@@ -1609,6 +1622,10 @@ tstate_delete_common(PyThreadState *tstate)
16091622
}
16101623
HEAD_UNLOCK(runtime);
16111624

1625+
#ifdef Py_GIL_DISABLED
1626+
_Py_qsbr_unregister((_PyThreadStateImpl *)tstate);
1627+
#endif
1628+
16121629
// XXX Unbind in PyThreadState_Clear(), or earlier
16131630
// (and assert not-equal here)?
16141631
if (tstate->_status.bound_gilstate) {
@@ -1650,6 +1667,9 @@ void
16501667
_PyThreadState_DeleteCurrent(PyThreadState *tstate)
16511668
{
16521669
_Py_EnsureTstateNotNULL(tstate);
1670+
#ifdef Py_GIL_DISABLED
1671+
_Py_qsbr_detach(((_PyThreadStateImpl *)tstate)->qsbr);
1672+
#endif
16531673
tstate_set_detached(tstate);
16541674
tstate_delete_common(tstate);
16551675
current_fast_clear(tstate->interp->runtime);
@@ -1871,6 +1891,10 @@ _PyThreadState_Attach(PyThreadState *tstate)
18711891
tstate_wait_attach(tstate);
18721892
}
18731893

1894+
#ifdef Py_GIL_DISABLED
1895+
_Py_qsbr_attach(((_PyThreadStateImpl *)tstate)->qsbr);
1896+
#endif
1897+
18741898
// Resume previous critical section. This acquires the lock(s) from the
18751899
// top-most critical section.
18761900
if (tstate->critical_section != 0) {
@@ -1891,6 +1915,9 @@ detach_thread(PyThreadState *tstate, int detached_state)
18911915
if (tstate->critical_section != 0) {
18921916
_PyCriticalSection_SuspendAll(tstate);
18931917
}
1918+
#ifdef Py_GIL_DISABLED
1919+
_Py_qsbr_detach(((_PyThreadStateImpl *)tstate)->qsbr);
1920+
#endif
18941921
tstate_deactivate(tstate);
18951922
tstate_set_detached(tstate);
18961923
current_fast_clear(&_PyRuntime);

‎Python/qsbr.c

+280
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,280 @@
1+
/*
2+
* Implementation of safe memory reclamation scheme using
3+
* quiescent states.
4+
*
5+
* This is dervied from the "GUS" safe memory reclamation technique
6+
* in FreeBSD written by Jeffrey Roberson. It is heavily modified. Any bugs
7+
* in this code are likely due to the modifications.
8+
*
9+
* The original copyright is preserved below.
10+
*
11+
* Copyright (c) 2019,2020 Jeffrey Roberson <jeff@FreeBSD.org>
12+
*
13+
* Redistribution and use in source and binary forms, with or without
14+
* modification, are permitted provided that the following conditions
15+
* are met:
16+
* 1. Redistributions of source code must retain the above copyright
17+
* notice unmodified, this list of conditions, and the following
18+
* disclaimer.
19+
* 2. Redistributions in binary form must reproduce the above copyright
20+
* notice, this list of conditions and the following disclaimer in the
21+
* documentation and/or other materials provided with the distribution.
22+
*
23+
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24+
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25+
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26+
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27+
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28+
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29+
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30+
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32+
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33+
*/
34+
#include "Python.h"
35+
#include "pycore_initconfig.h" // _PyStatus_NO_MEMORY()
36+
#include "pycore_lock.h" // PyMutex_Lock()
37+
#include "pycore_qsbr.h"
38+
#include "pycore_pystate.h" // _PyThreadState_GET()
39+
40+
41+
// Wrap-around safe comparison
42+
#define QSBR_LT(a, b) ((int64_t)((a)-(b)) < 0)
43+
#define QSBR_LEQ(a, b) ((int64_t)((a)-(b)) <= 0)
44+
45+
// Starting size of the array of qsbr thread states
46+
#define MIN_ARRAY_SIZE 8
47+
48+
// The shared write sequence is always odd and incremented by two. Detached
49+
// threads are indicated by a read sequence of zero.
50+
#define QSBR_OFFLINE 0
51+
#define QSBR_INITIAL 1
52+
#define QSBR_INCR 2
53+
54+
// For _Py_qsbr_deferred_advance(): the number of deferrals before advancing
55+
// the write sequence.
56+
#define QSBR_DEFERRED_LIMIT 10
57+
58+
// Allocate a QSBR thread state from the freelist
59+
struct _qsbr_thread_state *
60+
qsbr_allocate(struct _qsbr_shared *shared)
61+
{
62+
struct _qsbr_thread_state *qsbr = shared->freelist;
63+
if (qsbr == NULL) {
64+
return NULL;
65+
}
66+
shared->freelist = qsbr->freelist_next;
67+
qsbr->freelist_next = NULL;
68+
qsbr->shared = shared;
69+
qsbr->allocated = true;
70+
return qsbr;
71+
}
72+
73+
// Initialize (or reintialize) the freelist of QSBR thread states
74+
static void
75+
initialize_freelist(struct _qsbr_shared *shared)
76+
{
77+
for (Py_ssize_t i = 0; i != shared->size; i++) {
78+
struct _qsbr_thread_state *qsbr = &shared->array[i].qsbr;
79+
if (qsbr->tstate != NULL) {
80+
// Update the thread state pointer to its QSBR state
81+
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)qsbr->tstate;
82+
tstate->qsbr = qsbr;
83+
}
84+
if (!qsbr->allocated) {
85+
// Push to freelist
86+
qsbr->freelist_next = shared->freelist;
87+
shared->freelist = qsbr;
88+
}
89+
}
90+
}
91+
92+
// Grow the array of QSBR thread states. Returns 0 on success, -1 on failure.
93+
static int
94+
grow_thread_array(struct _qsbr_shared *shared)
95+
{
96+
Py_ssize_t new_size = shared->size * 2;
97+
if (new_size < MIN_ARRAY_SIZE) {
98+
new_size = MIN_ARRAY_SIZE;
99+
}
100+
101+
struct _qsbr_pad *array = PyMem_RawCalloc(new_size, sizeof(*array));
102+
if (array == NULL) {
103+
return -1;
104+
}
105+
106+
struct _qsbr_pad *old = shared->array;
107+
if (old != NULL) {
108+
memcpy(array, shared->array, shared->size * sizeof(*array));
109+
}
110+
111+
shared->array = array;
112+
shared->size = new_size;
113+
shared->freelist = NULL;
114+
initialize_freelist(shared);
115+
116+
PyMem_RawFree(old);
117+
return 0;
118+
}
119+
120+
uint64_t
121+
_Py_qsbr_advance(struct _qsbr_shared *shared)
122+
{
123+
return _Py_atomic_add_uint64(&shared->wr_seq, QSBR_INCR) + QSBR_INCR;
124+
}
125+
126+
uint64_t
127+
_Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr)
128+
{
129+
if (++qsbr->deferrals < QSBR_DEFERRED_LIMIT) {
130+
return _Py_qsbr_shared_current(qsbr->shared) + QSBR_INCR;
131+
}
132+
qsbr->deferrals = 0;
133+
return _Py_qsbr_advance(qsbr->shared);
134+
}
135+
136+
static uint64_t
137+
qsbr_poll_scan(struct _qsbr_shared *shared)
138+
{
139+
// Compute the minimum sequence number of all attached threads
140+
uint64_t min_seq = _Py_atomic_load_uint64(&shared->wr_seq);
141+
struct _qsbr_pad *array = shared->array;
142+
for (Py_ssize_t i = 0, size = shared->size; i != size; i++) {
143+
struct _qsbr_thread_state *qsbr = &array[i].qsbr;
144+
145+
uint64_t seq = _Py_atomic_load_uint64(&qsbr->seq);
146+
if (seq != QSBR_OFFLINE && QSBR_LT(seq, min_seq)) {
147+
min_seq = seq;
148+
}
149+
}
150+
151+
// Update the shared read sequence
152+
uint64_t rd_seq = _Py_atomic_load_uint64(&shared->rd_seq);
153+
if (QSBR_LT(rd_seq, min_seq)) {
154+
// It's okay if the compare-exchange failed: another thread updated it
155+
(void)_Py_atomic_compare_exchange_uint64(&shared->rd_seq, &rd_seq, min_seq);
156+
rd_seq = min_seq;
157+
}
158+
159+
return rd_seq;
160+
}
161+
162+
bool
163+
_Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal)
164+
{
165+
assert(_PyThreadState_GET()->state == _Py_THREAD_ATTACHED);
166+
167+
uint64_t rd_seq = _Py_atomic_load_uint64(&qsbr->shared->rd_seq);
168+
if (QSBR_LEQ(goal, rd_seq)) {
169+
return true;
170+
}
171+
172+
rd_seq = qsbr_poll_scan(qsbr->shared);
173+
return QSBR_LEQ(goal, rd_seq);
174+
}
175+
176+
void
177+
_Py_qsbr_attach(struct _qsbr_thread_state *qsbr)
178+
{
179+
assert(qsbr->seq == 0 && "already attached");
180+
181+
uint64_t seq = _Py_qsbr_shared_current(qsbr->shared);
182+
_Py_atomic_store_uint64_relaxed(&qsbr->seq, seq);
183+
184+
// ensure update to local counter is visible
185+
_Py_atomic_fence_seq_cst();
186+
}
187+
188+
void
189+
_Py_qsbr_detach(struct _qsbr_thread_state *qsbr)
190+
{
191+
assert(qsbr->seq != 0 && "already detached");
192+
193+
_Py_atomic_fence_release();
194+
_Py_atomic_store_uint64_relaxed(&qsbr->seq, QSBR_OFFLINE);
195+
}
196+
197+
Py_ssize_t
198+
_Py_qsbr_reserve(PyInterpreterState *interp)
199+
{
200+
struct _qsbr_shared *shared = &interp->qsbr;
201+
202+
PyMutex_LockFlags(&shared->mutex, _Py_LOCK_DONT_DETACH);
203+
struct _qsbr_thread_state *qsbr = qsbr_allocate(shared);
204+
205+
if (qsbr == NULL) {
206+
_PyEval_StopTheWorld(interp);
207+
if (grow_thread_array(shared) == 0) {
208+
qsbr = qsbr_allocate(shared);
209+
}
210+
_PyEval_StartTheWorld(interp);
211+
}
212+
PyMutex_Unlock(&shared->mutex);
213+
214+
if (qsbr == NULL) {
215+
return -1;
216+
}
217+
218+
// Compute index in the shared array from the pointer
219+
return (struct _qsbr_pad *)qsbr - shared->array;
220+
}
221+
222+
void
223+
_Py_qsbr_register(_PyThreadStateImpl *tstate, PyInterpreterState *interp,
224+
Py_ssize_t index)
225+
{
226+
struct _qsbr_shared *shared = &interp->qsbr;
227+
228+
// NOTE: this function is called with runtime locked, so we don't detach
229+
// while waiting for the lock. This prevents a stop-the-world pause
230+
// while the runtime lock is held, which could lead to deadlock.
231+
PyMutex_LockFlags(&shared->mutex, _Py_LOCK_DONT_DETACH);
232+
struct _qsbr_thread_state *qsbr = &interp->qsbr.array[index].qsbr;
233+
assert(qsbr->allocated);
234+
assert(qsbr->tstate == NULL);
235+
qsbr->tstate = (PyThreadState *)tstate;
236+
tstate->qsbr = qsbr;
237+
PyMutex_Unlock(&shared->mutex);
238+
}
239+
240+
void
241+
_Py_qsbr_unregister(_PyThreadStateImpl *tstate)
242+
{
243+
struct _qsbr_thread_state *qsbr = tstate->qsbr;
244+
struct _qsbr_shared *shared = qsbr->shared;
245+
246+
assert(qsbr->seq == 0 && "thread state must be detached");
247+
248+
PyMutex_LockFlags(&shared->mutex, _Py_LOCK_DONT_DETACH);
249+
qsbr->tstate = NULL;
250+
qsbr->allocated = false;
251+
qsbr->freelist_next = shared->freelist;
252+
shared->freelist = qsbr;
253+
PyMutex_Unlock(&shared->mutex);
254+
}
255+
256+
void
257+
_Py_qsbr_fini(PyInterpreterState *interp)
258+
{
259+
struct _qsbr_shared *shared = &interp->qsbr;
260+
PyMem_RawFree(shared->array);
261+
shared->array = NULL;
262+
shared->size = 0;
263+
shared->freelist = NULL;
264+
}
265+
266+
void
267+
_Py_qsbr_after_fork(struct _qsbr_shared *shared, struct _qsbr_thread_state *this_qsbr)
268+
{
269+
_PyMutex_at_fork_reinit(&shared->mutex);
270+
271+
for (Py_ssize_t i = 0; i != shared->size; i++) {
272+
struct _qsbr_thread_state *qsbr = &shared->array[i].qsbr;
273+
if (qsbr != this_qsbr && qsbr->tstate != NULL) {
274+
qsbr->tstate = NULL;
275+
qsbr->allocated = false;
276+
qsbr->freelist_next = shared->freelist;
277+
shared->freelist = qsbr;
278+
}
279+
}
280+
}

0 commit comments

Comments
 (0)
Please sign in to comment.