From 05af6b2df99b787c9dd04bda233cb94de95fe727 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 25 Sep 2024 11:33:28 -0700 Subject: [PATCH 01/16] Make specialization of LOAD_GLOBAL thread-safe Thread-safety of specialization in free-threaded builds: - A critical section is held on both the globals and builtins objects during specialization. This ensures we get an atomic view of both builtins and globals during specialization. - Generation of new keys versions is made atomic in free-threaded builds. - We use helpers safely modify the bytecode. Thread-safety of specialized instructions in free-threaded builds: - Dicts keys objects are passed from keys version guards to the downstream uops. This ensures that we are loading from the correct offset in the keys object. Once a unicode key has been stored in a keys object for a combined dictionary in free-threaded builds, the offset that it is stored in will never be reaused for a different key. - The dictionary read fast-path is used to read values from the dictionary. - Relaxed atomics are used when loading and storing dict keys versions. This avoids potential data races as the dict keys versions may be read without holding the dictionary's per-object lock while the instructions are executing. --- Include/internal/pycore_opcode_metadata.h | 4 +- Include/internal/pycore_uop_metadata.h | 4 +- Lib/test/test_opcache.py | 19 ++++- Objects/dictobject.c | 25 +++++-- Python/bytecodes.c | 35 ++++++--- Python/executor_cases.c.h | 41 ++++++++--- Python/generated_cases.c.h | 39 +++++++--- Python/specialize.c | 87 +++++++++++------------ 8 files changed, 171 insertions(+), 83 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 58e583eabbcc46..353e1c65a5e336 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1158,8 +1158,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [LOAD_FROM_DICT_OR_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FROM_DICT_OR_GLOBALS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [LOAD_GLOBAL] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [LOAD_GLOBAL_BUILTIN] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, - [LOAD_GLOBAL_MODULE] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, + [LOAD_GLOBAL_BUILTIN] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_GLOBAL_MODULE] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_LOCALS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_NAME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_SMALL_INT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 98a41d1f23f569..5f097c4686103a 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -124,8 +124,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_GUARD_GLOBALS_VERSION] = HAS_DEOPT_FLAG, [_GUARD_GLOBALS_VERSION_PUSH_KEYS] = HAS_DEOPT_FLAG, [_GUARD_BUILTINS_VERSION_PUSH_KEYS] = HAS_DEOPT_FLAG, - [_LOAD_GLOBAL_MODULE_FROM_KEYS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_LOAD_GLOBAL_BUILTINS_FROM_KEYS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_LOAD_GLOBAL_MODULE_FROM_KEYS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, + [_LOAD_GLOBAL_BUILTINS_FROM_KEYS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_DELETE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_MAKE_CELL] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG, [_DELETE_DEREF] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index 78e4bf44f7ea0c..ee7fd178b1c02e 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -546,7 +546,6 @@ def count_args(self, *args): @threading_helper.requires_working_threading() -@requires_specialization class TestRacesDoNotCrash(TestBase): # Careful with these. Bigger numbers have a higher chance of catching bugs, # but you can also burn through a *ton* of type/dict/function versions: @@ -588,6 +587,7 @@ def assert_races_do_not_crash( for writer in writers: writer.join() + @requires_specialization def test_binary_subscr_getitem(self): def get_items(): class C: @@ -617,6 +617,7 @@ def write(items): opname = "BINARY_SUBSCR_GETITEM" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_binary_subscr_list_int(self): def get_items(): items = [] @@ -640,6 +641,7 @@ def write(items): opname = "BINARY_SUBSCR_LIST_INT" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_for_iter_gen(self): def get_items(): def g(): @@ -671,6 +673,7 @@ def write(items): opname = "FOR_ITER_GEN" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_for_iter_list(self): def get_items(): items = [] @@ -692,6 +695,7 @@ def write(items): opname = "FOR_ITER_LIST" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_load_attr_class(self): def get_items(): class C: @@ -721,6 +725,7 @@ def write(items): opname = "LOAD_ATTR_CLASS" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_load_attr_getattribute_overridden(self): def get_items(): class C: @@ -750,6 +755,7 @@ def write(items): opname = "LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_load_attr_instance_value(self): def get_items(): class C: @@ -773,6 +779,7 @@ def write(items): opname = "LOAD_ATTR_INSTANCE_VALUE" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_load_attr_method_lazy_dict(self): def get_items(): class C(Exception): @@ -802,6 +809,7 @@ def write(items): opname = "LOAD_ATTR_METHOD_LAZY_DICT" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_load_attr_method_no_dict(self): def get_items(): class C: @@ -832,6 +840,7 @@ def write(items): opname = "LOAD_ATTR_METHOD_NO_DICT" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_load_attr_method_with_values(self): def get_items(): class C: @@ -861,6 +870,7 @@ def write(items): opname = "LOAD_ATTR_METHOD_WITH_VALUES" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_load_attr_module(self): def get_items(): items = [] @@ -885,6 +895,7 @@ def write(items): opname = "LOAD_ATTR_MODULE" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_load_attr_property(self): def get_items(): class C: @@ -914,6 +925,7 @@ def write(items): opname = "LOAD_ATTR_PROPERTY" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_load_attr_with_hint(self): def get_items(): class C: @@ -940,6 +952,7 @@ def write(items): opname = "LOAD_ATTR_WITH_HINT" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization_ft def test_load_global_module(self): def get_items(): items = [] @@ -961,6 +974,7 @@ def write(items): opname, get_items, read, write, check_items=True ) + @requires_specialization def test_store_attr_instance_value(self): def get_items(): class C: @@ -983,6 +997,7 @@ def write(items): opname = "STORE_ATTR_INSTANCE_VALUE" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_store_attr_with_hint(self): def get_items(): class C: @@ -1008,6 +1023,7 @@ def write(items): opname = "STORE_ATTR_WITH_HINT" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_store_subscr_list_int(self): def get_items(): items = [] @@ -1031,6 +1047,7 @@ def write(items): opname = "STORE_SUBSCR_LIST_INT" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_unpack_sequence_list(self): def get_items(): items = [] diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 2090008055b7c0..55f34ce546b1a4 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1701,7 +1701,7 @@ insert_combined_dict(PyInterpreterState *interp, PyDictObject *mp, } _PyDict_NotifyEvent(interp, PyDict_EVENT_ADDED, mp, key, value); - mp->ma_keys->dk_version = 0; + FT_ATOMIC_STORE_UINT32_RELAXED(mp->ma_keys->dk_version, 0); Py_ssize_t hashpos = find_empty_slot(mp->ma_keys, hash); dictkeys_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries); @@ -2679,7 +2679,7 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix, ASSERT_CONSISTENT(mp); } else { - mp->ma_keys->dk_version = 0; + FT_ATOMIC_STORE_UINT32_RELAXED(mp->ma_keys->dk_version, 0); dictkeys_set_index(mp->ma_keys, hashpos, DKIX_DUMMY); if (DK_IS_UNICODE(mp->ma_keys)) { PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(mp->ma_keys)[ix]; @@ -4498,7 +4498,7 @@ dict_popitem_impl(PyDictObject *self) return NULL; } } - self->ma_keys->dk_version = 0; + FT_ATOMIC_STORE_UINT32_RELAXED(self->ma_keys->dk_version, 0); /* Pop last item */ PyObject *key, *value; @@ -7390,17 +7390,30 @@ _PyDictKeys_DecRef(PyDictKeysObject *keys) dictkeys_decref(interp, keys, false); } +// In free-threaded builds the caller must ensure that the keys object is not +// being mutated concurrently by another thread. uint32_t _PyDictKeys_GetVersionForCurrentState(PyInterpreterState *interp, PyDictKeysObject *dictkeys) { - if (dictkeys->dk_version != 0) { - return dictkeys->dk_version; + uint32_t dk_version = FT_ATOMIC_LOAD_UINT32_RELAXED(dictkeys->dk_version); + if (dk_version != 0) { + return dk_version; } +#ifdef Py_GIL_DISABLED + uint32_t v; + do { + v = _Py_atomic_load_uint32_relaxed(&interp->dict_state.next_keys_version); + if (v == 0) { + return 0; + } + } while(!_Py_atomic_compare_exchange_uint32(&interp->dict_state.next_keys_version, &v, v + 1)); +#else if (interp->dict_state.next_keys_version == 0) { return 0; } uint32_t v = interp->dict_state.next_keys_version++; - dictkeys->dk_version = v; +#endif + FT_ATOMIC_STORE_UINT32_RELAXED(dictkeys->dk_version, v); return v; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 7ae0f20369641a..0a8e7bf44bb9f4 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1569,7 +1569,7 @@ dummy_func( }; specializing op(_SPECIALIZE_LOAD_GLOBAL, (counter/1 -- )) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); next_instr = this_instr; @@ -1578,7 +1578,7 @@ dummy_func( } OPCODE_DEFERRED_INC(LOAD_GLOBAL); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } // res[1] because we need a pointer to res to pass it to _PyEval_LoadGlobalStackRef @@ -1599,16 +1599,18 @@ dummy_func( op(_GUARD_GLOBALS_VERSION, (version/1 --)) { PyDictObject *dict = (PyDictObject *)GLOBALS(); DEOPT_IF(!PyDict_CheckExact(dict)); - DEOPT_IF(dict->ma_keys->dk_version != version); - assert(DK_IS_UNICODE(dict->ma_keys)); + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version); + assert(DK_IS_UNICODE(keys)); } op(_GUARD_GLOBALS_VERSION_PUSH_KEYS, (version / 1 -- globals_keys: PyDictKeysObject *)) { PyDictObject *dict = (PyDictObject *)GLOBALS(); DEOPT_IF(!PyDict_CheckExact(dict)); - DEOPT_IF(dict->ma_keys->dk_version != version); - globals_keys = dict->ma_keys; + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version); + globals_keys = keys; assert(DK_IS_UNICODE(globals_keys)); } @@ -1616,18 +1618,25 @@ dummy_func( { PyDictObject *dict = (PyDictObject *)BUILTINS(); DEOPT_IF(!PyDict_CheckExact(dict)); - DEOPT_IF(dict->ma_keys->dk_version != version); - builtins_keys = dict->ma_keys; + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version); + builtins_keys = keys; assert(DK_IS_UNICODE(builtins_keys)); } op(_LOAD_GLOBAL_MODULE_FROM_KEYS, (index/1, globals_keys: PyDictKeysObject* -- res, null if (oparg & 1))) { PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(globals_keys); - PyObject *res_o = entries[index].me_value; + PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); DEAD(globals_keys); SYNC_SP(); DEOPT_IF(res_o == NULL); + #if Py_GIL_DISABLED + int increfed; + increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); + DEOPT_IF(!increfed); + #else Py_INCREF(res_o); + #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; res = PyStackRef_FromPyObjectSteal(res_o); @@ -1635,11 +1644,17 @@ dummy_func( op(_LOAD_GLOBAL_BUILTINS_FROM_KEYS, (index/1, builtins_keys: PyDictKeysObject* -- res, null if (oparg & 1))) { PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(builtins_keys); - PyObject *res_o = entries[index].me_value; + PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); DEAD(builtins_keys); SYNC_SP(); DEOPT_IF(res_o == NULL); + #if Py_GIL_DISABLED + int increfed; + increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); + DEOPT_IF(!increfed); + #else Py_INCREF(res_o); + #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; res = PyStackRef_FromPyObjectSteal(res_o); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 1d63402214db5d..290fd1ba2bb04c 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1870,11 +1870,12 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - if (dict->ma_keys->dk_version != version) { + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + if (FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - assert(DK_IS_UNICODE(dict->ma_keys)); + assert(DK_IS_UNICODE(keys)); break; } @@ -1886,11 +1887,12 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - if (dict->ma_keys->dk_version != version) { + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + if (FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - globals_keys = dict->ma_keys; + globals_keys = keys; assert(DK_IS_UNICODE(globals_keys)); stack_pointer[0].bits = (uintptr_t)globals_keys; stack_pointer += 1; @@ -1906,11 +1908,12 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - if (dict->ma_keys->dk_version != version) { + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + if (FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - builtins_keys = dict->ma_keys; + builtins_keys = keys; assert(DK_IS_UNICODE(builtins_keys)); stack_pointer[0].bits = (uintptr_t)builtins_keys; stack_pointer += 1; @@ -1926,14 +1929,25 @@ globals_keys = (PyDictKeysObject *)stack_pointer[-1].bits; uint16_t index = (uint16_t)CURRENT_OPERAND(); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(globals_keys); - PyObject *res_o = entries[index].me_value; + PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); if (res_o == NULL) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + #if Py_GIL_DISABLED + int increfed; + _PyFrame_SetStackPointer(frame, stack_pointer); + increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (!increfed) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + #else Py_INCREF(res_o); + #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; res = PyStackRef_FromPyObjectSteal(res_o); @@ -1952,14 +1966,25 @@ builtins_keys = (PyDictKeysObject *)stack_pointer[-1].bits; uint16_t index = (uint16_t)CURRENT_OPERAND(); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(builtins_keys); - PyObject *res_o = entries[index].me_value; + PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); if (res_o == NULL) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + #if Py_GIL_DISABLED + int increfed; + _PyFrame_SetStackPointer(frame, stack_pointer); + increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (!increfed) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + #else Py_INCREF(res_o); + #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; res = PyStackRef_FromPyObjectSteal(res_o); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 03b4d2224922f0..2b6f48447b5dfa 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -6095,7 +6095,7 @@ { uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); next_instr = this_instr; @@ -6106,7 +6106,7 @@ } OPCODE_DEFERRED_INC(LOAD_GLOBAL); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } /* Skip 1 cache entry */ /* Skip 1 cache entry */ @@ -6141,25 +6141,35 @@ uint16_t version = read_u16(&this_instr[2].cache); PyDictObject *dict = (PyDictObject *)GLOBALS(); DEOPT_IF(!PyDict_CheckExact(dict), LOAD_GLOBAL); - DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL); - assert(DK_IS_UNICODE(dict->ma_keys)); + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version, LOAD_GLOBAL); + assert(DK_IS_UNICODE(keys)); } // _GUARD_BUILTINS_VERSION_PUSH_KEYS { uint16_t version = read_u16(&this_instr[3].cache); PyDictObject *dict = (PyDictObject *)BUILTINS(); DEOPT_IF(!PyDict_CheckExact(dict), LOAD_GLOBAL); - DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL); - builtins_keys = dict->ma_keys; + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version, LOAD_GLOBAL); + builtins_keys = keys; assert(DK_IS_UNICODE(builtins_keys)); } // _LOAD_GLOBAL_BUILTINS_FROM_KEYS { uint16_t index = read_u16(&this_instr[4].cache); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(builtins_keys); - PyObject *res_o = entries[index].me_value; + PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); DEOPT_IF(res_o == NULL, LOAD_GLOBAL); + #if Py_GIL_DISABLED + int increfed; + _PyFrame_SetStackPointer(frame, stack_pointer); + increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); + stack_pointer = _PyFrame_GetStackPointer(frame); + DEOPT_IF(!increfed, LOAD_GLOBAL); + #else Py_INCREF(res_o); + #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; res = PyStackRef_FromPyObjectSteal(res_o); @@ -6185,8 +6195,9 @@ uint16_t version = read_u16(&this_instr[2].cache); PyDictObject *dict = (PyDictObject *)GLOBALS(); DEOPT_IF(!PyDict_CheckExact(dict), LOAD_GLOBAL); - DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL); - globals_keys = dict->ma_keys; + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version, LOAD_GLOBAL); + globals_keys = keys; assert(DK_IS_UNICODE(globals_keys)); } /* Skip 1 cache entry */ @@ -6194,9 +6205,17 @@ { uint16_t index = read_u16(&this_instr[4].cache); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(globals_keys); - PyObject *res_o = entries[index].me_value; + PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); DEOPT_IF(res_o == NULL, LOAD_GLOBAL); + #if Py_GIL_DISABLED + int increfed; + _PyFrame_SetStackPointer(frame, stack_pointer); + increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); + stack_pointer = _PyFrame_GetStackPointer(frame); + DEOPT_IF(!increfed, LOAD_GLOBAL); + #else Py_INCREF(res_o); + #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; res = PyStackRef_FromPyObjectSteal(res_o); diff --git a/Python/specialize.c b/Python/specialize.c index 0699e7be5e6b9c..ce2ac3ef8ab230 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1521,103 +1521,102 @@ PyObject *descr, DescriptorClassification kind, bool is_method) } void -_Py_Specialize_LoadGlobal( +specialize_load_global_lock_held( PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name) { - assert(ENABLE_SPECIALIZATION); + assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[LOAD_GLOBAL] == INLINE_CACHE_ENTRIES_LOAD_GLOBAL); /* Use inline cache */ _PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)(instr + 1); assert(PyUnicode_CheckExact(name)); if (!PyDict_CheckExact(globals)) { - SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_LOAD_GLOBAL_NON_DICT); - goto fail; + unspecialize(instr, SPEC_FAIL_LOAD_GLOBAL_NON_DICT); + return; } PyDictKeysObject * globals_keys = ((PyDictObject *)globals)->ma_keys; if (!DK_IS_UNICODE(globals_keys)) { - SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_LOAD_GLOBAL_NON_STRING_OR_SPLIT); - goto fail; + unspecialize(instr, SPEC_FAIL_LOAD_GLOBAL_NON_STRING_OR_SPLIT); + return; } Py_ssize_t index = _PyDictKeys_StringLookup(globals_keys, name); if (index == DKIX_ERROR) { - SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_EXPECTED_ERROR); - goto fail; + unspecialize(instr, SPEC_FAIL_EXPECTED_ERROR); + return; } PyInterpreterState *interp = _PyInterpreterState_GET(); if (index != DKIX_EMPTY) { if (index != (uint16_t)index) { - SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_RANGE); - goto fail; + unspecialize(instr, SPEC_FAIL_OUT_OF_RANGE); + return; } uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState( interp, globals_keys); if (keys_version == 0) { - SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_VERSIONS); - goto fail; + unspecialize(instr, SPEC_FAIL_OUT_OF_VERSIONS); + return; } if (keys_version != (uint16_t)keys_version) { - SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_RANGE); - goto fail; + unspecialize(instr, SPEC_FAIL_OUT_OF_RANGE); + return; } cache->index = (uint16_t)index; cache->module_keys_version = (uint16_t)keys_version; - instr->op.code = LOAD_GLOBAL_MODULE; - goto success; + specialize(instr, LOAD_GLOBAL_MODULE); + return; } if (!PyDict_CheckExact(builtins)) { - SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_LOAD_GLOBAL_NON_DICT); - goto fail; + unspecialize(instr, SPEC_FAIL_LOAD_GLOBAL_NON_DICT); + return; } PyDictKeysObject * builtin_keys = ((PyDictObject *)builtins)->ma_keys; if (!DK_IS_UNICODE(builtin_keys)) { - SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_LOAD_GLOBAL_NON_STRING_OR_SPLIT); - goto fail; + unspecialize(instr, SPEC_FAIL_LOAD_GLOBAL_NON_STRING_OR_SPLIT); + return; } index = _PyDictKeys_StringLookup(builtin_keys, name); if (index == DKIX_ERROR) { - SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_EXPECTED_ERROR); - goto fail; + unspecialize(instr, SPEC_FAIL_EXPECTED_ERROR); + return; } if (index != (uint16_t)index) { - SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_RANGE); - goto fail; + unspecialize(instr, SPEC_FAIL_OUT_OF_RANGE); + return; } uint32_t globals_version = _PyDictKeys_GetVersionForCurrentState( interp, globals_keys); if (globals_version == 0) { - SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_VERSIONS); - goto fail; + unspecialize(instr, SPEC_FAIL_OUT_OF_VERSIONS); + return; } if (globals_version != (uint16_t)globals_version) { - SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_RANGE); - goto fail; + unspecialize(instr, SPEC_FAIL_OUT_OF_RANGE); + return; } uint32_t builtins_version = _PyDictKeys_GetVersionForCurrentState( interp, builtin_keys); if (builtins_version == 0) { - SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_VERSIONS); - goto fail; + unspecialize(instr, SPEC_FAIL_OUT_OF_VERSIONS); + return; } if (builtins_version > UINT16_MAX) { - SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_RANGE); - goto fail; + unspecialize(instr, SPEC_FAIL_OUT_OF_RANGE); + return; } cache->index = (uint16_t)index; cache->module_keys_version = (uint16_t)globals_version; cache->builtin_keys_version = (uint16_t)builtins_version; - instr->op.code = LOAD_GLOBAL_BUILTIN; - goto success; -fail: - STAT_INC(LOAD_GLOBAL, failure); - assert(!PyErr_Occurred()); - instr->op.code = LOAD_GLOBAL; - cache->counter = adaptive_counter_backoff(cache->counter); - return; -success: - STAT_INC(LOAD_GLOBAL, success); - assert(!PyErr_Occurred()); - cache->counter = adaptive_counter_cooldown(); + specialize(instr, LOAD_GLOBAL_BUILTIN); +} + +void +_Py_Specialize_LoadGlobal( + PyObject *globals, PyObject *builtins, + _Py_CODEUNIT *instr, PyObject *name) +{ + Py_BEGIN_CRITICAL_SECTION2(globals, builtins); + specialize_load_global_lock_held(globals, builtins, instr, name); + Py_END_CRITICAL_SECTION2(); } #ifdef Py_STATS From 5af633c7a74f791a1a373b3d78be7d2d5e602f1b Mon Sep 17 00:00:00 2001 From: Matt Page Date: Mon, 28 Oct 2024 15:32:43 -0700 Subject: [PATCH 02/16] _Py_GetExecutor needs a CS in free-threaded builds This handles the case where another thread is instrumenting the bytecode. --- Python/optimizer.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index b876b6c2bd72fd..f63d08f13eceef 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -205,8 +205,8 @@ _PyOptimizer_Optimize( return 1; } -_PyExecutorObject * -_Py_GetExecutor(PyCodeObject *code, int offset) +static _PyExecutorObject * +get_executor_lock_held(PyCodeObject *code, int offset) { int code_len = (int)Py_SIZE(code); for (int i = 0 ; i < code_len;) { @@ -222,6 +222,16 @@ _Py_GetExecutor(PyCodeObject *code, int offset) return NULL; } +_PyExecutorObject * +_Py_GetExecutor(PyCodeObject *code, int offset) +{ + _PyExecutorObject *executor; + Py_BEGIN_CRITICAL_SECTION(code); + executor = get_executor_lock_held(code, offset); + Py_END_CRITICAL_SECTION(); + return executor; +} + static PyObject * is_valid(PyObject *self, PyObject *Py_UNUSED(ignored)) { From 7b3df6f883e2f77bd14170cbf055fa222109e8ff Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 8 Nov 2024 10:21:02 -0800 Subject: [PATCH 03/16] Undo workaround for cases generator bug --- Python/bytecodes.c | 6 ++---- Python/executor_cases.c.h | 6 ++---- Python/generated_cases.c.h | 6 ++---- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 0a8e7bf44bb9f4..64ffa539899499 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1631,8 +1631,7 @@ dummy_func( SYNC_SP(); DEOPT_IF(res_o == NULL); #if Py_GIL_DISABLED - int increfed; - increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); + int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); DEOPT_IF(!increfed); #else Py_INCREF(res_o); @@ -1649,8 +1648,7 @@ dummy_func( SYNC_SP(); DEOPT_IF(res_o == NULL); #if Py_GIL_DISABLED - int increfed; - increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); + int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); DEOPT_IF(!increfed); #else Py_INCREF(res_o); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 290fd1ba2bb04c..5dd8d0bb78a1b4 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1937,9 +1937,8 @@ JUMP_TO_JUMP_TARGET(); } #if Py_GIL_DISABLED - int increfed; _PyFrame_SetStackPointer(frame, stack_pointer); - increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); + int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); stack_pointer = _PyFrame_GetStackPointer(frame); if (!increfed) { UOP_STAT_INC(uopcode, miss); @@ -1974,9 +1973,8 @@ JUMP_TO_JUMP_TARGET(); } #if Py_GIL_DISABLED - int increfed; _PyFrame_SetStackPointer(frame, stack_pointer); - increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); + int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); stack_pointer = _PyFrame_GetStackPointer(frame); if (!increfed) { UOP_STAT_INC(uopcode, miss); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 2b6f48447b5dfa..d6076f130e0bf6 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -6162,9 +6162,8 @@ PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); DEOPT_IF(res_o == NULL, LOAD_GLOBAL); #if Py_GIL_DISABLED - int increfed; _PyFrame_SetStackPointer(frame, stack_pointer); - increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); + int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); stack_pointer = _PyFrame_GetStackPointer(frame); DEOPT_IF(!increfed, LOAD_GLOBAL); #else @@ -6208,9 +6207,8 @@ PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); DEOPT_IF(res_o == NULL, LOAD_GLOBAL); #if Py_GIL_DISABLED - int increfed; _PyFrame_SetStackPointer(frame, stack_pointer); - increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); + int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); stack_pointer = _PyFrame_GetStackPointer(frame); DEOPT_IF(!increfed, LOAD_GLOBAL); #else From 8f6239d2eebcd6cfc65a49153b62ea26bfff2cb2 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 8 Nov 2024 12:35:52 -0800 Subject: [PATCH 04/16] Fix unused function warning --- Objects/funcobject.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 1f2387f68440aa..4ba47285f7152f 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -289,12 +289,14 @@ functions is running. */ +#ifndef Py_GIL_DISABLED static inline struct _func_version_cache_item * get_cache_item(PyInterpreterState *interp, uint32_t version) { return interp->func_state.func_version_cache + (version % FUNC_VERSION_CACHE_SIZE); } +#endif void _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version) From 49ec70a6ffe358fce6c8725298ee6a99c30ad46b Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 8 Nov 2024 12:36:42 -0800 Subject: [PATCH 05/16] Double check that keys are still valid --- Python/bytecodes.c | 10 ++++++++++ Python/executor_cases.c.h | 16 ++++++++++++++++ Python/generated_cases.c.h | 10 ++++++++++ Tools/cases_generator/analyzer.py | 1 + 4 files changed, 37 insertions(+) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 64ffa539899499..c3cea0f67b24e4 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1633,6 +1633,11 @@ dummy_func( #if Py_GIL_DISABLED int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); DEOPT_IF(!increfed); + PyDictObject *dict = (PyDictObject*) GLOBALS(); + if (globals_keys != _Py_atomic_load_ptr_acquire(&dict->ma_keys)) { + Py_DECREF(res_o); + DEOPT_IF(true); + } #else Py_INCREF(res_o); #endif @@ -1650,6 +1655,11 @@ dummy_func( #if Py_GIL_DISABLED int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); DEOPT_IF(!increfed); + PyDictObject *dict = (PyDictObject*) BUILTINS(); + if (builtins_keys != _Py_atomic_load_ptr_acquire(&dict->ma_keys)) { + Py_DECREF(res_o); + DEOPT_IF(true); + } #else Py_INCREF(res_o); #endif diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 5dd8d0bb78a1b4..5afa9263fb8dfb 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1944,6 +1944,14 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + PyDictObject *dict = (PyDictObject*) GLOBALS(); + if (globals_keys != _Py_atomic_load_ptr_acquire(&dict->ma_keys)) { + Py_DECREF(res_o); + if (true) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + } #else Py_INCREF(res_o); #endif @@ -1980,6 +1988,14 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + PyDictObject *dict = (PyDictObject*) BUILTINS(); + if (builtins_keys != _Py_atomic_load_ptr_acquire(&dict->ma_keys)) { + Py_DECREF(res_o); + if (true) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + } #else Py_INCREF(res_o); #endif diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index d6076f130e0bf6..c872b4c68f7ef0 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -6166,6 +6166,11 @@ int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); stack_pointer = _PyFrame_GetStackPointer(frame); DEOPT_IF(!increfed, LOAD_GLOBAL); + PyDictObject *dict = (PyDictObject*) BUILTINS(); + if (builtins_keys != _Py_atomic_load_ptr_acquire(&dict->ma_keys)) { + Py_DECREF(res_o); + DEOPT_IF(true, LOAD_GLOBAL); + } #else Py_INCREF(res_o); #endif @@ -6211,6 +6216,11 @@ int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); stack_pointer = _PyFrame_GetStackPointer(frame); DEOPT_IF(!increfed, LOAD_GLOBAL); + PyDictObject *dict = (PyDictObject*) GLOBALS(); + if (globals_keys != _Py_atomic_load_ptr_acquire(&dict->ma_keys)) { + Py_DECREF(res_o); + DEOPT_IF(true, LOAD_GLOBAL); + } #else Py_INCREF(res_o); #endif diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index a725ec10d4e52a..bb92ece54fbe83 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -623,6 +623,7 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "_Py_NewRef", "_Py_SINGLETON", "_Py_STR", + "_Py_atomic_load_ptr_acquire", "_Py_atomic_load_uintptr_relaxed", "_Py_set_eval_breaker_bit", "advance_backoff_counter", From 0e5045149ce1d061e5c78bce892a6851de954163 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 8 Nov 2024 12:48:46 -0800 Subject: [PATCH 06/16] Fix formatting --- Objects/dictobject.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 55f34ce546b1a4..3dbac488b4f2d4 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -7402,11 +7402,13 @@ uint32_t _PyDictKeys_GetVersionForCurrentState(PyInterpreterState *interp, #ifdef Py_GIL_DISABLED uint32_t v; do { - v = _Py_atomic_load_uint32_relaxed(&interp->dict_state.next_keys_version); + v = _Py_atomic_load_uint32_relaxed( + &interp->dict_state.next_keys_version); if (v == 0) { return 0; } - } while(!_Py_atomic_compare_exchange_uint32(&interp->dict_state.next_keys_version, &v, v + 1)); + } while (!_Py_atomic_compare_exchange_uint32( + &interp->dict_state.next_keys_version, &v, v + 1)); #else if (interp->dict_state.next_keys_version == 0) { return 0; From 72519ca3ac77dbcb2cc7b4a92c943fbcdc2ee7de Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 8 Nov 2024 17:07:31 -0800 Subject: [PATCH 07/16] Fix linkage --- Python/specialize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/specialize.c b/Python/specialize.c index ce2ac3ef8ab230..817c7dfdf4d54e 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1520,7 +1520,7 @@ PyObject *descr, DescriptorClassification kind, bool is_method) return 1; } -void +static void specialize_load_global_lock_held( PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name) From d55397b22c34b430a7d342cbaeaa55b8216fbc45 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 8 Nov 2024 17:21:31 -0800 Subject: [PATCH 08/16] `_Py_TryIncrefCompare` can't escape --- Include/internal/pycore_opcode_metadata.h | 4 ++-- Include/internal/pycore_uop_metadata.h | 4 ++-- Python/executor_cases.c.h | 4 ---- Python/generated_cases.c.h | 4 ---- Tools/cases_generator/analyzer.py | 1 + 5 files changed, 5 insertions(+), 12 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 353e1c65a5e336..58e583eabbcc46 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1158,8 +1158,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [LOAD_FROM_DICT_OR_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FROM_DICT_OR_GLOBALS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [LOAD_GLOBAL] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [LOAD_GLOBAL_BUILTIN] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, - [LOAD_GLOBAL_MODULE] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_GLOBAL_BUILTIN] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, + [LOAD_GLOBAL_MODULE] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [LOAD_LOCALS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_NAME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_SMALL_INT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 5f097c4686103a..98a41d1f23f569 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -124,8 +124,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_GUARD_GLOBALS_VERSION] = HAS_DEOPT_FLAG, [_GUARD_GLOBALS_VERSION_PUSH_KEYS] = HAS_DEOPT_FLAG, [_GUARD_BUILTINS_VERSION_PUSH_KEYS] = HAS_DEOPT_FLAG, - [_LOAD_GLOBAL_MODULE_FROM_KEYS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, - [_LOAD_GLOBAL_BUILTINS_FROM_KEYS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, + [_LOAD_GLOBAL_MODULE_FROM_KEYS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_LOAD_GLOBAL_BUILTINS_FROM_KEYS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_DELETE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_MAKE_CELL] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG, [_DELETE_DEREF] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 5afa9263fb8dfb..c457562533bd42 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1937,9 +1937,7 @@ JUMP_TO_JUMP_TARGET(); } #if Py_GIL_DISABLED - _PyFrame_SetStackPointer(frame, stack_pointer); int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); - stack_pointer = _PyFrame_GetStackPointer(frame); if (!increfed) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); @@ -1981,9 +1979,7 @@ JUMP_TO_JUMP_TARGET(); } #if Py_GIL_DISABLED - _PyFrame_SetStackPointer(frame, stack_pointer); int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); - stack_pointer = _PyFrame_GetStackPointer(frame); if (!increfed) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index c872b4c68f7ef0..d7cf72c609d20e 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -6162,9 +6162,7 @@ PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); DEOPT_IF(res_o == NULL, LOAD_GLOBAL); #if Py_GIL_DISABLED - _PyFrame_SetStackPointer(frame, stack_pointer); int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); - stack_pointer = _PyFrame_GetStackPointer(frame); DEOPT_IF(!increfed, LOAD_GLOBAL); PyDictObject *dict = (PyDictObject*) BUILTINS(); if (builtins_keys != _Py_atomic_load_ptr_acquire(&dict->ma_keys)) { @@ -6212,9 +6210,7 @@ PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); DEOPT_IF(res_o == NULL, LOAD_GLOBAL); #if Py_GIL_DISABLED - _PyFrame_SetStackPointer(frame, stack_pointer); int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); - stack_pointer = _PyFrame_GetStackPointer(frame); DEOPT_IF(!increfed, LOAD_GLOBAL); PyDictObject *dict = (PyDictObject*) GLOBALS(); if (globals_keys != _Py_atomic_load_ptr_acquire(&dict->ma_keys)) { diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index bb92ece54fbe83..8a68de2633c614 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -623,6 +623,7 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "_Py_NewRef", "_Py_SINGLETON", "_Py_STR", + "_Py_TryIncrefCompare", "_Py_atomic_load_ptr_acquire", "_Py_atomic_load_uintptr_relaxed", "_Py_set_eval_breaker_bit", From ad9a9d025a8232487586b222cb17b804e713bb3b Mon Sep 17 00:00:00 2001 From: Matt Page Date: Sat, 16 Nov 2024 10:36:35 -0800 Subject: [PATCH 09/16] Use atomics for setting keys version for split keys --- Objects/dictobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 3dbac488b4f2d4..9d7ad0f36599d8 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1743,7 +1743,7 @@ insert_split_key(PyDictKeysObject *keys, PyObject *key, Py_hash_t hash) ix = unicodekeys_lookup_unicode(keys, key, hash); if (ix == DKIX_EMPTY && keys->dk_usable > 0) { // Insert into new slot - keys->dk_version = 0; + FT_ATOMIC_STORE_UINT32_RELAXED(keys->dk_version, 0); Py_ssize_t hashpos = find_empty_slot(keys, hash); ix = keys->dk_nentries; dictkeys_set_index(keys, hashpos, ix); From 5b7e65c2a237d2ddc4d3c89887608de27636070d Mon Sep 17 00:00:00 2001 From: Matt Page Date: Sat, 16 Nov 2024 12:04:07 -0800 Subject: [PATCH 10/16] Ensure dict is shared when assigning keys version --- Include/internal/pycore_dict.h | 9 +++++ Objects/dictobject.c | 64 +++++++++++++++++++++++++++++----- Python/specialize.c | 12 +++---- 3 files changed, 70 insertions(+), 15 deletions(-) diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index c5399ad8e0497f..a16fa910c05b82 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -92,6 +92,15 @@ extern PyObject *_PyDict_FromKeys(PyObject *, PyObject *, PyObject *); extern uint32_t _PyDictKeys_GetVersionForCurrentState( PyInterpreterState *interp, PyDictKeysObject *dictkeys); +/* Gets a version number unique to the current state of the keys of dict, if possible. + * + * In free-threaded builds ensures that the dict can be used for lock-free + * reads if a version was assigned. + * + * Returns the version number, or zero if it was not possible to get a version number. */ +extern uint32_t _PyDict_GetKeysVersionForCurrentState( + PyInterpreterState *interp, PyDictObject *dictkeys); + extern size_t _PyDict_KeysSize(PyDictKeysObject *keys); extern void _PyDictKeys_DecRef(PyDictKeysObject *keys); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 9d7ad0f36599d8..6580709febb9d9 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1284,6 +1284,20 @@ ensure_shared_on_resize(PyDictObject *mp) #endif } +static inline void +ensure_shared_on_keys_version_assignment(PyDictObject *mp) +{ + ASSERT_DICT_LOCKED((PyObject *) mp); + #ifdef Py_GIL_DISABLED + if (!_Py_IsOwnedByCurrentThread((PyObject *)mp) && !IS_DICT_SHARED(mp)) { + // This ensures that a concurrent resize operation will delay + // freeing the old keys or values using QSBR, which is necessary to + // safely allow concurrent reads without locking. + SET_DICT_SHARED(mp); + } + #endif +} + #ifdef Py_GIL_DISABLED static inline Py_ALWAYS_INLINE int @@ -7390,15 +7404,9 @@ _PyDictKeys_DecRef(PyDictKeysObject *keys) dictkeys_decref(interp, keys, false); } -// In free-threaded builds the caller must ensure that the keys object is not -// being mutated concurrently by another thread. -uint32_t _PyDictKeys_GetVersionForCurrentState(PyInterpreterState *interp, - PyDictKeysObject *dictkeys) +static inline uint32_t +get_next_dict_keys_version(PyInterpreterState *interp) { - uint32_t dk_version = FT_ATOMIC_LOAD_UINT32_RELAXED(dictkeys->dk_version); - if (dk_version != 0) { - return dk_version; - } #ifdef Py_GIL_DISABLED uint32_t v; do { @@ -7415,10 +7423,48 @@ uint32_t _PyDictKeys_GetVersionForCurrentState(PyInterpreterState *interp, } uint32_t v = interp->dict_state.next_keys_version++; #endif - FT_ATOMIC_STORE_UINT32_RELAXED(dictkeys->dk_version, v); return v; } +typedef struct { + uint32_t version; + int assigned; +} assign_keys_version_result; + +static inline assign_keys_version_result +assign_keys_version(PyInterpreterState *interp, PyDictKeysObject *dictkeys) +{ + uint32_t dk_version = FT_ATOMIC_LOAD_UINT32_RELAXED(dictkeys->dk_version); + if (dk_version != 0) { + return (assign_keys_version_result){dk_version, 0}; + } + dk_version = get_next_dict_keys_version(interp); + FT_ATOMIC_STORE_UINT32_RELAXED(dictkeys->dk_version, dk_version); + return (assign_keys_version_result){dk_version, 1}; +} + +// In free-threaded builds the caller must ensure that the keys object is not +// being mutated concurrently by another thread. +uint32_t +_PyDictKeys_GetVersionForCurrentState(PyInterpreterState *interp, + PyDictKeysObject *dictkeys) +{ + assign_keys_version_result res = assign_keys_version(interp, dictkeys); + return res.version; +} + +uint32_t +_PyDict_GetKeysVersionForCurrentState(PyInterpreterState *interp, + PyDictObject *dict) +{ + ASSERT_DICT_LOCKED((PyObject *) dict); + assign_keys_version_result res = assign_keys_version(interp, dict->ma_keys); + if (res.assigned) { + ensure_shared_on_keys_version_assignment(dict); + } + return res.version; +} + static inline int validate_watcher_id(PyInterpreterState *interp, int watcher_id) { diff --git a/Python/specialize.c b/Python/specialize.c index 817c7dfdf4d54e..09f93ec1a1fab1 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1550,8 +1550,8 @@ specialize_load_global_lock_held( unspecialize(instr, SPEC_FAIL_OUT_OF_RANGE); return; } - uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState( - interp, globals_keys); + uint32_t keys_version = _PyDict_GetKeysVersionForCurrentState( + interp, (PyDictObject*) globals); if (keys_version == 0) { unspecialize(instr, SPEC_FAIL_OUT_OF_VERSIONS); return; @@ -1583,8 +1583,8 @@ specialize_load_global_lock_held( unspecialize(instr, SPEC_FAIL_OUT_OF_RANGE); return; } - uint32_t globals_version = _PyDictKeys_GetVersionForCurrentState( - interp, globals_keys); + uint32_t globals_version = _PyDict_GetKeysVersionForCurrentState( + interp, (PyDictObject*) globals); if (globals_version == 0) { unspecialize(instr, SPEC_FAIL_OUT_OF_VERSIONS); return; @@ -1593,8 +1593,8 @@ specialize_load_global_lock_held( unspecialize(instr, SPEC_FAIL_OUT_OF_RANGE); return; } - uint32_t builtins_version = _PyDictKeys_GetVersionForCurrentState( - interp, builtin_keys); + uint32_t builtins_version = _PyDict_GetKeysVersionForCurrentState( + interp, (PyDictObject*) builtins); if (builtins_version == 0) { unspecialize(instr, SPEC_FAIL_OUT_OF_VERSIONS); return; From 129300b45cdf52a5f1e024fcfbecc5a5a5d9c8f5 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Sat, 16 Nov 2024 17:15:07 -0800 Subject: [PATCH 11/16] Don't check keys after loading them Keys are freed using QSBR. Nothing occurs in between loading the keys and loading the value that would cause the QSBR version to advance, ensuring that the keys object cannot be freed and reused. --- Python/bytecodes.c | 10 ---------- Python/executor_cases.c.h | 16 ---------------- Python/generated_cases.c.h | 10 ---------- 3 files changed, 36 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c3cea0f67b24e4..64ffa539899499 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1633,11 +1633,6 @@ dummy_func( #if Py_GIL_DISABLED int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); DEOPT_IF(!increfed); - PyDictObject *dict = (PyDictObject*) GLOBALS(); - if (globals_keys != _Py_atomic_load_ptr_acquire(&dict->ma_keys)) { - Py_DECREF(res_o); - DEOPT_IF(true); - } #else Py_INCREF(res_o); #endif @@ -1655,11 +1650,6 @@ dummy_func( #if Py_GIL_DISABLED int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); DEOPT_IF(!increfed); - PyDictObject *dict = (PyDictObject*) BUILTINS(); - if (builtins_keys != _Py_atomic_load_ptr_acquire(&dict->ma_keys)) { - Py_DECREF(res_o); - DEOPT_IF(true); - } #else Py_INCREF(res_o); #endif diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index c457562533bd42..11d16f5dd4f1f2 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1942,14 +1942,6 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - PyDictObject *dict = (PyDictObject*) GLOBALS(); - if (globals_keys != _Py_atomic_load_ptr_acquire(&dict->ma_keys)) { - Py_DECREF(res_o); - if (true) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - } #else Py_INCREF(res_o); #endif @@ -1984,14 +1976,6 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - PyDictObject *dict = (PyDictObject*) BUILTINS(); - if (builtins_keys != _Py_atomic_load_ptr_acquire(&dict->ma_keys)) { - Py_DECREF(res_o); - if (true) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - } #else Py_INCREF(res_o); #endif diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index d7cf72c609d20e..86425b88ac9abf 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -6164,11 +6164,6 @@ #if Py_GIL_DISABLED int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); DEOPT_IF(!increfed, LOAD_GLOBAL); - PyDictObject *dict = (PyDictObject*) BUILTINS(); - if (builtins_keys != _Py_atomic_load_ptr_acquire(&dict->ma_keys)) { - Py_DECREF(res_o); - DEOPT_IF(true, LOAD_GLOBAL); - } #else Py_INCREF(res_o); #endif @@ -6212,11 +6207,6 @@ #if Py_GIL_DISABLED int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); DEOPT_IF(!increfed, LOAD_GLOBAL); - PyDictObject *dict = (PyDictObject*) GLOBALS(); - if (globals_keys != _Py_atomic_load_ptr_acquire(&dict->ma_keys)) { - Py_DECREF(res_o); - DEOPT_IF(true, LOAD_GLOBAL); - } #else Py_INCREF(res_o); #endif From 34404291a63239661d98bce768ca7f35764d6427 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Sat, 16 Nov 2024 19:55:57 -0800 Subject: [PATCH 12/16] Support deferred refcounting --- Include/internal/pycore_object.h | 15 +++++++++++++++ Python/bytecodes.c | 8 ++++---- Python/executor_cases.c.h | 8 ++++---- Python/generated_cases.c.h | 8 ++++---- Tools/cases_generator/analyzer.py | 1 + 5 files changed, 28 insertions(+), 12 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index c7af720b1ce43d..740056e33ea300 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -14,6 +14,7 @@ extern "C" { #include "pycore_interp.h" // PyInterpreterState.gc #include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_PTR_RELAXED #include "pycore_pystate.h" // _PyInterpreterState_GET() +#include "pycore_stackref.h" #include "pycore_uniqueid.h" // _PyObject_ThreadIncrefSlow() // This value is added to `ob_ref_shared` for objects that use deferred @@ -591,6 +592,20 @@ _Py_TryIncrefCompare(PyObject **src, PyObject *op) return 1; } +static inline int +_Py_TryIncrefCompareStackRef(PyObject **src, PyObject *op, _PyStackRef *out) +{ + if (_Py_IsImmortal(op) || _PyObject_HasDeferredRefcount(op)) { + *out = (_PyStackRef){ .bits = (intptr_t)op | Py_TAG_DEFERRED }; + return 1; + } + if (_Py_TryIncrefCompare(src, op)) { + *out = PyStackRef_FromPyObjectSteal(op); + return 1; + } + return 0; +} + /* Loads and increfs an object from ptr, which may contain a NULL value. Safe with concurrent (atomic) updates to ptr. NOTE: The writer must set maybe-weakref on the stored object! */ diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 64ffa539899499..918877bd1403f0 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1631,14 +1631,14 @@ dummy_func( SYNC_SP(); DEOPT_IF(res_o == NULL); #if Py_GIL_DISABLED - int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); + int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); DEOPT_IF(!increfed); #else Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; - res = PyStackRef_FromPyObjectSteal(res_o); } op(_LOAD_GLOBAL_BUILTINS_FROM_KEYS, (index/1, builtins_keys: PyDictKeysObject* -- res, null if (oparg & 1))) { @@ -1648,14 +1648,14 @@ dummy_func( SYNC_SP(); DEOPT_IF(res_o == NULL); #if Py_GIL_DISABLED - int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); + int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); DEOPT_IF(!increfed); #else Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; - res = PyStackRef_FromPyObjectSteal(res_o); } macro(LOAD_GLOBAL_MODULE) = diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 11d16f5dd4f1f2..a59383eec22087 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1937,17 +1937,17 @@ JUMP_TO_JUMP_TARGET(); } #if Py_GIL_DISABLED - int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); + int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); if (!increfed) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } #else Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; - res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[0] = res; if (oparg & 1) stack_pointer[1] = null; stack_pointer += 1 + (oparg & 1); @@ -1971,17 +1971,17 @@ JUMP_TO_JUMP_TARGET(); } #if Py_GIL_DISABLED - int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); + int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); if (!increfed) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } #else Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; - res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[0] = res; if (oparg & 1) stack_pointer[1] = null; stack_pointer += 1 + (oparg & 1); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 86425b88ac9abf..52ecac7b5f8e68 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -6162,14 +6162,14 @@ PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); DEOPT_IF(res_o == NULL, LOAD_GLOBAL); #if Py_GIL_DISABLED - int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); + int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); DEOPT_IF(!increfed, LOAD_GLOBAL); #else Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; - res = PyStackRef_FromPyObjectSteal(res_o); } stack_pointer[0] = res; if (oparg & 1) stack_pointer[1] = null; @@ -6205,14 +6205,14 @@ PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); DEOPT_IF(res_o == NULL, LOAD_GLOBAL); #if Py_GIL_DISABLED - int increfed = _Py_TryIncrefCompare(&entries[index].me_value, res_o); + int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); DEOPT_IF(!increfed, LOAD_GLOBAL); #else Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; - res = PyStackRef_FromPyObjectSteal(res_o); } stack_pointer[0] = res; if (oparg & 1) stack_pointer[1] = null; diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 8a68de2633c614..acc38a69c9aa7c 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -624,6 +624,7 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "_Py_SINGLETON", "_Py_STR", "_Py_TryIncrefCompare", + "_Py_TryIncrefCompareStackRef", "_Py_atomic_load_ptr_acquire", "_Py_atomic_load_uintptr_relaxed", "_Py_set_eval_breaker_bit", From bfe1c93285ca290e2f2eef49ae3cf3f013b94e3c Mon Sep 17 00:00:00 2001 From: Matt Page Date: Mon, 18 Nov 2024 21:19:07 -0800 Subject: [PATCH 13/16] Fix param name --- Include/internal/pycore_dict.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index be869983057a83..4f53ce6855120a 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -97,7 +97,7 @@ extern uint32_t _PyDictKeys_GetVersionForCurrentState( * * Returns the version number, or zero if it was not possible to get a version number. */ extern uint32_t _PyDict_GetKeysVersionForCurrentState( - PyInterpreterState *interp, PyDictObject *dictkeys); + PyInterpreterState *interp, PyDictObject *dict); extern size_t _PyDict_KeysSize(PyDictKeysObject *keys); From d5258e05b4153112209a76e39e1acf28ec2f58c5 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Mon, 18 Nov 2024 21:34:25 -0800 Subject: [PATCH 14/16] Always mark keys shared --- Objects/dictobject.c | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 3bd895bf34e90f..e1636f50833e33 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1290,7 +1290,7 @@ ensure_shared_on_keys_version_assignment(PyDictObject *mp) { ASSERT_DICT_LOCKED((PyObject *) mp); #ifdef Py_GIL_DISABLED - if (!_Py_IsOwnedByCurrentThread((PyObject *)mp) && !IS_DICT_SHARED(mp)) { + if (!IS_DICT_SHARED(mp)) { // This ensures that a concurrent resize operation will delay // freeing the old keys or values using QSBR, which is necessary to // safely allow concurrent reads without locking. @@ -7349,31 +7349,19 @@ get_next_dict_keys_version(PyInterpreterState *interp) return v; } -typedef struct { - uint32_t version; - int assigned; -} assign_keys_version_result; - -static inline assign_keys_version_result -assign_keys_version(PyInterpreterState *interp, PyDictKeysObject *dictkeys) -{ - uint32_t dk_version = FT_ATOMIC_LOAD_UINT32_RELAXED(dictkeys->dk_version); - if (dk_version != 0) { - return (assign_keys_version_result){dk_version, 0}; - } - dk_version = get_next_dict_keys_version(interp); - FT_ATOMIC_STORE_UINT32_RELAXED(dictkeys->dk_version, dk_version); - return (assign_keys_version_result){dk_version, 1}; -} - // In free-threaded builds the caller must ensure that the keys object is not // being mutated concurrently by another thread. uint32_t _PyDictKeys_GetVersionForCurrentState(PyInterpreterState *interp, PyDictKeysObject *dictkeys) { - assign_keys_version_result res = assign_keys_version(interp, dictkeys); - return res.version; + uint32_t dk_version = FT_ATOMIC_LOAD_UINT32_RELAXED(dictkeys->dk_version); + if (dk_version != 0) { + return dk_version; + } + dk_version = get_next_dict_keys_version(interp); + FT_ATOMIC_STORE_UINT32_RELAXED(dictkeys->dk_version, dk_version); + return dk_version; } uint32_t @@ -7381,11 +7369,10 @@ _PyDict_GetKeysVersionForCurrentState(PyInterpreterState *interp, PyDictObject *dict) { ASSERT_DICT_LOCKED((PyObject *) dict); - assign_keys_version_result res = assign_keys_version(interp, dict->ma_keys); - if (res.assigned) { - ensure_shared_on_keys_version_assignment(dict); - } - return res.version; + uint32_t dk_version = + _PyDictKeys_GetVersionForCurrentState(interp, dict->ma_keys); + ensure_shared_on_keys_version_assignment(dict); + return dk_version; } static inline int From 852be3891639e1b817da9794debf75f81fd2339d Mon Sep 17 00:00:00 2001 From: Matt Page Date: Mon, 18 Nov 2024 21:34:35 -0800 Subject: [PATCH 15/16] Update comment --- Include/internal/pycore_dict.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 4f53ce6855120a..6e4a308226f3fe 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -95,6 +95,8 @@ extern uint32_t _PyDictKeys_GetVersionForCurrentState( * In free-threaded builds ensures that the dict can be used for lock-free * reads if a version was assigned. * + * The caller must hold the per-object lock on dict. + * * Returns the version number, or zero if it was not possible to get a version number. */ extern uint32_t _PyDict_GetKeysVersionForCurrentState( PyInterpreterState *interp, PyDictObject *dict); From 01f41432db65b4b157c7f8979328d02ea8c967db Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 20 Nov 2024 15:06:39 -0800 Subject: [PATCH 16/16] Don't pass reason to unspecialize --- Python/specialize.c | 59 ++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index fcde6a354caa3d..af37e241965b48 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1530,34 +1530,34 @@ specialize_load_global_lock_held( _PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)(instr + 1); assert(PyUnicode_CheckExact(name)); if (!PyDict_CheckExact(globals)) { - unspecialize(instr, SPEC_FAIL_LOAD_GLOBAL_NON_DICT); - return; + SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_LOAD_GLOBAL_NON_DICT); + goto fail; } PyDictKeysObject * globals_keys = ((PyDictObject *)globals)->ma_keys; if (!DK_IS_UNICODE(globals_keys)) { - unspecialize(instr, SPEC_FAIL_LOAD_GLOBAL_NON_STRING_OR_SPLIT); - return; + SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_LOAD_GLOBAL_NON_STRING_OR_SPLIT); + goto fail; } Py_ssize_t index = _PyDictKeys_StringLookup(globals_keys, name); if (index == DKIX_ERROR) { - unspecialize(instr, SPEC_FAIL_EXPECTED_ERROR); - return; + SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_EXPECTED_ERROR); + goto fail; } PyInterpreterState *interp = _PyInterpreterState_GET(); if (index != DKIX_EMPTY) { if (index != (uint16_t)index) { - unspecialize(instr, SPEC_FAIL_OUT_OF_RANGE); - return; + SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_RANGE); + goto fail; } uint32_t keys_version = _PyDict_GetKeysVersionForCurrentState( interp, (PyDictObject*) globals); if (keys_version == 0) { - unspecialize(instr, SPEC_FAIL_OUT_OF_VERSIONS); - return; + SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_VERSIONS); + goto fail; } if (keys_version != (uint16_t)keys_version) { - unspecialize(instr, SPEC_FAIL_OUT_OF_RANGE); - return; + SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_RANGE); + goto fail; } cache->index = (uint16_t)index; cache->module_keys_version = (uint16_t)keys_version; @@ -1565,47 +1565,50 @@ specialize_load_global_lock_held( return; } if (!PyDict_CheckExact(builtins)) { - unspecialize(instr, SPEC_FAIL_LOAD_GLOBAL_NON_DICT); - return; + SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_LOAD_GLOBAL_NON_DICT); + goto fail; } PyDictKeysObject * builtin_keys = ((PyDictObject *)builtins)->ma_keys; if (!DK_IS_UNICODE(builtin_keys)) { - unspecialize(instr, SPEC_FAIL_LOAD_GLOBAL_NON_STRING_OR_SPLIT); - return; + SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_LOAD_GLOBAL_NON_STRING_OR_SPLIT); + goto fail; } index = _PyDictKeys_StringLookup(builtin_keys, name); if (index == DKIX_ERROR) { - unspecialize(instr, SPEC_FAIL_EXPECTED_ERROR); - return; + SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_EXPECTED_ERROR); + goto fail; } if (index != (uint16_t)index) { - unspecialize(instr, SPEC_FAIL_OUT_OF_RANGE); - return; + SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_RANGE); + goto fail; } uint32_t globals_version = _PyDict_GetKeysVersionForCurrentState( interp, (PyDictObject*) globals); if (globals_version == 0) { - unspecialize(instr, SPEC_FAIL_OUT_OF_VERSIONS); - return; + SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_VERSIONS); + goto fail; } if (globals_version != (uint16_t)globals_version) { - unspecialize(instr, SPEC_FAIL_OUT_OF_RANGE); - return; + SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_RANGE); + goto fail; } uint32_t builtins_version = _PyDict_GetKeysVersionForCurrentState( interp, (PyDictObject*) builtins); if (builtins_version == 0) { - unspecialize(instr, SPEC_FAIL_OUT_OF_VERSIONS); - return; + SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_VERSIONS); + goto fail; } if (builtins_version > UINT16_MAX) { - unspecialize(instr, SPEC_FAIL_OUT_OF_RANGE); - return; + SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_RANGE); + goto fail; } cache->index = (uint16_t)index; cache->module_keys_version = (uint16_t)globals_version; cache->builtin_keys_version = (uint16_t)builtins_version; specialize(instr, LOAD_GLOBAL_BUILTIN); + return; +fail: + unspecialize(instr); } void