From c2b071381d87060dc5b154fdde07ac915e84b819 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Tue, 1 Mar 2022 17:33:38 -0800 Subject: [PATCH 01/11] Use inline caching for attribute accesses --- Include/internal/pycore_code.h | 93 ++++++++++++--- Include/opcode.h | 3 + Lib/opcode.py | 6 +- Lib/test/test_dis.py | 2 +- Programs/test_frozenmain.h | 16 +-- Python/ceval.c | 211 +++++++++++++++++---------------- Python/specialize.c | 110 ++++++++--------- 7 files changed, 256 insertions(+), 185 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index b9671d0ec32bb8..56af0310c2a421 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -20,14 +20,7 @@ typedef struct { uint32_t version; } _PyAdaptiveEntry; - -typedef struct { - uint32_t tp_version; - uint32_t dk_version; -} _PyAttrCache; - typedef struct { - /* Borrowed ref in LOAD_METHOD */ PyObject *obj; } _PyObjectCache; @@ -51,7 +44,6 @@ typedef struct { typedef union { _PyEntryZero zero; _PyAdaptiveEntry adaptive; - _PyAttrCache attr; _PyObjectCache obj; _PyCallCache call; } SpecializedCacheEntry; @@ -65,8 +57,7 @@ typedef union { typedef struct { _Py_CODEUNIT counter; _Py_CODEUNIT index; - _Py_CODEUNIT module_keys_version; - _Py_CODEUNIT _m1; + _Py_CODEUNIT module_keys_version[2]; _Py_CODEUNIT builtin_keys_version; } _PyLoadGlobalCache; @@ -94,13 +85,32 @@ typedef struct { typedef struct { _Py_CODEUNIT counter; - _Py_CODEUNIT type_version; - _Py_CODEUNIT _t1; + _Py_CODEUNIT type_version[2]; _Py_CODEUNIT func_version; } _PyBinarySubscrCache; #define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache) +typedef struct { + _Py_CODEUNIT counter; + _Py_CODEUNIT version[2]; + _Py_CODEUNIT index; +} _PyAttrCache; + +#define INLINE_CACHE_ENTRIES_LOAD_ATTR CACHE_ENTRIES(_PyAttrCache) + +#define INLINE_CACHE_ENTRIES_STORE_ATTR CACHE_ENTRIES(_PyAttrCache) + +typedef struct { + _Py_CODEUNIT counter; + _Py_CODEUNIT type_version[2]; + _Py_CODEUNIT dict_offset; + _Py_CODEUNIT keys_version[2]; + _Py_CODEUNIT descr[4]; +} _PyLoadMethodCache; + +#define INLINE_CACHE_ENTRIES_LOAD_METHOD CACHE_ENTRIES(_PyLoadMethodCache) + /* Maximum size of code to quicken, in code units. */ #define MAX_SIZE_TO_QUICKEN 5000 @@ -328,10 +338,13 @@ cache_backoff(_PyAdaptiveEntry *entry) { /* Specialization functions */ -extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); -extern int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); +extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, + PyObject *name); +extern int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, + PyObject *name); extern int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name); -extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); +extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, + PyObject *name); extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); extern int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *instr); extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs, @@ -416,33 +429,75 @@ extern PyObject* _Py_GetSpecializationStats(void); #ifdef WORDS_BIGENDIAN static inline void -write32(uint16_t *p, uint32_t val) +write_u32(uint16_t p[2], uint32_t val) { p[0] = val >> 16; p[1] = (uint16_t)val; } static inline uint32_t -read32(uint16_t *p) +read_u32(uint16_t p[2]) { return (p[0] << 16) | p[1]; } +static inline void +write_obj(uint16_t p[4], PyObject *obj) +{ + uint64_t val = (uintptr_t)obj; + p[0] = (uint16_t)(val >> 48); + p[1] = (uint16_t)(val >> 32); + p[2] = (uint16_t)(val >> 16); + p[3] = (uint16_t)(val >> 0); +} + +static inline PyObject * +read_obj(uint16_t p[4]) +{ + uintptr_t val = 0; + val |= (uint64_t)p[0] << 48; + val |= (uint64_t)p[1] << 32; + val |= (uint64_t)p[2] << 16; + val |= (uint64_t)p[3] << 0; + return (PyObject *)val; +} + #else static inline void -write32(uint16_t *p, uint32_t val) +write_u32(uint16_t p[2], uint32_t val) { p[0] = (uint16_t)val; p[1] = val >> 16; } static inline uint32_t -read32(uint16_t *p) +read_u32(uint16_t p[2]) { return p[0] | (p[1] << 16); } +static inline void +write_obj(uint16_t p[4], PyObject *obj) +{ + uint64_t val = (uintptr_t)obj; + p[0] = (uint16_t)(val >> 0); + p[1] = (uint16_t)(val >> 16); + p[2] = (uint16_t)(val >> 32); + p[3] = (uint16_t)(val >> 48); +} + +static inline PyObject * +read_obj(uint16_t p[4]) +{ + uintptr_t val = 0; + val |= (uint64_t)p[0] << 0; + val |= (uint64_t)p[1] << 16; + val |= (uint64_t)p[2] << 32; + val |= (uint64_t)p[3] << 48; + return (PyObject *)val; +} + #endif #ifdef __cplusplus diff --git a/Include/opcode.h b/Include/opcode.h index f6330d9056aa12..656b06fea6ada4 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -213,9 +213,12 @@ static const uint32_t _PyOpcode_Jump[8] = { const uint8_t _PyOpcode_InlineCacheEntries[256] = { [BINARY_SUBSCR] = 4, [UNPACK_SEQUENCE] = 1, + [STORE_ATTR] = 4, + [LOAD_ATTR] = 4, [COMPARE_OP] = 2, [LOAD_GLOBAL] = 5, [BINARY_OP] = 1, + [LOAD_METHOD] = 10, }; #endif /* OPCODE_TABLES */ diff --git a/Lib/opcode.py b/Lib/opcode.py index 9b08562cd04f69..4d53e2e29a1306 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -112,7 +112,7 @@ def jabs_op(name, op, entries=0): def_op('UNPACK_SEQUENCE', 92, 1) # Number of tuple items jrel_op('FOR_ITER', 93) def_op('UNPACK_EX', 94) -name_op('STORE_ATTR', 95) # Index in name list +name_op('STORE_ATTR', 95, 4) # Index in name list name_op('DELETE_ATTR', 96) # "" name_op('STORE_GLOBAL', 97) # "" name_op('DELETE_GLOBAL', 98) # "" @@ -124,7 +124,7 @@ def jabs_op(name, op, entries=0): def_op('BUILD_LIST', 103) # Number of list items def_op('BUILD_SET', 104) # Number of set items def_op('BUILD_MAP', 105) # Number of dict entries -name_op('LOAD_ATTR', 106) # Index in name list +name_op('LOAD_ATTR', 106, 4) # Index in name list def_op('COMPARE_OP', 107, 2) # Comparison operator hascompare.append(107) name_op('IMPORT_NAME', 108) # Index in name list @@ -186,7 +186,7 @@ def jabs_op(name, op, entries=0): def_op('BUILD_CONST_KEY_MAP', 156) def_op('BUILD_STRING', 157) -name_op('LOAD_METHOD', 160) +name_op('LOAD_METHOD', 160, 10) def_op('LIST_EXTEND', 162) def_op('SET_UPDATE', 163) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 8de2ed09e83528..7e0542ae0ae9e3 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -375,7 +375,7 @@ def bug42562(): >> PUSH_EXC_INFO %3d LOAD_GLOBAL 0 (Exception) - JUMP_IF_NOT_EXC_MATCH 31 (to 62) + JUMP_IF_NOT_EXC_MATCH 35 (to 70) STORE_FAST 0 (e) %3d LOAD_FAST 0 (e) diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index 3fef981e42ff96..4ebab4f7544bbc 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -1,14 +1,15 @@ // Auto-generated by Programs/freeze_test_frozenmain.py unsigned char M_test_frozenmain[] = { 227,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0, - 0,0,0,0,0,115,120,0,0,0,151,0,100,0,100,1, + 0,0,0,0,0,115,136,0,0,0,151,0,100,0,100,1, 108,0,90,0,100,0,100,1,108,1,90,1,2,0,101,2, 100,2,166,1,171,1,1,0,2,0,101,2,100,3,101,0, - 106,3,166,2,171,2,1,0,2,0,101,1,106,4,166,0, + 106,3,3,0,3,0,3,0,3,0,166,2,171,2,1,0, + 2,0,101,1,106,4,3,0,3,0,3,0,3,0,166,0, 171,0,100,4,25,0,3,0,3,0,3,0,3,0,90,5, 100,5,68,0,93,20,90,6,2,0,101,2,100,6,101,6, 155,0,100,7,101,5,101,6,25,0,3,0,3,0,3,0, - 3,0,155,0,157,4,166,1,171,1,1,0,113,37,100,1, + 3,0,155,0,157,4,166,1,171,1,1,0,113,45,100,1, 83,0,41,8,233,0,0,0,0,78,122,18,70,114,111,122, 101,110,32,72,101,108,108,111,32,87,111,114,108,100,122,8, 115,121,115,46,97,114,103,118,218,6,99,111,110,102,105,103, @@ -25,12 +26,13 @@ unsigned char M_test_frozenmain[] = { 0,0,250,18,116,101,115,116,95,102,114,111,122,101,110,109, 97,105,110,46,112,121,250,8,60,109,111,100,117,108,101,62, 114,11,0,0,0,1,0,0,0,115,18,0,0,0,2,128, - 8,3,8,1,12,2,16,1,24,1,8,1,38,7,4,249, - 115,20,0,0,0,2,128,8,3,8,1,12,2,16,1,24, - 1,2,7,4,1,2,249,42,7,115,120,0,0,0,0,0, + 8,3,8,1,12,2,24,1,32,1,8,1,38,7,4,249, + 115,20,0,0,0,2,128,8,3,8,1,12,2,24,1,32, + 1,2,7,4,1,2,249,42,7,115,136,0,0,0,0,0, 1,11,1,11,1,11,1,11,1,25,1,25,1,25,1,25, 1,6,1,6,7,27,1,28,1,28,1,28,1,6,1,6, - 7,17,19,22,19,27,1,28,1,28,1,28,10,39,10,27, + 7,17,19,22,19,27,19,27,19,27,19,27,19,27,1,28, + 1,28,1,28,10,39,10,27,10,39,10,39,10,39,10,39, 10,39,10,41,10,41,42,50,10,51,10,51,10,51,10,51, 10,51,1,7,12,2,1,42,1,42,5,8,5,10,5,10, 11,41,21,24,11,41,11,41,28,34,35,38,28,39,28,39, diff --git a/Python/ceval.c b/Python/ceval.c index b3673d7d04ab2d..cc6db7a1ee78b4 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1448,18 +1448,19 @@ eval_frame_handle_pending(PyThreadState *tstate) // shared by LOAD_ATTR_MODULE and LOAD_METHOD_MODULE #define LOAD_MODULE_ATTR_OR_METHOD(attr_or_method) \ - SpecializedCacheEntry *caches = GET_CACHE(); \ - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; \ + _PyAttrCache *cache = (_PyAttrCache *)next_instr; \ DEOPT_IF(!PyModule_CheckExact(owner), LOAD_##attr_or_method); \ PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner)->md_dict; \ assert(dict != NULL); \ - DEOPT_IF(dict->ma_keys->dk_version != cache0->version, \ - LOAD_##attr_or_method); \ + DEOPT_IF(dict->ma_keys->dk_version != read_u32(cache->version), \ + LOAD_##attr_or_method); \ assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE); \ - assert(cache0->index < dict->ma_keys->dk_nentries); \ - PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache0->index; \ + assert(cache->index < dict->ma_keys->dk_nentries); \ + PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache->index; \ res = ep->me_value; \ DEOPT_IF(res == NULL, LOAD_##attr_or_method); \ + /* XXX: Uncomment this next line to make test_asyncio very angry! */ \ + DEOPT_IF(LOAD_##attr_or_method == LOAD_ATTR, LOAD_##attr_or_method); \ STAT_INC(LOAD_##attr_or_method, hit); \ Py_INCREF(res); @@ -2197,7 +2198,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int PyObject *sub = TOP(); PyObject *container = SECOND(); _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr; - uint32_t type_version = read32(&cache->type_version); + uint32_t type_version = read_u32(cache->type_version); PyTypeObject *tp = Py_TYPE(container); DEOPT_IF(tp->tp_version_tag != type_version, BINARY_SUBSCR); assert(tp->tp_flags & Py_TPFLAGS_HEAPTYPE); @@ -2849,8 +2850,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int err = PyObject_SetAttr(owner, name, v); Py_DECREF(v); Py_DECREF(owner); - if (err != 0) + if (err != 0) { goto error; + } + JUMPBY(INLINE_CACHE_ENTRIES_STORE_ATTR); DISPATCH(); } @@ -3028,7 +3031,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int DEOPT_IF(!PyDict_CheckExact(GLOBALS()), LOAD_GLOBAL); PyDictObject *dict = (PyDictObject *)GLOBALS(); _PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)next_instr; - uint32_t version = read32(&cache->module_keys_version); + uint32_t version = read_u32(cache->module_keys_version); DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL); PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache->index; PyObject *res = ep->me_value; @@ -3047,7 +3050,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int PyDictObject *mdict = (PyDictObject *)GLOBALS(); PyDictObject *bdict = (PyDictObject *)BUILTINS(); _PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)next_instr; - uint32_t mod_version = read32(&cache->module_keys_version); + uint32_t mod_version = read_u32(cache->module_keys_version); uint16_t bltn_version = cache->builtin_keys_version; DEOPT_IF(mdict->ma_keys->dk_version != mod_version, LOAD_GLOBAL); DEOPT_IF(bdict->ma_keys->dk_version != bltn_version, LOAD_GLOBAL); @@ -3438,25 +3441,25 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int } Py_DECREF(owner); SET_TOP(res); + JUMPBY(INLINE_CACHE_ENTRIES_LOAD_ATTR); DISPATCH(); } TARGET(LOAD_ATTR_ADAPTIVE) { assert(cframe.use_tracing == 0); - SpecializedCacheEntry *cache = GET_CACHE(); - if (cache->adaptive.counter == 0) { + _PyAttrCache *cache = (_PyAttrCache *)next_instr; + if (cache->counter == 0) { PyObject *owner = TOP(); - PyObject *name = GETITEM(names, cache->adaptive.original_oparg); + PyObject *name = GETITEM(names, oparg); next_instr--; - if (_Py_Specialize_LoadAttr(owner, next_instr, name, cache) < 0) { + if (_Py_Specialize_LoadAttr(owner, next_instr, name) < 0) { goto error; } DISPATCH(); } else { STAT_INC(LOAD_ATTR, deferred); - cache->adaptive.counter--; - oparg = cache->adaptive.original_oparg; + cache->counter--; JUMP_TO_INSTRUCTION(LOAD_ATTR); } } @@ -3469,23 +3472,22 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int } // GET_CACHE(), but for the following opcode assert(_Py_OPCODE(*next_instr) == LOAD_ATTR_INSTANCE_VALUE); - SpecializedCacheEntry *caches = _GetSpecializedCacheEntryForInstruction( - first_instr, INSTR_OFFSET() + 1, _Py_OPARG(*next_instr)); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - assert(cache0->version != 0); + _PyAttrCache *cache = (_PyAttrCache *)(next_instr + 1); + uint32_t type_version = read_u32(cache->version); + assert(type_version != 0); PyTypeObject *tp = Py_TYPE(owner); // These DEOPT_IF miss branches do PUSH(Py_NewRef(owner)). - DEOPT_IF(tp->tp_version_tag != cache0->version, + DEOPT_IF(tp->tp_version_tag != type_version, LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE); assert(tp->tp_dictoffset < 0); assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictValues *values = *_PyObject_ValuesPointer(owner); DEOPT_IF(values == NULL, LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE); - PyObject *res = values->values[cache0->index]; + PyObject *res = values->values[cache->index]; DEOPT_IF(res == NULL, LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE); STAT_INC(LOAD_ATTR, hit); PUSH(Py_NewRef(res)); - next_instr++; + JUMPBY(INLINE_CACHE_ENTRIES_LOAD_ATTR + 1); NOTRACE_DISPATCH(); } @@ -3494,20 +3496,22 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int PyObject *owner = TOP(); PyObject *res; PyTypeObject *tp = Py_TYPE(owner); - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - assert(cache0->version != 0); - DEOPT_IF(tp->tp_version_tag != cache0->version, LOAD_ATTR_INSTANCE_VALUE); + _PyAttrCache *cache = (_PyAttrCache *)next_instr; + uint32_t type_version = read_u32(cache->version); + assert(type_version != 0); + DEOPT_IF(tp->tp_version_tag != type_version, + LOAD_ATTR_INSTANCE_VALUE); assert(tp->tp_dictoffset < 0); assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictValues *values = *_PyObject_ValuesPointer(owner); DEOPT_IF(values == NULL, LOAD_ATTR_INSTANCE_VALUE); - res = values->values[cache0->index]; + res = values->values[cache->index]; DEOPT_IF(res == NULL, LOAD_ATTR_INSTANCE_VALUE); STAT_INC(LOAD_ATTR, hit); Py_INCREF(res); SET_TOP(res); Py_DECREF(owner); + JUMPBY(INLINE_CACHE_ENTRIES_LOAD_ATTR); NOTRACE_DISPATCH(); } @@ -3519,6 +3523,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int LOAD_MODULE_ATTR_OR_METHOD(ATTR); SET_TOP(res); Py_DECREF(owner); + JUMPBY(INLINE_CACHE_ENTRIES_LOAD_ATTR); NOTRACE_DISPATCH(); } @@ -3527,16 +3532,16 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int PyObject *owner = TOP(); PyObject *res; PyTypeObject *tp = Py_TYPE(owner); - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - assert(cache0->version != 0); - DEOPT_IF(tp->tp_version_tag != cache0->version, LOAD_ATTR); + _PyAttrCache *cache = (_PyAttrCache *)next_instr; + uint32_t type_version = read_u32(cache->version); + assert(type_version != 0); + DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR); assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictObject *dict = *(PyDictObject **)_PyObject_ManagedDictPointer(owner); DEOPT_IF(dict == NULL, LOAD_ATTR); assert(PyDict_CheckExact((PyObject *)dict)); - PyObject *name = GETITEM(names, cache0->original_oparg); - uint16_t hint = cache0->index; + PyObject *name = GETITEM(names, oparg); + uint16_t hint = cache->index; DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries, LOAD_ATTR); PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + hint; DEOPT_IF(ep->me_key != name, LOAD_ATTR); @@ -3546,6 +3551,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int Py_INCREF(res); SET_TOP(res); Py_DECREF(owner); + JUMPBY(INLINE_CACHE_ENTRIES_LOAD_ATTR); NOTRACE_DISPATCH(); } @@ -3554,36 +3560,36 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int PyObject *owner = TOP(); PyObject *res; PyTypeObject *tp = Py_TYPE(owner); - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - assert(cache0->version != 0); - DEOPT_IF(tp->tp_version_tag != cache0->version, LOAD_ATTR); - char *addr = (char *)owner + cache0->index; + _PyAttrCache *cache = (_PyAttrCache *)next_instr; + uint32_t type_version = read_u32(cache->version); + assert(type_version != 0); + DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR); + char *addr = (char *)owner + cache->index; res = *(PyObject **)addr; DEOPT_IF(res == NULL, LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); Py_INCREF(res); SET_TOP(res); Py_DECREF(owner); + JUMPBY(INLINE_CACHE_ENTRIES_LOAD_ATTR); NOTRACE_DISPATCH(); } TARGET(STORE_ATTR_ADAPTIVE) { assert(cframe.use_tracing == 0); - SpecializedCacheEntry *cache = GET_CACHE(); - if (cache->adaptive.counter == 0) { + _PyAttrCache *cache = (_PyAttrCache *)next_instr; + if (cache->counter == 0) { PyObject *owner = TOP(); - PyObject *name = GETITEM(names, cache->adaptive.original_oparg); + PyObject *name = GETITEM(names, oparg); next_instr--; - if (_Py_Specialize_StoreAttr(owner, next_instr, name, cache) < 0) { + if (_Py_Specialize_StoreAttr(owner, next_instr, name) < 0) { goto error; } DISPATCH(); } else { STAT_INC(STORE_ATTR, deferred); - cache->adaptive.counter--; - oparg = cache->adaptive.original_oparg; + cache->counter--; JUMP_TO_INSTRUCTION(STORE_ATTR); } } @@ -3592,15 +3598,15 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int assert(cframe.use_tracing == 0); PyObject *owner = TOP(); PyTypeObject *tp = Py_TYPE(owner); - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - assert(cache0->version != 0); - DEOPT_IF(tp->tp_version_tag != cache0->version, STORE_ATTR); + _PyAttrCache *cache = (_PyAttrCache *)next_instr; + uint32_t type_version = read_u32(cache->version); + assert(type_version != 0); + DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR); assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictValues *values = *_PyObject_ValuesPointer(owner); DEOPT_IF(values == NULL, STORE_ATTR); STAT_INC(STORE_ATTR, hit); - Py_ssize_t index = cache0->index; + Py_ssize_t index = cache->index; STACK_SHRINK(1); PyObject *value = POP(); PyObject *old_value = values->values[index]; @@ -3612,6 +3618,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int Py_DECREF(old_value); } Py_DECREF(owner); + JUMPBY(INLINE_CACHE_ENTRIES_STORE_ATTR); NOTRACE_DISPATCH(); } @@ -3619,16 +3626,16 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int assert(cframe.use_tracing == 0); PyObject *owner = TOP(); PyTypeObject *tp = Py_TYPE(owner); - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - assert(cache0->version != 0); - DEOPT_IF(tp->tp_version_tag != cache0->version, STORE_ATTR); + _PyAttrCache *cache = (_PyAttrCache *)next_instr; + uint32_t type_version = read_u32(cache->version); + assert(type_version != 0); + DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR); assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictObject *dict = *(PyDictObject **)_PyObject_ManagedDictPointer(owner); DEOPT_IF(dict == NULL, STORE_ATTR); assert(PyDict_CheckExact((PyObject *)dict)); - PyObject *name = GETITEM(names, cache0->original_oparg); - uint16_t hint = cache0->index; + PyObject *name = GETITEM(names, oparg); + uint16_t hint = cache->index; DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries, STORE_ATTR); PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + hint; DEOPT_IF(ep->me_key != name, STORE_ATTR); @@ -3646,6 +3653,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int /* PEP 509 */ dict->ma_version_tag = DICT_NEXT_VERSION(); Py_DECREF(owner); + JUMPBY(INLINE_CACHE_ENTRIES_STORE_ATTR); NOTRACE_DISPATCH(); } @@ -3653,11 +3661,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int assert(cframe.use_tracing == 0); PyObject *owner = TOP(); PyTypeObject *tp = Py_TYPE(owner); - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - assert(cache0->version != 0); - DEOPT_IF(tp->tp_version_tag != cache0->version, STORE_ATTR); - char *addr = (char *)owner + cache0->index; + _PyAttrCache *cache = (_PyAttrCache *)next_instr; + uint32_t type_version = read_u32(cache->version); + assert(type_version != 0); + DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR); + char *addr = (char *)owner + cache->index; STAT_INC(STORE_ATTR, hit); STACK_SHRINK(1); PyObject *value = POP(); @@ -3665,6 +3673,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int *(PyObject **)addr = value; Py_XDECREF(old_value); Py_DECREF(owner); + JUMPBY(INLINE_CACHE_ENTRIES_STORE_ATTR); NOTRACE_DISPATCH(); } @@ -4421,25 +4430,25 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int Py_DECREF(obj); PUSH(meth); } + JUMPBY(INLINE_CACHE_ENTRIES_LOAD_METHOD); DISPATCH(); } TARGET(LOAD_METHOD_ADAPTIVE) { assert(cframe.use_tracing == 0); - SpecializedCacheEntry *cache = GET_CACHE(); - if (cache->adaptive.counter == 0) { + _PyLoadMethodCache *cache = (_PyLoadMethodCache *)next_instr; + if (cache->counter == 0) { PyObject *owner = TOP(); - PyObject *name = GETITEM(names, cache->adaptive.original_oparg); + PyObject *name = GETITEM(names, oparg); next_instr--; - if (_Py_Specialize_LoadMethod(owner, next_instr, name, cache) < 0) { + if (_Py_Specialize_LoadMethod(owner, next_instr, name) < 0) { goto error; } DISPATCH(); } else { STAT_INC(LOAD_METHOD, deferred); - cache->adaptive.counter--; - oparg = cache->adaptive.original_oparg; + cache->counter--; JUMP_TO_INSTRUCTION(LOAD_METHOD); } } @@ -4449,22 +4458,22 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int assert(cframe.use_tracing == 0); PyObject *self = TOP(); PyTypeObject *self_cls = Py_TYPE(self); - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAttrCache *cache1 = &caches[-1].attr; - _PyObjectCache *cache2 = &caches[-2].obj; - assert(cache1->tp_version != 0); - DEOPT_IF(self_cls->tp_version_tag != cache1->tp_version, LOAD_METHOD); + _PyLoadMethodCache *cache = (_PyLoadMethodCache *)next_instr; + uint32_t type_version = read_u32(cache->type_version); + assert(type_version != 0); + DEOPT_IF(self_cls->tp_version_tag != type_version, LOAD_METHOD); assert(self_cls->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictObject *dict = *(PyDictObject**)_PyObject_ManagedDictPointer(self); DEOPT_IF(dict != NULL, LOAD_METHOD); - DEOPT_IF(((PyHeapTypeObject *)self_cls)->ht_cached_keys->dk_version != cache1->dk_version, LOAD_METHOD); + DEOPT_IF(((PyHeapTypeObject *)self_cls)->ht_cached_keys->dk_version != read_u32(cache->keys_version), LOAD_METHOD); STAT_INC(LOAD_METHOD, hit); - PyObject *res = cache2->obj; + PyObject *res = read_obj(cache->descr); assert(res != NULL); assert(_PyType_HasFeature(Py_TYPE(res), Py_TPFLAGS_METHOD_DESCRIPTOR)); Py_INCREF(res); SET_TOP(res); PUSH(self); + JUMPBY(INLINE_CACHE_ENTRIES_LOAD_METHOD); NOTRACE_DISPATCH(); } @@ -4474,14 +4483,12 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int assert(cframe.use_tracing == 0); PyObject *self = TOP(); PyTypeObject *self_cls = Py_TYPE(self); - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - _PyAttrCache *cache1 = &caches[-1].attr; - _PyObjectCache *cache2 = &caches[-2].obj; + _PyLoadMethodCache *cache = (_PyLoadMethodCache *)next_instr; - DEOPT_IF(self_cls->tp_version_tag != cache1->tp_version, LOAD_METHOD); + DEOPT_IF(self_cls->tp_version_tag != read_u32(cache->type_version), + LOAD_METHOD); /* Treat index as a signed 16 bit value */ - int dictoffset = *(int16_t *)&cache0->index; + int dictoffset = *(int16_t *)&cache->dict_offset; PyDictObject **dictptr = (PyDictObject**)(((char *)self)+dictoffset); assert( dictoffset == MANAGED_DICT_OFFSET || @@ -4489,14 +4496,16 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int ); PyDictObject *dict = *dictptr; DEOPT_IF(dict == NULL, LOAD_METHOD); - DEOPT_IF(dict->ma_keys->dk_version != cache1->dk_version, LOAD_METHOD); + DEOPT_IF(dict->ma_keys->dk_version != read_u32(cache->keys_version), + LOAD_METHOD); STAT_INC(LOAD_METHOD, hit); - PyObject *res = cache2->obj; + PyObject *res = read_obj(cache->descr); assert(res != NULL); assert(_PyType_HasFeature(Py_TYPE(res), Py_TPFLAGS_METHOD_DESCRIPTOR)); Py_INCREF(res); SET_TOP(res); PUSH(self); + JUMPBY(INLINE_CACHE_ENTRIES_LOAD_METHOD); NOTRACE_DISPATCH(); } @@ -4504,18 +4513,18 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int assert(cframe.use_tracing == 0); PyObject *self = TOP(); PyTypeObject *self_cls = Py_TYPE(self); - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAttrCache *cache1 = &caches[-1].attr; - _PyObjectCache *cache2 = &caches[-2].obj; - DEOPT_IF(self_cls->tp_version_tag != cache1->tp_version, LOAD_METHOD); + _PyLoadMethodCache *cache = (_PyLoadMethodCache *)next_instr; + uint32_t type_version = read_u32(cache->type_version); + DEOPT_IF(self_cls->tp_version_tag != type_version, LOAD_METHOD); assert(self_cls->tp_dictoffset == 0); STAT_INC(LOAD_METHOD, hit); - PyObject *res = cache2->obj; + PyObject *res = read_obj(cache->descr); assert(res != NULL); assert(_PyType_HasFeature(Py_TYPE(res), Py_TPFLAGS_METHOD_DESCRIPTOR)); Py_INCREF(res); SET_TOP(res); PUSH(self); + JUMPBY(INLINE_CACHE_ENTRIES_LOAD_METHOD); NOTRACE_DISPATCH(); } @@ -4528,29 +4537,30 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int SET_TOP(NULL); Py_DECREF(owner); PUSH(res); + JUMPBY(INLINE_CACHE_ENTRIES_LOAD_METHOD); NOTRACE_DISPATCH(); } TARGET(LOAD_METHOD_CLASS) { /* LOAD_METHOD, for class methods */ assert(cframe.use_tracing == 0); - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAttrCache *cache1 = &caches[-1].attr; - _PyObjectCache *cache2 = &caches[-2].obj; + _PyLoadMethodCache *cache = (_PyLoadMethodCache *)next_instr; PyObject *cls = TOP(); DEOPT_IF(!PyType_Check(cls), LOAD_METHOD); - DEOPT_IF(((PyTypeObject *)cls)->tp_version_tag != cache1->tp_version, - LOAD_METHOD); - assert(cache1->tp_version != 0); + uint32_t type_version = read_u32(cache->type_version); + DEOPT_IF(((PyTypeObject *)cls)->tp_version_tag != type_version, + LOAD_METHOD); + assert(type_version != 0); STAT_INC(LOAD_METHOD, hit); - PyObject *res = cache2->obj; + PyObject *res = read_obj(cache->descr); assert(res != NULL); Py_INCREF(res); SET_TOP(NULL); Py_DECREF(cls); PUSH(res); + JUMPBY(INLINE_CACHE_ENTRIES_LOAD_METHOD); NOTRACE_DISPATCH(); } @@ -5603,10 +5613,10 @@ opname ## _miss: \ JUMP_TO_INSTRUCTION(opname); \ } -MISS_WITH_CACHE(LOAD_ATTR) -MISS_WITH_CACHE(STORE_ATTR) +MISS_WITH_INLINE_CACHE(LOAD_ATTR) +MISS_WITH_INLINE_CACHE(STORE_ATTR) MISS_WITH_INLINE_CACHE(LOAD_GLOBAL) -MISS_WITH_CACHE(LOAD_METHOD) +MISS_WITH_INLINE_CACHE(LOAD_METHOD) MISS_WITH_CACHE(PRECALL) MISS_WITH_CACHE(CALL) MISS_WITH_INLINE_CACHE(BINARY_OP) @@ -5622,10 +5632,10 @@ MISS_WITH_OPARG_COUNTER(STORE_SUBSCR) // LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE gets replaced as well STAT_INC(LOAD_ATTR_INSTANCE_VALUE, miss); STAT_INC(LOAD_ATTR, miss); - _PyAdaptiveEntry *cache = &GET_CACHE()->adaptive; + _PyAttrCache *cache = (_PyAttrCache *)next_instr; cache->counter--; if (cache->counter == 0) { - next_instr[-1] = _Py_MAKECODEUNIT(LOAD_ATTR_ADAPTIVE, _Py_OPARG(next_instr[-1])); + next_instr[-1] = _Py_MAKECODEUNIT(LOAD_ATTR_ADAPTIVE, oparg); if (_Py_OPCODE(next_instr[-2]) == LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE) { next_instr[-2] = _Py_MAKECODEUNIT(LOAD_FAST, _Py_OPARG(next_instr[-2])); if (_Py_OPCODE(next_instr[-3]) == LOAD_FAST) { @@ -5633,9 +5643,8 @@ MISS_WITH_OPARG_COUNTER(STORE_SUBSCR) } } STAT_INC(LOAD_ATTR, deopt); - cache_backoff(cache); + cache->counter = ADAPTIVE_CACHE_BACKOFF; } - oparg = cache->original_oparg; JUMP_TO_INSTRUCTION(LOAD_ATTR); } diff --git a/Python/specialize.c b/Python/specialize.c index 5486b5b1f65dc9..91473149e2e405 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -58,12 +58,9 @@ static uint8_t adaptive_opcodes[256] = { /* The number of cache entries required for a "family" of instructions. */ static uint8_t cache_requirements[256] = { - [LOAD_ATTR] = 1, // _PyAdaptiveEntry - [LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */ [STORE_SUBSCR] = 0, [CALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */ [PRECALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */ - [STORE_ATTR] = 1, // _PyAdaptiveEntry }; Py_ssize_t _Py_QuickenedCount = 0; @@ -640,11 +637,10 @@ initial_counter_value(void) { static int -specialize_module_load_attr( - PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, - _PyAdaptiveEntry *cache0, int opcode, - int opcode_module) +specialize_module_load_attr(PyObject *owner, _Py_CODEUNIT *instr, + PyObject *name, int opcode, int opcode_module) { + _PyAttrCache *cache = (_PyAttrCache *)(instr + 1); PyModuleObject *m = (PyModuleObject *)owner; PyObject *value = NULL; assert((owner->ob_type->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0); @@ -675,8 +671,8 @@ specialize_module_load_attr( SPECIALIZATION_FAIL(opcode, SPEC_FAIL_OUT_OF_VERSIONS); return -1; } - cache0->version = keys_version; - cache0->index = (uint16_t)index; + write_u32(cache->version, keys_version); + cache->index = (uint16_t)index; *instr = _Py_MAKECODEUNIT(opcode_module, _Py_OPARG(*instr)); return 0; } @@ -764,7 +760,6 @@ static int specialize_dict_access( PyObject *owner, _Py_CODEUNIT *instr, PyTypeObject *type, DescriptorClassification kind, PyObject *name, - _PyAdaptiveEntry *cache0, int base_op, int values_op, int hint_op) { assert(kind == NON_OVERRIDING || kind == NON_DESCRIPTOR || kind == ABSENT || @@ -774,6 +769,7 @@ specialize_dict_access( SPECIALIZATION_FAIL(base_op, SPEC_FAIL_ATTR_NOT_MANAGED_DICT); return 0; } + _PyAttrCache *cache = (_PyAttrCache *)(instr + 1); PyObject **dictptr = _PyObject_ManagedDictPointer(owner); PyDictObject *dict = (PyDictObject *)*dictptr; if (dict == NULL) { @@ -786,8 +782,8 @@ specialize_dict_access( SPECIALIZATION_FAIL(base_op, SPEC_FAIL_OUT_OF_RANGE); return 0; } - cache0->version = type->tp_version_tag; - cache0->index = (uint16_t)index; + write_u32(cache->version, type->tp_version_tag); + cache->index = (uint16_t)index; *instr = _Py_MAKECODEUNIT(values_op, _Py_OPARG(*instr)); } else { @@ -803,20 +799,22 @@ specialize_dict_access( SPECIALIZATION_FAIL(base_op, SPEC_FAIL_OUT_OF_RANGE); return 0; } - cache0->index = (uint16_t)hint; - cache0->version = type->tp_version_tag; + cache->index = (uint16_t)hint; + write_u32(cache->version, type->tp_version_tag); *instr = _Py_MAKECODEUNIT(hint_op, _Py_OPARG(*instr)); } return 1; } int -_Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache) +_Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name) { - _PyAdaptiveEntry *cache0 = &cache->adaptive; + assert(_PyOpcode_InlineCacheEntries[LOAD_ATTR] == + INLINE_CACHE_ENTRIES_LOAD_ATTR); + _PyAttrCache *cache = (_PyAttrCache *)(instr + 1); if (PyModule_CheckExact(owner)) { - int err = specialize_module_load_attr(owner, instr, name, cache0, - LOAD_ATTR, LOAD_ATTR_MODULE); + int err = specialize_module_load_attr(owner, instr, name, LOAD_ATTR, + LOAD_ATTR_MODULE); if (err) { goto fail; } @@ -855,8 +853,8 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp } assert(dmem->type == T_OBJECT_EX); assert(offset > 0); - cache0->index = (uint16_t)offset; - cache0->version = type->tp_version_tag; + cache->index = (uint16_t)offset; + write_u32(cache->version, type->tp_version_tag); *instr = _Py_MAKECODEUNIT(LOAD_ATTR_SLOT, _Py_OPARG(*instr)); goto success; } @@ -864,8 +862,8 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp { Py_ssize_t offset = offsetof(PyObject, ob_type); assert(offset == (uint16_t)offset); - cache0->index = (uint16_t)offset; - cache0->version = type->tp_version_tag; + cache->index = (uint16_t)offset; + write_u32(cache->version, type->tp_version_tag); *instr = _Py_MAKECODEUNIT(LOAD_ATTR_SLOT, _Py_OPARG(*instr)); goto success; } @@ -886,7 +884,7 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp break; } int err = specialize_dict_access( - owner, instr, type, kind, name, cache0, + owner, instr, type, kind, name, LOAD_ATTR, LOAD_ATTR_INSTANCE_VALUE, LOAD_ATTR_WITH_HINT ); if (err < 0) { @@ -908,19 +906,21 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp fail: STAT_INC(LOAD_ATTR, failure); assert(!PyErr_Occurred()); - cache_backoff(cache0); + cache->counter = ADAPTIVE_CACHE_BACKOFF; return 0; success: STAT_INC(LOAD_ATTR, success); assert(!PyErr_Occurred()); - cache0->counter = initial_counter_value(); + cache->counter = initial_counter_value(); return 0; } int -_Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache) +_Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name) { - _PyAdaptiveEntry *cache0 = &cache->adaptive; + assert(_PyOpcode_InlineCacheEntries[LOAD_ATTR] == + INLINE_CACHE_ENTRIES_LOAD_ATTR); + _PyAttrCache *cache = (_PyAttrCache *)(instr + 1); PyTypeObject *type = Py_TYPE(owner); if (PyModule_CheckExact(owner)) { SPECIALIZATION_FAIL(STORE_ATTR, SPEC_FAIL_OVERRIDDEN); @@ -953,8 +953,8 @@ _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, S } assert(dmem->type == T_OBJECT_EX); assert(offset > 0); - cache0->index = (uint16_t)offset; - cache0->version = type->tp_version_tag; + cache->index = (uint16_t)offset; + write_u32(cache->version, type->tp_version_tag); *instr = _Py_MAKECODEUNIT(STORE_ATTR_SLOT, _Py_OPARG(*instr)); goto success; } @@ -977,7 +977,7 @@ _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, S } int err = specialize_dict_access( - owner, instr, type, kind, name, cache0, + owner, instr, type, kind, name, STORE_ATTR, STORE_ATTR_INSTANCE_VALUE, STORE_ATTR_WITH_HINT ); if (err < 0) { @@ -989,12 +989,12 @@ _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, S fail: STAT_INC(STORE_ATTR, failure); assert(!PyErr_Occurred()); - cache_backoff(cache0); + cache->counter = ADAPTIVE_CACHE_BACKOFF; return 0; success: STAT_INC(STORE_ATTR, success); assert(!PyErr_Occurred()); - cache0->counter = initial_counter_value(); + cache->counter = initial_counter_value(); return 0; } @@ -1036,18 +1036,18 @@ load_method_fail_kind(DescriptorClassification kind) #endif static int -specialize_class_load_method(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, - _PyAttrCache *cache1, _PyObjectCache *cache2) +specialize_class_load_method(PyObject *owner, _Py_CODEUNIT *instr, + PyObject *name) { - + _PyLoadMethodCache *cache = (_PyLoadMethodCache *)(instr + 1); PyObject *descr = NULL; DescriptorClassification kind = 0; kind = analyze_descriptor((PyTypeObject *)owner, name, &descr, 0); switch (kind) { case METHOD: case NON_DESCRIPTOR: - cache1->tp_version = ((PyTypeObject *)owner)->tp_version_tag; - cache2->obj = descr; + write_u32(cache->type_version, ((PyTypeObject *)owner)->tp_version_tag); + write_obj(cache->descr, descr); *instr = _Py_MAKECODEUNIT(LOAD_METHOD_CLASS, _Py_OPARG(*instr)); return 0; #ifdef Py_STATS @@ -1077,16 +1077,18 @@ typedef enum { // can cause a significant drop in cache hits. A possible test is // python.exe -m test_typing test_re test_dis test_zlib. int -_Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache) +_Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name) { - _PyAdaptiveEntry *cache0 = &cache->adaptive; - _PyAttrCache *cache1 = &cache[-1].attr; - _PyObjectCache *cache2 = &cache[-2].obj; + assert(_PyOpcode_InlineCacheEntries[LOAD_METHOD] == + INLINE_CACHE_ENTRIES_LOAD_METHOD); + _PyLoadMethodCache *cache = (_PyLoadMethodCache *)(instr + 1); PyTypeObject *owner_cls = Py_TYPE(owner); if (PyModule_CheckExact(owner)) { - int err = specialize_module_load_attr(owner, instr, name, cache0, - LOAD_METHOD, LOAD_METHOD_MODULE); + assert(INLINE_CACHE_ENTRIES_LOAD_ATTR <= + INLINE_CACHE_ENTRIES_LOAD_METHOD); + int err = specialize_module_load_attr(owner, instr, name, LOAD_METHOD, + LOAD_METHOD_MODULE); if (err) { goto fail; } @@ -1098,7 +1100,7 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, } } if (PyType_Check(owner)) { - int err = specialize_class_load_method(owner, instr, name, cache1, cache2); + int err = specialize_class_load_method(owner, instr, name); if (err) { goto fail; } @@ -1156,7 +1158,7 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SPECIALIZATION_FAIL(LOAD_METHOD, SPEC_FAIL_OUT_OF_VERSIONS); goto fail; } - cache1->dk_version = keys_version; + write_u32(cache->keys_version, keys_version); } switch(dictkind) { case NO_DICT: @@ -1166,12 +1168,12 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, *instr = _Py_MAKECODEUNIT(LOAD_METHOD_WITH_VALUES, _Py_OPARG(*instr)); break; case MANAGED_DICT: - *(int16_t *)&cache0->index = (int16_t)MANAGED_DICT_OFFSET; + *(int16_t *)&cache->dict_offset = (int16_t)MANAGED_DICT_OFFSET; *instr = _Py_MAKECODEUNIT(LOAD_METHOD_WITH_DICT, _Py_OPARG(*instr)); break; case OFFSET_DICT: assert(owner_cls->tp_dictoffset > 0 && owner_cls->tp_dictoffset <= INT16_MAX); - cache0->index = (uint16_t)owner_cls->tp_dictoffset; + cache->dict_offset = (uint16_t)owner_cls->tp_dictoffset; *instr = _Py_MAKECODEUNIT(LOAD_METHOD_WITH_DICT, _Py_OPARG(*instr)); break; } @@ -1189,18 +1191,18 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, * PyType_Modified usages in typeobject.c). The MCACHE has been * working since Python 2.6 and it's battle-tested. */ - cache1->tp_version = owner_cls->tp_version_tag; - cache2->obj = descr; + write_u32(cache->type_version, owner_cls->tp_version_tag); + write_obj(cache->descr, descr); // Fall through. success: STAT_INC(LOAD_METHOD, success); assert(!PyErr_Occurred()); - cache0->counter = initial_counter_value(); + cache->counter = initial_counter_value(); return 0; fail: STAT_INC(LOAD_METHOD, failure); assert(!PyErr_Occurred()); - cache_backoff(cache0); + cache->counter = ADAPTIVE_CACHE_BACKOFF; return 0; } @@ -1233,7 +1235,7 @@ _Py_Specialize_LoadGlobal( goto fail; } cache->index = (uint16_t)index; - write32(&cache->module_keys_version, keys_version); + write_u32(cache->module_keys_version, keys_version); *instr = _Py_MAKECODEUNIT(LOAD_GLOBAL_MODULE, _Py_OPARG(*instr)); goto success; } @@ -1264,7 +1266,7 @@ _Py_Specialize_LoadGlobal( goto fail; } cache->index = (uint16_t)index; - write32(&cache->module_keys_version, globals_version); + write_u32(cache->module_keys_version, globals_version); cache->builtin_keys_version = (uint16_t)builtins_version; *instr = _Py_MAKECODEUNIT(LOAD_GLOBAL_BUILTIN, _Py_OPARG(*instr)); goto success; @@ -1384,7 +1386,7 @@ _Py_Specialize_BinarySubscr( goto fail; } assert(cls->tp_version_tag != 0); - write32(&cache->type_version, cls->tp_version_tag); + write_u32(cache->type_version, cls->tp_version_tag); int version = _PyFunction_GetVersionForCurrentState(func); if (version == 0 || version != (uint16_t)version) { SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_VERSIONS); From 2d5e15a02baa868cddc8d85232f4efe833687dd5 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Tue, 1 Mar 2022 17:53:59 -0800 Subject: [PATCH 02/11] Bump magic number --- Lib/importlib/_bootstrap_external.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index dd1f6ffd64cee5..55366b11130d09 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -390,6 +390,8 @@ def _write_atomic(path, data, mode=0o666): # Python 3.11a5 3481 (Use inline cache for BINARY_OP) # Python 3.11a5 3482 (Use inline caching for UNPACK_SEQUENCE and LOAD_GLOBAL) # Python 3.11a5 3483 (Use inline caching for COMPARE_OP and BINARY_SUBSCR) +# Python 3.11a6 3484 (Use inline caching for LOAD_ATTR, LOAD_METHOD, and +# STORE_ATTR) # Python 3.12 will start with magic number 3500 @@ -404,7 +406,7 @@ def _write_atomic(path, data, mode=0o666): # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array # in PC/launcher.c must also be updated. -MAGIC_NUMBER = (3483).to_bytes(2, 'little') + b'\r\n' +MAGIC_NUMBER = (3484).to_bytes(2, 'little') + b'\r\n' _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c _PYCACHE = '__pycache__' From 5b06d9755d9e49abc8fbecfafc107f80209aee78 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Tue, 1 Mar 2022 17:54:06 -0800 Subject: [PATCH 03/11] blurb add --- .../Core and Builtins/2022-03-01-17-47-58.bpo-46841.inYQlU.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-03-01-17-47-58.bpo-46841.inYQlU.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-03-01-17-47-58.bpo-46841.inYQlU.rst b/Misc/NEWS.d/next/Core and Builtins/2022-03-01-17-47-58.bpo-46841.inYQlU.rst new file mode 100644 index 00000000000000..0e7beb019f46a2 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-03-01-17-47-58.bpo-46841.inYQlU.rst @@ -0,0 +1,2 @@ +Use inline caching for :opcode:`LOAD_ATTR`, :opcode:`LOAD_METHOD`, and +:opcode:`STORE_ATTR`. From 828540af36587c39071e56f54b174ef7819e1754 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Tue, 1 Mar 2022 17:54:16 -0800 Subject: [PATCH 04/11] Clean things up a bit --- Python/ceval.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index cc6db7a1ee78b4..665d0f91445151 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1459,7 +1459,7 @@ eval_frame_handle_pending(PyThreadState *tstate) PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache->index; \ res = ep->me_value; \ DEOPT_IF(res == NULL, LOAD_##attr_or_method); \ - /* XXX: Uncomment this next line to make test_asyncio very angry! */ \ + /* XXX: Remove this next line to make test_asyncio very angry! */ \ DEOPT_IF(LOAD_##attr_or_method == LOAD_ATTR, LOAD_##attr_or_method); \ STAT_INC(LOAD_##attr_or_method, hit); \ Py_INCREF(res); @@ -4465,7 +4465,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int assert(self_cls->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictObject *dict = *(PyDictObject**)_PyObject_ManagedDictPointer(self); DEOPT_IF(dict != NULL, LOAD_METHOD); - DEOPT_IF(((PyHeapTypeObject *)self_cls)->ht_cached_keys->dk_version != read_u32(cache->keys_version), LOAD_METHOD); + PyHeapTypeObject *self_heap_type = (PyHeapTypeObject *)self_cls; + DEOPT_IF(self_heap_type->ht_cached_keys->dk_version != + read_u32(cache->keys_version), LOAD_METHOD); STAT_INC(LOAD_METHOD, hit); PyObject *res = read_obj(cache->descr); assert(res != NULL); From 2b876f14ff326eeda667bd015f6753be298efcfb Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 2 Mar 2022 15:44:29 -0800 Subject: [PATCH 05/11] Fix typo --- Python/specialize.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index 91473149e2e405..a715cb7825a66d 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -918,8 +918,8 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name) int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name) { - assert(_PyOpcode_InlineCacheEntries[LOAD_ATTR] == - INLINE_CACHE_ENTRIES_LOAD_ATTR); + assert(_PyOpcode_InlineCacheEntries[STORE_ATTR] == + INLINE_CACHE_ENTRIES_STORE_ATTR); _PyAttrCache *cache = (_PyAttrCache *)(instr + 1); PyTypeObject *type = Py_TYPE(owner); if (PyModule_CheckExact(owner)) { From 097fdaf622de4ece84bb3b2f9b9a5988f3a9b9d2 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 2 Mar 2022 16:44:46 -0800 Subject: [PATCH 06/11] Apply feedback from code review --- Include/internal/pycore_code.h | 92 +++++++++++++++++++++++----------- 1 file changed, 62 insertions(+), 30 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 56af0310c2a421..662f7447634d3a 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -21,6 +21,7 @@ typedef struct { } _PyAdaptiveEntry; typedef struct { + /* Borrowed ref */ PyObject *obj; } _PyObjectCache; @@ -429,77 +430,108 @@ extern PyObject* _Py_GetSpecializationStats(void); #ifdef WORDS_BIGENDIAN static inline void -write_u32(uint16_t p[2], uint32_t val) +write_u32(uint16_t *p, uint32_t val) { - p[0] = val >> 16; - p[1] = (uint16_t)val; -} - -static inline uint32_t -read_u32(uint16_t p[2]) -{ - return (p[0] << 16) | p[1]; + p[0] = (uint16_t)(val >> 16); + p[1] = (uint16_t)(val >> 0); } static inline void -write_obj(uint16_t p[4], PyObject *obj) +write_u64(uint16_t *p, uint64_t val) { - uint64_t val = (uintptr_t)obj; p[0] = (uint16_t)(val >> 48); p[1] = (uint16_t)(val >> 32); p[2] = (uint16_t)(val >> 16); p[3] = (uint16_t)(val >> 0); } -static inline PyObject * -read_obj(uint16_t p[4]) +static inline uint32_t +read_u32(uint16_t *p) +{ + uint32_t val = 0; + val |= (uint32_t)p[0] << 16; + val |= (uint32_t)p[1] << 0; + return val; +} + +static inline uint64_t +read_u64(uint16_t *p) { - uintptr_t val = 0; + uint64_t val = 0; val |= (uint64_t)p[0] << 48; val |= (uint64_t)p[1] << 32; val |= (uint64_t)p[2] << 16; val |= (uint64_t)p[3] << 0; - return (PyObject *)val; + return val; } #else static inline void -write_u32(uint16_t p[2], uint32_t val) -{ - p[0] = (uint16_t)val; - p[1] = val >> 16; -} - -static inline uint32_t -read_u32(uint16_t p[2]) +write_u32(uint16_t *p, uint32_t val) { - return p[0] | (p[1] << 16); + p[0] = (uint16_t)(val >> 0); + p[1] = (uint16_t)(val >> 16); } static inline void -write_obj(uint16_t p[4], PyObject *obj) +write_u64(uint16_t *p, uint64_t val) { - uint64_t val = (uintptr_t)obj; p[0] = (uint16_t)(val >> 0); p[1] = (uint16_t)(val >> 16); p[2] = (uint16_t)(val >> 32); p[3] = (uint16_t)(val >> 48); } -static inline PyObject * -read_obj(uint16_t p[4]) +static inline uint32_t +read_u32(uint16_t *p) { - uintptr_t val = 0; + uint32_t val = 0; + val |= (uint32_t)p[0] << 0; + val |= (uint32_t)p[1] << 16; + return val; +} + +static inline uint64_t +read_u64(uint16_t *p) +{ + uint64_t val = 0; val |= (uint64_t)p[0] << 0; val |= (uint64_t)p[1] << 16; val |= (uint64_t)p[2] << 32; val |= (uint64_t)p[3] << 48; - return (PyObject *)val; + return val; } #endif +static inline void +write_obj(uint16_t *p, PyObject *obj) +{ + uintptr_t val = (uintptr_t)obj; +#if SIZEOF_UINTPTR_T == 8 + write_u64(p, val); +#elif SIZEOF_UINTPTR_T == 4 + write_u32(p, val); +#else + #error "sizeof(uintptr_t) must be 4 or 8" +#endif +} + +static inline PyObject * +read_obj(uint16_t *p) +{ + uintptr_t val; +#if SIZEOF_UINTPTR_T == 8 + val = read_u64(p); +#elif SIZE_OF_UINTPTR_T == 4 + val = read_u32(p); +#else + #error "sizeof(uintptr_t) must be 4 or 8" +#endif + return (PyObject *)val; +} + #ifdef __cplusplus } #endif From 4c71c121d332bb00c41714dde8d619865e7b3124 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 2 Mar 2022 17:21:14 -0800 Subject: [PATCH 07/11] Get rid of LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE --- Include/opcode.h | 1 - Lib/opcode.py | 1 - Python/ceval.c | 79 ++--------------------------------------- Python/opcode_targets.h | 2 +- Python/specialize.c | 10 ------ 5 files changed, 4 insertions(+), 89 deletions(-) diff --git a/Include/opcode.h b/Include/opcode.h index 656b06fea6ada4..110f8c36171404 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -183,7 +183,6 @@ extern "C" { #define LOAD_FAST__LOAD_CONST 173 #define LOAD_CONST__LOAD_FAST 174 #define STORE_FAST__STORE_FAST 175 -#define LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE 176 #define DO_TRACING 255 extern const uint8_t _PyOpcode_InlineCacheEntries[256]; diff --git a/Lib/opcode.py b/Lib/opcode.py index 4d53e2e29a1306..f6e2dec32e0f57 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -301,7 +301,6 @@ def jabs_op(name, op, entries=0): "LOAD_FAST__LOAD_CONST", "LOAD_CONST__LOAD_FAST", "STORE_FAST__STORE_FAST", - "LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE", ] _specialization_stats = [ "success", diff --git a/Python/ceval.c b/Python/ceval.c index c8ab44e49d8856..fafc7564b3e448 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3458,33 +3458,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int } } - TARGET(LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE) { - assert(cframe.use_tracing == 0); - PyObject *owner = GETLOCAL(oparg); // borrowed - if (owner == NULL) { - goto unbound_local_error; - } - // GET_CACHE(), but for the following opcode - assert(_Py_OPCODE(*next_instr) == LOAD_ATTR_INSTANCE_VALUE); - _PyAttrCache *cache = (_PyAttrCache *)(next_instr + 1); - uint32_t type_version = read_u32(cache->version); - assert(type_version != 0); - PyTypeObject *tp = Py_TYPE(owner); - // These DEOPT_IF miss branches do PUSH(Py_NewRef(owner)). - DEOPT_IF(tp->tp_version_tag != type_version, - LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE); - assert(tp->tp_dictoffset < 0); - assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); - PyDictValues *values = *_PyObject_ValuesPointer(owner); - DEOPT_IF(values == NULL, LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE); - PyObject *res = values->values[cache->index]; - DEOPT_IF(res == NULL, LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE); - STAT_INC(LOAD_ATTR, hit); - PUSH(Py_NewRef(res)); - JUMPBY(INLINE_CACHE_ENTRIES_LOAD_ATTR + 1); - NOTRACE_DISPATCH(); - } - TARGET(LOAD_ATTR_INSTANCE_VALUE) { assert(cframe.use_tracing == 0); PyObject *owner = TOP(); @@ -3493,14 +3466,13 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int _PyAttrCache *cache = (_PyAttrCache *)next_instr; uint32_t type_version = read_u32(cache->version); assert(type_version != 0); - DEOPT_IF(tp->tp_version_tag != type_version, - LOAD_ATTR_INSTANCE_VALUE); + DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR); assert(tp->tp_dictoffset < 0); assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictValues *values = *_PyObject_ValuesPointer(owner); - DEOPT_IF(values == NULL, LOAD_ATTR_INSTANCE_VALUE); + DEOPT_IF(values == NULL, LOAD_ATTR); res = values->values[cache->index]; - DEOPT_IF(res == NULL, LOAD_ATTR_INSTANCE_VALUE); + DEOPT_IF(res == NULL, LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); Py_INCREF(res); SET_TOP(res); @@ -5640,51 +5612,6 @@ MISS_WITH_INLINE_CACHE(BINARY_SUBSCR) MISS_WITH_INLINE_CACHE(UNPACK_SEQUENCE) MISS_WITH_OPARG_COUNTER(STORE_SUBSCR) -LOAD_ATTR_INSTANCE_VALUE_miss: - { - // Special-cased so that if LOAD_ATTR_INSTANCE_VALUE - // gets replaced, then any preceeding - // LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE gets replaced as well - STAT_INC(LOAD_ATTR_INSTANCE_VALUE, miss); - STAT_INC(LOAD_ATTR, miss); - _PyAttrCache *cache = (_PyAttrCache *)next_instr; - cache->counter--; - if (cache->counter == 0) { - next_instr[-1] = _Py_MAKECODEUNIT(LOAD_ATTR_ADAPTIVE, oparg); - if (_Py_OPCODE(next_instr[-2]) == LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE) { - next_instr[-2] = _Py_MAKECODEUNIT(LOAD_FAST, _Py_OPARG(next_instr[-2])); - if (_Py_OPCODE(next_instr[-3]) == LOAD_FAST) { - next_instr[-3] = _Py_MAKECODEUNIT(LOAD_FAST__LOAD_FAST, _Py_OPARG(next_instr[-3])); - } - } - STAT_INC(LOAD_ATTR, deopt); - cache->counter = ADAPTIVE_CACHE_BACKOFF; - } - JUMP_TO_INSTRUCTION(LOAD_ATTR); - } - -LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE_miss: - { - // This is special-cased because we have a superinstruction - // that includes a specialized instruction. - // If the specialized portion misses, carry out - // the first instruction, then perform a miss - // for the second instruction as usual. - - // Do LOAD_FAST - { - PyObject *value = GETLOCAL(oparg); - assert(value != NULL); // Already checked if unbound - Py_INCREF(value); - PUSH(value); - NEXTOPARG(); - next_instr++; - } - - // Now we are in the correct state for LOAD_ATTR - goto LOAD_ATTR_INSTANCE_VALUE_miss; - } - binary_subscr_dict_error: { PyObject *sub = POP(); diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index d463e303e27ac7..7be7b168a755d8 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -175,7 +175,7 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_FAST__LOAD_CONST, &&TARGET_LOAD_CONST__LOAD_FAST, &&TARGET_STORE_FAST__STORE_FAST, - &&TARGET_LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE, + &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, diff --git a/Python/specialize.c b/Python/specialize.c index a715cb7825a66d..436aa1dd6f3c02 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -891,16 +891,6 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name) return -1; } if (err) { - if (_Py_OPCODE(instr[0]) == LOAD_ATTR_INSTANCE_VALUE) { - // Note: instr[-1] exists because there's something on the stack, - // and instr[-2] exists because there's at least a RESUME as well. - if (_Py_OPCODE(instr[-1]) == LOAD_FAST) { - instr[-1] = _Py_MAKECODEUNIT(LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE, _Py_OPARG(instr[-1])); - if (_Py_OPCODE(instr[-2]) == LOAD_FAST__LOAD_FAST) { - instr[-2] = _Py_MAKECODEUNIT(LOAD_FAST, _Py_OPARG(instr[-2])); - } - } - } goto success; } fail: From 09fbac5b127c1869834ae72284792cc7c7d102a1 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 2 Mar 2022 17:21:22 -0800 Subject: [PATCH 08/11] Remove workaround --- Python/ceval.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index fafc7564b3e448..5d9b6423b9a648 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1459,8 +1459,6 @@ eval_frame_handle_pending(PyThreadState *tstate) PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + cache->index; \ res = ep->me_value; \ DEOPT_IF(res == NULL, LOAD_##attr_or_method); \ - /* XXX: Remove this next line to make test_asyncio very angry! */ \ - DEOPT_IF(LOAD_##attr_or_method == LOAD_ATTR, LOAD_##attr_or_method); \ STAT_INC(LOAD_##attr_or_method, hit); \ Py_INCREF(res); From e5229ce1c02103d01d4eed29556e7edb5b6c9616 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 2 Mar 2022 17:37:53 -0800 Subject: [PATCH 09/11] Fix typo --- Include/internal/pycore_code.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 662f7447634d3a..5d48c225666355 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -524,7 +524,7 @@ read_obj(uint16_t *p) uintptr_t val; #if SIZEOF_UINTPTR_T == 8 val = read_u64(p); -#elif SIZE_OF_UINTPTR_T == 4 +#elif SIZEOF_UINTPTR_T == 4 val = read_u32(p); #else #error "sizeof(uintptr_t) must be 4 or 8" From 452c78ccd2a41d76039eadf51e1f164b0e14ed62 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 2 Mar 2022 22:45:36 -0800 Subject: [PATCH 10/11] SIZEOF_UINTPTR_T isn't defined on Windows --- Include/internal/pycore_code.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 5d48c225666355..25c31a1fca7a62 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -509,12 +509,12 @@ static inline void write_obj(uint16_t *p, PyObject *obj) { uintptr_t val = (uintptr_t)obj; -#if SIZEOF_UINTPTR_T == 8 +#if SIZEOF_VOID_P == 8 write_u64(p, val); -#elif SIZEOF_UINTPTR_T == 4 +#elif SIZEOF_VOID_P == 4 write_u32(p, val); #else - #error "sizeof(uintptr_t) must be 4 or 8" + #error "SIZEOF_VOID_P must be 4 or 8" #endif } @@ -522,12 +522,12 @@ static inline PyObject * read_obj(uint16_t *p) { uintptr_t val; -#if SIZEOF_UINTPTR_T == 8 +#if SIZEOF_VOID_P == 8 val = read_u64(p); -#elif SIZEOF_UINTPTR_T == 4 +#elif SIZEOF_VOID_P == 4 val = read_u32(p); #else - #error "sizeof(uintptr_t) must be 4 or 8" + #error "SIZEOF_VOID_P must be 4 or 8" #endif return (PyObject *)val; } From 948a206fe3076272ea6a5adfad229059c053d8df Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 3 Mar 2022 14:56:52 -0800 Subject: [PATCH 11/11] Fix version number --- Lib/importlib/_bootstrap_external.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index 55366b11130d09..9d36bc27c44d4c 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -390,7 +390,7 @@ def _write_atomic(path, data, mode=0o666): # Python 3.11a5 3481 (Use inline cache for BINARY_OP) # Python 3.11a5 3482 (Use inline caching for UNPACK_SEQUENCE and LOAD_GLOBAL) # Python 3.11a5 3483 (Use inline caching for COMPARE_OP and BINARY_SUBSCR) -# Python 3.11a6 3484 (Use inline caching for LOAD_ATTR, LOAD_METHOD, and +# Python 3.11a5 3484 (Use inline caching for LOAD_ATTR, LOAD_METHOD, and # STORE_ATTR) # Python 3.12 will start with magic number 3500