From 8db0653950309a2b911972acd4f025c0d5e90a78 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 26 May 2021 16:28:54 +0100 Subject: [PATCH 01/15] Specialize LOAD_ATTR with LOAD_ATTR_SLOT and LOAD_ATTR_SPLIT_KEYS --- Include/internal/pycore_code.h | 65 +++++++ Include/opcode.h | 3 + Lib/opcode.py | 6 + Python/ceval.c | 270 +++++++++-------------------- Python/makeopcodetargets.py | 5 + Python/opcode_targets.h | 6 +- Python/specialize.c | 94 +++++++++- Tools/scripts/generate_opcode_h.py | 9 + 8 files changed, 269 insertions(+), 189 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 2709e082b05b17..bd2c9a1ec1804a 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -42,6 +42,12 @@ typedef struct { uint16_t index; } _PyAdaptiveEntry; + +typedef struct { + uint32_t tp_version; + uint32_t dk_version; +} _PyLoadAttrCache; + /* Add specialized versions of entries to this union. * * Do not break the invariant: sizeof(SpecializedCacheEntry) == 8 @@ -55,6 +61,7 @@ typedef struct { typedef union { _PyEntryZero zero; _PyAdaptiveEntry adaptive; + _PyLoadAttrCache load_attr; } SpecializedCacheEntry; #define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT)) @@ -255,6 +262,64 @@ PyAPI_FUNC(PyObject *) _PyCode_GetCellvars(PyCodeObject *); PyAPI_FUNC(PyObject *) _PyCode_GetFreevars(PyCodeObject *); +/* Cache hits and misses */ + +static inline uint8_t +saturating_increment(uint8_t c) +{ + return c<<1; +} + +static inline uint8_t +saturating_decrement(uint8_t c) +{ + return (c>>1) + 128; +} + +static inline uint8_t +saturating_zero(void) +{ + return 255; +} + +/* Starting value for saturating counter. + * Technically this should be 1, but that is likely to + * cause a bit of thrashing when we optimize then get an immediate miss. + * We want to give the counter a change to stabilize, so we start at 3. + */ +static inline uint8_t +saturating_start(void) +{ + return saturating_zero()<<3; +} + +static inline void +record_cache_hit(_PyAdaptiveEntry *entry) { + entry->counter = saturating_increment(entry->counter); +} + +static inline void +record_cache_miss(_PyAdaptiveEntry *entry) { + entry->counter = saturating_decrement(entry->counter); +} + +static inline int +too_many_cache_misses(_PyAdaptiveEntry *entry) { + return entry->counter == saturating_zero(); +} + +#define BACKOFF 64 + +static inline void +cache_backoff(_PyAdaptiveEntry *entry) { + entry->counter = BACKOFF; +} + +/* Specialization functions */ + +int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); + + #ifdef __cplusplus } #endif diff --git a/Include/opcode.h b/Include/opcode.h index c65e2f41133fc6..d8cba7ba09616a 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -136,6 +136,9 @@ extern "C" { #define DICT_MERGE 164 #define DICT_UPDATE 165 #define CALL_METHOD_KW 166 +#define LOAD_ATTR_ADAPTIVE 7 +#define LOAD_ATTR_SPLIT_KEYS 8 +#define LOAD_ATTR_SLOT 13 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { 0U, diff --git a/Lib/opcode.py b/Lib/opcode.py index 4d5343179e5932..224344f69efdf5 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -218,3 +218,9 @@ def jabs_op(name, op): def_op('CALL_METHOD_KW', 166) del def_op, name_op, jrel_op, jabs_op + +_specialized_instructions = [ + "LOAD_ATTR_ADAPTIVE", + "LOAD_ATTR_SPLIT_KEYS", + "LOAD_ATTR_SLOT", +] diff --git a/Python/ceval.c b/Python/ceval.c index a8abead23038ce..bd7e5066ca5469 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -24,6 +24,7 @@ #include "pycore_tuple.h" // _PyTuple_ITEMS() #include "code.h" +#include "Objects/dict-common.h" #include "dictobject.h" #include "frameobject.h" #include "pycore_frame.h" @@ -1554,8 +1555,15 @@ eval_frame_handle_pending(PyThreadState *tstate) #define OPCACHE_STAT_ATTR_DEOPT() #define OPCACHE_STAT_ATTR_TOTAL() +#define JUMP_TO_INSTRUCTION(op) goto PREDICT_ID(op) + +#define GET_CACHE() \ + _GetSpecializedCacheEntryForInstruction(first_instr, INSTR_OFFSET(), oparg) + #endif +#define DEOPT_IF(cond, instname) if (cond) { goto instname ## _miss; } + #define GLOBALS() specials[FRAME_SPECIALS_GLOBALS_OFFSET] #define BUILTINS() specials[FRAME_SPECIALS_BUILTINS_OFFSET] #define LOCALS() specials[FRAME_SPECIALS_LOCALS_OFFSET] @@ -1574,7 +1582,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) int lastopcode = 0; #endif PyObject **stack_pointer; /* Next free slot in value stack */ - const _Py_CODEUNIT *next_instr; + _Py_CODEUNIT *next_instr; int opcode; /* Current opcode */ int oparg; /* Current opcode argument, if any */ PyObject **localsplus, **specials; @@ -1582,7 +1590,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) _Py_atomic_int * const eval_breaker = &tstate->interp->ceval.eval_breaker; PyCodeObject *co; - const _Py_CODEUNIT *first_instr; + _Py_CODEUNIT *first_instr; PyObject *names; PyObject *consts; _PyOpcache *co_opcache; @@ -3443,196 +3451,75 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) } case TARGET(LOAD_ATTR): { + PREDICTED(LOAD_ATTR); PyObject *name = GETITEM(names, oparg); PyObject *owner = TOP(); + PyObject *res = PyObject_GetAttr(owner, name); + if (res == NULL) { + goto error; + } + Py_DECREF(owner); + SET_TOP(res); + DISPATCH(); + } - PyTypeObject *type = Py_TYPE(owner); - PyObject *res; - PyObject **dictptr; - PyObject *dict; - _PyOpCodeOpt_LoadAttr *la; - - OPCACHE_STAT_ATTR_TOTAL(); - - OPCACHE_CHECK(); - if (co_opcache != NULL && PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)) - { - if (co_opcache->optimized > 0) { - // Fast path -- cache hit makes LOAD_ATTR ~30% faster. - la = &co_opcache->u.la; - if (la->type == type && la->tp_version_tag == type->tp_version_tag) - { - // Hint >= 0 is a dict index; hint == -1 is a dict miss. - // Hint < -1 is an inverted slot offset: offset is strictly > 0, - // so ~offset is strictly < -1 (assuming 2's complement). - if (la->hint < -1) { - // Even faster path -- slot hint. - Py_ssize_t offset = ~la->hint; - // fprintf(stderr, "Using hint for offset %zd\n", offset); - char *addr = (char *)owner + offset; - res = *(PyObject **)addr; - if (res != NULL) { - Py_INCREF(res); - SET_TOP(res); - Py_DECREF(owner); - DISPATCH(); - } - // Else slot is NULL. Fall through to slow path to raise AttributeError(name). - // Don't DEOPT, since the slot is still there. - } else { - // Fast path for dict. - assert(type->tp_dict != NULL); - assert(type->tp_dictoffset > 0); - - dictptr = (PyObject **) ((char *)owner + type->tp_dictoffset); - dict = *dictptr; - if (dict != NULL && PyDict_CheckExact(dict)) { - Py_ssize_t hint = la->hint; - Py_INCREF(dict); - res = NULL; - assert(!_PyErr_Occurred(tstate)); - la->hint = _PyDict_GetItemHint((PyDictObject*)dict, name, hint, &res); - if (res != NULL) { - assert(la->hint >= 0); - if (la->hint == hint && hint >= 0) { - // Our hint has helped -- cache hit. - OPCACHE_STAT_ATTR_HIT(); - } else { - // The hint we provided didn't work. - // Maybe next time? - OPCACHE_MAYBE_DEOPT_LOAD_ATTR(); - } - - Py_INCREF(res); - SET_TOP(res); - Py_DECREF(owner); - Py_DECREF(dict); - DISPATCH(); - } - else { - _PyErr_Clear(tstate); - // This attribute can be missing sometimes; - // we don't want to optimize this lookup. - OPCACHE_DEOPT_LOAD_ATTR(); - Py_DECREF(dict); - } - } - else { - // There is no dict, or __dict__ doesn't satisfy PyDict_CheckExact. - OPCACHE_DEOPT_LOAD_ATTR(); - } - } - } - else { - // The type of the object has either been updated, - // or is different. Maybe it will stabilize? - OPCACHE_MAYBE_DEOPT_LOAD_ATTR(); - } - OPCACHE_STAT_ATTR_MISS(); - } - - if (co_opcache != NULL && // co_opcache can be NULL after a DEOPT() call. - type->tp_getattro == PyObject_GenericGetAttr) - { - if (type->tp_dict == NULL) { - if (PyType_Ready(type) < 0) { - Py_DECREF(owner); - SET_TOP(NULL); - goto error; - } - } - PyObject *descr = _PyType_Lookup(type, name); - if (descr != NULL) { - // We found an attribute with a data-like descriptor. - PyTypeObject *dtype = Py_TYPE(descr); - if (dtype == &PyMemberDescr_Type) { // It's a slot - PyMemberDescrObject *member = (PyMemberDescrObject *)descr; - struct PyMemberDef *dmem = member->d_member; - if (dmem->type == T_OBJECT_EX) { - Py_ssize_t offset = dmem->offset; - assert(offset > 0); // 0 would be confused with dict hint == -1 (miss). - - if (co_opcache->optimized == 0) { - // First time we optimize this opcode. - OPCACHE_STAT_ATTR_OPT(); - co_opcache->optimized = OPCODE_CACHE_MAX_TRIES; - // fprintf(stderr, "Setting hint for %s, offset %zd\n", dmem->name, offset); - } - - la = &co_opcache->u.la; - la->type = type; - la->tp_version_tag = type->tp_version_tag; - la->hint = ~offset; - - char *addr = (char *)owner + offset; - res = *(PyObject **)addr; - if (res != NULL) { - Py_INCREF(res); - Py_DECREF(owner); - SET_TOP(res); - - DISPATCH(); - } - // Else slot is NULL. Fall through to slow path to raise AttributeError(name). - } - // Else it's a slot of a different type. We don't handle those. - } - // Else it's some other kind of descriptor that we don't handle. - OPCACHE_DEOPT_LOAD_ATTR(); - } - else if (type->tp_dictoffset > 0) { - // We found an instance with a __dict__. - dictptr = (PyObject **) ((char *)owner + type->tp_dictoffset); - dict = *dictptr; - - if (dict != NULL && PyDict_CheckExact(dict)) { - Py_INCREF(dict); - res = NULL; - assert(!_PyErr_Occurred(tstate)); - Py_ssize_t hint = _PyDict_GetItemHint((PyDictObject*)dict, name, -1, &res); - if (res != NULL) { - Py_INCREF(res); - Py_DECREF(dict); - Py_DECREF(owner); - SET_TOP(res); - - if (co_opcache->optimized == 0) { - // First time we optimize this opcode. - OPCACHE_STAT_ATTR_OPT(); - co_opcache->optimized = OPCODE_CACHE_MAX_TRIES; - } - - la = &co_opcache->u.la; - la->type = type; - la->tp_version_tag = type->tp_version_tag; - assert(hint >= 0); - la->hint = hint; - - DISPATCH(); - } - else { - _PyErr_Clear(tstate); - } - Py_DECREF(dict); - } else { - // There is no dict, or __dict__ doesn't satisfy PyDict_CheckExact. - OPCACHE_DEOPT_LOAD_ATTR(); - } - } else { - // The object's class does not have a tp_dictoffset we can use. - OPCACHE_DEOPT_LOAD_ATTR(); - } - } else if (type->tp_getattro != PyObject_GenericGetAttr) { - OPCACHE_DEOPT_LOAD_ATTR(); + case TARGET(LOAD_ATTR_ADAPTIVE): { + SpecializedCacheEntry *cache = GET_CACHE(); + if (cache->adaptive.counter == 0) { + PyObject *owner = TOP(); + PyObject *name = GETITEM(names, cache->adaptive.original_oparg); + next_instr--; + if (_Py_Specialize_LoadAttr(owner, next_instr, name, cache) < 0) { + goto error; } + DISPATCH(); } + else { + cache->adaptive.counter--; + oparg = cache->adaptive.original_oparg; + JUMP_TO_INSTRUCTION(LOAD_ATTR); + } + } - // Slow path. - res = PyObject_GetAttr(owner, name); + case TARGET(LOAD_ATTR_SPLIT_KEYS): { + PyObject *owner = TOP(); + PyObject *res; + PyTypeObject *tp = Py_TYPE(owner); + SpecializedCacheEntry *caches = GET_CACHE(); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + _PyLoadAttrCache *cache1 = &caches[-1].load_attr; + assert(cache1->tp_version != 0); + DEOPT_IF(tp->tp_version_tag != cache1->tp_version, LOAD_ATTR); + assert(tp->tp_dictoffset > 0); + PyDictObject *dict = *(PyDictObject **)(((char *)owner) + tp->tp_dictoffset); + DEOPT_IF(dict == NULL, LOAD_ATTR); + assert(PyDict_CheckExact((PyObject *)dict)); + DEOPT_IF(dict->ma_keys->dk_version != cache1->dk_version, LOAD_ATTR); + res = dict->ma_values[cache0->index]; + DEOPT_IF(res == NULL, LOAD_ATTR); + record_cache_hit(cache0); + Py_INCREF(res); + SET_TOP(res); Py_DECREF(owner); + DISPATCH(); + } + + case TARGET(LOAD_ATTR_SLOT): { + PyObject *owner = TOP(); + PyObject *res; + PyTypeObject *tp = Py_TYPE(owner); + SpecializedCacheEntry *caches = GET_CACHE(); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + _PyLoadAttrCache *cache1 = &caches[-1].load_attr; + assert(cache1->tp_version != 0); + DEOPT_IF(tp->tp_version_tag != cache1->tp_version, LOAD_ATTR); + char *addr = (char *)owner + cache0->index; + res = *(PyObject **)addr; + DEOPT_IF(res == NULL, LOAD_ATTR); + record_cache_hit(cache0); + Py_INCREF(res); SET_TOP(res); - if (res == NULL) - goto error; + Py_DECREF(owner); DISPATCH(); } @@ -4494,6 +4381,20 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) or goto error. */ Py_UNREACHABLE(); +/* Cache misses */ + +LOAD_ATTR_miss: + { + _PyAdaptiveEntry *cache = &GET_CACHE()->adaptive; + record_cache_miss(cache); + if (too_many_cache_misses(cache)) { + next_instr[-1] = _Py_MAKECODEUNIT(LOAD_ATTR_ADAPTIVE, _Py_OPARG(next_instr[-1])); + cache_backoff(cache); + } + oparg = cache->original_oparg; + JUMP_TO_INSTRUCTION(LOAD_ATTR); + } + error: /* Double-check exception status. */ #ifdef NDEBUG @@ -4515,6 +4416,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) call_exc_trace(tstate->c_tracefunc, tstate->c_traceobj, tstate, f, &trace_info); } + exception_unwind: f->f_state = FRAME_UNWINDING; /* We can't use f->f_lasti here, as RERAISE may have set it */ diff --git a/Python/makeopcodetargets.py b/Python/makeopcodetargets.py index 023c9e6c9f1adc..189d72a8c84af3 100755 --- a/Python/makeopcodetargets.py +++ b/Python/makeopcodetargets.py @@ -34,6 +34,11 @@ def write_contents(f): targets = ['_unknown_opcode'] * 256 for opname, op in opcode.opmap.items(): targets[op] = "TARGET_%s" % opname + next_op = 1 + for opname in opcode._specialized_instructions: + while targets[next_op] != '_unknown_opcode': + next_op += 1 + targets[next_op] = "TARGET_%s" % opname f.write("static void *opcode_targets[256] = {\n") f.write(",\n".join([" &&%s" % s for s in targets])) f.write("\n};\n") diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 94b2a7c9b6e930..efcf6d613e9713 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -6,13 +6,13 @@ static void *opcode_targets[256] = { &&TARGET_DUP_TOP, &&TARGET_DUP_TOP_TWO, &&TARGET_ROT_FOUR, - &&_unknown_opcode, - &&_unknown_opcode, + &&TARGET_LOAD_ATTR_ADAPTIVE, + &&TARGET_LOAD_ATTR_SPLIT_KEYS, &&TARGET_NOP, &&TARGET_UNARY_POSITIVE, &&TARGET_UNARY_NEGATIVE, &&TARGET_UNARY_NOT, - &&_unknown_opcode, + &&TARGET_LOAD_ATTR_SLOT, &&_unknown_opcode, &&TARGET_UNARY_INVERT, &&TARGET_BINARY_MATRIX_MULTIPLY, diff --git a/Python/specialize.c b/Python/specialize.c index 07152d80538307..edf1b775d27f3c 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1,7 +1,9 @@ #include "Python.h" #include "pycore_code.h" +#include "Objects/dict-common.h" #include "opcode.h" +#include "structmember.h" // struct PyMemberDef, T_OFFSET_EX /* We layout the quickened data as a bi-directional array: @@ -56,10 +58,14 @@ get_cache_count(SpecializedCacheOrInstruction *quickened) { /* Map from opcode to adaptive opcode. Values of zero are ignored. */ -static uint8_t adaptive_opcodes[256] = { 0 }; +static uint8_t adaptive_opcodes[256] = { + [LOAD_ATTR] = LOAD_ATTR_ADAPTIVE, +}; /* The number of cache entries required for a "family" of instructions. */ -static uint8_t cache_requirements[256] = { 0 }; +static uint8_t cache_requirements[256] = { + [LOAD_ATTR] = 2, +}; /* Return the oparg for the cache_offset and instruction index. * @@ -195,3 +201,87 @@ _Py_Quicken(PyCodeObject *code) { return 0; } +int +_Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache) +{ + _PyAdaptiveEntry *cache0 = &cache->adaptive; + _PyLoadAttrCache *cache1 = &cache[-1].load_attr; + PyTypeObject *type = Py_TYPE(owner); + if (type->tp_getattro != PyObject_GenericGetAttr) { + goto fail; + } + if (type->tp_dict == NULL) { + if (PyType_Ready(type) < 0) { + return -1; + } + } + PyObject *descr = _PyType_Lookup(type, name); + if (descr != NULL) { + // We found an attribute with a data-like descriptor. + PyTypeObject *dtype = Py_TYPE(descr); + if (dtype != &PyMemberDescr_Type) { + goto fail; + } + // It's a slot + PyMemberDescrObject *member = (PyMemberDescrObject *)descr; + struct PyMemberDef *dmem = member->d_member; + if (dmem->type != T_OBJECT_EX) { + // It's a slot of a different type. We don't handle those. + goto fail; + } + Py_ssize_t offset = dmem->offset; + assert(offset > 0); + cache0->index = offset; + cache1->tp_version = type->tp_version_tag; + *instr = _Py_MAKECODEUNIT(LOAD_ATTR_SLOT, _Py_OPARG(*instr)); + goto success; + } + // No desciptor + if (type->tp_dictoffset <= 0) { + // No dictionary, or computed offset dictionary + goto fail; + } + PyObject **dictptr = (PyObject **) ((char *)owner + type->tp_dictoffset); + if (*dictptr == NULL || !PyDict_CheckExact(*dictptr)) { + goto fail; + } + // We found an instance with a __dict__. + PyDictObject *dict = (PyDictObject *)*dictptr; + if ((type->tp_flags & Py_TPFLAGS_HEAPTYPE) + && dict->ma_keys == ((PyHeapTypeObject*)type)->ht_cached_keys + ) { + // Keys are shared + assert(PyUnicode_CheckExact(name)); + Py_hash_t hash = PyObject_Hash(name); + if (hash == -1) { + return -1; + } + PyObject *value; + Py_ssize_t index = _Py_dict_lookup(dict, name, hash, &value); + assert (index != DKIX_ERROR); + if (index != (uint16_t)index) { + goto fail; + } + uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(dict); + if (keys_version == 0) { + goto fail; + } + cache1->dk_version = keys_version; + cache1->tp_version = type->tp_version_tag; + cache0->index = index; + goto success; + } + else { + // Combined table + goto fail; + } +fail: + assert(!PyErr_Occurred()); + cache_backoff(cache0); + return 0; +success: + assert(!PyErr_Occurred()); + cache0->counter = saturating_start(); + return 0; +} + diff --git a/Tools/scripts/generate_opcode_h.py b/Tools/scripts/generate_opcode_h.py index 290f6251af1745..41ae3fe6e53686 100644 --- a/Tools/scripts/generate_opcode_h.py +++ b/Tools/scripts/generate_opcode_h.py @@ -53,6 +53,10 @@ def main(opcode_py, outfile='Include/opcode.h'): opmap = opcode['opmap'] hasjrel = opcode['hasjrel'] hasjabs = opcode['hasjabs'] + used = [ False ] * 256 + next_op = 1 + for name, op in opmap.items(): + used[op] = True with open(outfile, 'w') as fobj: fobj.write(header) for name in opcode['opname']: @@ -61,6 +65,11 @@ def main(opcode_py, outfile='Include/opcode.h'): if name == 'POP_EXCEPT': # Special entry for HAVE_ARGUMENT fobj.write("#define %-23s %3d\n" % ('HAVE_ARGUMENT', opcode['HAVE_ARGUMENT'])) + for name in opcode['_specialized_instructions']: + while used[next_op]: + next_op += 1 + fobj.write("#define %-23s %3s\n" % (name, next_op)) + used[next_op] = True fobj.write("#ifdef NEED_OPCODE_JUMP_TABLES\n") write_int_array_from_ops("_PyOpcode_RelativeJump", opcode['hasjrel'], fobj) write_int_array_from_ops("_PyOpcode_Jump", opcode['hasjrel'] + opcode['hasjabs'], fobj) From 35522bcf8c4f443373df34a2038c32142ec80daf Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 1 Jun 2021 14:17:54 +0100 Subject: [PATCH 02/15] Move dict-common.h to internal/pycore_dict.h --- .../internal/pycore_dict.h | 19 +++++++++++++++++-- Makefile.pre.in | 4 ++-- Objects/dictobject.c | 2 +- Objects/odictobject.c | 2 +- Python/ceval.c | 2 +- Python/specialize.c | 2 +- 6 files changed, 23 insertions(+), 8 deletions(-) rename Objects/dict-common.h => Include/internal/pycore_dict.h (86%) diff --git a/Objects/dict-common.h b/Include/internal/pycore_dict.h similarity index 86% rename from Objects/dict-common.h rename to Include/internal/pycore_dict.h index a6f518f301885a..a60560f21cf470 100644 --- a/Objects/dict-common.h +++ b/Include/internal/pycore_dict.h @@ -1,5 +1,14 @@ -#ifndef Py_DICT_COMMON_H -#define Py_DICT_COMMON_H + +#ifndef Py_INTERNAL_DICT_H +#define Py_INTERNAL_DICT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + typedef struct { /* Cached hash code of me_key. */ @@ -62,4 +71,10 @@ struct _dictkeysobject { see the DK_ENTRIES() macro */ }; +#define _PyList_ITEMS(op) (_PyList_CAST(op)->ob_item) + + +#ifdef __cplusplus +} #endif +#endif /* !Py_INTERNAL_DICT_H */ diff --git a/Makefile.pre.in b/Makefile.pre.in index 859b53947cab1a..97f21d454464bd 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -979,8 +979,7 @@ Objects/bytearrayobject.o: $(srcdir)/Objects/bytearrayobject.c $(BYTESTR_DEPS) Objects/unicodeobject.o: $(srcdir)/Objects/unicodeobject.c $(UNICODE_DEPS) -Objects/odictobject.o: $(srcdir)/Objects/dict-common.h -Objects/dictobject.o: $(srcdir)/Objects/stringlib/eq.h $(srcdir)/Objects/dict-common.h +Objects/dictobject.o: $(srcdir)/Objects/stringlib/eq.h Objects/setobject.o: $(srcdir)/Objects/stringlib/eq.h .PHONY: regen-opcode-targets @@ -1156,6 +1155,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_compile.h \ $(srcdir)/Include/internal/pycore_condvar.h \ $(srcdir)/Include/internal/pycore_context.h \ + $(srcdir)/Include/internal/pycore_dict.h \ $(srcdir)/Include/internal/pycore_dtoa.h \ $(srcdir)/Include/internal/pycore_fileutils.h \ $(srcdir)/Include/internal/pycore_format.h \ diff --git a/Objects/dictobject.c b/Objects/dictobject.c index d97f9e2120d3fa..f50b588dbe2697 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -117,7 +117,7 @@ converting the dict to the combined table. #include "pycore_object.h" // _PyObject_GC_TRACK() #include "pycore_pyerrors.h" // _PyErr_Fetch() #include "pycore_pystate.h" // _PyThreadState_GET() -#include "dict-common.h" +#include "pycore_dict.h" #include "stringlib/eq.h" // unicode_eq() /*[clinic input] diff --git a/Objects/odictobject.c b/Objects/odictobject.c index 6a33910d9a89de..fb1ac0ce48dcfc 100644 --- a/Objects/odictobject.c +++ b/Objects/odictobject.c @@ -467,7 +467,7 @@ Potential Optimizations #include "Python.h" #include "pycore_object.h" #include // offsetof() -#include "dict-common.h" +#include "pycore_dict.h" #include #include "clinic/odictobject.c.h" diff --git a/Python/ceval.c b/Python/ceval.c index bd7e5066ca5469..6d7dc1f57bcc3a 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -24,7 +24,7 @@ #include "pycore_tuple.h" // _PyTuple_ITEMS() #include "code.h" -#include "Objects/dict-common.h" +#include "pycore_dict.h" #include "dictobject.h" #include "frameobject.h" #include "pycore_frame.h" diff --git a/Python/specialize.c b/Python/specialize.c index edf1b775d27f3c..deb42c077e140f 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1,7 +1,7 @@ #include "Python.h" #include "pycore_code.h" -#include "Objects/dict-common.h" +#include "pycore_dict.h" #include "opcode.h" #include "structmember.h" // struct PyMemberDef, T_OFFSET_EX From 3ca2c7c6c65c01451170a5c79a8c49d9d5f42b9e Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 1 Jun 2021 17:17:36 +0100 Subject: [PATCH 03/15] Add LOAD_ATTR_COMBINED_KEYS specialized opcode. --- Include/internal/pycore_code.h | 2 +- Include/internal/pycore_dict.h | 18 ++++++++++++++++++ Include/opcode.h | 3 ++- Lib/opcode.py | 1 + Objects/dictobject.c | 18 ------------------ Python/ceval.c | 29 ++++++++++++++++++++++++++++- Python/opcode_targets.h | 2 +- Python/specialize.c | 18 ++++++++++++++---- 8 files changed, 65 insertions(+), 26 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index bd2c9a1ec1804a..2df8abc6af10ba 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -45,7 +45,7 @@ typedef struct { typedef struct { uint32_t tp_version; - uint32_t dk_version; + uint32_t dk_version_or_hint; } _PyLoadAttrCache; /* Add specialized versions of entries to this union. diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index a60560f21cf470..ceb7a5642c8f79 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -73,6 +73,24 @@ struct _dictkeysobject { #define _PyList_ITEMS(op) (_PyList_CAST(op)->ob_item) +#define DK_LOG_SIZE(dk) ((dk)->dk_log2_size) +#if SIZEOF_VOID_P > 4 +#define DK_SIZE(dk) (((int64_t)1)<dk_indices))[DK_SIZE(dk) * DK_IXSIZE(dk)])) + #ifdef __cplusplus } diff --git a/Include/opcode.h b/Include/opcode.h index d8cba7ba09616a..11d15030c63c5d 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -138,7 +138,8 @@ extern "C" { #define CALL_METHOD_KW 166 #define LOAD_ATTR_ADAPTIVE 7 #define LOAD_ATTR_SPLIT_KEYS 8 -#define LOAD_ATTR_SLOT 13 +#define LOAD_ATTR_COMBINED_KEYS 13 +#define LOAD_ATTR_SLOT 14 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { 0U, diff --git a/Lib/opcode.py b/Lib/opcode.py index 224344f69efdf5..d8e5825e72c385 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -222,5 +222,6 @@ def jabs_op(name, op): _specialized_instructions = [ "LOAD_ATTR_ADAPTIVE", "LOAD_ATTR_SPLIT_KEYS", + "LOAD_ATTR_COMBINED_KEYS", "LOAD_ATTR_SLOT", ] diff --git a/Objects/dictobject.c b/Objects/dictobject.c index f50b588dbe2697..b0e9eea35336ad 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -285,24 +285,6 @@ _PyDict_DebugMallocStats(FILE *out) state->numfree, sizeof(PyDictObject)); } -#define DK_LOG_SIZE(dk) ((dk)->dk_log2_size) -#if SIZEOF_VOID_P > 4 -#define DK_SIZE(dk) (((int64_t)1)<dk_indices))[DK_SIZE(dk) * DK_IXSIZE(dk)])) - #define DK_MASK(dk) (DK_SIZE(dk)-1) #define IS_POWER_OF_2(x) (((x) & (x-1)) == 0) diff --git a/Python/ceval.c b/Python/ceval.c index 6d7dc1f57bcc3a..9ccb5effb888cd 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3494,7 +3494,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) PyDictObject *dict = *(PyDictObject **)(((char *)owner) + tp->tp_dictoffset); DEOPT_IF(dict == NULL, LOAD_ATTR); assert(PyDict_CheckExact((PyObject *)dict)); - DEOPT_IF(dict->ma_keys->dk_version != cache1->dk_version, LOAD_ATTR); + DEOPT_IF(dict->ma_keys->dk_version != cache1->dk_version_or_hint, LOAD_ATTR); res = dict->ma_values[cache0->index]; DEOPT_IF(res == NULL, LOAD_ATTR); record_cache_hit(cache0); @@ -3504,6 +3504,33 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DISPATCH(); } + case TARGET(LOAD_ATTR_COMBINED_KEYS): { + PyObject *owner = TOP(); + PyObject *res; + PyTypeObject *tp = Py_TYPE(owner); + SpecializedCacheEntry *caches = GET_CACHE(); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + _PyLoadAttrCache *cache1 = &caches[-1].load_attr; + assert(cache1->tp_version != 0); + DEOPT_IF(tp->tp_version_tag != cache1->tp_version, LOAD_ATTR); + assert(tp->tp_dictoffset > 0); + PyDictObject *dict = *(PyDictObject **)(((char *)owner) + tp->tp_dictoffset); + DEOPT_IF(dict == NULL, LOAD_ATTR); + assert(PyDict_CheckExact((PyObject *)dict)); + PyObject *name = GETITEM(names, cache0->original_oparg); + uint16_t hint = cache0->index; + DEOPT_IF(hint > dict->ma_keys->dk_nentries, LOAD_ATTR); + PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + (size_t)hint; + DEOPT_IF(ep->me_key != name, LOAD_ATTR); + res = ep->me_value; + DEOPT_IF(res == NULL, LOAD_ATTR); + record_cache_hit(cache0); + Py_INCREF(res); + SET_TOP(res); + Py_DECREF(owner); + DISPATCH(); + } + case TARGET(LOAD_ATTR_SLOT): { PyObject *owner = TOP(); PyObject *res; diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index efcf6d613e9713..b1f0018acde3d2 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -12,8 +12,8 @@ static void *opcode_targets[256] = { &&TARGET_UNARY_POSITIVE, &&TARGET_UNARY_NEGATIVE, &&TARGET_UNARY_NOT, + &&TARGET_LOAD_ATTR_COMBINED_KEYS, &&TARGET_LOAD_ATTR_SLOT, - &&_unknown_opcode, &&TARGET_UNARY_INVERT, &&TARGET_BINARY_MATRIX_MULTIPLY, &&TARGET_INPLACE_MATRIX_MULTIPLY, diff --git a/Python/specialize.c b/Python/specialize.c index deb42c077e140f..8436056af871cb 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -251,7 +251,7 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp && dict->ma_keys == ((PyHeapTypeObject*)type)->ht_cached_keys ) { // Keys are shared - assert(PyUnicode_CheckExact(name)); + assert(PyUnicode_CheckExact()); Py_hash_t hash = PyObject_Hash(name); if (hash == -1) { return -1; @@ -266,14 +266,24 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp if (keys_version == 0) { goto fail; } - cache1->dk_version = keys_version; + cache1->dk_version_or_hint = keys_version; cache1->tp_version = type->tp_version_tag; cache0->index = index; + *instr = _Py_MAKECODEUNIT(LOAD_ATTR_SPLIT_KEYS, _Py_OPARG(*instr)); goto success; } else { - // Combined table - goto fail; + PyObject *value; + Py_ssize_t hint = + _PyDict_GetItemHint(dict, name, -1, &value); + if (hint != (uint32_t)hint) { + goto fail; + } + cache0->index = (uint16_t)hint; + cache1->tp_version = type->tp_version_tag; + *instr = _Py_MAKECODEUNIT(LOAD_ATTR_COMBINED_KEYS, _Py_OPARG(*instr)); + goto success; + } fail: assert(!PyErr_Occurred()); From 3d50df3cf0d16291b370afd38b99e70899e15870 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 2 Jun 2021 11:40:11 +0100 Subject: [PATCH 04/15] Quicken in function if loopy --- Include/opcode.h | 9 +++++---- Lib/opcode.py | 1 + Lib/test/test_capi.py | 2 +- Python/ceval.c | 21 +++++++++++++++++++++ Python/opcode_targets.h | 6 +++--- Python/specialize.c | 3 +++ 6 files changed, 34 insertions(+), 8 deletions(-) diff --git a/Include/opcode.h b/Include/opcode.h index 11d15030c63c5d..d6df1ad77f1f6f 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -136,10 +136,11 @@ extern "C" { #define DICT_MERGE 164 #define DICT_UPDATE 165 #define CALL_METHOD_KW 166 -#define LOAD_ATTR_ADAPTIVE 7 -#define LOAD_ATTR_SPLIT_KEYS 8 -#define LOAD_ATTR_COMBINED_KEYS 13 -#define LOAD_ATTR_SLOT 14 +#define JUMP_ABSOLUTE_QUICK 7 +#define LOAD_ATTR_ADAPTIVE 8 +#define LOAD_ATTR_SPLIT_KEYS 13 +#define LOAD_ATTR_COMBINED_KEYS 14 +#define LOAD_ATTR_SLOT 18 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { 0U, diff --git a/Lib/opcode.py b/Lib/opcode.py index d8e5825e72c385..d77649164259a6 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -220,6 +220,7 @@ def jabs_op(name, op): del def_op, name_op, jrel_op, jabs_op _specialized_instructions = [ + "JUMP_ABSOLUTE_QUICK", "LOAD_ATTR_ADAPTIVE", "LOAD_ATTR_SPLIT_KEYS", "LOAD_ATTR_COMBINED_KEYS", diff --git a/Lib/test/test_capi.py b/Lib/test/test_capi.py index 1b18bfad553007..f4b7b8c13b7d38 100644 --- a/Lib/test/test_capi.py +++ b/Lib/test/test_capi.py @@ -323,7 +323,7 @@ class C(): pass break """ rc, out, err = assert_python_ok('-c', code) - self.assertIn(b'MemoryError 1 10', out) + self.assertIn(b'MemoryError 1', out) self.assertIn(b'MemoryError 2 20', out) self.assertIn(b'MemoryError 3 30', out) diff --git a/Python/ceval.c b/Python/ceval.c index 9ccb5effb888cd..df45dbf91aeb3b 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3793,6 +3793,27 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) case TARGET(JUMP_ABSOLUTE): { PREDICTED(JUMP_ABSOLUTE); + if (oparg < INSTR_OFFSET()) { + /* Increment the warmup counter and quicken if warm enough + * _Py_Quicken is idempotent so we don't worry about overflow */ + if (!PyCodeObject_IsWarmedUp(co)) { + PyCodeObject_IncrementWarmup(co); + if (PyCodeObject_IsWarmedUp(co)) { + if (_Py_Quicken(co)) { + goto error; + } + int nexti = INSTR_OFFSET(); + first_instr = co->co_firstinstr; + next_instr = first_instr + nexti; + } + } + } + JUMPTO(oparg); + CHECK_EVAL_BREAKER(); + DISPATCH(); + } + + case TARGET(JUMP_ABSOLUTE_QUICK): { JUMPTO(oparg); CHECK_EVAL_BREAKER(); DISPATCH(); diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index b1f0018acde3d2..b52adce7ca6727 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -6,18 +6,18 @@ static void *opcode_targets[256] = { &&TARGET_DUP_TOP, &&TARGET_DUP_TOP_TWO, &&TARGET_ROT_FOUR, + &&TARGET_JUMP_ABSOLUTE_QUICK, &&TARGET_LOAD_ATTR_ADAPTIVE, - &&TARGET_LOAD_ATTR_SPLIT_KEYS, &&TARGET_NOP, &&TARGET_UNARY_POSITIVE, &&TARGET_UNARY_NEGATIVE, &&TARGET_UNARY_NOT, + &&TARGET_LOAD_ATTR_SPLIT_KEYS, &&TARGET_LOAD_ATTR_COMBINED_KEYS, - &&TARGET_LOAD_ATTR_SLOT, &&TARGET_UNARY_INVERT, &&TARGET_BINARY_MATRIX_MULTIPLY, &&TARGET_INPLACE_MATRIX_MULTIPLY, - &&_unknown_opcode, + &&TARGET_LOAD_ATTR_SLOT, &&TARGET_BINARY_POWER, &&TARGET_BINARY_MULTIPLY, &&_unknown_opcode, diff --git a/Python/specialize.c b/Python/specialize.c index 8436056af871cb..8befa55f09bde2 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -164,6 +164,9 @@ optimize(SpecializedCacheOrInstruction *quickened, int len) /* Super instructions don't use the cache, * so no need to update the offset. */ switch (opcode) { + case JUMP_ABSOLUTE: + instructions[i] = _Py_MAKECODEUNIT(JUMP_ABSOLUTE_QUICK, oparg); + break; /* Insert superinstructions here E.g. case LOAD_FAST: From 85dd177788d96dc28a229d28695f79e9626c2a3c Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 3 Jun 2021 18:19:21 +0100 Subject: [PATCH 05/15] Specialize LOAD_ATTR for module attributes. --- Include/internal/pycore_code.h | 19 ++++++++++ Include/opcode.h | 1 + Lib/opcode.py | 1 + Python/ceval.c | 22 ++++++++++++ Python/opcode_targets.h | 2 +- Python/specialize.c | 65 ++++++++++++++++++++++++++++++++-- 6 files changed, 107 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 2df8abc6af10ba..05ba522969a3d0 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -319,6 +319,25 @@ cache_backoff(_PyAdaptiveEntry *entry) { int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); +#define SPECIALIZATION_STATS 0 +#if SPECIALIZATION_STATS + +typedef struct _specialization_stats { + uint64_t specialization_success; + uint64_t specialization_failure; + uint64_t loadattr_hit; + uint64_t loadattr_deferred; + uint64_t loadattr_miss; + uint64_t loadattr_deopt; +} SpecializationStats; + +extern SpecializationStats _specialization_stats; +#define STAT_INC(name) _specialization_stats.name++ +void _Py_PrintSpecializationStats(void); +#else +#define STAT_INC(name) ((void)0) +#endif + #ifdef __cplusplus } diff --git a/Include/opcode.h b/Include/opcode.h index d6df1ad77f1f6f..d58f1688bc6d2a 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -141,6 +141,7 @@ extern "C" { #define LOAD_ATTR_SPLIT_KEYS 13 #define LOAD_ATTR_COMBINED_KEYS 14 #define LOAD_ATTR_SLOT 18 +#define LOAD_ATTR_MODULE 21 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { 0U, diff --git a/Lib/opcode.py b/Lib/opcode.py index d77649164259a6..936ced6eef2c68 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -225,4 +225,5 @@ def jabs_op(name, op): "LOAD_ATTR_SPLIT_KEYS", "LOAD_ATTR_COMBINED_KEYS", "LOAD_ATTR_SLOT", + "LOAD_ATTR_MODULE", ] diff --git a/Python/ceval.c b/Python/ceval.c index df45dbf91aeb3b..22141b9cf99422 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -16,6 +16,7 @@ #include "pycore_code.h" // _PyCode_InitOpcache() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_object.h" // _PyObject_GC_TRACK() +#include "pycore_moduleobject.h" #include "pycore_pyerrors.h" // _PyErr_Fetch() #include "pycore_pylifecycle.h" // _PyErr_Print() #include "pycore_pymem.h" // _PyMem_IsPtrFreed() @@ -3475,6 +3476,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DISPATCH(); } else { + STAT_INC(loadattr_deferred); cache->adaptive.counter--; oparg = cache->adaptive.original_oparg; JUMP_TO_INSTRUCTION(LOAD_ATTR); @@ -3498,6 +3500,26 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) res = dict->ma_values[cache0->index]; DEOPT_IF(res == NULL, LOAD_ATTR); record_cache_hit(cache0); + STAT_INC(loadattr_hit); + Py_INCREF(res); + SET_TOP(res); + Py_DECREF(owner); + DISPATCH(); + } + + case TARGET(LOAD_ATTR_MODULE): { + PyObject *owner = TOP(); + PyObject *res; + SpecializedCacheEntry *caches = GET_CACHE(); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + _PyLoadAttrCache *cache1 = &caches[-1].load_attr; + DEOPT_IF(!PyModule_CheckExact(owner), LOAD_ATTR); + PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner)->md_dict; + DEOPT_IF(dict->ma_keys->dk_version != cache1->dk_version_or_hint, LOAD_ATTR); + PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache0->index; + res = ep->me_value; + DEOPT_IF(res == NULL, LOAD_ATTR); + record_cache_hit(cache0); Py_INCREF(res); SET_TOP(res); Py_DECREF(owner); diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index b52adce7ca6727..efdb138a95ee76 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -20,7 +20,7 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_ATTR_SLOT, &&TARGET_BINARY_POWER, &&TARGET_BINARY_MULTIPLY, - &&_unknown_opcode, + &&TARGET_LOAD_ATTR_MODULE, &&TARGET_BINARY_MODULO, &&TARGET_BINARY_ADD, &&TARGET_BINARY_SUBTRACT, diff --git a/Python/specialize.c b/Python/specialize.c index 8befa55f09bde2..7b7890b5c3a8ec 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2,6 +2,7 @@ #include "Python.h" #include "pycore_code.h" #include "pycore_dict.h" +#include "pycore_moduleobject.h" #include "opcode.h" #include "structmember.h" // struct PyMemberDef, T_OFFSET_EX @@ -31,6 +32,22 @@ */ Py_ssize_t _Py_QuickenedCount = 0; +#if SPECIALIZATION_STATS +SpecializationStats _specialization_stats = { 0 }; + +#define PRINT_STAT(name) fprintf(stderr, #name " : %" PRIu64" \n", _specialization_stats.name); +void +_Py_PrintSpecializationStats(void) +{ + PRINT_STAT(specialization_success); + PRINT_STAT(specialization_failure); + PRINT_STAT(loadattr_hit); + PRINT_STAT(loadattr_deferred); + PRINT_STAT(loadattr_miss); + PRINT_STAT(loadattr_deopt); +} + +#endif static SpecializedCacheOrInstruction * allocate(int cache_count, int instruction_count) @@ -204,11 +221,55 @@ _Py_Quicken(PyCodeObject *code) { return 0; } +int +special_module_load_attr( + PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, + _PyAdaptiveEntry *cache0, _PyLoadAttrCache *cache1) +{ + PyModuleObject *m = (PyModuleObject *)owner; + PyObject *attr, *getattr; + _Py_IDENTIFIER(__getattr__); + PyDictObject *dict = (PyDictObject *)m->md_dict; + getattr = _PyDict_GetItemIdWithError(m->md_dict, &PyId___getattr__); + if (PyErr_Occurred()) { + PyErr_Clear(); + return -1; + } + if (getattr != NULL) { + return -1; + } + Py_hash_t hash = PyObject_Hash(name); + if (hash == -1) { + PyErr_Clear(); + return -1; + } + Py_ssize_t index = _Py_dict_lookup(dict, name, hash, &attr); + assert (index != DKIX_ERROR); + if (index != (uint16_t)index) { + return -1; + } + uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(dict); + if (keys_version == 0) { + return -1; + } + cache1->dk_version_or_hint = keys_version; + cache0->index = index; + *instr = _Py_MAKECODEUNIT(LOAD_ATTR_MODULE, _Py_OPARG(*instr)); + return 0; +} + int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache) { _PyAdaptiveEntry *cache0 = &cache->adaptive; _PyLoadAttrCache *cache1 = &cache[-1].load_attr; + if (PyModule_CheckExact(owner)) { + int err = special_module_load_attr(owner, instr, name, cache0, cache1); + if (err) { + goto fail; + } + goto success; + } PyTypeObject *type = Py_TYPE(owner); if (type->tp_getattro != PyObject_GenericGetAttr) { goto fail; @@ -254,7 +315,7 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp && dict->ma_keys == ((PyHeapTypeObject*)type)->ht_cached_keys ) { // Keys are shared - assert(PyUnicode_CheckExact()); + assert(PyUnicode_CheckExact(name)); Py_hash_t hash = PyObject_Hash(name); if (hash == -1) { return -1; @@ -276,7 +337,7 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp goto success; } else { - PyObject *value; + PyObject *value = NULL; Py_ssize_t hint = _PyDict_GetItemHint(dict, name, -1, &value); if (hint != (uint32_t)hint) { From e54f76bf31dff759dbcd3b785477c41408676907 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 7 Jun 2021 14:15:12 +0100 Subject: [PATCH 06/15] Set dict version to zero before setting index --- Objects/dictobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index b0e9eea35336ad..3a1dbc994b44b0 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1526,10 +1526,10 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix, assert(hashpos >= 0); mp->ma_used--; + mp->ma_keys->dk_version = 0; mp->ma_version_tag = DICT_NEXT_VERSION(); ep = &DK_ENTRIES(mp->ma_keys)[ix]; dictkeys_set_index(mp->ma_keys, hashpos, DKIX_DUMMY); - mp->ma_keys->dk_version = 0; old_key = ep->me_key; ep->me_key = NULL; ep->me_value = NULL; From eb08f919074726b565d9bc1a190442a8c3588750 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 7 Jun 2021 22:45:17 +0100 Subject: [PATCH 07/15] Don't crash if module has no dictionary --- Python/specialize.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Python/specialize.c b/Python/specialize.c index 7b7890b5c3a8ec..89e0747681af3d 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -230,6 +230,9 @@ special_module_load_attr( PyObject *attr, *getattr; _Py_IDENTIFIER(__getattr__); PyDictObject *dict = (PyDictObject *)m->md_dict; + if (dict == NULL) { + return -1; + } getattr = _PyDict_GetItemIdWithError(m->md_dict, &PyId___getattr__); if (PyErr_Occurred()) { PyErr_Clear(); From a03abb353ed2b03c65e287b662fb526a8e433a0f Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 8 Jun 2021 10:26:13 +0100 Subject: [PATCH 08/15] Rename LOAD_ATTR_COMBINED_KEYS to LOAD_ATTR_WITH_HINT. Add NEWS item. --- Include/opcode.h | 2 +- Lib/opcode.py | 2 +- .../2021-06-08-10-22-46.bpo-44337.RTjmIt.rst | 11 +++++++++++ Python/ceval.c | 2 +- Python/opcode_targets.h | 2 +- Python/specialize.c | 2 +- 6 files changed, 16 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-06-08-10-22-46.bpo-44337.RTjmIt.rst diff --git a/Include/opcode.h b/Include/opcode.h index d58f1688bc6d2a..8f5be99cae0c1b 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -139,7 +139,7 @@ extern "C" { #define JUMP_ABSOLUTE_QUICK 7 #define LOAD_ATTR_ADAPTIVE 8 #define LOAD_ATTR_SPLIT_KEYS 13 -#define LOAD_ATTR_COMBINED_KEYS 14 +#define LOAD_ATTR_WITH_HINT 14 #define LOAD_ATTR_SLOT 18 #define LOAD_ATTR_MODULE 21 #ifdef NEED_OPCODE_JUMP_TABLES diff --git a/Lib/opcode.py b/Lib/opcode.py index 936ced6eef2c68..265759e60071ce 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -223,7 +223,7 @@ def jabs_op(name, op): "JUMP_ABSOLUTE_QUICK", "LOAD_ATTR_ADAPTIVE", "LOAD_ATTR_SPLIT_KEYS", - "LOAD_ATTR_COMBINED_KEYS", + "LOAD_ATTR_WITH_HINT", "LOAD_ATTR_SLOT", "LOAD_ATTR_MODULE", ] diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-06-08-10-22-46.bpo-44337.RTjmIt.rst b/Misc/NEWS.d/next/Core and Builtins/2021-06-08-10-22-46.bpo-44337.RTjmIt.rst new file mode 100644 index 00000000000000..2df082a078e309 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-06-08-10-22-46.bpo-44337.RTjmIt.rst @@ -0,0 +1,11 @@ +Initial implementation of adaptive specialization of LOAD_ATTR + +Four specialized forms of LOAD_ATTR are added: + +* LOAD_ATTR_SLOT + +* LOAD_ATTR_SPLIT_KEYS + +* LOAD_ATTR_WITH_HINT + +* LOAD_ATTR_MODULE diff --git a/Python/ceval.c b/Python/ceval.c index 22141b9cf99422..84a6cc41f58f50 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3526,7 +3526,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DISPATCH(); } - case TARGET(LOAD_ATTR_COMBINED_KEYS): { + case TARGET(LOAD_ATTR_WITH_HINT): { PyObject *owner = TOP(); PyObject *res; PyTypeObject *tp = Py_TYPE(owner); diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index efdb138a95ee76..47beee7d59dbcc 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -13,7 +13,7 @@ static void *opcode_targets[256] = { &&TARGET_UNARY_NEGATIVE, &&TARGET_UNARY_NOT, &&TARGET_LOAD_ATTR_SPLIT_KEYS, - &&TARGET_LOAD_ATTR_COMBINED_KEYS, + &&TARGET_LOAD_ATTR_WITH_HINT, &&TARGET_UNARY_INVERT, &&TARGET_BINARY_MATRIX_MULTIPLY, &&TARGET_INPLACE_MATRIX_MULTIPLY, diff --git a/Python/specialize.c b/Python/specialize.c index 89e0747681af3d..e7c0ba955cad5b 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -348,7 +348,7 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp } cache0->index = (uint16_t)hint; cache1->tp_version = type->tp_version_tag; - *instr = _Py_MAKECODEUNIT(LOAD_ATTR_COMBINED_KEYS, _Py_OPARG(*instr)); + *instr = _Py_MAKECODEUNIT(LOAD_ATTR_WITH_HINT, _Py_OPARG(*instr)); goto success; } From 10799646c86f9cd5014056d9cb7f3fc437fc7843 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 8 Jun 2021 10:48:24 +0100 Subject: [PATCH 09/15] Fix name and use wider cache slot. --- Python/ceval.c | 4 ++-- Python/specialize.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 84a6cc41f58f50..d31159b8152625 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3540,9 +3540,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DEOPT_IF(dict == NULL, LOAD_ATTR); assert(PyDict_CheckExact((PyObject *)dict)); PyObject *name = GETITEM(names, cache0->original_oparg); - uint16_t hint = cache0->index; + uint32_t hint = cache1->dk_version_or_hint; DEOPT_IF(hint > dict->ma_keys->dk_nentries, LOAD_ATTR); - PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + (size_t)hint; + PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + hint; DEOPT_IF(ep->me_key != name, LOAD_ATTR); res = ep->me_value; DEOPT_IF(res == NULL, LOAD_ATTR); diff --git a/Python/specialize.c b/Python/specialize.c index e7c0ba955cad5b..2df58fd45ff342 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -221,8 +221,8 @@ _Py_Quicken(PyCodeObject *code) { return 0; } -int -special_module_load_attr( +static int +specialize_module_load_attr( PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, _PyAdaptiveEntry *cache0, _PyLoadAttrCache *cache1) { @@ -267,7 +267,7 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp _PyAdaptiveEntry *cache0 = &cache->adaptive; _PyLoadAttrCache *cache1 = &cache[-1].load_attr; if (PyModule_CheckExact(owner)) { - int err = special_module_load_attr(owner, instr, name, cache0, cache1); + int err = specialize_module_load_attr(owner, instr, name, cache0, cache1); if (err) { goto fail; } @@ -346,12 +346,12 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp if (hint != (uint32_t)hint) { goto fail; } - cache0->index = (uint16_t)hint; + cache1->dk_version_or_hint = (uint32_t)hint; cache1->tp_version = type->tp_version_tag; *instr = _Py_MAKECODEUNIT(LOAD_ATTR_WITH_HINT, _Py_OPARG(*instr)); goto success; - } + fail: assert(!PyErr_Occurred()); cache_backoff(cache0); From d2e09408620c77e8afa063fcfe2914ed064d961a Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 8 Jun 2021 11:13:00 +0100 Subject: [PATCH 10/15] Remove rebase artifact --- Include/internal/pycore_dict.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index ceb7a5642c8f79..b2c64b2168cdcf 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -71,8 +71,6 @@ struct _dictkeysobject { see the DK_ENTRIES() macro */ }; -#define _PyList_ITEMS(op) (_PyList_CAST(op)->ob_item) - #define DK_LOG_SIZE(dk) ((dk)->dk_log2_size) #if SIZEOF_VOID_P > 4 #define DK_SIZE(dk) (((int64_t)1)< Date: Tue, 8 Jun 2021 11:19:12 +0100 Subject: [PATCH 11/15] Use specialization stats --- Python/ceval.c | 6 ++++++ Python/specialize.c | 2 ++ 2 files changed, 8 insertions(+) diff --git a/Python/ceval.c b/Python/ceval.c index d31159b8152625..06a02b40f9f153 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3499,6 +3499,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DEOPT_IF(dict->ma_keys->dk_version != cache1->dk_version_or_hint, LOAD_ATTR); res = dict->ma_values[cache0->index]; DEOPT_IF(res == NULL, LOAD_ATTR); + STAT_INC(loadattr_hit); record_cache_hit(cache0); STAT_INC(loadattr_hit); Py_INCREF(res); @@ -3519,6 +3520,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache0->index; res = ep->me_value; DEOPT_IF(res == NULL, LOAD_ATTR); + STAT_INC(loadattr_hit); record_cache_hit(cache0); Py_INCREF(res); SET_TOP(res); @@ -3546,6 +3548,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DEOPT_IF(ep->me_key != name, LOAD_ATTR); res = ep->me_value; DEOPT_IF(res == NULL, LOAD_ATTR); + STAT_INC(loadattr_hit); record_cache_hit(cache0); Py_INCREF(res); SET_TOP(res); @@ -3565,6 +3568,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) char *addr = (char *)owner + cache0->index; res = *(PyObject **)addr; DEOPT_IF(res == NULL, LOAD_ATTR); + STAT_INC(loadattr_hit); record_cache_hit(cache0); Py_INCREF(res); SET_TOP(res); @@ -4455,10 +4459,12 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) LOAD_ATTR_miss: { + STAT_INC(loadattr_miss); _PyAdaptiveEntry *cache = &GET_CACHE()->adaptive; record_cache_miss(cache); if (too_many_cache_misses(cache)) { next_instr[-1] = _Py_MAKECODEUNIT(LOAD_ATTR_ADAPTIVE, _Py_OPARG(next_instr[-1])); + STAT_INC(loadattr_deopt); cache_backoff(cache); } oparg = cache->original_oparg; diff --git a/Python/specialize.c b/Python/specialize.c index 2df58fd45ff342..1e3e1bbeec13b1 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -353,10 +353,12 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp } fail: + STAT_INC(specialization_failure); assert(!PyErr_Occurred()); cache_backoff(cache0); return 0; success: + STAT_INC(specialization_success); assert(!PyErr_Occurred()); cache0->counter = saturating_start(); return 0; From f9e999ccbba871a5998fcb3c9fea4a5c0b679ba5 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 8 Jun 2021 11:39:48 +0100 Subject: [PATCH 12/15] Use _PyDict_GetItemHint instead of _Py_dict_lookup to determine index in dictionary when specializing. --- Python/specialize.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index 1e3e1bbeec13b1..1f309a3179fc16 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -227,26 +227,27 @@ specialize_module_load_attr( _PyAdaptiveEntry *cache0, _PyLoadAttrCache *cache1) { PyModuleObject *m = (PyModuleObject *)owner; - PyObject *attr, *getattr; + PyObject *value = NULL; + PyObject *getattr; _Py_IDENTIFIER(__getattr__); PyDictObject *dict = (PyDictObject *)m->md_dict; if (dict == NULL) { return -1; } - getattr = _PyDict_GetItemIdWithError(m->md_dict, &PyId___getattr__); - if (PyErr_Occurred()) { - PyErr_Clear(); + if (dict->ma_keys->dk_kind != DICT_KEYS_UNICODE) { return -1; } - if (getattr != NULL) { + getattr = _PyUnicode_FromId(&PyId___getattr__); /* borrowed */ + if (getattr == NULL) { + PyErr_Clear(); return -1; } - Py_hash_t hash = PyObject_Hash(name); - if (hash == -1) { - PyErr_Clear(); + Py_ssize_t index = _PyDict_GetItemHint(dict, getattr, -1, &value); + assert(index != DKIX_ERROR); + if (index != DKIX_EMPTY) { return -1; } - Py_ssize_t index = _Py_dict_lookup(dict, name, hash, &attr); + index = _PyDict_GetItemHint(dict, name, -1, &value); assert (index != DKIX_ERROR); if (index != (uint16_t)index) { return -1; From e858ea15718709bc8ec3c13bd8451ff7d62cbe80 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 9 Jun 2021 09:33:13 +0100 Subject: [PATCH 13/15] Assert that address is in bounds (ASAN thinks it might not be). --- Python/ceval.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Python/ceval.c b/Python/ceval.c index 06a02b40f9f153..784d0244e8c852 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3517,6 +3517,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DEOPT_IF(!PyModule_CheckExact(owner), LOAD_ATTR); PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner)->md_dict; DEOPT_IF(dict->ma_keys->dk_version != cache1->dk_version_or_hint, LOAD_ATTR); + assert(cache0->index < dict->ma_keys->dk_nentries); PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache0->index; res = ep->me_value; DEOPT_IF(res == NULL, LOAD_ATTR); From a0ed9e2b0387137bd8d124f7d7cd83e34dad2606 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 9 Jun 2021 13:16:53 +0100 Subject: [PATCH 14/15] Fix off by one error --- Python/ceval.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/ceval.c b/Python/ceval.c index 784d0244e8c852..46133c9fe28dca 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3517,6 +3517,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DEOPT_IF(!PyModule_CheckExact(owner), LOAD_ATTR); PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner)->md_dict; DEOPT_IF(dict->ma_keys->dk_version != cache1->dk_version_or_hint, LOAD_ATTR); + assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE); assert(cache0->index < dict->ma_keys->dk_nentries); PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache0->index; res = ep->me_value; @@ -3544,7 +3545,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) assert(PyDict_CheckExact((PyObject *)dict)); PyObject *name = GETITEM(names, cache0->original_oparg); uint32_t hint = cache1->dk_version_or_hint; - DEOPT_IF(hint > dict->ma_keys->dk_nentries, LOAD_ATTR); + DEOPT_IF(hint >= dict->ma_keys->dk_nentries, LOAD_ATTR); PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + hint; DEOPT_IF(ep->me_key != name, LOAD_ATTR); res = ep->me_value; From b07f219a01c1a37e122af997bfed27d355d8442d Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 9 Jun 2021 13:23:52 +0100 Subject: [PATCH 15/15] Fix compiler warnings and check slot offset can be stored in index field --- Python/specialize.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index 1f309a3179fc16..1801e6620f1e3c 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -257,7 +257,7 @@ specialize_module_load_attr( return -1; } cache1->dk_version_or_hint = keys_version; - cache0->index = index; + cache0->index = (uint16_t)index; *instr = _Py_MAKECODEUNIT(LOAD_ATTR_MODULE, _Py_OPARG(*instr)); return 0; } @@ -298,8 +298,11 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp goto fail; } Py_ssize_t offset = dmem->offset; + if (offset != (uint16_t)offset) { + goto fail; + } assert(offset > 0); - cache0->index = offset; + cache0->index = (uint16_t)offset; cache1->tp_version = type->tp_version_tag; *instr = _Py_MAKECODEUNIT(LOAD_ATTR_SLOT, _Py_OPARG(*instr)); goto success; @@ -336,7 +339,7 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp } cache1->dk_version_or_hint = keys_version; cache1->tp_version = type->tp_version_tag; - cache0->index = index; + cache0->index = (uint16_t)index; *instr = _Py_MAKECODEUNIT(LOAD_ATTR_SPLIT_KEYS, _Py_OPARG(*instr)); goto success; }