From 9c1f7ba1b458f2aafeb16dffa83ea1a143cf008a Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Wed, 8 Mar 2023 11:47:18 -0800 Subject: [PATCH] mro: thread-safe MRO cache --- Include/cpython/object.h | 11 + Include/internal/pycore_interp.h | 6 +- Include/internal/pycore_mrocache.h | 110 +++++++++ Include/internal/pycore_pymem.h | 1 + Include/internal/pycore_pyqueue.h | 2 + Include/internal/pycore_typeobject.h | 18 -- Makefile.pre.in | 2 + Modules/_testbuffer.c | 3 + Modules/gcmodule.c | 5 + Objects/obmalloc.c | 80 +++++-- Objects/typeobject.c | 139 +++++------ PCbuild/_freeze_module.vcxproj | 1 + PCbuild/_freeze_module.vcxproj.filters | 3 + PCbuild/pythoncore.vcxproj | 2 + PCbuild/pythoncore.vcxproj.filters | 6 + Python/mrocache.c | 315 +++++++++++++++++++++++++ Python/pylifecycle.c | 7 + Python/pystate.c | 1 + 18 files changed, 597 insertions(+), 115 deletions(-) create mode 100644 Include/internal/pycore_mrocache.h create mode 100644 Python/mrocache.c diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 2ba20c4a389..0861c3523a6 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -136,6 +136,14 @@ typedef struct { * backwards-compatibility */ typedef Py_ssize_t printfunc; +struct _Py_mro_cache_entry; + +typedef struct { + struct _Py_mro_cache_entry *buckets; + uint32_t mask; +} _Py_mro_cache; + + // If this structure is modified, Doc/includes/typestruct.h should be updated // as well. struct _typeobject { @@ -221,6 +229,9 @@ struct _typeobject { destructor tp_finalize; vectorcallfunc tp_vectorcall; + /* Added in version 3.13 */ + _Py_mro_cache tp_mro_cache; + /* bitset of which type-watchers care about this type */ char tp_watched; }; diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 9843f02fc5e..ed4af6de607 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -23,6 +23,7 @@ extern "C" { #include "pycore_gc.h" // struct _gc_runtime_state #include "pycore_list.h" // struct _Py_list_state #include "pycore_llist.h" // struct llist_node +#include "pycore_mrocache.h" // struct _mro_cache_state #include "pycore_global_objects.h" // struct _Py_interp_static_objects #include "pycore_pymem.h" // struct _mem_work #include "pycore_tuple.h" // struct _Py_tuple_state @@ -78,8 +79,6 @@ typedef struct PyThreadStateImpl { struct brc_state brc; struct qsbr *qsbr; - - struct type_cache type_cache; } PyThreadStateImpl; @@ -127,6 +126,7 @@ struct _is { struct _ceval_state ceval; struct _gc_runtime_state gc; struct _mem_state mem; + struct _mro_cache_state mro_cache; // sys.modules dictionary PyObject *modules; @@ -211,6 +211,8 @@ struct _is { struct callable_cache callable_cache; PyCodeObject *interpreter_trampoline; + struct _Py_queue_head mro_buckets_to_free; + struct _Py_interp_cached_objects cached_objects; struct _Py_interp_static_objects static_objects; diff --git a/Include/internal/pycore_mrocache.h b/Include/internal/pycore_mrocache.h new file mode 100644 index 00000000000..f63462f3539 --- /dev/null +++ b/Include/internal/pycore_mrocache.h @@ -0,0 +1,110 @@ +#ifndef Py_INTERNAL_TYPECACHE_H +#define Py_INTERNAL_TYPECACHE_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +// TODO(sgross): MRO cache or type cache? + +typedef struct _Py_mro_cache_entry { + PyObject *name; /* name (interned unicode; immortal) */ + uintptr_t value; /* resolved function (owned ref), or 0=not cached 1=not present */ +} _Py_mro_cache_entry; + +typedef struct _Py_mro_cache_buckets { + struct _Py_queue_node node; + union { + Py_ssize_t refcount; + Py_ssize_t capacity; + } u; + uint32_t available; /* number of unused buckets */ + uint32_t used; /* number of used buckets */ + _Py_mro_cache_entry array[]; +} _Py_mro_cache_buckets; + +/* Per-interpreter state */ +struct _mro_cache_state { + _Py_mro_cache_buckets *empty_buckets; + Py_ssize_t empty_buckets_capacity; +}; + +typedef struct _Py_mro_cache_result { + int hit; + PyObject *value; +} _Py_mro_cache_result; + +extern PyStatus _Py_mro_cache_init(PyInterpreterState *interp); +extern void _Py_mro_cache_fini(PyInterpreterState *interp); +extern void _Py_mro_cache_init_type(PyTypeObject *type); +extern void _Py_mro_cache_fini_type(PyTypeObject *type); +extern int _Py_mro_cache_visit(_Py_mro_cache *cache, visitproc visit, void *arg); + +extern void _Py_mro_cache_erase(_Py_mro_cache *cache); +extern void _Py_mro_cache_insert(_Py_mro_cache *cache, PyObject *name, PyObject *value); +extern void _Py_mro_process_freed_buckets(PyInterpreterState *interp); + +extern PyObject *_Py_mro_cache_as_dict(_Py_mro_cache *cache); + +static inline _Py_mro_cache_result +_Py_mro_cache_make_result(uintptr_t *ptr) +{ + uintptr_t value = _Py_atomic_load_uintptr_relaxed(ptr); + return (_Py_mro_cache_result) { + .hit = value != 0, + .value = (PyObject *)(value & ~1), + }; +} + +static inline struct _Py_mro_cache_result +_Py_mro_cache_lookup(_Py_mro_cache *cache, PyObject *name) +{ + Py_hash_t hash = ((PyASCIIObject *)name)->hash; + uint32_t mask = _Py_atomic_load_uint32(&cache->mask); + _Py_mro_cache_entry *first = _Py_atomic_load_ptr_relaxed(&cache->buckets); + + Py_ssize_t offset = hash & mask; + _Py_mro_cache_entry *bucket = (_Py_mro_cache_entry *)((char *)first + offset); + + PyObject *entry_name = _Py_atomic_load_ptr_relaxed(&bucket->name); + if (_PY_LIKELY(entry_name == name)) { + return _Py_mro_cache_make_result(&bucket->value); + } + + /* First loop */ + while (1) { + if (entry_name == NULL) { + return (_Py_mro_cache_result){0, NULL}; + } + if (bucket == first) { + break; + } + bucket--; + entry_name = _Py_atomic_load_ptr_relaxed(&bucket->name); + if (entry_name == name) { + return _Py_mro_cache_make_result(&bucket->value); + } + } + + /* Second loop. Start at the last bucket. */ + bucket = (_Py_mro_cache_entry *)((char *)first + mask); + while (1) { + entry_name = _Py_atomic_load_ptr_relaxed(&bucket->name); + if (entry_name == name) { + return _Py_mro_cache_make_result(&bucket->value); + } + if (entry_name == NULL || bucket == first) { + return (_Py_mro_cache_result){0, NULL}; + } + bucket--; + } +} + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_TYPECACHE_H */ diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index e5fb53e3287..ad983b82f11 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -101,6 +101,7 @@ PyAPI_FUNC(int) _PyMem_SetupAllocators(PyMemAllocatorName allocator); /* Free the pointer after all threads are quiescent. */ extern void _PyMem_FreeQsbr(void *ptr); +extern void _PyQsbr_Free(void *ptr, freefunc func); extern void _PyMem_QsbrPoll(PyThreadState *tstate); extern void _PyMem_AbandonQsbr(PyThreadState *tstate); extern void _PyMem_QsbrFini(PyInterpreterState *interp); diff --git a/Include/internal/pycore_pyqueue.h b/Include/internal/pycore_pyqueue.h index 2a56497b00e..ce01f64026d 100644 --- a/Include/internal/pycore_pyqueue.h +++ b/Include/internal/pycore_pyqueue.h @@ -13,6 +13,8 @@ extern "C" { // struct _Py_queue_head which contains pointers to the first and // last node in the queue. +#define _Py_QUEUE_INIT(name) { { NULL }, &name.first } + static inline void _Py_queue_init(struct _Py_queue_head *head) { diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index 0b56699a33c..bccb8bb6b86 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -26,21 +26,6 @@ extern void _PyTypes_Fini(PyInterpreterState *); typedef struct wrapperbase pytype_slotdef; - -// Type attribute lookup cache: speed up attribute and method lookups, -// see _PyType_Lookup(). -struct type_cache_entry { - unsigned int version; // initialized from type->tp_version_tag - PyObject *name; // reference to exactly a str or None - PyObject *value; // borrowed reference or NULL -}; - -#define MCACHE_SIZE_EXP 12 - -struct type_cache { - struct type_cache_entry hashtable[1 << MCACHE_SIZE_EXP]; -}; - /* For now we hard-code this to a value for which we are confident all the static builtin types will fit (for all builds). */ #define _Py_MAX_STATIC_BUILTIN_TYPES 200 @@ -63,9 +48,6 @@ _PyStaticType_GET_WEAKREFS_LISTPTR(static_builtin_state *state) } struct types_state { -#ifndef Py_NOGIL - struct type_cache type_cache; -#endif size_t num_builtins_initialized; static_builtin_state builtins[_Py_MAX_STATIC_BUILTIN_TYPES]; }; diff --git a/Makefile.pre.in b/Makefile.pre.in index 6424ce18dea..1e8f5f774f4 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -400,6 +400,7 @@ PYTHON_OBJS= \ Python/lock.o \ Python/marshal.o \ Python/modsupport.o \ + Python/mrocache.o \ Python/mysnprintf.o \ Python/mystrtoul.o \ Python/parking_lot.o \ @@ -1693,6 +1694,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_list.h \ $(srcdir)/Include/internal/pycore_long.h \ $(srcdir)/Include/internal/pycore_moduleobject.h \ + $(srcdir)/Include/internal/pycore_mrocache.h \ $(srcdir)/Include/internal/pycore_namespace.h \ $(srcdir)/Include/internal/pycore_object.h \ $(srcdir)/Include/internal/pycore_obmalloc.h \ diff --git a/Modules/_testbuffer.c b/Modules/_testbuffer.c index 63ed4dc6ca8..6ab9100d465 100644 --- a/Modules/_testbuffer.c +++ b/Modules/_testbuffer.c @@ -2825,6 +2825,9 @@ PyInit__testbuffer(void) { PyObject *m; + if (PyType_Ready(&NDArray_Type) < 0) + return NULL; + m = PyModule_Create(&_testbuffermodule); if (m == NULL) return NULL; diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 79ae87ddec1..4c7934115d6 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -33,6 +33,7 @@ #include "pycore_pymem.h" #include "pycore_pystate.h" #include "pycore_refcnt.h" +#include "pycore_qsbr.h" #include "pycore_gc.h" #include "frameobject.h" /* for PyFrame_ClearFreeList */ #include "pydtrace.h" @@ -1697,6 +1698,10 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) */ handle_legacy_finalizers(tstate, gcstate, &finalizers); + _Py_qsbr_advance(&_PyRuntime.qsbr_shared); + _Py_qsbr_quiescent_state(tstate); + _PyMem_QsbrPoll(tstate); + if (_PyErr_Occurred(tstate)) { if (reason == GC_REASON_SHUTDOWN) { _PyErr_Clear(tstate); diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 3b3bcf93d1b..c553071f6b4 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -675,31 +675,46 @@ PyMem_Free(void *ptr) _PyMem.free(_PyMem.ctx, ptr); } - -typedef struct { +typedef union { void *ptr; - uint64_t seq; -} _PyMem_WorkItem; + void (*func)(void *); + uint64_t tagged_seq; +} workitem; -#define PY_MEM_WORK_ITEMS 127 +#define PY_MEM_WORK_ITEMS 254 typedef struct _PyMemWork { struct _Py_queue_node node; unsigned int first; unsigned int size; - _PyMem_WorkItem items[PY_MEM_WORK_ITEMS]; + workitem items[PY_MEM_WORK_ITEMS]; } _PyMem_WorkBuf; + void -_PyMem_FreeQsbr(void *ptr) +_PyQsbr_Free(void *ptr, freefunc func) { + int nitems = (func == NULL ? 2 : 3); + + if (_PyRuntime.stop_the_world) { + // Free immediately if the world is stopped, including during + // interpreter shutdown. + if (func == NULL) { + PyMem_Free(ptr); + } + else { + func(ptr); + } + return; + } + PyThreadState *tstate = _PyThreadState_GET(); // Try to get an non-full workbuf _PyMem_WorkBuf *work = NULL; if (!_Py_queue_is_empty(&tstate->mem_work)) { work = _Py_queue_last(&tstate->mem_work, _PyMem_WorkBuf, node); - if (work->size == PY_MEM_WORK_ITEMS) { + if (work->size + nitems >= PY_MEM_WORK_ITEMS) { work = NULL; } } @@ -714,16 +729,42 @@ _PyMem_FreeQsbr(void *ptr) } PyThreadStateImpl *tstate_impl = (PyThreadStateImpl *)tstate; - work->items[work->size].ptr = ptr; - work->items[work->size].seq = _Py_qsbr_deferred_advance(tstate_impl->qsbr); - work->size++; + uint64_t seq = _Py_qsbr_deferred_advance(tstate_impl->qsbr); + assert(seq % 2 == 1); + work->items[work->size++].tagged_seq = seq - (func == NULL ? 1 : 0); + work->items[work->size++].ptr = ptr; + if (func != NULL) { + work->items[work->size++].func = func; + } - if (work->size == PY_MEM_WORK_ITEMS) { + if (work->size + 3 >= PY_MEM_WORK_ITEMS) { // Now seems like a good time to check for any memory that can be freed. _PyMem_QsbrPoll(tstate); } } +void +_PyMem_FreeQsbr(void *ptr) +{ + _PyQsbr_Free(ptr, NULL); +} + +static void +free_next_workitem(_PyMem_WorkBuf *work) +{ + int tag = work->items[work->first].tagged_seq & 1; + void *ptr = work->items[work->first + 1].ptr; + if (tag) { + void (*func)(void *) = work->items[work->first + 2].func; + work->first += 3; + func(ptr); + } + else { + work->first += 2; + PyMem_Free(ptr); + } +} + static int _PyMem_ProcessQueue(struct _Py_queue_head *queue, struct qsbr *qsbr, bool keep_empty) { @@ -732,13 +773,14 @@ _PyMem_ProcessQueue(struct _Py_queue_head *queue, struct qsbr *qsbr, bool keep_e if (work->size == 0 && keep_empty) { return 0; } + while (work->first < work->size) { - _PyMem_WorkItem *item = &work->items[work->first]; - if (!_Py_qsbr_poll(qsbr, item->seq)) { + uint64_t tagged_seq = work->items[work->first].tagged_seq; + uint64_t seq = tagged_seq | 1; /* seq numbers are always odd */ + if (!_Py_qsbr_poll(qsbr, seq)) { return 1; } - PyMem_Free(item->ptr); - work->first++; + free_next_workitem(work); } // Remove the empty work buffer @@ -765,6 +807,8 @@ _PyMem_ProcessQueue(struct _Py_queue_head *queue, struct qsbr *qsbr, bool keep_e void _PyMem_QsbrPoll(PyThreadState *tstate) { + // FIXME(sgross): avoid re-entrancy + struct qsbr *qsbr = ((PyThreadStateImpl *)tstate)->qsbr; // Process any work on the thread-local queue. @@ -787,9 +831,7 @@ _PyMem_QsbrFini(PyInterpreterState *interp) while (!_Py_queue_is_empty(queue)) { _PyMem_WorkBuf *work = _Py_queue_first(queue, _PyMem_WorkBuf, node); while (work->first < work->size) { - _PyMem_WorkItem *item = &work->items[work->first]; - PyMem_Free(item->ptr); - work->first++; + free_next_workitem(work); } _Py_queue_dequeue(queue); PyMem_RawFree(work); diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 5936748ee66..458e43f7467 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -7,6 +7,7 @@ #include "pycore_dict.h" // _PyDict_KeysSize() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_moduleobject.h" // _PyModule_GetDef() +#include "pycore_mrocache.h" // _Py_mro_cache_lookup #include "pycore_object.h" // _PyType_HasFeature() #include "pycore_pyerrors.h" // _PyErr_Occurred() #include "pycore_pystate.h" // _PyThreadState_GET() @@ -28,27 +29,10 @@ class object "PyObject *" "&PyBaseObject_Type" /* Support type attribute lookup cache */ -/* The cache can keep references to the names alive for longer than - they normally would. This is why the maximum size is limited to - MCACHE_MAX_ATTR_SIZE, since it might be a problem if very large - strings are used as attribute names. */ -#define MCACHE_MAX_ATTR_SIZE 100 -#define MCACHE_HASH(version, name_hash) \ - (((unsigned int)(version) ^ (unsigned int)(name_hash)) \ - & ((1 << MCACHE_SIZE_EXP) - 1)) - -static inline unsigned int -MCACHE_HASH_METHOD(PyTypeObject *type, PyObject *name) -{ - unsigned int version = _Py_atomic_load_uint32_relaxed(&type->tp_version_tag); - return MCACHE_HASH(version, ((Py_ssize_t)(name)) >> 3); -} - #define MCACHE_CACHEABLE_NAME(name) \ PyUnicode_CheckExact(name) && \ PyUnicode_IS_READY(name) && \ - PyUnicode_CHECK_INTERNED(name) && \ - (PyUnicode_GET_LENGTH(name) <= MCACHE_MAX_ATTR_SIZE) + PyUnicode_CHECK_INTERNED(name) #define next_version_tag (_PyRuntime.types.next_version_tag) @@ -292,45 +276,22 @@ _PyType_GetTextSignatureFromInternalDoc(const char *name, const char *internal_d } -static struct type_cache* -get_type_cache(void) -{ - PyThreadState *tstate = _PyThreadState_GET(); -#ifdef Py_NOGIL - return &((PyThreadStateImpl *)tstate)->type_cache; -#else - return &tstate->interp->types.type_cache; -#endif -} - - void _PyType_InitCache(PyInterpreterState *interp) { } - -static unsigned int -_PyType_ClearCache(PyThreadStateImpl *tstate) -{ - memset(&tstate->type_cache, 0, sizeof(tstate->type_cache)); - return next_version_tag - 1; -} - - unsigned int PyType_ClearCache(void) { // TODO: clear all threads type caches or merge type caches - PyThreadState *tstate = _PyThreadState_GET(); - return _PyType_ClearCache((PyThreadStateImpl *)tstate); + return next_version_tag - 1; } void _PyTypes_Fini(PyInterpreterState *interp) { - _PyType_ClearCache(&interp->_initial_thread); assert(interp->types.num_builtins_initialized == 0); // All the static builtin types should have been finalized already. for (size_t i = 0; i < _Py_MAX_STATIC_BUILTIN_TYPES; i++) { @@ -474,6 +435,7 @@ _PyType_ModifiedEx(PyTypeObject *type) } } + _Py_mro_cache_erase(&type->tp_mro_cache); type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG; type->tp_version_tag = 0; /* 0 is not a valid version tag */ } @@ -484,6 +446,7 @@ PyType_Modified(PyTypeObject *type) _PyMutex_lock(&_PyRuntime.mutex); _PyType_ModifiedEx(type); _PyMutex_unlock(&_PyRuntime.mutex); + _Py_mro_process_freed_buckets(_PyInterpreterState_GET()); } static void @@ -535,8 +498,12 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) { } return; clear: + _PyMutex_lock(&_PyRuntime.mutex); + _Py_mro_cache_erase(&type->tp_mro_cache); type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG; type->tp_version_tag = 0; /* 0 is not a valid version tag */ + _PyMutex_unlock(&_PyRuntime.mutex); + _Py_mro_process_freed_buckets(_PyInterpreterState_GET()); } static unsigned int @@ -1150,6 +1117,16 @@ type_set_annotations(PyTypeObject *type, PyObject *value, void *context) return result; } +static PyObject * +type_get_mro_cache(PyTypeObject *type, void *context) +{ + PyObject *res; + _PyMutex_lock(&_PyRuntime.mutex); + res = _Py_mro_cache_as_dict(&type->tp_mro_cache); + _PyMutex_unlock(&_PyRuntime.mutex); + return res; +} + /*[clinic input] type.__instancecheck__ -> bool @@ -1195,6 +1172,7 @@ static PyGetSetDef type_getsets[] = { {"__doc__", (getter)type_get_doc, (setter)type_set_doc, NULL}, {"__text_signature__", (getter)type_get_text_signature, NULL, NULL}, {"__annotations__", (getter)type_get_annotations, (setter)type_set_annotations, NULL}, + {"__mro_cache__", (getter)type_get_mro_cache, NULL, NULL}, {0} }; @@ -4160,31 +4138,12 @@ is_dunder_name(PyObject *name) return 0; } -/* Internal API to look for a name through the MRO. - This returns a borrowed reference, and doesn't set an exception! */ -PyObject * -_PyType_Lookup(PyTypeObject *type, PyObject *name) -{ - PyObject *res; - int error; +Py_NO_INLINE static PyObject * +_PyType_LookupSlow(PyTypeObject *type, PyObject *name) { + // TODO(sgross): perform lookup and insert under lock - unsigned int h = MCACHE_HASH_METHOD(type, name); - struct type_cache *cache = get_type_cache(); - struct type_cache_entry *entry = &cache->hashtable[h]; - if (entry->version == type->tp_version_tag && - entry->name == name) { - assert(_PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)); - OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name)); - OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name)); - return entry->value; - } - OBJECT_STAT_INC_COND(type_cache_misses, !is_dunder_name(name)); - OBJECT_STAT_INC_COND(type_cache_dunder_misses, is_dunder_name(name)); - - /* We may end up clearing live exceptions below, so make sure it's ours. */ - assert(!PyErr_Occurred()); - - res = find_name_in_mro(type, name, &error); + int error; + PyObject *res = find_name_in_mro(type, name, &error); /* Only put NULL results into cache if there was no error. */ if (error) { /* It's not ideal to clear the error condition, @@ -4201,20 +4160,38 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name) return NULL; } - if (MCACHE_CACHEABLE_NAME(name) && assign_version_tag(type)) { - h = MCACHE_HASH_METHOD(type, name); - struct type_cache_entry *entry = &cache->hashtable[h]; - entry->version = type->tp_version_tag; - entry->value = res; /* borrowed */ - assert(_PyASCIIObject_CAST(name)->hash != -1); - OBJECT_STAT_INC_COND(type_cache_collisions, entry->name != NULL && entry->name != name); - assert(_PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)); - assert(_PyObject_IS_IMMORTAL(name)); - entry->name = name; + /* We may end up clearing live exceptions below, so make sure it's ours. */ + assert(!PyErr_Occurred()); + + if (MCACHE_CACHEABLE_NAME(name)) { + // TODO(sgross): want consistency with find_name_in_mros + _PyMutex_lock(&_PyRuntime.mutex); + if (assign_version_tag(type)) { + _Py_mro_cache_insert(&type->tp_mro_cache, name, res); + } + _PyMutex_unlock(&_PyRuntime.mutex); + _Py_mro_process_freed_buckets(_PyInterpreterState_GET()); } + return res; } +/* Internal API to look for a name through the MRO. + This returns a borrowed reference, and doesn't set an exception! */ +PyObject * +_PyType_Lookup(PyTypeObject *type, PyObject *name) +{ + _Py_mro_cache_result r = _Py_mro_cache_lookup(&type->tp_mro_cache, name); + if (r.hit) { + OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name)); + OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name)); + return r.value; + } + OBJECT_STAT_INC_COND(type_cache_misses, !is_dunder_name(name)); + OBJECT_STAT_INC_COND(type_cache_dunder_misses, is_dunder_name(name)); + return _PyType_LookupSlow(type, name); +} + PyObject * _PyType_LookupId(PyTypeObject *type, _Py_Identifier *name) { @@ -4407,6 +4384,8 @@ type_dealloc_common(PyTypeObject *type) remove_all_subclasses(type, type->tp_bases); PyErr_Restore(tp, val, tb); } + _Py_mro_cache_fini_type(type); + _Py_mro_process_freed_buckets(_PyInterpreterState_GET()); } @@ -4744,6 +4723,10 @@ type_traverse(PyTypeObject *type, visitproc visit, void *arg) Py_VISIT(type->tp_bases); Py_VISIT(type->tp_base); Py_VISIT(((PyHeapTypeObject *)type)->ht_module); + int err =_Py_mro_cache_visit(&type->tp_mro_cache, visit, arg); + if (err != 0) { + return err; + } /* There's no need to visit others because they can't be involved in cycles: @@ -6980,6 +6963,10 @@ PyType_Ready(PyTypeObject *type) type->tp_flags |= Py_TPFLAGS_IMMUTABLETYPE; } + _PyMutex_lock(&_PyRuntime.mutex); + _Py_mro_cache_init_type(type); + _PyMutex_unlock(&_PyRuntime.mutex); + if (type_ready(type) < 0) { type->tp_flags &= ~Py_TPFLAGS_READYING; return -1; diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index 716adaec48c..5de2513d5bf 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -223,6 +223,7 @@ + diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index 580e05d424a..8f68e71d6aa 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -286,6 +286,9 @@ Source Files + + Source Files + Source Files diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 597fa646a4f..4031511c73f 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -240,6 +240,7 @@ + @@ -554,6 +555,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index aebc069dcfe..15f100bc138 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -630,6 +630,9 @@ Include\internal + + Include\internal + Include @@ -1259,6 +1262,9 @@ Python + + Python + Python diff --git a/Python/mrocache.c b/Python/mrocache.c new file mode 100644 index 00000000000..10513376c50 --- /dev/null +++ b/Python/mrocache.c @@ -0,0 +1,315 @@ +#include "Python.h" + +#include "pycore_initconfig.h" +#include "pycore_interp.h" +#include "pycore_mrocache.h" +#include "pycore_pymem.h" +#include "pycore_pyqueue.h" +#include "pycore_pystate.h" + +#include +#include + +#define _Py_MRO_CACHE_MIN_SIZE 8 +#define _Py_MRO_CACHE_MAX_SIZE 65536 + +/* NOTE: mask is used to index array in bytes */ +static uint32_t +mask_from_capacity(size_t capacity) +{ + assert((capacity & (capacity - 1)) == 0); + assert(capacity >= _Py_MRO_CACHE_MIN_SIZE); + + return (uint32_t)((capacity - 1) * sizeof(_Py_mro_cache_entry)); +} + +static size_t +capacity_from_mask(Py_ssize_t mask) +{ + return (mask / sizeof(_Py_mro_cache_entry)) + 1; +} + +static void +decref_empty_bucket(_Py_mro_cache_buckets *buckets) +{ + assert(buckets->u.refcount > 0); + buckets->u.refcount--; + if (buckets->u.refcount == 0) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + _Py_queue_enqeue(&interp->mro_buckets_to_free, &buckets->node); + } +} + +static void +clear_buckets(_Py_mro_cache_buckets *buckets) +{ + if (buckets->used == 0 && buckets->available == 0) { + decref_empty_bucket(buckets); + } + else { + PyInterpreterState *interp = _PyInterpreterState_GET(); + _Py_queue_enqeue(&interp->mro_buckets_to_free, &buckets->node); + } +} + +static void +buckets_free(void *ptr) +{ + _Py_mro_cache_buckets *buckets = (_Py_mro_cache_buckets *)ptr; + Py_ssize_t capacity = buckets->u.capacity; + for (Py_ssize_t i = 0; i < capacity; i++) { + PyObject *value = (PyObject *)(buckets->array[i].value & ~1); + Py_XDECREF(value); + } + PyMem_Free(buckets); +} + +void +_Py_mro_process_freed_buckets(PyInterpreterState *interp) +{ + struct _Py_queue_node *node; + while ((node = _Py_queue_dequeue(&interp->mro_buckets_to_free)) != NULL) { + _Py_mro_cache_buckets *buckets = _Py_queue_data(node, _Py_mro_cache_buckets, node); + if (buckets->used == 0 && buckets->available == 0) { + // empty bucket; no contents to decref + _PyMem_FreeQsbr(buckets); + } + else { + _PyQsbr_Free(buckets, &buckets_free); + } + } +} + +static _Py_mro_cache_buckets * +allocate_empty_buckets(Py_ssize_t capacity) +{ + Py_ssize_t size = sizeof(_Py_mro_cache_buckets) + capacity * sizeof(_Py_mro_cache_entry); + _Py_mro_cache_buckets *buckets = PyMem_Calloc(1, size); + buckets->u.refcount = 1; + return buckets; +} + +static _Py_mro_cache_buckets * +get_buckets(_Py_mro_cache *cache) +{ + char *mem = (char *)cache->buckets; + mem -= offsetof(_Py_mro_cache_buckets, array); + return (_Py_mro_cache_buckets *)mem; +} + +static _Py_mro_cache_buckets * +allocate_buckets(Py_ssize_t capacity) +{ + if (capacity > _Py_MRO_CACHE_MAX_SIZE) { + return NULL; + } + + /* Ensure that there is an empty buckets array of at least the same capacity. */ + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (capacity > (Py_ssize_t)interp->mro_cache.empty_buckets_capacity) { + _Py_mro_cache_buckets *old = interp->mro_cache.empty_buckets; + _Py_mro_cache_buckets *new = allocate_empty_buckets(capacity); + if (new == NULL) { + return NULL; + } + interp->mro_cache.empty_buckets = new; + interp->mro_cache.empty_buckets_capacity = capacity; + decref_empty_bucket(old); + } + + Py_ssize_t size = sizeof(_Py_mro_cache_buckets) + capacity * sizeof(_Py_mro_cache_entry); + _Py_mro_cache_buckets *buckets = PyMem_Calloc(1, size); + if (buckets == NULL) { + return NULL; + } + buckets->u.capacity = capacity; + buckets->available = (capacity + 1) * 7 / 8; + buckets->used = 0; + return buckets; +} + +void +_Py_mro_cache_erase(_Py_mro_cache *cache) +{ + assert(_PyMutex_is_locked(&_PyRuntime.mutex)); + _Py_mro_cache_buckets *old = get_buckets(cache); + if (old->available == 0 && old->used == 0) { + return; + } + + PyInterpreterState *interp = _PyInterpreterState_GET(); + struct _mro_cache_state *mro_cache = &interp->mro_cache; + assert(capacity_from_mask(cache->mask) <= (size_t)mro_cache->empty_buckets_capacity); + + _Py_mro_cache_buckets *empty_buckets = mro_cache->empty_buckets; + empty_buckets->u.refcount++; + _Py_atomic_store_ptr_release(&cache->buckets, empty_buckets->array); + + _Py_queue_enqeue(&interp->mro_buckets_to_free, &old->node); +} + +static int +resize(_Py_mro_cache *cache, _Py_mro_cache_buckets *buckets) +{ + size_t old_capacity = capacity_from_mask(cache->mask); + size_t new_capacity; + if (buckets->used == 0) { + /* empty bucket */ + new_capacity = old_capacity; + } + else { + new_capacity = old_capacity * 2; + } + uint32_t new_mask = mask_from_capacity(new_capacity); + + _Py_mro_cache_buckets *new_buckets = allocate_buckets(new_capacity); + if (new_buckets == NULL) { + return -1; + } + + // First store the new buckets. + _Py_atomic_store_ptr_release(&cache->buckets, new_buckets->array); + + // Then update the mask (with at least release semantics) so that + // the buckets is visible first. + _Py_atomic_store_uint32(&cache->mask, new_mask); + + clear_buckets(buckets); + return 0; +} + +void +_Py_mro_cache_insert(_Py_mro_cache *cache, PyObject *name, PyObject *value) +{ + assert(PyUnicode_CheckExact(name) && PyUnicode_CHECK_INTERNED(name)); + // FIXME(sgross): need to lock runtime mutex + assert(_PyMutex_is_locked(&_PyRuntime.mutex)); + + _Py_mro_cache_buckets *buckets = get_buckets(cache); + if (buckets->available == 0) { + if (resize(cache, buckets) < 0) { + // allocation failure: don't cache the value + return; + } + buckets = get_buckets(cache); + assert(buckets->available > 0); + } + + assert(buckets->available < UINT32_MAX/10); + + Py_hash_t hash = ((PyASCIIObject *)name)->hash; + Py_ssize_t capacity = capacity_from_mask(cache->mask); + Py_ssize_t ix = (hash & cache->mask) / sizeof(_Py_mro_cache_entry); + for (;;) { + if (buckets->array[ix].name == NULL) { + uintptr_t v = value ? (uintptr_t)Py_NewRef(value) : 1; + _Py_atomic_store_ptr_relaxed(&buckets->array[ix].name, name); + _Py_atomic_store_uintptr_relaxed(&buckets->array[ix].value, v); + assert(buckets->available > 0); + buckets->available--; + buckets->used++; + return; + } + else if (buckets->array[ix].name == name) { + /* someone else added the entry before us. */ + return; + } + ix = (ix == 0) ? capacity - 1 : ix - 1; + } +} + +PyObject * +_Py_mro_cache_as_dict(_Py_mro_cache *cache) +{ + PyObject *dict = PyDict_New(); + if (dict == NULL) { + return NULL; + } + + assert(_PyMutex_is_locked(&_PyRuntime.mutex)); + _Py_mro_cache_entry *entry = cache->buckets; + Py_ssize_t capacity = capacity_from_mask(cache->mask); + for (Py_ssize_t i = 0; i < capacity; i++, entry++) { + if (entry->name) { + PyObject *value = (PyObject *)(entry->value & ~1); + if (value == NULL) { + value = Py_None; + } + int err = PyDict_SetItem(dict, entry->name, value); + if (err < 0) { + Py_CLEAR(dict); + return NULL; + } + } + } + + return dict; +} + +void +_Py_mro_cache_init_type(PyTypeObject *type) +{ + assert(_PyMutex_is_locked(&_PyRuntime.mutex)); + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (type->tp_mro_cache.buckets == NULL) { + struct _Py_mro_cache_buckets *empty_buckets = interp->mro_cache.empty_buckets; + empty_buckets->u.refcount++; + type->tp_mro_cache.buckets = empty_buckets->array; + type->tp_mro_cache.mask = mask_from_capacity(_Py_MRO_CACHE_MIN_SIZE); + } +} + +void +_Py_mro_cache_fini_type(PyTypeObject *type) +{ + if (type->tp_mro_cache.buckets != NULL) { + _Py_mro_cache_buckets *buckets = get_buckets(&type->tp_mro_cache); + type->tp_mro_cache.buckets = NULL; + type->tp_mro_cache.mask = 0; + clear_buckets(buckets); + } +} + +int +_Py_mro_cache_visit(_Py_mro_cache *cache, visitproc visit, void *arg) +{ + _Py_mro_cache_entry *entry = cache->buckets; + if (entry == NULL) { + return 0; + } + Py_ssize_t capacity = capacity_from_mask(cache->mask); + for (Py_ssize_t i = 0; i < capacity; i++, entry++) { + PyObject *value = (PyObject *)(entry->value & ~1); + if (value) { + int err = visit(value, arg); + if (err != 0) { + return err; + } + } + } + return 0; +} + +PyStatus +_Py_mro_cache_init(PyInterpreterState *interp) +{ + _Py_mro_cache_buckets *b = allocate_empty_buckets(_Py_MRO_CACHE_MIN_SIZE); + if (b == NULL) { + return _PyStatus_NO_MEMORY(); + } + interp->mro_cache.empty_buckets = b; + interp->mro_cache.empty_buckets_capacity = _Py_MRO_CACHE_MIN_SIZE; + return _PyStatus_OK(); +} + +void +_Py_mro_cache_fini(PyInterpreterState *interp) +{ + _Py_mro_cache_buckets *b = interp->mro_cache.empty_buckets; + if (b != NULL) { + interp->mro_cache.empty_buckets = NULL; + interp->mro_cache.empty_buckets_capacity = 0; + decref_empty_bucket(b); + _Py_mro_process_freed_buckets(interp); + } +} \ No newline at end of file diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index a912cb1b0dc..5d7eb289a09 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -15,6 +15,7 @@ #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_list.h" // _PyList_Fini() #include "pycore_long.h" // _PyLong_InitTypes() +#include "pycore_mrocache.h" // _Py_mro_cache_init() #include "pycore_object.h" // _PyDebug_PrintTotalRefs() #include "pycore_pathconfig.h" // _PyConfig_WritePathConfig() #include "pycore_pyerrors.h" // _PyErr_Occurred() @@ -902,6 +903,11 @@ pycore_interp_init(PyThreadState *tstate) return _PyStatus_ERR("failed to initialize deep-frozen modules"); } + status = _Py_mro_cache_init(interp); + if (_PyStatus_EXCEPTION(status)) { + goto done; + } + status = pycore_init_types(interp); if (_PyStatus_EXCEPTION(status)) { goto done; @@ -1797,6 +1803,7 @@ finalize_interp_clear(PyThreadState *tstate) } finalize_interp_types(tstate); + _Py_mro_cache_fini(tstate->interp); } diff --git a/Python/pystate.c b/Python/pystate.c index dc529cd096a..860cdb56392 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -466,6 +466,7 @@ init_interpreter(PyInterpreterState *interp, PyConfig_InitPythonConfig(&interp->config); _PyType_InitCache(interp); _Py_queue_init(&interp->mem.work); + _Py_queue_init(&interp->mro_buckets_to_free); interp->_initialized = 1; }