From e8acadbe89aca1ce38dcf94f72b38468b836510b Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Thu, 18 Jan 2024 00:01:33 +0900 Subject: [PATCH 1/3] gh-111968: Use per-thread freelists for generator in free-threading --- Include/internal/pycore_freelist.h | 17 ++++++++ Include/internal/pycore_gc.h | 2 +- Include/internal/pycore_genobject.h | 31 ++------------ Include/internal/pycore_interp.h | 1 - Objects/genobject.c | 64 +++++++++++------------------ Python/gc_gil.c | 1 - Python/pylifecycle.c | 2 +- Python/pystate.c | 1 + 8 files changed, 47 insertions(+), 72 deletions(-) diff --git a/Include/internal/pycore_freelist.h b/Include/internal/pycore_freelist.h index 566d47dbea11af..4ab93ee2bf6c32 100644 --- a/Include/internal/pycore_freelist.h +++ b/Include/internal/pycore_freelist.h @@ -19,12 +19,14 @@ extern "C" { # define PyList_MAXFREELIST 80 # define PyFloat_MAXFREELIST 100 # define PyContext_MAXFREELIST 255 +# define _PyAsyncGen_MAXFREELIST 80 #else # define PyTuple_NFREELISTS 0 # define PyTuple_MAXFREELIST 0 # define PyList_MAXFREELIST 0 # define PyFloat_MAXFREELIST 0 # define PyContext_MAXFREELIST 0 +# define _PyAsyncGen_MAXFREELIST 0 #endif struct _Py_list_state { @@ -77,12 +79,27 @@ struct _Py_context_state { #endif }; +struct _Py_async_gen_state { +#ifdef WITH_FREELISTS + /* Freelists boost performance 6-10%; they also reduce memory + fragmentation, as _PyAsyncGenWrappedValue and PyAsyncGenASend + are short-living objects that are instantiated for every + __anext__() call. */ + struct _PyAsyncGenWrappedValue* value_freelist[_PyAsyncGen_MAXFREELIST]; + int value_numfree; + + struct PyAsyncGenASend* asend_freelist[_PyAsyncGen_MAXFREELIST]; + int asend_numfree; +#endif +}; + typedef struct _Py_freelist_state { struct _Py_float_state float_state; struct _Py_tuple_state tuple_state; struct _Py_list_state list_state; struct _Py_slice_state slice_state; struct _Py_context_state context_state; + struct _Py_async_gen_state async_gen_state; } _PyFreeListState; #ifdef __cplusplus diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 52e8b39ed0485d..d53de97709a782 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -251,7 +251,7 @@ extern void _PyFloat_ClearFreeList(_PyFreeListState *state, int is_finalization) extern void _PyList_ClearFreeList(_PyFreeListState *state, int is_finalization); extern void _PySlice_ClearCache(_PyFreeListState *state); extern void _PyDict_ClearFreeList(PyInterpreterState *interp); -extern void _PyAsyncGen_ClearFreeLists(PyInterpreterState *interp); +extern void _PyAsyncGen_ClearFreeLists(_PyFreeListState *state, int is_finalization); extern void _PyContext_ClearFreeList(_PyFreeListState *state, int is_finalization); extern void _Py_ScheduleGC(PyInterpreterState *interp); extern void _Py_RunGC(PyThreadState *tstate); diff --git a/Include/internal/pycore_genobject.h b/Include/internal/pycore_genobject.h index cf58a2750a31f9..5ad63658051e86 100644 --- a/Include/internal/pycore_genobject.h +++ b/Include/internal/pycore_genobject.h @@ -8,6 +8,8 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#include "pycore_freelist.h" + extern PyObject *_PyGen_yf(PyGenObject *); extern void _PyGen_Finalize(PyObject *self); @@ -26,34 +28,7 @@ extern PyTypeObject _PyAsyncGenAThrow_Type; /* runtime lifecycle */ -extern void _PyAsyncGen_Fini(PyInterpreterState *); - - -/* other API */ - -#ifndef WITH_FREELISTS -// without freelists -# define _PyAsyncGen_MAXFREELIST 0 -#endif - -#ifndef _PyAsyncGen_MAXFREELIST -# define _PyAsyncGen_MAXFREELIST 80 -#endif - -struct _Py_async_gen_state { -#if _PyAsyncGen_MAXFREELIST > 0 - /* Freelists boost performance 6-10%; they also reduce memory - fragmentation, as _PyAsyncGenWrappedValue and PyAsyncGenASend - are short-living objects that are instantiated for every - __anext__() call. */ - struct _PyAsyncGenWrappedValue* value_freelist[_PyAsyncGen_MAXFREELIST]; - int value_numfree; - - struct PyAsyncGenASend* asend_freelist[_PyAsyncGen_MAXFREELIST]; - int asend_numfree; -#endif -}; - +extern void _PyAsyncGen_Fini(_PyFreeListState *); #ifdef __cplusplus } diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 7ec963005aba7e..922c84543a1393 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -190,7 +190,6 @@ struct _is { struct _Py_tuple_state tuple; struct _Py_dict_state dict_state; - struct _Py_async_gen_state async_gen; struct _Py_exc_state exc_state; struct ast_state ast; diff --git a/Objects/genobject.c b/Objects/genobject.c index f03919c75d70a5..e9aeb7ab9a9fa8 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -1628,12 +1628,12 @@ PyTypeObject PyAsyncGen_Type = { }; -#if _PyAsyncGen_MAXFREELIST > 0 +#ifdef WITH_FREELISTS static struct _Py_async_gen_state * get_async_gen_state(void) { - PyInterpreterState *interp = _PyInterpreterState_GET(); - return &interp->async_gen; + _PyFreeListState *state = _PyFreeListState_GET(); + return &state->async_gen_state; } #endif @@ -1656,36 +1656,36 @@ PyAsyncGen_New(PyFrameObject *f, PyObject *name, PyObject *qualname) void -_PyAsyncGen_ClearFreeLists(PyInterpreterState *interp) +_PyAsyncGen_ClearFreeLists(_PyFreeListState *freelist_state, int is_finalization) { -#if _PyAsyncGen_MAXFREELIST > 0 - struct _Py_async_gen_state *state = &interp->async_gen; +#ifdef WITH_FREELISTS + struct _Py_async_gen_state *state = &freelist_state->async_gen_state; - while (state->value_numfree) { + while (state->value_numfree > 0) { _PyAsyncGenWrappedValue *o; o = state->value_freelist[--state->value_numfree]; assert(_PyAsyncGenWrappedValue_CheckExact(o)); PyObject_GC_Del(o); } - while (state->asend_numfree) { + while (state->asend_numfree > 0) { PyAsyncGenASend *o; o = state->asend_freelist[--state->asend_numfree]; assert(Py_IS_TYPE(o, &_PyAsyncGenASend_Type)); PyObject_GC_Del(o); } + + if (is_finalization) { + state->value_numfree = -1; + state->asend_numfree = -1; + } #endif } void -_PyAsyncGen_Fini(PyInterpreterState *interp) +_PyAsyncGen_Fini(_PyFreeListState *state) { - _PyAsyncGen_ClearFreeLists(interp); -#if defined(Py_DEBUG) && _PyAsyncGen_MAXFREELIST > 0 - struct _Py_async_gen_state *state = &interp->async_gen; - state->value_numfree = -1; - state->asend_numfree = -1; -#endif + _PyAsyncGen_ClearFreeLists(state, 1); } @@ -1732,13 +1732,9 @@ async_gen_asend_dealloc(PyAsyncGenASend *o) _PyObject_GC_UNTRACK((PyObject *)o); Py_CLEAR(o->ags_gen); Py_CLEAR(o->ags_sendval); -#if _PyAsyncGen_MAXFREELIST > 0 +#ifdef WITH_FREELISTS struct _Py_async_gen_state *state = get_async_gen_state(); -#ifdef Py_DEBUG - // async_gen_asend_dealloc() must not be called after _PyAsyncGen_Fini() - assert(state->asend_numfree != -1); -#endif - if (state->asend_numfree < _PyAsyncGen_MAXFREELIST) { + if (state->asend_numfree >= 0 && state->asend_numfree < _PyAsyncGen_MAXFREELIST) { assert(PyAsyncGenASend_CheckExact(o)); _PyGC_CLEAR_FINALIZED((PyObject *)o); state->asend_freelist[state->asend_numfree++] = o; @@ -1906,13 +1902,9 @@ static PyObject * async_gen_asend_new(PyAsyncGenObject *gen, PyObject *sendval) { PyAsyncGenASend *o; -#if _PyAsyncGen_MAXFREELIST > 0 +#ifdef WITH_FREELISTS struct _Py_async_gen_state *state = get_async_gen_state(); -#ifdef Py_DEBUG - // async_gen_asend_new() must not be called after _PyAsyncGen_Fini() - assert(state->asend_numfree != -1); -#endif - if (state->asend_numfree) { + if (state->asend_numfree > 0) { state->asend_numfree--; o = state->asend_freelist[state->asend_numfree]; _Py_NewReference((PyObject *)o); @@ -1945,13 +1937,9 @@ async_gen_wrapped_val_dealloc(_PyAsyncGenWrappedValue *o) { _PyObject_GC_UNTRACK((PyObject *)o); Py_CLEAR(o->agw_val); -#if _PyAsyncGen_MAXFREELIST > 0 +#ifdef WITH_FREELISTS struct _Py_async_gen_state *state = get_async_gen_state(); -#ifdef Py_DEBUG - // async_gen_wrapped_val_dealloc() must not be called after _PyAsyncGen_Fini() - assert(state->value_numfree != -1); -#endif - if (state->value_numfree < _PyAsyncGen_MAXFREELIST) { + if (state->value_numfree >= 0 && state->value_numfree < _PyAsyncGen_MAXFREELIST) { assert(_PyAsyncGenWrappedValue_CheckExact(o)); state->value_freelist[state->value_numfree++] = o; OBJECT_STAT_INC(to_freelist); @@ -2022,13 +2010,9 @@ _PyAsyncGenValueWrapperNew(PyThreadState *tstate, PyObject *val) _PyAsyncGenWrappedValue *o; assert(val); -#if _PyAsyncGen_MAXFREELIST > 0 - struct _Py_async_gen_state *state = &tstate->interp->async_gen; -#ifdef Py_DEBUG - // _PyAsyncGenValueWrapperNew() must not be called after _PyAsyncGen_Fini() - assert(state->value_numfree != -1); -#endif - if (state->value_numfree) { +#ifdef WITH_FREELISTS + struct _Py_async_gen_state *state = get_async_gen_state(); + if (state->value_numfree > 0) { state->value_numfree--; o = state->value_freelist[state->value_numfree]; OBJECT_STAT_INC(from_freelist); diff --git a/Python/gc_gil.c b/Python/gc_gil.c index edf84176f79e0d..04c1c184250c60 100644 --- a/Python/gc_gil.c +++ b/Python/gc_gil.c @@ -12,7 +12,6 @@ void _PyGC_ClearAllFreeLists(PyInterpreterState *interp) { _PyDict_ClearFreeList(interp); - _PyAsyncGen_ClearFreeLists(interp); _Py_ClearFreeLists(&interp->freelist_state, 0); } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 598cd68806e9be..0d5eec06e9b458 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1735,7 +1735,6 @@ finalize_interp_types(PyInterpreterState *interp) _PySys_FiniTypes(interp); _PyXI_FiniTypes(interp); _PyExc_Fini(interp); - _PyAsyncGen_Fini(interp); _PyFloat_FiniType(interp); _PyLong_FiniTypes(interp); _PyThread_FiniType(interp); @@ -1759,6 +1758,7 @@ finalize_interp_types(PyInterpreterState *interp) _PyFloat_Fini(state); _PySlice_Fini(state); _PyContext_Fini(state); + _PyAsyncGen_Fini(state); #ifdef Py_DEBUG _PyStaticObjects_CheckRefcnt(interp); diff --git a/Python/pystate.c b/Python/pystate.c index 8374223fbf3b19..afb516f3838bb7 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1462,6 +1462,7 @@ _Py_ClearFreeLists(_PyFreeListState *state, int is_finalization) _PyTuple_ClearFreeList(state, is_finalization); _PyList_ClearFreeList(state, is_finalization); _PyContext_ClearFreeList(state, is_finalization); + _PyAsyncGen_ClearFreeLists(state, is_finalization); } void From 62e7a42712c6cc8eac76c9b3db1cb73cd8dbd1d5 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Thu, 18 Jan 2024 00:06:35 +0900 Subject: [PATCH 2/3] Fix --- Python/gc_free_threading.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index b1511eb5a70e7e..207a43b68d21f5 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -15,7 +15,6 @@ void _PyGC_ClearAllFreeLists(PyInterpreterState *interp) { _PyDict_ClearFreeList(interp); - _PyAsyncGen_ClearFreeLists(interp); HEAD_LOCK(&_PyRuntime); _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)interp->threads.head; From fe7f1dc734b038bcdd8b5816ca7ace0400164843 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Fri, 19 Jan 2024 02:14:05 +0900 Subject: [PATCH 3/3] Address code review --- Python/pystate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/pystate.c b/Python/pystate.c index afb516f3838bb7..999976283da675 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1550,7 +1550,7 @@ PyThreadState_Clear(PyThreadState *tstate) #ifdef Py_GIL_DISABLED // Each thread should clear own freelists in free-threading builds. _PyFreeListState *freelist_state = &((_PyThreadStateImpl*)tstate)->freelist_state; - _Py_ClearFreeLists(freelist_state, 0); + _Py_ClearFreeLists(freelist_state, 1); _PySlice_ClearCache(freelist_state); #endif