Skip to content

gh-111968: Use per-thread freelists for generator in free-threading #114189

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions Include/internal/pycore_freelist.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@ extern "C" {
# define PyList_MAXFREELIST 80
# define PyFloat_MAXFREELIST 100
# define PyContext_MAXFREELIST 255
# define _PyAsyncGen_MAXFREELIST 80
#else
# define PyTuple_NFREELISTS 0
# define PyTuple_MAXFREELIST 0
# define PyList_MAXFREELIST 0
# define PyFloat_MAXFREELIST 0
# define PyContext_MAXFREELIST 0
# define _PyAsyncGen_MAXFREELIST 0
#endif

struct _Py_list_state {
Expand Down Expand Up @@ -77,12 +79,27 @@ struct _Py_context_state {
#endif
};

struct _Py_async_gen_state {
#ifdef WITH_FREELISTS
/* Freelists boost performance 6-10%; they also reduce memory
fragmentation, as _PyAsyncGenWrappedValue and PyAsyncGenASend
are short-living objects that are instantiated for every
__anext__() call. */
struct _PyAsyncGenWrappedValue* value_freelist[_PyAsyncGen_MAXFREELIST];
int value_numfree;

struct PyAsyncGenASend* asend_freelist[_PyAsyncGen_MAXFREELIST];
int asend_numfree;
#endif
};

typedef struct _Py_freelist_state {
struct _Py_float_state float_state;
struct _Py_tuple_state tuple_state;
struct _Py_list_state list_state;
struct _Py_slice_state slice_state;
struct _Py_context_state context_state;
struct _Py_async_gen_state async_gen_state;
} _PyFreeListState;

#ifdef __cplusplus
Expand Down
2 changes: 1 addition & 1 deletion Include/internal/pycore_gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ extern void _PyFloat_ClearFreeList(_PyFreeListState *state, int is_finalization)
extern void _PyList_ClearFreeList(_PyFreeListState *state, int is_finalization);
extern void _PySlice_ClearCache(_PyFreeListState *state);
extern void _PyDict_ClearFreeList(PyInterpreterState *interp);
extern void _PyAsyncGen_ClearFreeLists(PyInterpreterState *interp);
extern void _PyAsyncGen_ClearFreeLists(_PyFreeListState *state, int is_finalization);
extern void _PyContext_ClearFreeList(_PyFreeListState *state, int is_finalization);
extern void _Py_ScheduleGC(PyInterpreterState *interp);
extern void _Py_RunGC(PyThreadState *tstate);
Expand Down
31 changes: 3 additions & 28 deletions Include/internal/pycore_genobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define"
#endif

#include "pycore_freelist.h"

extern PyObject *_PyGen_yf(PyGenObject *);
extern void _PyGen_Finalize(PyObject *self);

Expand All @@ -26,34 +28,7 @@ extern PyTypeObject _PyAsyncGenAThrow_Type;

/* runtime lifecycle */

extern void _PyAsyncGen_Fini(PyInterpreterState *);


/* other API */

#ifndef WITH_FREELISTS
// without freelists
# define _PyAsyncGen_MAXFREELIST 0
#endif

#ifndef _PyAsyncGen_MAXFREELIST
# define _PyAsyncGen_MAXFREELIST 80
#endif

struct _Py_async_gen_state {
#if _PyAsyncGen_MAXFREELIST > 0
/* Freelists boost performance 6-10%; they also reduce memory
fragmentation, as _PyAsyncGenWrappedValue and PyAsyncGenASend
are short-living objects that are instantiated for every
__anext__() call. */
struct _PyAsyncGenWrappedValue* value_freelist[_PyAsyncGen_MAXFREELIST];
int value_numfree;

struct PyAsyncGenASend* asend_freelist[_PyAsyncGen_MAXFREELIST];
int asend_numfree;
#endif
};

extern void _PyAsyncGen_Fini(_PyFreeListState *);

#ifdef __cplusplus
}
Expand Down
1 change: 0 additions & 1 deletion Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,6 @@ struct _is {

struct _Py_tuple_state tuple;
struct _Py_dict_state dict_state;
struct _Py_async_gen_state async_gen;
struct _Py_exc_state exc_state;

struct ast_state ast;
Expand Down
64 changes: 24 additions & 40 deletions Objects/genobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1628,12 +1628,12 @@ PyTypeObject PyAsyncGen_Type = {
};


#if _PyAsyncGen_MAXFREELIST > 0
#ifdef WITH_FREELISTS
static struct _Py_async_gen_state *
get_async_gen_state(void)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
return &interp->async_gen;
_PyFreeListState *state = _PyFreeListState_GET();
return &state->async_gen_state;
}
#endif

Expand All @@ -1656,36 +1656,36 @@ PyAsyncGen_New(PyFrameObject *f, PyObject *name, PyObject *qualname)


void
_PyAsyncGen_ClearFreeLists(PyInterpreterState *interp)
_PyAsyncGen_ClearFreeLists(_PyFreeListState *freelist_state, int is_finalization)
{
#if _PyAsyncGen_MAXFREELIST > 0
struct _Py_async_gen_state *state = &interp->async_gen;
#ifdef WITH_FREELISTS
struct _Py_async_gen_state *state = &freelist_state->async_gen_state;

while (state->value_numfree) {
while (state->value_numfree > 0) {
_PyAsyncGenWrappedValue *o;
o = state->value_freelist[--state->value_numfree];
assert(_PyAsyncGenWrappedValue_CheckExact(o));
PyObject_GC_Del(o);
}

while (state->asend_numfree) {
while (state->asend_numfree > 0) {
PyAsyncGenASend *o;
o = state->asend_freelist[--state->asend_numfree];
assert(Py_IS_TYPE(o, &_PyAsyncGenASend_Type));
PyObject_GC_Del(o);
}

if (is_finalization) {
state->value_numfree = -1;
state->asend_numfree = -1;
}
#endif
}

void
_PyAsyncGen_Fini(PyInterpreterState *interp)
_PyAsyncGen_Fini(_PyFreeListState *state)
{
_PyAsyncGen_ClearFreeLists(interp);
#if defined(Py_DEBUG) && _PyAsyncGen_MAXFREELIST > 0
struct _Py_async_gen_state *state = &interp->async_gen;
state->value_numfree = -1;
state->asend_numfree = -1;
#endif
_PyAsyncGen_ClearFreeLists(state, 1);
}


Expand Down Expand Up @@ -1732,13 +1732,9 @@ async_gen_asend_dealloc(PyAsyncGenASend *o)
_PyObject_GC_UNTRACK((PyObject *)o);
Py_CLEAR(o->ags_gen);
Py_CLEAR(o->ags_sendval);
#if _PyAsyncGen_MAXFREELIST > 0
#ifdef WITH_FREELISTS
struct _Py_async_gen_state *state = get_async_gen_state();
#ifdef Py_DEBUG
// async_gen_asend_dealloc() must not be called after _PyAsyncGen_Fini()
assert(state->asend_numfree != -1);
#endif
if (state->asend_numfree < _PyAsyncGen_MAXFREELIST) {
if (state->asend_numfree >= 0 && state->asend_numfree < _PyAsyncGen_MAXFREELIST) {
assert(PyAsyncGenASend_CheckExact(o));
_PyGC_CLEAR_FINALIZED((PyObject *)o);
state->asend_freelist[state->asend_numfree++] = o;
Expand Down Expand Up @@ -1906,13 +1902,9 @@ static PyObject *
async_gen_asend_new(PyAsyncGenObject *gen, PyObject *sendval)
{
PyAsyncGenASend *o;
#if _PyAsyncGen_MAXFREELIST > 0
#ifdef WITH_FREELISTS
struct _Py_async_gen_state *state = get_async_gen_state();
#ifdef Py_DEBUG
// async_gen_asend_new() must not be called after _PyAsyncGen_Fini()
assert(state->asend_numfree != -1);
#endif
if (state->asend_numfree) {
if (state->asend_numfree > 0) {
state->asend_numfree--;
o = state->asend_freelist[state->asend_numfree];
_Py_NewReference((PyObject *)o);
Expand Down Expand Up @@ -1945,13 +1937,9 @@ async_gen_wrapped_val_dealloc(_PyAsyncGenWrappedValue *o)
{
_PyObject_GC_UNTRACK((PyObject *)o);
Py_CLEAR(o->agw_val);
#if _PyAsyncGen_MAXFREELIST > 0
#ifdef WITH_FREELISTS
struct _Py_async_gen_state *state = get_async_gen_state();
#ifdef Py_DEBUG
// async_gen_wrapped_val_dealloc() must not be called after _PyAsyncGen_Fini()
assert(state->value_numfree != -1);
#endif
if (state->value_numfree < _PyAsyncGen_MAXFREELIST) {
if (state->value_numfree >= 0 && state->value_numfree < _PyAsyncGen_MAXFREELIST) {
assert(_PyAsyncGenWrappedValue_CheckExact(o));
state->value_freelist[state->value_numfree++] = o;
OBJECT_STAT_INC(to_freelist);
Expand Down Expand Up @@ -2022,13 +2010,9 @@ _PyAsyncGenValueWrapperNew(PyThreadState *tstate, PyObject *val)
_PyAsyncGenWrappedValue *o;
assert(val);

#if _PyAsyncGen_MAXFREELIST > 0
struct _Py_async_gen_state *state = &tstate->interp->async_gen;
#ifdef Py_DEBUG
// _PyAsyncGenValueWrapperNew() must not be called after _PyAsyncGen_Fini()
assert(state->value_numfree != -1);
#endif
if (state->value_numfree) {
#ifdef WITH_FREELISTS
struct _Py_async_gen_state *state = get_async_gen_state();
if (state->value_numfree > 0) {
state->value_numfree--;
o = state->value_freelist[state->value_numfree];
OBJECT_STAT_INC(from_freelist);
Expand Down
1 change: 0 additions & 1 deletion Python/gc_free_threading.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ void
_PyGC_ClearAllFreeLists(PyInterpreterState *interp)
{
_PyDict_ClearFreeList(interp);
_PyAsyncGen_ClearFreeLists(interp);

HEAD_LOCK(&_PyRuntime);
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)interp->threads.head;
Expand Down
1 change: 0 additions & 1 deletion Python/gc_gil.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ void
_PyGC_ClearAllFreeLists(PyInterpreterState *interp)
{
_PyDict_ClearFreeList(interp);
_PyAsyncGen_ClearFreeLists(interp);

_Py_ClearFreeLists(&interp->freelist_state, 0);
}
Expand Down
2 changes: 1 addition & 1 deletion Python/pylifecycle.c
Original file line number Diff line number Diff line change
Expand Up @@ -1735,7 +1735,6 @@ finalize_interp_types(PyInterpreterState *interp)
_PySys_FiniTypes(interp);
_PyXI_FiniTypes(interp);
_PyExc_Fini(interp);
_PyAsyncGen_Fini(interp);
_PyFloat_FiniType(interp);
_PyLong_FiniTypes(interp);
_PyThread_FiniType(interp);
Expand All @@ -1759,6 +1758,7 @@ finalize_interp_types(PyInterpreterState *interp)
_PyFloat_Fini(state);
_PySlice_Fini(state);
_PyContext_Fini(state);
_PyAsyncGen_Fini(state);

#ifdef Py_DEBUG
_PyStaticObjects_CheckRefcnt(interp);
Expand Down
3 changes: 2 additions & 1 deletion Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -1462,6 +1462,7 @@ _Py_ClearFreeLists(_PyFreeListState *state, int is_finalization)
_PyTuple_ClearFreeList(state, is_finalization);
_PyList_ClearFreeList(state, is_finalization);
_PyContext_ClearFreeList(state, is_finalization);
_PyAsyncGen_ClearFreeLists(state, is_finalization);
}

void
Expand Down Expand Up @@ -1549,7 +1550,7 @@ PyThreadState_Clear(PyThreadState *tstate)
#ifdef Py_GIL_DISABLED
// Each thread should clear own freelists in free-threading builds.
_PyFreeListState *freelist_state = &((_PyThreadStateImpl*)tstate)->freelist_state;
_Py_ClearFreeLists(freelist_state, 0);
_Py_ClearFreeLists(freelist_state, 1);
_PySlice_ClearCache(freelist_state);
#endif

Expand Down