Skip to content

Commit

Permalink
pymem: add _PyMem_FreeQsbr
Browse files Browse the repository at this point in the history
  • Loading branch information
colesbury committed Apr 23, 2023
1 parent 74df778 commit 4450445
Show file tree
Hide file tree
Showing 8 changed files with 189 additions and 1 deletion.
1 change: 1 addition & 0 deletions Include/Python.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
#include "cpython/cellobject.h"
#include "iterobject.h"
#include "cpython/initconfig.h"
#include "cpython/pyqueue.h"
#include "pystate.h"
#include "cpython/genobject.h"
#include "descrobject.h"
Expand Down
4 changes: 4 additions & 0 deletions Include/cpython/pystate.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,10 @@ struct _ts {
_PyStackChunk *datastack_chunk;
PyObject **datastack_top;
PyObject **datastack_limit;

/* Queue of data pointers to be freed */
struct _Py_queue_head/*<_PyMemWork>*/ mem_work;

/* XXX signal handlers should also be here */

/* The following fields are here to avoid allocation during init.
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ extern "C" {
#include "pycore_list.h" // struct _Py_list_state
#include "pycore_llist.h" // struct llist_node
#include "pycore_global_objects.h" // struct _Py_interp_static_objects
#include "pycore_pymem.h" // struct _mem_work
#include "pycore_tuple.h" // struct _Py_tuple_state
#include "pycore_typeobject.h" // struct type_cache
#include "pycore_unicodeobject.h" // struct _Py_unicode_state
Expand Down Expand Up @@ -118,6 +119,7 @@ struct _is {

struct _ceval_state ceval;
struct _gc_runtime_state gc;
struct _mem_state mem;

// sys.modules dictionary
PyObject *modules;
Expand Down
13 changes: 13 additions & 0 deletions Include/internal/pycore_pymem.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ struct _pymem_allocators {
PyObjectArenaAllocator obj_arena;
};

struct _mem_state {
_PyMutex mutex;
/* Queue of data pointers to be freed from dead threads */
struct _Py_queue_head/*<_PyMemWork>*/ work;
int nonempty;
};


/* Set the memory allocator of the specified domain to the default.
Save the old allocator into *old_alloc if it's non-NULL.
Expand Down Expand Up @@ -92,6 +99,12 @@ PyAPI_FUNC(int) _PyMem_GetAllocatorName(
PYMEM_ALLOCATOR_NOT_SET does nothing. */
PyAPI_FUNC(int) _PyMem_SetupAllocators(PyMemAllocatorName allocator);

/* Free the pointer after all threads are quiescent. */
extern void _PyMem_FreeQsbr(void *ptr);
extern void _PyMem_QsbrPoll(PyThreadState *tstate);
extern void _PyMem_AbandonQsbr(PyThreadState *tstate);
extern void _PyMem_QsbrFini(PyInterpreterState *interp);

extern void * _PyMem_DefaultRawMalloc(size_t);
extern void * _PyMem_DefaultRawCalloc(size_t, size_t);
extern void * _PyMem_DefaultRawRealloc(void *, size_t);
Expand Down
5 changes: 5 additions & 0 deletions Include/internal/pycore_qsbr.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ struct qsbr {
uint64_t t_seq;
struct qsbr_shared *t_shared;
struct qsbr *t_next;
int t_deferred;
int t_limit;
PyThreadState *tstate;
};

Expand Down Expand Up @@ -46,6 +48,9 @@ _Py_qsbr_init(struct qsbr_shared *shared);
uint64_t
_Py_qsbr_advance(struct qsbr_shared *shared);

uint64_t
_Py_qsbr_deferred_advance(struct qsbr *qsbr);

bool
_Py_qsbr_poll(struct qsbr *qsbr, uint64_t goal);

Expand Down
147 changes: 146 additions & 1 deletion Objects/obmalloc.c
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#include "Python.h"
#include "pycore_code.h" // stats
#include "pycore_pystate.h" // _PyInterpreterState_GET

#include "pycore_obmalloc.h"
#include "pycore_pymem.h"
#include "pycore_pymem_init.h"
#include "pycore_pyqueue.h"
#include "pycore_qsbr.h"

#include <stdlib.h> // malloc()
#include <stdbool.h>
Expand Down Expand Up @@ -675,6 +676,150 @@ PyMem_Free(void *ptr)
}


typedef struct {
void *ptr;
uint64_t seq;
} _PyMem_WorkItem;

#define PY_MEM_WORK_ITEMS 127

typedef struct _PyMemWork {
struct _Py_queue_node node;
unsigned int first;
unsigned int size;
_PyMem_WorkItem items[PY_MEM_WORK_ITEMS];
} _PyMem_WorkBuf;

void
_PyMem_FreeQsbr(void *ptr)
{
PyThreadState *tstate = _PyThreadState_GET();

// Try to get an non-full workbuf
_PyMem_WorkBuf *work = NULL;
if (!_Py_queue_is_empty(&tstate->mem_work)) {
work = _Py_queue_last(&tstate->mem_work, _PyMem_WorkBuf, node);
if (work->size == PY_MEM_WORK_ITEMS) {
work = NULL;
}
}

if (work == NULL) {
work = PyMem_RawMalloc(sizeof(_PyMem_WorkBuf));
if (work == NULL) {
Py_FatalError("out of memory (in _PyMem_FreeQsbr)");
}
work->first = work->size = 0;
_Py_queue_enqeue(&tstate->mem_work, &work->node);
}

PyThreadStateImpl *tstate_impl = (PyThreadStateImpl *)tstate;
work->items[work->size].ptr = ptr;
work->items[work->size].seq = _Py_qsbr_deferred_advance(tstate_impl->qsbr);
work->size++;

if (work->size == PY_MEM_WORK_ITEMS) {
// Now seems like a good time to check for any memory that can be freed.
_PyMem_QsbrPoll(tstate);
}
}

static int
_PyMem_ProcessQueue(struct _Py_queue_head *queue, struct qsbr *qsbr, bool keep_empty)
{
while (!_Py_queue_is_empty(queue)) {
_PyMem_WorkBuf *work = _Py_queue_first(queue, _PyMem_WorkBuf, node);
if (work->size == 0 && keep_empty) {
return 0;
}
while (work->first < work->size) {
_PyMem_WorkItem *item = &work->items[work->first];
if (!_Py_qsbr_poll(qsbr, item->seq)) {
return 1;
}
PyMem_Free(item->ptr);
work->first++;
}

// Remove the empty work buffer
_Py_queue_dequeue(queue);

// If the queue doesn't have an empty work buffer, stick this
// one at the end of the queue. Otherwise, free it.
if (keep_empty && _Py_queue_is_empty(queue)) {
work->first = work->size = 0;
_Py_queue_enqeue(queue, &work->node);
return 0;
}
else if (keep_empty && _Py_queue_last(queue, _PyMem_WorkBuf, node)->size == 0) {
work->first = work->size = 0;
_Py_queue_enqeue(queue, &work->node);
}
else {
PyMem_RawFree(work);
}
}
return 0;
}

void
_PyMem_QsbrPoll(PyThreadState *tstate)
{
struct qsbr *qsbr = ((PyThreadStateImpl *)tstate)->qsbr;

// Process any work on the thread-local queue.
_PyMem_ProcessQueue(&tstate->mem_work, qsbr, true);

// Process any work on the interpreter queue if we can get the lock.
PyInterpreterState *interp = tstate->interp;
if (_Py_atomic_load_int_relaxed(&interp->mem.nonempty) &&
_PyMutex_TryLock(&interp->mem.mutex)) {
int more = _PyMem_ProcessQueue(&interp->mem.work, qsbr, false);
_Py_atomic_store_int_relaxed(&interp->mem.nonempty, more);
_PyMutex_unlock(&interp->mem.mutex);
}
}

void
_PyMem_QsbrFini(PyInterpreterState *interp)
{
struct _Py_queue_head *queue = &interp->mem.work;
while (!_Py_queue_is_empty(queue)) {
_PyMem_WorkBuf *work = _Py_queue_first(queue, _PyMem_WorkBuf, node);
while (work->first < work->size) {
_PyMem_WorkItem *item = &work->items[work->first];
PyMem_Free(item->ptr);
work->first++;
}
_Py_queue_dequeue(queue);
PyMem_RawFree(work);
}
interp->mem.nonempty = 0;
}

void
_PyMem_AbandonQsbr(PyThreadState *tstate)
{
PyInterpreterState *interp = tstate->interp;

while (!_Py_queue_is_empty(&tstate->mem_work)) {
struct _Py_queue_node *node = _Py_queue_dequeue(&tstate->mem_work);
if (node == NULL) {
break;
}
_PyMem_WorkBuf *work = _Py_queue_data(node, _PyMem_WorkBuf, node);
if (work->first == work->size) {
PyMem_RawFree(work);
}
else {
_PyMutex_lock(&interp->mem.mutex);
_Py_queue_enqeue(&interp->mem.work, node);
_Py_atomic_store_int_relaxed(&interp->mem.nonempty, 1);
_PyMutex_unlock(&interp->mem.mutex);
}
}
}

wchar_t*
_PyMem_RawWcsdup(const wchar_t *str)
{
Expand Down
7 changes: 7 additions & 0 deletions Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "pycore_pyerrors.h"
#include "pycore_pylifecycle.h"
#include "pycore_pymem.h" // _PyMem_DefaultRawFree()
#include "pycore_pyqueue.h" // _Py_queue_init
#include "pycore_pystate.h" // _PyThreadState_GET()
#include "pycore_qsbr.h"
#include "pycore_runtime_init.h" // _PyRuntimeState_INIT
Expand Down Expand Up @@ -464,6 +465,7 @@ init_interpreter(PyInterpreterState *interp,
_PyGC_InitState(&interp->gc);
PyConfig_InitPythonConfig(&interp->config);
_PyType_InitCache(interp);
_Py_queue_init(&interp->mem.work);

interp->_initialized = 1;
}
Expand Down Expand Up @@ -598,6 +600,9 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
_PyGC_CollectNoFail(tstate);
_PyGC_Fini(interp);

/* Perform any delayed PyMem_Free calls */
_PyMem_QsbrFini(interp);

/* We don't clear sysdict and builtins until the end of this function.
Because clearing other attributes can execute arbitrary Python code
which requires sysdict and builtins. */
Expand Down Expand Up @@ -1051,6 +1056,7 @@ init_threadstate(PyThreadState *tstate,
tstate->daemon = (id > 1);
tstate->done_event = done_event;
_PyEventRc_Incref(done_event);
_Py_queue_init(&tstate->mem_work);

if (_PyRuntime.stop_the_world_requested) {
tstate->status = _Py_THREAD_GC;
Expand Down Expand Up @@ -1339,6 +1345,7 @@ PyThreadState_Clear(PyThreadState *tstate)
}

_Py_queue_destroy(tstate);
_PyMem_AbandonQsbr(tstate);

/* Don't clear tstate->pyframe: it is a borrowed reference */

Expand Down
11 changes: 11 additions & 0 deletions Python/qsbr.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ _Py_qsbr_alloc(struct qsbr_shared *shared)
}
memset(qsbr, 0, sizeof(*qsbr));
qsbr->t_shared = shared;
qsbr->t_limit = 32;
return qsbr;
}

Expand Down Expand Up @@ -95,6 +96,16 @@ _Py_qsbr_advance(struct qsbr_shared *shared)
return _Py_atomic_add_uint64(&shared->s_wr, QSBR_INCR) + QSBR_INCR;
}

uint64_t
_Py_qsbr_deferred_advance(struct qsbr *qsbr)
{
if (++qsbr->t_deferred < qsbr->t_limit) {
return _Py_qsbr_shared_current(qsbr->t_shared) + QSBR_INCR;
}
qsbr->t_deferred = 0;
return _Py_qsbr_advance(qsbr->t_shared);
}

uint64_t
_Py_qsbr_poll_scan(struct qsbr_shared *shared)
{
Expand Down

0 comments on commit 4450445

Please sign in to comment.