Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3.12]: gh-113055: Use pointer for interp->obmalloc state #118618

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3,897 changes: 1,962 additions & 1,935 deletions Doc/data/python3.12.abi

Large diffs are not rendered by default.

12 changes: 11 additions & 1 deletion Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,17 @@ struct _is {
struct _warnings_runtime_state warnings;
struct atexit_state atexit;

struct _obmalloc_state obmalloc;
// Per-interpreter state for the obmalloc allocator. For the main
// interpreter and for all interpreters that don't have their
// own obmalloc state, this points to the static structure in
// obmalloc.c obmalloc_state_main. For other interpreters, it is
// heap allocated by _PyMem_init_obmalloc() and freed when the
// interpreter structure is freed. In the case of a heap allocated
// obmalloc state, it is not safe to hold on to or use memory after
// the interpreter is freed. The obmalloc state corresponding to
// that allocated memory is gone. See free_obmalloc_arenas() for
// more comments.
struct _obmalloc_state *obmalloc;

PyObject *audit_hooks;
PyType_WatchCallback type_watchers[TYPE_MAX_WATCHERS];
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_obmalloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -686,6 +686,8 @@ extern Py_ssize_t _Py_GetGlobalAllocatedBlocks(void);
_Py_GetGlobalAllocatedBlocks()
extern Py_ssize_t _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *);
extern void _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *);
extern int _PyMem_init_obmalloc(PyInterpreterState *interp);
extern bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp);


#ifdef WITH_PYMALLOC
Expand Down
7 changes: 0 additions & 7 deletions Include/internal/pycore_obmalloc_init.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,6 @@ extern "C" {
.dump_debug_stats = -1, \
}

#define _obmalloc_state_INIT(obmalloc) \
{ \
.pools = { \
.used = _obmalloc_pools_INIT(obmalloc.pools), \
}, \
}


#ifdef __cplusplus
}
Expand Down
1 change: 0 additions & 1 deletion Include/internal/pycore_runtime_init.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ extern PyTypeObject _PyExc_MemoryError;
{ \
.id_refcount = -1, \
.imports = IMPORTS_INIT, \
.obmalloc = _obmalloc_state_INIT(INTERP.obmalloc), \
.ceval = { \
.recursion_limit = Py_DEFAULT_RECURSION_LIMIT, \
}, \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Make interp->obmalloc a pointer. For interpreters that share state with the
main interpreter, this points to the same static memory structure. For
interpreters with their own obmalloc state, it is heap allocated. Add
free_obmalloc_arenas() which will free the obmalloc arenas and radix tree
structures for interpreters with their own obmalloc state.
121 changes: 115 additions & 6 deletions Objects/obmalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "Python.h"
#include "pycore_code.h" // stats
#include "pycore_pystate.h" // _PyInterpreterState_GET
#include "pycore_obmalloc_init.h"

#include "pycore_obmalloc.h"
#include "pycore_pymem.h"
Expand Down Expand Up @@ -852,6 +853,13 @@ static int running_on_valgrind = -1;

typedef struct _obmalloc_state OMState;

/* obmalloc state for main interpreter and shared by all interpreters without
* their own obmalloc state. By not explicitly initalizing this structure, it
* will be allocated in the BSS which is a small performance win. The radix
* tree arrays are fairly large but are sparsely used. */
static struct _obmalloc_state obmalloc_state_main;
static bool obmalloc_state_initialized;

static inline int
has_own_state(PyInterpreterState *interp)
{
Expand All @@ -864,10 +872,8 @@ static inline OMState *
get_state(void)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
if (!has_own_state(interp)) {
interp = _PyInterpreterState_Main();
}
return &interp->obmalloc;
assert(interp->obmalloc != NULL); // otherwise not initialized or freed
return interp->obmalloc;
}

// These macros all rely on a local "state" variable.
Expand All @@ -893,7 +899,11 @@ _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
"the interpreter doesn't have its own allocator");
}
#endif
OMState *state = &interp->obmalloc;
OMState *state = interp->obmalloc;

if (state == NULL) {
return 0;
}

Py_ssize_t n = raw_allocated_blocks;
/* add up allocated blocks for used pools */
Expand All @@ -915,13 +925,25 @@ _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
return n;
}

static void free_obmalloc_arenas(PyInterpreterState *interp);

void
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
{
if (has_own_state(interp)) {
if (has_own_state(interp) && interp->obmalloc != NULL) {
Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
assert(has_own_state(interp) || leaked == 0);
interp->runtime->obmalloc.interpreter_leaks += leaked;
if (_PyMem_obmalloc_state_on_heap(interp) && leaked == 0) {
// free the obmalloc arenas and radix tree nodes. If leaked > 0
// then some of the memory allocated by obmalloc has not been
// freed. It might be safe to free the arenas in that case but
// it's possible that extension modules are still using that
// memory. So, it is safer to not free and to leak. Perhaps there
// should be warning when this happens. It should be possible to
// use a tool like "-fsanitize=address" to track down these leaks.
free_obmalloc_arenas(interp);
}
}
}

Expand Down Expand Up @@ -2511,9 +2533,96 @@ _PyDebugAllocatorStats(FILE *out,
(void)printone(out, buf2, num_blocks * sizeof_block);
}

// Return true if the obmalloc state structure is heap allocated,
// by PyMem_RawCalloc(). For the main interpreter, this structure
// allocated in the BSS. Allocating that way gives some memory savings
// and a small performance win (at least on a demand paged OS). On
// 64-bit platforms, the obmalloc structure is 256 kB. Most of that
// memory is for the arena_map_top array. Since normally only one entry
// of that array is used, only one page of resident memory is actually
// used, rather than the full 256 kB.
bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp)
{
#if WITH_PYMALLOC
return interp->obmalloc && interp->obmalloc != &obmalloc_state_main;
#else
return false;
#endif
}

#ifdef WITH_PYMALLOC
static void
init_obmalloc_pools(PyInterpreterState *interp)
{
// initialize the obmalloc->pools structure. This must be done
// before the obmalloc alloc/free functions can be called.
poolp temp[OBMALLOC_USED_POOLS_SIZE] =
_obmalloc_pools_INIT(interp->obmalloc->pools);
memcpy(&interp->obmalloc->pools.used, temp, sizeof(temp));
}
#endif /* WITH_PYMALLOC */

int _PyMem_init_obmalloc(PyInterpreterState *interp)
{
#ifdef WITH_PYMALLOC
/* Initialize obmalloc, but only for subinterpreters,
since the main interpreter is initialized statically. */
if (_Py_IsMainInterpreter(interp)
|| _PyInterpreterState_HasFeature(interp,
Py_RTFLAGS_USE_MAIN_OBMALLOC)) {
interp->obmalloc = &obmalloc_state_main;
if (!obmalloc_state_initialized) {
init_obmalloc_pools(interp);
obmalloc_state_initialized = true;
}
} else {
interp->obmalloc = PyMem_RawCalloc(1, sizeof(struct _obmalloc_state));
if (interp->obmalloc == NULL) {
return -1;
}
init_obmalloc_pools(interp);
}
#endif /* WITH_PYMALLOC */
return 0; // success
}


#ifdef WITH_PYMALLOC

static void
free_obmalloc_arenas(PyInterpreterState *interp)
{
OMState *state = interp->obmalloc;
for (uint i = 0; i < maxarenas; ++i) {
// free each obmalloc memory arena
struct arena_object *ao = &allarenas[i];
_PyObject_Arena.free(_PyObject_Arena.ctx,
(void *)ao->address, ARENA_SIZE);
}
// free the array containing pointers to all arenas
PyMem_RawFree(allarenas);
#if WITH_PYMALLOC_RADIX_TREE
#ifdef USE_INTERIOR_NODES
// Free the middle and bottom nodes of the radix tree. These are allocated
// by arena_map_mark_used() but not freed when arenas are freed.
for (int i1 = 0; i1 < MAP_TOP_LENGTH; i1++) {
arena_map_mid_t *mid = arena_map_root.ptrs[i1];
if (mid == NULL) {
continue;
}
for (int i2 = 0; i2 < MAP_MID_LENGTH; i2++) {
arena_map_bot_t *bot = arena_map_root.ptrs[i1]->ptrs[i2];
if (bot == NULL) {
continue;
}
PyMem_RawFree(bot);
}
PyMem_RawFree(mid);
}
#endif
#endif
}

#ifdef Py_DEBUG
/* Is target in the list? The list is traversed via the nextpool pointers.
* The list may be NULL-terminated, or circular. Return 1 if target is in
Expand Down
16 changes: 16 additions & 0 deletions Python/pylifecycle.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "pycore_typeobject.h" // _PyTypes_InitTypes()
#include "pycore_typevarobject.h" // _Py_clear_generic_types()
#include "pycore_unicodeobject.h" // _PyUnicode_InitTypes()
#include "pycore_obmalloc.h" // _PyMem_init_obmalloc()
#include "opcode.h"

#include <locale.h> // setlocale()
Expand Down Expand Up @@ -650,6 +651,13 @@ pycore_create_interpreter(_PyRuntimeState *runtime,
return status;
}

// initialize the interp->obmalloc state. This must be done after
// the settings are loaded (so that feature_flags are set) but before
// any calls are made to obmalloc functions.
if (_PyMem_init_obmalloc(interp) < 0) {
return _PyStatus_NO_MEMORY();
}

PyThreadState *tstate = _PyThreadState_New(interp);
if (tstate == NULL) {
return _PyStatus_ERR("can't make first thread");
Expand Down Expand Up @@ -2094,6 +2102,14 @@ new_interpreter(PyThreadState **tstate_p, const PyInterpreterConfig *config)
goto error;
}

// initialize the interp->obmalloc state. This must be done after
// the settings are loaded (so that feature_flags are set) but before
// any calls are made to obmalloc functions.
if (_PyMem_init_obmalloc(interp) < 0) {
status = _PyStatus_NO_MEMORY();
goto error;
}

status = init_interp_create_gil(tstate, config->gil);
if (_PyStatus_EXCEPTION(status)) {
goto error;
Expand Down
13 changes: 6 additions & 7 deletions Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "pycore_pystate.h"
#include "pycore_runtime_init.h" // _PyRuntimeState_INIT
#include "pycore_sysmodule.h"
#include "pycore_obmalloc.h" // _PyMem_obmalloc_state_on_heap()

/* --------------------------------------------------------------------------
CAUTION
Expand Down Expand Up @@ -636,6 +637,11 @@ free_interpreter(PyInterpreterState *interp)
// The main interpreter is statically allocated so
// should not be freed.
if (interp != &_PyRuntime._main_interpreter) {
if (_PyMem_obmalloc_state_on_heap(interp)) {
// interpreter has its own obmalloc state, free it
PyMem_RawFree(interp->obmalloc);
interp->obmalloc = NULL;
}
PyMem_RawFree(interp);
}
}
Expand Down Expand Up @@ -679,13 +685,6 @@ init_interpreter(PyInterpreterState *interp,
assert(next != NULL || (interp == runtime->interpreters.main));
interp->next = next;

/* Initialize obmalloc, but only for subinterpreters,
since the main interpreter is initialized statically. */
if (interp != &runtime->_main_interpreter) {
poolp temp[OBMALLOC_USED_POOLS_SIZE] = \
_obmalloc_pools_INIT(interp->obmalloc.pools);
memcpy(&interp->obmalloc.pools.used, temp, sizeof(temp));
}
_PyObject_InitState(interp);

_PyEval_InitState(interp, pending_lock);
Expand Down
3 changes: 2 additions & 1 deletion Tools/c-analyzer/cpython/ignored.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,8 @@ Objects/obmalloc.c - _PyMem_Debug -
Objects/obmalloc.c - _PyMem_Raw -
Objects/obmalloc.c - _PyObject -
Objects/obmalloc.c - last_final_leaks -
Objects/obmalloc.c - usedpools -
Objects/obmalloc.c - obmalloc_state_main -
Objects/obmalloc.c - obmalloc_state_initialized -
Objects/typeobject.c - name_op -
Objects/typeobject.c - slotdefs -
Objects/unicodeobject.c - stripfuncnames -
Expand Down
Loading