Skip to content

Commit 7a7bce5

Browse files
gh-113055: Use pointer for interp->obmalloc state (gh-113412)
For interpreters that share state with the main interpreter, this points to the same static memory structure. For interpreters with their own obmalloc state, it is heap allocated. Add free_obmalloc_arenas() which will free the obmalloc arenas and radix tree structures for interpreters with their own obmalloc state. Co-authored-by: Eric Snow <ericsnowcurrently@gmail.com>
1 parent 2d08af3 commit 7a7bce5

File tree

9 files changed

+157
-24
lines changed

9 files changed

+157
-24
lines changed

Include/internal/pycore_interp.h

+11-1
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,17 @@ struct _is {
203203
struct _mimalloc_interp_state mimalloc;
204204
#endif
205205

206-
struct _obmalloc_state obmalloc;
206+
// Per-interpreter state for the obmalloc allocator. For the main
207+
// interpreter and for all interpreters that don't have their
208+
// own obmalloc state, this points to the static structure in
209+
// obmalloc.c obmalloc_state_main. For other interpreters, it is
210+
// heap allocated by _PyMem_init_obmalloc() and freed when the
211+
// interpreter structure is freed. In the case of a heap allocated
212+
// obmalloc state, it is not safe to hold on to or use memory after
213+
// the interpreter is freed. The obmalloc state corresponding to
214+
// that allocated memory is gone. See free_obmalloc_arenas() for
215+
// more comments.
216+
struct _obmalloc_state *obmalloc;
207217

208218
PyObject *audit_hooks;
209219
PyType_WatchCallback type_watchers[TYPE_MAX_WATCHERS];

Include/internal/pycore_obmalloc.h

+2
Original file line numberDiff line numberDiff line change
@@ -686,6 +686,8 @@ extern Py_ssize_t _Py_GetGlobalAllocatedBlocks(void);
686686
_Py_GetGlobalAllocatedBlocks()
687687
extern Py_ssize_t _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *);
688688
extern void _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *);
689+
extern int _PyMem_init_obmalloc(PyInterpreterState *interp);
690+
extern bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp);
689691

690692

691693
#ifdef WITH_PYMALLOC

Include/internal/pycore_obmalloc_init.h

-7
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,6 @@ extern "C" {
5959
.dump_debug_stats = -1, \
6060
}
6161

62-
#define _obmalloc_state_INIT(obmalloc) \
63-
{ \
64-
.pools = { \
65-
.used = _obmalloc_pools_INIT(obmalloc.pools), \
66-
}, \
67-
}
68-
6962

7063
#ifdef __cplusplus
7164
}

Include/internal/pycore_runtime_init.h

-1
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,6 @@ extern PyTypeObject _PyExc_MemoryError;
155155
{ \
156156
.id_refcount = -1, \
157157
.imports = IMPORTS_INIT, \
158-
.obmalloc = _obmalloc_state_INIT(INTERP.obmalloc), \
159158
.ceval = { \
160159
.recursion_limit = Py_DEFAULT_RECURSION_LIMIT, \
161160
}, \
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Make interp->obmalloc a pointer. For interpreters that share state with the
2+
main interpreter, this points to the same static memory structure. For
3+
interpreters with their own obmalloc state, it is heap allocated. Add
4+
free_obmalloc_arenas() which will free the obmalloc arenas and radix tree
5+
structures for interpreters with their own obmalloc state.

Objects/obmalloc.c

+115-6
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "pycore_pyerrors.h" // _Py_FatalErrorFormat()
88
#include "pycore_pymem.h"
99
#include "pycore_pystate.h" // _PyInterpreterState_GET
10+
#include "pycore_obmalloc_init.h"
1011

1112
#include <stdlib.h> // malloc()
1213
#include <stdbool.h>
@@ -1016,6 +1017,13 @@ static int running_on_valgrind = -1;
10161017

10171018
typedef struct _obmalloc_state OMState;
10181019

1020+
/* obmalloc state for main interpreter and shared by all interpreters without
1021+
* their own obmalloc state. By not explicitly initalizing this structure, it
1022+
* will be allocated in the BSS which is a small performance win. The radix
1023+
* tree arrays are fairly large but are sparsely used. */
1024+
static struct _obmalloc_state obmalloc_state_main;
1025+
static bool obmalloc_state_initialized;
1026+
10191027
static inline int
10201028
has_own_state(PyInterpreterState *interp)
10211029
{
@@ -1028,10 +1036,8 @@ static inline OMState *
10281036
get_state(void)
10291037
{
10301038
PyInterpreterState *interp = _PyInterpreterState_GET();
1031-
if (!has_own_state(interp)) {
1032-
interp = _PyInterpreterState_Main();
1033-
}
1034-
return &interp->obmalloc;
1039+
assert(interp->obmalloc != NULL); // otherwise not initialized or freed
1040+
return interp->obmalloc;
10351041
}
10361042

10371043
// These macros all rely on a local "state" variable.
@@ -1094,7 +1100,11 @@ _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
10941100
"the interpreter doesn't have its own allocator");
10951101
}
10961102
#endif
1097-
OMState *state = &interp->obmalloc;
1103+
OMState *state = interp->obmalloc;
1104+
1105+
if (state == NULL) {
1106+
return 0;
1107+
}
10981108

10991109
Py_ssize_t n = raw_allocated_blocks;
11001110
/* add up allocated blocks for used pools */
@@ -1116,6 +1126,8 @@ _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
11161126
return n;
11171127
}
11181128

1129+
static void free_obmalloc_arenas(PyInterpreterState *interp);
1130+
11191131
void
11201132
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
11211133
{
@@ -1124,10 +1136,20 @@ _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
11241136
return;
11251137
}
11261138
#endif
1127-
if (has_own_state(interp)) {
1139+
if (has_own_state(interp) && interp->obmalloc != NULL) {
11281140
Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
11291141
assert(has_own_state(interp) || leaked == 0);
11301142
interp->runtime->obmalloc.interpreter_leaks += leaked;
1143+
if (_PyMem_obmalloc_state_on_heap(interp) && leaked == 0) {
1144+
// free the obmalloc arenas and radix tree nodes. If leaked > 0
1145+
// then some of the memory allocated by obmalloc has not been
1146+
// freed. It might be safe to free the arenas in that case but
1147+
// it's possible that extension modules are still using that
1148+
// memory. So, it is safer to not free and to leak. Perhaps there
1149+
// should be warning when this happens. It should be possible to
1150+
// use a tool like "-fsanitize=address" to track down these leaks.
1151+
free_obmalloc_arenas(interp);
1152+
}
11311153
}
11321154
}
11331155

@@ -2717,9 +2739,96 @@ _PyDebugAllocatorStats(FILE *out,
27172739
(void)printone(out, buf2, num_blocks * sizeof_block);
27182740
}
27192741

2742+
// Return true if the obmalloc state structure is heap allocated,
2743+
// by PyMem_RawCalloc(). For the main interpreter, this structure
2744+
// allocated in the BSS. Allocating that way gives some memory savings
2745+
// and a small performance win (at least on a demand paged OS). On
2746+
// 64-bit platforms, the obmalloc structure is 256 kB. Most of that
2747+
// memory is for the arena_map_top array. Since normally only one entry
2748+
// of that array is used, only one page of resident memory is actually
2749+
// used, rather than the full 256 kB.
2750+
bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp)
2751+
{
2752+
#if WITH_PYMALLOC
2753+
return interp->obmalloc && interp->obmalloc != &obmalloc_state_main;
2754+
#else
2755+
return false;
2756+
#endif
2757+
}
2758+
2759+
#ifdef WITH_PYMALLOC
2760+
static void
2761+
init_obmalloc_pools(PyInterpreterState *interp)
2762+
{
2763+
// initialize the obmalloc->pools structure. This must be done
2764+
// before the obmalloc alloc/free functions can be called.
2765+
poolp temp[OBMALLOC_USED_POOLS_SIZE] =
2766+
_obmalloc_pools_INIT(interp->obmalloc->pools);
2767+
memcpy(&interp->obmalloc->pools.used, temp, sizeof(temp));
2768+
}
2769+
#endif /* WITH_PYMALLOC */
2770+
2771+
int _PyMem_init_obmalloc(PyInterpreterState *interp)
2772+
{
2773+
#ifdef WITH_PYMALLOC
2774+
/* Initialize obmalloc, but only for subinterpreters,
2775+
since the main interpreter is initialized statically. */
2776+
if (_Py_IsMainInterpreter(interp)
2777+
|| _PyInterpreterState_HasFeature(interp,
2778+
Py_RTFLAGS_USE_MAIN_OBMALLOC)) {
2779+
interp->obmalloc = &obmalloc_state_main;
2780+
if (!obmalloc_state_initialized) {
2781+
init_obmalloc_pools(interp);
2782+
obmalloc_state_initialized = true;
2783+
}
2784+
} else {
2785+
interp->obmalloc = PyMem_RawCalloc(1, sizeof(struct _obmalloc_state));
2786+
if (interp->obmalloc == NULL) {
2787+
return -1;
2788+
}
2789+
init_obmalloc_pools(interp);
2790+
}
2791+
#endif /* WITH_PYMALLOC */
2792+
return 0; // success
2793+
}
2794+
27202795

27212796
#ifdef WITH_PYMALLOC
27222797

2798+
static void
2799+
free_obmalloc_arenas(PyInterpreterState *interp)
2800+
{
2801+
OMState *state = interp->obmalloc;
2802+
for (uint i = 0; i < maxarenas; ++i) {
2803+
// free each obmalloc memory arena
2804+
struct arena_object *ao = &allarenas[i];
2805+
_PyObject_Arena.free(_PyObject_Arena.ctx,
2806+
(void *)ao->address, ARENA_SIZE);
2807+
}
2808+
// free the array containing pointers to all arenas
2809+
PyMem_RawFree(allarenas);
2810+
#if WITH_PYMALLOC_RADIX_TREE
2811+
#ifdef USE_INTERIOR_NODES
2812+
// Free the middle and bottom nodes of the radix tree. These are allocated
2813+
// by arena_map_mark_used() but not freed when arenas are freed.
2814+
for (int i1 = 0; i1 < MAP_TOP_LENGTH; i1++) {
2815+
arena_map_mid_t *mid = arena_map_root.ptrs[i1];
2816+
if (mid == NULL) {
2817+
continue;
2818+
}
2819+
for (int i2 = 0; i2 < MAP_MID_LENGTH; i2++) {
2820+
arena_map_bot_t *bot = arena_map_root.ptrs[i1]->ptrs[i2];
2821+
if (bot == NULL) {
2822+
continue;
2823+
}
2824+
PyMem_RawFree(bot);
2825+
}
2826+
PyMem_RawFree(mid);
2827+
}
2828+
#endif
2829+
#endif
2830+
}
2831+
27232832
#ifdef Py_DEBUG
27242833
/* Is target in the list? The list is traversed via the nextpool pointers.
27252834
* The list may be NULL-terminated, or circular. Return 1 if target is in

Python/pylifecycle.c

+16
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "pycore_typevarobject.h" // _Py_clear_generic_types()
3333
#include "pycore_unicodeobject.h" // _PyUnicode_InitTypes()
3434
#include "pycore_weakref.h" // _PyWeakref_GET_REF()
35+
#include "pycore_obmalloc.h" // _PyMem_init_obmalloc()
3536

3637
#include "opcode.h"
3738

@@ -645,6 +646,13 @@ pycore_create_interpreter(_PyRuntimeState *runtime,
645646
return status;
646647
}
647648

649+
// initialize the interp->obmalloc state. This must be done after
650+
// the settings are loaded (so that feature_flags are set) but before
651+
// any calls are made to obmalloc functions.
652+
if (_PyMem_init_obmalloc(interp) < 0) {
653+
return _PyStatus_NO_MEMORY();
654+
}
655+
648656
PyThreadState *tstate = _PyThreadState_New(interp,
649657
_PyThreadState_WHENCE_INTERP);
650658
if (tstate == NULL) {
@@ -2144,6 +2152,14 @@ new_interpreter(PyThreadState **tstate_p, const PyInterpreterConfig *config)
21442152
goto error;
21452153
}
21462154

2155+
// initialize the interp->obmalloc state. This must be done after
2156+
// the settings are loaded (so that feature_flags are set) but before
2157+
// any calls are made to obmalloc functions.
2158+
if (_PyMem_init_obmalloc(interp) < 0) {
2159+
status = _PyStatus_NO_MEMORY();
2160+
goto error;
2161+
}
2162+
21472163
tstate = _PyThreadState_New(interp, _PyThreadState_WHENCE_INTERP);
21482164
if (tstate == NULL) {
21492165
status = _PyStatus_NO_MEMORY();

Python/pystate.c

+6-8
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "pycore_pystate.h"
1919
#include "pycore_runtime_init.h" // _PyRuntimeState_INIT
2020
#include "pycore_sysmodule.h" // _PySys_Audit()
21+
#include "pycore_obmalloc.h" // _PyMem_obmalloc_state_on_heap()
2122

2223
/* --------------------------------------------------------------------------
2324
CAUTION
@@ -553,6 +554,11 @@ free_interpreter(PyInterpreterState *interp)
553554
// The main interpreter is statically allocated so
554555
// should not be freed.
555556
if (interp != &_PyRuntime._main_interpreter) {
557+
if (_PyMem_obmalloc_state_on_heap(interp)) {
558+
// interpreter has its own obmalloc state, free it
559+
PyMem_RawFree(interp->obmalloc);
560+
interp->obmalloc = NULL;
561+
}
556562
PyMem_RawFree(interp);
557563
}
558564
}
@@ -595,14 +601,6 @@ init_interpreter(PyInterpreterState *interp,
595601
assert(next != NULL || (interp == runtime->interpreters.main));
596602
interp->next = next;
597603

598-
/* Initialize obmalloc, but only for subinterpreters,
599-
since the main interpreter is initialized statically. */
600-
if (interp != &runtime->_main_interpreter) {
601-
poolp temp[OBMALLOC_USED_POOLS_SIZE] = \
602-
_obmalloc_pools_INIT(interp->obmalloc.pools);
603-
memcpy(&interp->obmalloc.pools.used, temp, sizeof(temp));
604-
}
605-
606604
PyStatus status = _PyObject_InitState(interp);
607605
if (_PyStatus_EXCEPTION(status)) {
608606
return status;

Tools/c-analyzer/cpython/ignored.tsv

+2-1
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,8 @@ Objects/obmalloc.c - _PyMem_Debug -
325325
Objects/obmalloc.c - _PyMem_Raw -
326326
Objects/obmalloc.c - _PyObject -
327327
Objects/obmalloc.c - last_final_leaks -
328-
Objects/obmalloc.c - usedpools -
328+
Objects/obmalloc.c - obmalloc_state_main -
329+
Objects/obmalloc.c - obmalloc_state_initialized -
329330
Objects/typeobject.c - name_op -
330331
Objects/typeobject.c - slotdefs -
331332
Objects/unicodeobject.c - stripfuncnames -

0 commit comments

Comments
 (0)