7
7
#include "pycore_pyerrors.h" // _Py_FatalErrorFormat()
8
8
#include "pycore_pymem.h"
9
9
#include "pycore_pystate.h" // _PyInterpreterState_GET
10
+ #include "pycore_obmalloc_init.h"
10
11
11
12
#include <stdlib.h> // malloc()
12
13
#include <stdbool.h>
@@ -1016,6 +1017,13 @@ static int running_on_valgrind = -1;
1016
1017
1017
1018
typedef struct _obmalloc_state OMState ;
1018
1019
1020
+ /* obmalloc state for main interpreter and shared by all interpreters without
1021
+ * their own obmalloc state. By not explicitly initalizing this structure, it
1022
+ * will be allocated in the BSS which is a small performance win. The radix
1023
+ * tree arrays are fairly large but are sparsely used. */
1024
+ static struct _obmalloc_state obmalloc_state_main ;
1025
+ static bool obmalloc_state_initialized ;
1026
+
1019
1027
static inline int
1020
1028
has_own_state (PyInterpreterState * interp )
1021
1029
{
@@ -1028,10 +1036,8 @@ static inline OMState *
1028
1036
get_state (void )
1029
1037
{
1030
1038
PyInterpreterState * interp = _PyInterpreterState_GET ();
1031
- if (!has_own_state (interp )) {
1032
- interp = _PyInterpreterState_Main ();
1033
- }
1034
- return & interp -> obmalloc ;
1039
+ assert (interp -> obmalloc != NULL ); // otherwise not initialized or freed
1040
+ return interp -> obmalloc ;
1035
1041
}
1036
1042
1037
1043
// These macros all rely on a local "state" variable.
@@ -1094,7 +1100,11 @@ _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
1094
1100
"the interpreter doesn't have its own allocator" );
1095
1101
}
1096
1102
#endif
1097
- OMState * state = & interp -> obmalloc ;
1103
+ OMState * state = interp -> obmalloc ;
1104
+
1105
+ if (state == NULL ) {
1106
+ return 0 ;
1107
+ }
1098
1108
1099
1109
Py_ssize_t n = raw_allocated_blocks ;
1100
1110
/* add up allocated blocks for used pools */
@@ -1116,6 +1126,8 @@ _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
1116
1126
return n ;
1117
1127
}
1118
1128
1129
+ static void free_obmalloc_arenas (PyInterpreterState * interp );
1130
+
1119
1131
void
1120
1132
_PyInterpreterState_FinalizeAllocatedBlocks (PyInterpreterState * interp )
1121
1133
{
@@ -1124,10 +1136,20 @@ _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
1124
1136
return ;
1125
1137
}
1126
1138
#endif
1127
- if (has_own_state (interp )) {
1139
+ if (has_own_state (interp ) && interp -> obmalloc != NULL ) {
1128
1140
Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks (interp );
1129
1141
assert (has_own_state (interp ) || leaked == 0 );
1130
1142
interp -> runtime -> obmalloc .interpreter_leaks += leaked ;
1143
+ if (_PyMem_obmalloc_state_on_heap (interp ) && leaked == 0 ) {
1144
+ // free the obmalloc arenas and radix tree nodes. If leaked > 0
1145
+ // then some of the memory allocated by obmalloc has not been
1146
+ // freed. It might be safe to free the arenas in that case but
1147
+ // it's possible that extension modules are still using that
1148
+ // memory. So, it is safer to not free and to leak. Perhaps there
1149
+ // should be warning when this happens. It should be possible to
1150
+ // use a tool like "-fsanitize=address" to track down these leaks.
1151
+ free_obmalloc_arenas (interp );
1152
+ }
1131
1153
}
1132
1154
}
1133
1155
@@ -2717,9 +2739,96 @@ _PyDebugAllocatorStats(FILE *out,
2717
2739
(void )printone (out , buf2 , num_blocks * sizeof_block );
2718
2740
}
2719
2741
2742
+ // Return true if the obmalloc state structure is heap allocated,
2743
+ // by PyMem_RawCalloc(). For the main interpreter, this structure
2744
+ // allocated in the BSS. Allocating that way gives some memory savings
2745
+ // and a small performance win (at least on a demand paged OS). On
2746
+ // 64-bit platforms, the obmalloc structure is 256 kB. Most of that
2747
+ // memory is for the arena_map_top array. Since normally only one entry
2748
+ // of that array is used, only one page of resident memory is actually
2749
+ // used, rather than the full 256 kB.
2750
+ bool _PyMem_obmalloc_state_on_heap (PyInterpreterState * interp )
2751
+ {
2752
+ #if WITH_PYMALLOC
2753
+ return interp -> obmalloc && interp -> obmalloc != & obmalloc_state_main ;
2754
+ #else
2755
+ return false;
2756
+ #endif
2757
+ }
2758
+
2759
+ #ifdef WITH_PYMALLOC
2760
+ static void
2761
+ init_obmalloc_pools (PyInterpreterState * interp )
2762
+ {
2763
+ // initialize the obmalloc->pools structure. This must be done
2764
+ // before the obmalloc alloc/free functions can be called.
2765
+ poolp temp [OBMALLOC_USED_POOLS_SIZE ] =
2766
+ _obmalloc_pools_INIT (interp -> obmalloc -> pools );
2767
+ memcpy (& interp -> obmalloc -> pools .used , temp , sizeof (temp ));
2768
+ }
2769
+ #endif /* WITH_PYMALLOC */
2770
+
2771
+ int _PyMem_init_obmalloc (PyInterpreterState * interp )
2772
+ {
2773
+ #ifdef WITH_PYMALLOC
2774
+ /* Initialize obmalloc, but only for subinterpreters,
2775
+ since the main interpreter is initialized statically. */
2776
+ if (_Py_IsMainInterpreter (interp )
2777
+ || _PyInterpreterState_HasFeature (interp ,
2778
+ Py_RTFLAGS_USE_MAIN_OBMALLOC )) {
2779
+ interp -> obmalloc = & obmalloc_state_main ;
2780
+ if (!obmalloc_state_initialized ) {
2781
+ init_obmalloc_pools (interp );
2782
+ obmalloc_state_initialized = true;
2783
+ }
2784
+ } else {
2785
+ interp -> obmalloc = PyMem_RawCalloc (1 , sizeof (struct _obmalloc_state ));
2786
+ if (interp -> obmalloc == NULL ) {
2787
+ return -1 ;
2788
+ }
2789
+ init_obmalloc_pools (interp );
2790
+ }
2791
+ #endif /* WITH_PYMALLOC */
2792
+ return 0 ; // success
2793
+ }
2794
+
2720
2795
2721
2796
#ifdef WITH_PYMALLOC
2722
2797
2798
+ static void
2799
+ free_obmalloc_arenas (PyInterpreterState * interp )
2800
+ {
2801
+ OMState * state = interp -> obmalloc ;
2802
+ for (uint i = 0 ; i < maxarenas ; ++ i ) {
2803
+ // free each obmalloc memory arena
2804
+ struct arena_object * ao = & allarenas [i ];
2805
+ _PyObject_Arena .free (_PyObject_Arena .ctx ,
2806
+ (void * )ao -> address , ARENA_SIZE );
2807
+ }
2808
+ // free the array containing pointers to all arenas
2809
+ PyMem_RawFree (allarenas );
2810
+ #if WITH_PYMALLOC_RADIX_TREE
2811
+ #ifdef USE_INTERIOR_NODES
2812
+ // Free the middle and bottom nodes of the radix tree. These are allocated
2813
+ // by arena_map_mark_used() but not freed when arenas are freed.
2814
+ for (int i1 = 0 ; i1 < MAP_TOP_LENGTH ; i1 ++ ) {
2815
+ arena_map_mid_t * mid = arena_map_root .ptrs [i1 ];
2816
+ if (mid == NULL ) {
2817
+ continue ;
2818
+ }
2819
+ for (int i2 = 0 ; i2 < MAP_MID_LENGTH ; i2 ++ ) {
2820
+ arena_map_bot_t * bot = arena_map_root .ptrs [i1 ]-> ptrs [i2 ];
2821
+ if (bot == NULL ) {
2822
+ continue ;
2823
+ }
2824
+ PyMem_RawFree (bot );
2825
+ }
2826
+ PyMem_RawFree (mid );
2827
+ }
2828
+ #endif
2829
+ #endif
2830
+ }
2831
+
2723
2832
#ifdef Py_DEBUG
2724
2833
/* Is target in the list? The list is traversed via the nextpool pointers.
2725
2834
* The list may be NULL-terminated, or circular. Return 1 if target is in
0 commit comments