Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add a compile-time option to enable 4k page sizes (#52229) #111

Merged
merged 1 commit into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion src/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,12 @@
extern "C" {
#endif

#ifdef GC_SMALL_PAGE
#define GC_PAGE_LG2 12 // log2(size of a page)
#else
#define GC_PAGE_LG2 14 // log2(size of a page)
#define GC_PAGE_SZ (1 << GC_PAGE_LG2) // 16k
#endif
#define GC_PAGE_SZ (1 << GC_PAGE_LG2)
#define GC_PAGE_OFFSET (JL_HEAP_ALIGNMENT - (sizeof(jl_taggedvalue_t) % JL_HEAP_ALIGNMENT))

#define jl_malloc_tag ((void*)0xdeadaa01)
Expand Down Expand Up @@ -241,6 +245,23 @@ typedef struct {
_Atomic(size_t) n_pages_allocd;
} gc_fragmentation_stat_t;

#ifdef GC_SMALL_PAGE
#ifdef _P64
#define REGION0_PG_COUNT (1 << 16)
#define REGION1_PG_COUNT (1 << 18)
#define REGION2_PG_COUNT (1 << 18)
#define REGION0_INDEX(p) (((uintptr_t)(p) >> 12) & 0xFFFF) // shift by GC_PAGE_LG2
#define REGION1_INDEX(p) (((uintptr_t)(p) >> 28) & 0x3FFFF)
#define REGION_INDEX(p) (((uintptr_t)(p) >> 46) & 0x3FFFF)
#else
#define REGION0_PG_COUNT (1 << 10)
#define REGION1_PG_COUNT (1 << 10)
#define REGION2_PG_COUNT (1 << 0)
#define REGION0_INDEX(p) (((uintptr_t)(p) >> 12) & 0x3FF) // shift by GC_PAGE_LG2
#define REGION1_INDEX(p) (((uintptr_t)(p) >> 22) & 0x3FF)
#define REGION_INDEX(p) (0)
#endif
#else
#ifdef _P64
#define REGION0_PG_COUNT (1 << 16)
#define REGION1_PG_COUNT (1 << 16)
Expand All @@ -256,6 +277,7 @@ typedef struct {
#define REGION1_INDEX(p) (((uintptr_t)(p) >> 22) & 0x3FF)
#define REGION_INDEX(p) (0)
#endif
#endif

// define the representation of the levels of the page-table (0 to 2)
typedef struct {
Expand Down
45 changes: 37 additions & 8 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -359,24 +359,48 @@ static const int jl_gc_sizeclasses[] = {
144, 160, 176, 192, 208, 224, 240, 256,

// the following tables are computed for maximum packing efficiency via the formula:
// pg = 2^14
// pg = GC_SMALL_PAGE ? 2^12 : 2^14
// sz = (div.(pg-8, rng).÷16)*16; hcat(sz, (pg-8).÷sz, pg .- (pg-8).÷sz.*sz)'

#ifdef GC_SMALL_PAGE
// rng = 15:-1:2 (14 pools)
272, 288, 304, 336, 368, 400, 448, 496, 576, 672, 816, 1008, 1360, 2032
// 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, /pool
// 16, 64, 144, 64, 48, 96, 64, 128, 64, 64, 16, 64, 16, 32, bytes lost
#else
// rng = 60:-4:32 (8 pools)
272, 288, 304, 336, 368, 400, 448, 496,
// 60, 56, 53, 48, 44, 40, 36, 33, /pool
// 64, 256, 272, 256, 192, 384, 256, 16, bytes lost
// 60, 56, 53, 48, 44, 40, 36, 33, /pool
// 64, 256, 272, 256, 192, 384, 256, 16, bytes lost

// rng = 30:-2:16 (8 pools)
544, 576, 624, 672, 736, 816, 896, 1008,
// 30, 28, 26, 24, 22, 20, 18, 16, /pool
// 64, 256, 160, 256, 192, 64, 256, 256, bytes lost
// 30, 28, 26, 24, 22, 20, 18, 16, /pool
// 64, 256, 160, 256, 192, 64, 256, 256, bytes lost

// rng = 15:-1:8 (8 pools)
1088, 1168, 1248, 1360, 1488, 1632, 1808, 2032
// 15, 14, 13, 12, 11, 10, 9, 8, /pool
// 64, 32, 160, 64, 16, 64, 112, 128, bytes lost
// 15, 14, 13, 12, 11, 10, 9, 8, /pool
// 64, 32, 160, 64, 16, 64, 112, 128, bytes lost
#endif
};
#ifdef GC_SMALL_PAGE
#ifdef _P64
# define JL_GC_N_POOLS 39
#elif MAX_ALIGN == 8
# define JL_GC_N_POOLS 40
#else
# define JL_GC_N_POOLS 41
#endif
#else
#ifdef _P64
# define JL_GC_N_POOLS 49
#elif MAX_ALIGN == 8
# define JL_GC_N_POOLS 50
#else
# define JL_GC_N_POOLS 51
#endif
#endif
static_assert(sizeof(jl_gc_sizeclasses) / sizeof(jl_gc_sizeclasses[0]) == JL_GC_N_POOLS, "");

STATIC_INLINE int jl_gc_alignment(size_t sz)
Expand All @@ -403,7 +427,12 @@ JL_DLLEXPORT int jl_alignment(size_t sz);

// the following table is computed as:
// [searchsortedfirst(jl_gc_sizeclasses, i) - 1 for i = 0:16:jl_gc_sizeclasses[end]]
static const uint8_t szclass_table[] = {0, 1, 3, 5, 7, 9, 11, 13, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 28, 29, 29, 30, 30, 31, 31, 31, 32, 32, 32, 33, 33, 33, 34, 34, 35, 35, 35, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 43, 43, 43, 43, 43, 44, 44, 44, 44, 44, 44, 44, 45, 45, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48};
static const uint8_t szclass_table[] =
#ifdef GC_SMALL_PAGE
{0,1,3,5,7,9,11,13,15,17,18,19,20,21,22,23,24,25,26,27,28,28,29,29,30,30,31,31,31,32,32,32,33,33,33,33,33,34,34,34,34,34,34,35,35,35,35,35,35,35,35,35,36,36,36,36,36,36,36,36,36,36,36,36,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38};
#else
{0,1,3,5,7,9,11,13,15,17,18,19,20,21,22,23,24,25,26,27,28,28,29,29,30,30,31,31,31,32,32,32,33,33,33,34,34,35,35,35,36,36,36,37,37,37,37,38,38,38,38,38,39,39,39,39,39,40,40,40,40,40,40,40,41,41,41,41,41,42,42,42,42,42,43,43,43,43,43,44,44,44,44,44,44,44,45,45,45,45,45,45,45,45,46,46,46,46,46,46,46,46,46,47,47,47,47,47,47,47,47,47,47,47,48,48,48,48,48,48,48,48,48,48,48,48,48,48};
#endif
static_assert(sizeof(szclass_table) == 128, "");

STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass(unsigned sz)
Expand Down
12 changes: 3 additions & 9 deletions src/julia_threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
#ifndef JL_THREADS_H
#define JL_THREADS_H

#include "work-stealing-queue.h"
#include "julia_atomics.h"
#include "work-stealing-queue.h"
#ifndef _OS_WINDOWS_
#include "pthread.h"
#endif
Expand Down Expand Up @@ -160,14 +160,8 @@ typedef struct {
arraylist_t *last_remset;

// variables for allocating objects from pools
#ifdef _P64
# define JL_GC_N_POOLS 49
#elif MAX_ALIGN == 8
# define JL_GC_N_POOLS 50
#else
# define JL_GC_N_POOLS 51
#endif
jl_gc_pool_t norm_pools[JL_GC_N_POOLS];
#define JL_GC_N_MAX_POOLS 51 // conservative. must be kept in sync with `src/julia_internal.h`
kpamnany marked this conversation as resolved.
Show resolved Hide resolved
jl_gc_pool_t norm_pools[JL_GC_N_MAX_POOLS];

#define JL_N_STACK_POOLS 16
small_arraylist_t free_stacks[JL_N_STACK_POOLS];
Expand Down
5 changes: 5 additions & 0 deletions src/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@
// Automatic Instrumenting Profiler
//#define ENABLE_TIMINGS

// pool allocator configuration options

// GC_SMALL_PAGE allocates objects in 4k pages
// #define GC_SMALL_PAGE


// method dispatch profiling --------------------------------------------------

Expand Down