Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move heap_size batching code into pair of functions #51611

Merged
merged 8 commits into from
Nov 17, 2023
126 changes: 42 additions & 84 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,7 @@ static uint64_t gc_end_time = 0;
static int thrash_counter = 0;
static int thrashing = 0;
// global variables for GC stats
static uint64_t freed_in_runtime = 0;

// Resetting the object to a young object, this is used when marking the
// finalizer list to collect them the next time because the object is very
Expand Down Expand Up @@ -1005,6 +1006,22 @@ static void sweep_weak_refs(void)
}


STATIC_INLINE void jl_batch_accum_heap_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT
{
uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc) + sz;
if (alloc_acc < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc);
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
}
}

STATIC_INLINE void jl_batch_accum_free_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT
{
jl_atomic_store_relaxed(&ptls->gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_num.free_acc) + sz);
}

// big value list

// Size includes the tag and the tag is not cleared!!
Expand All @@ -1027,13 +1044,7 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz)
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz);
jl_atomic_store_relaxed(&ptls->gc_num.bigalloc,
jl_atomic_load_relaxed(&ptls->gc_num.bigalloc) + 1);
uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc);
if (alloc_acc + allocsz < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + allocsz);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + allocsz);
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
}
jl_batch_accum_heap_size(ptls, allocsz);
#ifdef MEMDEBUG
memset(v, 0xee, allocsz);
#endif
Expand Down Expand Up @@ -1147,16 +1158,10 @@ void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT
jl_ptls_t ptls = jl_current_task->ptls;
jl_atomic_store_relaxed(&ptls->gc_num.allocd,
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc);
if (alloc_acc + sz < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + sz);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + sz);
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
}
jl_batch_accum_heap_size(ptls, sz);
}

static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT
// Only safe to update the heap inside the GC
static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTSAFEPOINT
{
int gc_n_threads;
jl_ptls_t* gc_all_tls_states;
Expand All @@ -1170,12 +1175,14 @@ static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT
dest->realloc += jl_atomic_load_relaxed(&ptls->gc_num.realloc);
dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_num.poolalloc);
dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_num.bigalloc);
uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc);
uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc);
dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.free_acc);
jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_acc - free_acc + jl_atomic_load_relaxed(&gc_heap_stats.heap_size));
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0);
if (update_heap) {
uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc);
freed_in_runtime += jl_atomic_load_relaxed(&ptls->gc_num.free_acc);
jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_acc + jl_atomic_load_relaxed(&gc_heap_stats.heap_size));
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0);
}
}
}
}
Expand Down Expand Up @@ -1209,7 +1216,7 @@ static int64_t inc_live_bytes(int64_t inc) JL_NOTSAFEPOINT

void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT
{
combine_thread_gc_counts(&gc_num);
combine_thread_gc_counts(&gc_num, 0);
inc_live_bytes(gc_num.deferred_alloc + gc_num.allocd);
gc_num.allocd = 0;
gc_num.deferred_alloc = 0;
Expand Down Expand Up @@ -3176,7 +3183,7 @@ JL_DLLEXPORT int jl_gc_is_enabled(void)
JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT
{
jl_gc_num_t num = gc_num;
combine_thread_gc_counts(&num);
combine_thread_gc_counts(&num, 0);
// Sync this logic with `base/util.jl:GC_Diff`
*bytes = (num.total_allocd + num.deferred_alloc + num.allocd);
}
Expand All @@ -3189,7 +3196,7 @@ JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void)
JL_DLLEXPORT jl_gc_num_t jl_gc_num(void)
{
jl_gc_num_t num = gc_num;
combine_thread_gc_counts(&num);
combine_thread_gc_counts(&num, 0);
return num;
}

Expand Down Expand Up @@ -3248,7 +3255,7 @@ size_t jl_maxrss(void);
// Only one thread should be running in this function
static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
{
combine_thread_gc_counts(&gc_num);
combine_thread_gc_counts(&gc_num, 1);

// We separate the update of the graph from the update of live_bytes here
// so that the sweep shows a downward trend in memory usage.
Expand Down Expand Up @@ -3432,6 +3439,8 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
gc_num.last_incremental_sweep = gc_end_time;
}

jl_atomic_store_relaxed(&gc_heap_stats.heap_size, jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - freed_in_runtime);
d-netto marked this conversation as resolved.
Show resolved Hide resolved
freed_in_runtime = 0;
size_t heap_size = jl_atomic_load_relaxed(&gc_heap_stats.heap_size);
double target_allocs = 0.0;
double min_interval = default_collect_interval;
Expand Down Expand Up @@ -3780,13 +3789,7 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
jl_atomic_store_relaxed(&ptls->gc_num.malloc,
jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc);
if (alloc_acc + sz < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + sz);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + sz);
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
}
jl_batch_accum_heap_size(ptls, sz);
}
return data;
}
Expand All @@ -3803,13 +3806,7 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz);
jl_atomic_store_relaxed(&ptls->gc_num.malloc,
jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc);
if (alloc_acc + sz < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + sz * nm);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + sz * nm);
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
}
jl_batch_accum_heap_size(ptls, sz * nm);
}
return data;
}
Expand All @@ -3820,14 +3817,7 @@ JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz)
jl_task_t *ct = jl_current_task;
free(p);
if (pgcstack != NULL && ct->world_age) {
jl_ptls_t ptls = ct->ptls;
uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc);
if (free_acc + sz < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.free_acc, free_acc + sz);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_acc + sz));
jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0);
}
jl_batch_accum_free_size(ct->ptls, sz);
}
}

Expand All @@ -3847,23 +3837,10 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size

int64_t diff = sz - old;
if (diff < 0) {
diff = -diff;
uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc);
if (free_acc + diff < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.free_acc, free_acc + diff);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_acc + diff));
jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0);
}
jl_batch_accum_free_size(ptls, -diff);
}
else {
uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc);
if (alloc_acc + diff < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + diff);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + diff);
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
}
jl_batch_accum_heap_size(ptls, diff);
}
}
return data;
Expand Down Expand Up @@ -3948,13 +3925,7 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz);
jl_atomic_store_relaxed(&ptls->gc_num.malloc,
jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc);
if (alloc_acc + allocsz < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + allocsz);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + allocsz);
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
}
jl_batch_accum_heap_size(ptls, allocsz);
#ifdef _OS_WINDOWS_
SetLastError(last_error);
#endif
Expand Down Expand Up @@ -4002,23 +3973,10 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds

int64_t diff = allocsz - oldsz;
if (diff < 0) {
diff = -diff;
uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc);
if (free_acc + diff < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.free_acc, free_acc + diff);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_acc + diff));
jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0);
}
jl_batch_accum_free_size(ptls, -diff);
}
else {
uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc);
if (alloc_acc + diff < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + diff);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + diff);
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
}
jl_batch_accum_heap_size(ptls, diff);
}
if (allocsz > oldsz) {
maybe_record_alloc_to_profile((jl_value_t*)b, allocsz - oldsz, (jl_datatype_t*)jl_buff_tag);
Expand Down
11 changes: 5 additions & 6 deletions src/staticdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ External links:
*/
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <stdio.h> // printf
#include <inttypes.h> // PRIxPTR

Expand Down Expand Up @@ -3522,7 +3521,7 @@ static jl_value_t *jl_validate_cache_file(ios_t *f, jl_array_t *depmods, uint64_
}

// TODO?: refactor to make it easier to create the "package inspector"
static jl_value_t *jl_restore_package_image_from_stream(void* pkgimage_handle, ios_t *f, jl_image_t *image, jl_array_t *depmods, int completeinfo, const char *pkgname, bool needs_permalloc)
static jl_value_t *jl_restore_package_image_from_stream(void* pkgimage_handle, ios_t *f, jl_image_t *image, jl_array_t *depmods, int completeinfo, const char *pkgname, int needs_permalloc)
{
JL_TIMING(LOAD_IMAGE, LOAD_Pkgimg);
jl_timing_printf(JL_TIMING_DEFAULT_BLOCK, pkgname);
Expand All @@ -3548,7 +3547,7 @@ static jl_value_t *jl_restore_package_image_from_stream(void* pkgimage_handle, i
JL_SIGATOMIC_BEGIN();
size_t len = dataendpos - datastartpos;
char *sysimg;
bool success = !needs_permalloc;
int success = !needs_permalloc;
ios_seek(f, datastartpos);
if (needs_permalloc)
sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0);
Expand Down Expand Up @@ -3608,7 +3607,7 @@ static void jl_restore_system_image_from_stream(ios_t *f, jl_image_t *image, uin
jl_restore_system_image_from_stream_(f, image, NULL, checksum | ((uint64_t)0xfdfcfbfa << 32), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
}

JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(void* pkgimage_handle, const char *buf, jl_image_t *image, size_t sz, jl_array_t *depmods, int completeinfo, const char *pkgname, bool needs_permalloc)
JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(void* pkgimage_handle, const char *buf, jl_image_t *image, size_t sz, jl_array_t *depmods, int completeinfo, const char *pkgname, int needs_permalloc)
{
ios_t f;
ios_static_buffer(&f, (char*)buf, sz);
Expand All @@ -3625,7 +3624,7 @@ JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *d
"Cache file \"%s\" not found.\n", fname);
}
jl_image_t pkgimage = {};
jl_value_t *ret = jl_restore_package_image_from_stream(NULL, &f, &pkgimage, depmods, completeinfo, pkgname, true);
jl_value_t *ret = jl_restore_package_image_from_stream(NULL, &f, &pkgimage, depmods, completeinfo, pkgname, 1);
ios_close(&f);
return ret;
}
Expand Down Expand Up @@ -3700,7 +3699,7 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j
memset(&pkgimage.fptrs, 0, sizeof(pkgimage.fptrs));
}

jl_value_t* mod = jl_restore_incremental_from_buf(pkgimg_handle, pkgimg_data, &pkgimage, *plen, depmods, completeinfo, pkgname, false);
jl_value_t* mod = jl_restore_incremental_from_buf(pkgimg_handle, pkgimg_data, &pkgimage, *plen, depmods, completeinfo, pkgname, 0);

return mod;
}
Expand Down