Skip to content

Commit

Permalink
Add the batch functions
Browse files Browse the repository at this point in the history
+ adjust the target heap if too much gets freed out of GC
+ add the counted realloc size to the old gen
  • Loading branch information
gbaraldi committed Oct 6, 2023
1 parent d8d4945 commit fe17541
Showing 1 changed file with 49 additions and 82 deletions.
131 changes: 49 additions & 82 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1013,6 +1013,39 @@ static void sweep_weak_refs(void)
}
}

STATIC_INLINE void jl_update_heap_size(uint64_t target_heap) JL_NOTSAFEPOINT
{
if (target_heap > max_total_memory && !thrashing) // Allow it to go over if we are thrashing if we die we die
target_heap = max_total_memory;
else if (target_heap < default_collect_interval)
target_heap = default_collect_interval;
jl_atomic_store_release(&gc_heap_stats.heap_target, target_heap);
}
STATIC_INLINE void jl_batch_accum_heap_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT
{
uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc) + sz;
if (alloc_acc < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc);
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
}
}

STATIC_INLINE void jl_batch_accum_free_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT
{
uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc) + sz;
if (free_acc < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.free_acc, free_acc);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -free_acc);
jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0);
size_t heap_target = jl_atomic_load_relaxed(&gc_heap_stats.heap_size) * (alpha+1);
if (heap_target < 0.9 * jl_atomic_load_relaxed(&gc_heap_stats.heap_target)) {
jl_update_heap_size(heap_target); // This is racy but it's ok
}
}
}

// big value list

Expand All @@ -1036,13 +1069,7 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz)
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz);
jl_atomic_store_relaxed(&ptls->gc_num.bigalloc,
jl_atomic_load_relaxed(&ptls->gc_num.bigalloc) + 1);
uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc);
if (alloc_acc + allocsz < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + allocsz);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + allocsz);
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
}
jl_batch_accum_heap_size(ptls, allocsz);
#ifdef MEMDEBUG
memset(v, 0xee, allocsz);
#endif
Expand Down Expand Up @@ -1156,13 +1183,7 @@ void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT
jl_ptls_t ptls = jl_current_task->ptls;
jl_atomic_store_relaxed(&ptls->gc_num.allocd,
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc);
if (alloc_acc + sz < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + sz);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + sz);
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
}
jl_batch_accum_heap_size(ptls, sz);
}

static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT
Expand All @@ -1182,7 +1203,8 @@ static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT
uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc);
uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc);
dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.free_acc);
jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_acc - free_acc + jl_atomic_load_relaxed(&gc_heap_stats.heap_size));
int64_t diff = alloc_acc - free_acc;
jl_atomic_store_relaxed(&gc_heap_stats.heap_size, diff + jl_atomic_load_relaxed(&gc_heap_stats.heap_size));
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0);
}
Expand Down Expand Up @@ -3417,8 +3439,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
old_alloc_diff = alloc_diff;
old_mut_time = mutator_time;
old_freed_diff = freed_diff;
old_pause_time = pause;
old_heap_size = heap_size; // TODO: Update these values dynamically instead of just during the GC
old_pause_time = pause;// TODO: Update these values dynamically instead of just during the GC
if (gc_time > alloc_time * 95 && !(thrash_counter < 4))
thrash_counter += 1;
else if (thrash_counter > 0)
Expand Down Expand Up @@ -3446,12 +3467,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)

target_allocs = alpha * heap_size;
#endif
uint64_t target_heap = (uint64_t)target_allocs + heap_size;
if (target_heap > max_total_memory && !thrashing) // Allow it to go over if we are thrashing if we die we die
target_heap = max_total_memory;
else if (target_heap < default_collect_interval)
target_heap = default_collect_interval;
jl_atomic_store_relaxed(&gc_heap_stats.heap_target, target_heap);
jl_update_heap_size(target_allocs + heap_size);

double old_ratio = (double)promoted_bytes/(double)heap_size;
if (heap_size > max_total_memory * 0.8 || old_ratio > 0.15)
Expand Down Expand Up @@ -3767,13 +3783,7 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
jl_atomic_store_relaxed(&ptls->gc_num.malloc,
jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc);
if (alloc_acc + sz < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + sz);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + sz);
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
}
jl_batch_accum_heap_size(ptls, sz);
}
return data;
}
Expand All @@ -3790,13 +3800,7 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz);
jl_atomic_store_relaxed(&ptls->gc_num.malloc,
jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc);
if (alloc_acc + sz < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + sz * nm);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + sz * nm);
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
}
jl_batch_accum_heap_size(ptls, sz * nm);
}
return data;
}
Expand All @@ -3807,14 +3811,7 @@ JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz)
jl_task_t *ct = jl_current_task;
free(p);
if (pgcstack != NULL && ct->world_age) {
jl_ptls_t ptls = ct->ptls;
uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc);
if (free_acc + sz < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.free_acc, free_acc + sz);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_acc + sz));
jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0);
}
jl_batch_accum_free_size(ct->ptls, sz);
}
}

Expand All @@ -3834,23 +3831,12 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size

int64_t diff = sz - old;
if (diff < 0) {
diff = -diff;
uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc);
if (free_acc + diff < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.free_acc, free_acc + diff);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_acc + diff));
jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0);
}
jl_batch_accum_free_size(ptls, -diff);
}
else {
uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc);
if (alloc_acc + diff < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + diff);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + diff);
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
}
jl_batch_accum_heap_size(ptls, diff);
ptls->gc_cache.perm_scanned_bytes += diff; // We can't be sure of the age of this object,
// so assume old because in the worst case we run more full GCs
}
}
return data;
Expand Down Expand Up @@ -3935,13 +3921,7 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz);
jl_atomic_store_relaxed(&ptls->gc_num.malloc,
jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc);
if (alloc_acc + allocsz < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + allocsz);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + allocsz);
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
}
jl_batch_accum_heap_size(ptls, allocsz);
#ifdef _OS_WINDOWS_
SetLastError(last_error);
#endif
Expand Down Expand Up @@ -3989,23 +3969,10 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds

int64_t diff = allocsz - oldsz;
if (diff < 0) {
diff = -diff;
uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc);
if (free_acc + diff < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.free_acc, free_acc + diff);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_acc + diff));
jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0);
}
jl_batch_accum_free_size(ptls, -diff);
}
else {
uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc);
if (alloc_acc + diff < 16*1024)
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + diff);
else {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + diff);
jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
}
jl_batch_accum_heap_size(ptls, diff);
}
if (allocsz > oldsz) {
maybe_record_alloc_to_profile((jl_value_t*)b, allocsz - oldsz, (jl_datatype_t*)jl_buff_tag);
Expand Down

0 comments on commit fe17541

Please sign in to comment.