diff --git a/src/gc.c b/src/gc.c index c390619ce4695..7958a2a951955 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1013,6 +1013,39 @@ static void sweep_weak_refs(void) } } +STATIC_INLINE void jl_update_heap_size(uint64_t target_heap) JL_NOTSAFEPOINT +{ + if (target_heap > max_total_memory && !thrashing) // Allow it to go over if we are thrashing if we die we die + target_heap = max_total_memory; + else if (target_heap < default_collect_interval) + target_heap = default_collect_interval; + jl_atomic_store_release(&gc_heap_stats.heap_target, target_heap); +} +STATIC_INLINE void jl_batch_accum_heap_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT +{ + uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc) + sz; + if (alloc_acc < 16*1024) + jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc); + else { + jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc); + jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); + } +} + +STATIC_INLINE void jl_batch_accum_free_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT +{ + uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc) + sz; + if (free_acc < 16*1024) + jl_atomic_store_relaxed(&ptls->gc_num.free_acc, free_acc); + else { + jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -free_acc); + jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0); + size_t heap_target = jl_atomic_load_relaxed(&gc_heap_stats.heap_size) * (alpha+1); + if (heap_target < 0.9 * jl_atomic_load_relaxed(&gc_heap_stats.heap_target)) { + jl_update_heap_size(heap_target); // This is racy but it's ok + } + } +} // big value list @@ -1036,13 +1069,7 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz); jl_atomic_store_relaxed(&ptls->gc_num.bigalloc, jl_atomic_load_relaxed(&ptls->gc_num.bigalloc) + 1); - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); - if (alloc_acc + allocsz < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + allocsz); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + allocsz); - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); - } + jl_batch_accum_heap_size(ptls, allocsz); #ifdef MEMDEBUG memset(v, 0xee, allocsz); #endif @@ -1156,13 +1183,7 @@ void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT jl_ptls_t ptls = jl_current_task->ptls; jl_atomic_store_relaxed(&ptls->gc_num.allocd, jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); - if (alloc_acc + sz < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + sz); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + sz); - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); - } + jl_batch_accum_heap_size(ptls, sz); } static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT @@ -1182,7 +1203,8 @@ static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc); dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.free_acc); - jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_acc - free_acc + jl_atomic_load_relaxed(&gc_heap_stats.heap_size)); + int64_t diff = alloc_acc - free_acc; + jl_atomic_store_relaxed(&gc_heap_stats.heap_size, diff + jl_atomic_load_relaxed(&gc_heap_stats.heap_size)); jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0); } @@ -3417,8 +3439,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) old_alloc_diff = alloc_diff; old_mut_time = mutator_time; old_freed_diff = freed_diff; - old_pause_time = pause; - old_heap_size = heap_size; // TODO: Update these values dynamically instead of just during the GC + old_pause_time = pause;// TODO: Update these values dynamically instead of just during the GC if (gc_time > alloc_time * 95 && !(thrash_counter < 4)) thrash_counter += 1; else if (thrash_counter > 0) @@ -3446,12 +3467,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) target_allocs = alpha * heap_size; #endif - uint64_t target_heap = (uint64_t)target_allocs + heap_size; - if (target_heap > max_total_memory && !thrashing) // Allow it to go over if we are thrashing if we die we die - target_heap = max_total_memory; - else if (target_heap < default_collect_interval) - target_heap = default_collect_interval; - jl_atomic_store_relaxed(&gc_heap_stats.heap_target, target_heap); + jl_update_heap_size(target_allocs + heap_size); double old_ratio = (double)promoted_bytes/(double)heap_size; if (heap_size > max_total_memory * 0.8 || old_ratio > 0.15) @@ -3767,13 +3783,7 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); jl_atomic_store_relaxed(&ptls->gc_num.malloc, jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); - if (alloc_acc + sz < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + sz); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + sz); - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); - } + jl_batch_accum_heap_size(ptls, sz); } return data; } @@ -3790,13 +3800,7 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz); jl_atomic_store_relaxed(&ptls->gc_num.malloc, jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); - if (alloc_acc + sz < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + sz * nm); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + sz * nm); - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); - } + jl_batch_accum_heap_size(ptls, sz * nm); } return data; } @@ -3807,14 +3811,7 @@ JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) jl_task_t *ct = jl_current_task; free(p); if (pgcstack != NULL && ct->world_age) { - jl_ptls_t ptls = ct->ptls; - uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc); - if (free_acc + sz < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.free_acc, free_acc + sz); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_acc + sz)); - jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0); - } + jl_batch_accum_free_size(ct->ptls, sz); } } @@ -3834,23 +3831,12 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size int64_t diff = sz - old; if (diff < 0) { - diff = -diff; - uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc); - if (free_acc + diff < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.free_acc, free_acc + diff); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_acc + diff)); - jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0); - } + jl_batch_accum_free_size(ptls, -diff); } else { - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); - if (alloc_acc + diff < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + diff); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + diff); - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); - } + jl_batch_accum_heap_size(ptls, diff); + ptls->gc_cache.perm_scanned_bytes += diff; // We can't be sure of the age of this object, + // so assume old because in the worst case we run more full GCs } } return data; @@ -3935,13 +3921,7 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz); jl_atomic_store_relaxed(&ptls->gc_num.malloc, jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); - if (alloc_acc + allocsz < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + allocsz); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + allocsz); - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); - } + jl_batch_accum_heap_size(ptls, allocsz); #ifdef _OS_WINDOWS_ SetLastError(last_error); #endif @@ -3989,23 +3969,10 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds int64_t diff = allocsz - oldsz; if (diff < 0) { - diff = -diff; - uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc); - if (free_acc + diff < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.free_acc, free_acc + diff); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_acc + diff)); - jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0); - } + jl_batch_accum_free_size(ptls, -diff); } else { - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); - if (alloc_acc + diff < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + diff); - else { - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + diff); - jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); - } + jl_batch_accum_heap_size(ptls, diff); } if (allocsz > oldsz) { maybe_record_alloc_to_profile((jl_value_t*)b, allocsz - oldsz, (jl_datatype_t*)jl_buff_tag);