From 629eaee971f26b1761608e487099c61659db9747 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 1 Jun 2023 04:48:45 +0000 Subject: [PATCH 1/6] WIP: Try embed mmtk mutator in TLS --- src/julia.h | 4 ++-- src/julia_internal.h | 6 +++++- src/julia_threads.h | 4 +--- src/llvm-final-gc-lowering.cpp | 21 ++++++++++++++++++--- src/mmtk-gc.c | 10 +++++----- src/symbol.c | 2 +- 6 files changed, 32 insertions(+), 15 deletions(-) diff --git a/src/julia.h b/src/julia.h index 7950eca3e0f1d..253105ef94386 100644 --- a/src/julia.h +++ b/src/julia.h @@ -2391,7 +2391,7 @@ STATIC_INLINE void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSA { jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; - mmtk_object_reference_write_post(ptls->mmtk_mutator_ptr, parent, ptr); + mmtk_object_reference_write_post(&ptls->mmtk_mutator, parent, ptr); } // Inlined fastpath @@ -2405,7 +2405,7 @@ STATIC_INLINE void mmtk_gc_wb_fast(const void *parent, const void *ptr) JL_NOTSA if (((byte_val >> shift) & 1) == 1) { jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; - mmtk_object_reference_write_slow(ptls->mmtk_mutator_ptr, parent, ptr); + mmtk_object_reference_write_slow(&ptls->mmtk_mutator, parent, ptr); } } } diff --git a/src/julia_internal.h b/src/julia_internal.h index 5e5b0ebb76e41..02d0af1ad64b7 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -25,6 +25,10 @@ #include #endif +#ifdef MMTK_GC +#include "mmtk.h" +#endif + // pragma visibility is more useful than -fvisibility #pragma GCC visibility push(hidden) @@ -550,7 +554,7 @@ STATIC_INLINE jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT o->header = tag | GC_OLD_MARKED; #ifdef MMTK_GC jl_ptls_t ptls = jl_current_task->ptls; - mmtk_post_alloc(ptls->mmtk_mutator_ptr, jl_valueof(o), allocsz, 1); + mmtk_post_alloc(&ptls->mmtk_mutator, jl_valueof(o), allocsz, 1); #endif return jl_valueof(o); } diff --git a/src/julia_threads.h b/src/julia_threads.h index 46ad724b71aa0..f79d17d35cb64 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -282,9 +282,7 @@ typedef struct _jl_tls_states_t { ) #ifdef MMTK_GC - MMTkMutatorContext* mmtk_mutator_ptr; - void* cursor; - void* limit; + MMTkMutatorContext mmtk_mutator; #endif // some hidden state (usually just because we don't have the type's size declaration) diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 6ad46f1eb01d4..edea67d2ffef0 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -280,17 +280,32 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) #else // MMTK_GC auto pool_osize_i32 = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize); auto pool_osize = ConstantInt::get(Type::getInt64Ty(F.getContext()), osize); - auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, cursor)); - auto limit_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, limit)); + + // Assuming we use the first immix allocator. + // FIXME: We should get the allocator index and type from MMTk. We have only implemented immix fastpath, for other allocators, we should panic + // printf("cursor offsets: %ld, %ld, %ld, %ld, %ld\n", + // offsetof(jl_tls_states_t, mmtk_mutator), + // offsetof(MMTkMutatorContext, allocators), + // offsetof(Allocators, immix), + // offsetof(ImmixAllocator, cursor), + // offsetof(ImmixAllocator, limit) + // ); + // fflush(stdout); + // printf("size: %ld\n", sizeof(MMTkMutatorContext)); fflush(stdout); + auto allocator_offset = offsetof(jl_tls_states_t, mmtk_mutator) + offsetof(MMTkMutatorContext, allocators) + offsetof(Allocators, immix); + + auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, cursor)); + auto limit_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, limit)); auto cursor_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, cursor_pos); auto cursor_ptr = builder.CreateBitCast(cursor_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "cursor_ptr"); auto cursor = builder.CreateLoad(Type::getInt64Ty(target->getContext()), cursor_ptr, "cursor"); - + // TODO: offset = 8? In jl_mmtk_gc_alloc_default, if the type is jl_buff_tag, the offset is 0. auto delta_offset = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), ConstantInt::get(Type::getInt64Ty(target->getContext()), 8)); auto delta_cursor = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), cursor); auto delta_op = builder.CreateNSWAdd(delta_offset, delta_cursor); + // alignment 16 (15 = 16 - 1) auto delta = builder.CreateAnd(delta_op, ConstantInt::get(Type::getInt64Ty(target->getContext()), 15), "delta"); auto result = builder.CreateNSWAdd(cursor, delta, "result"); diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 5e868ef11c1d2..c81f630a907a6 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -267,7 +267,7 @@ void jl_init_thread_heap(jl_ptls_t ptls) jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); MMTk_Mutator mmtk_mutator = mmtk_bind_mutator((void *)ptls, ptls->tid); - ptls->mmtk_mutator_ptr = ((MMTkMutatorContext*)mmtk_mutator); + memcpy(&ptls->mmtk_mutator, mmtk_mutator, sizeof(MMTkMutatorContext)); } // System-wide initialization @@ -506,7 +506,7 @@ void disable_collection(void) JL_DLLEXPORT void jl_gc_array_ptr_copy(jl_array_t *dest, void **dest_p, jl_array_t *src, void **src_p, ssize_t n) JL_NOTSAFEPOINT { jl_ptls_t ptls = jl_current_task->ptls; - mmtk_memory_region_copy(ptls->mmtk_mutator_ptr, jl_array_owner(src), src_p, jl_array_owner(dest), dest_p, n); + mmtk_memory_region_copy(&ptls->mmtk_mutator, jl_array_owner(src), src_p, jl_array_owner(dest), dest_p, n); } // No inline write barrier -- only used for debugging @@ -524,20 +524,20 @@ JL_DLLEXPORT void jl_gc_wb1_slow(const void *parent) JL_NOTSAFEPOINT { jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; - mmtk_object_reference_write_slow(ptls->mmtk_mutator_ptr, parent, (const void*) 0); + mmtk_object_reference_write_slow(&ptls->mmtk_mutator, parent, (const void*) 0); } JL_DLLEXPORT void jl_gc_wb2_slow(const void *parent, const void* ptr) JL_NOTSAFEPOINT { jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; - mmtk_object_reference_write_slow(ptls->mmtk_mutator_ptr, parent, ptr); + mmtk_object_reference_write_slow(&ptls->mmtk_mutator, parent, ptr); } void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) { jl_ptls_t ptls = jl_current_task->ptls; - void* addr = mmtk_alloc(ptls->mmtk_mutator_ptr, sz, align, offset, 1); + void* addr = mmtk_alloc(&ptls->mmtk_mutator, sz, align, offset, 1); return addr; } diff --git a/src/symbol.c b/src/symbol.c index dcfa0b6086846..f1cd18cfb84cc 100644 --- a/src/symbol.c +++ b/src/symbol.c @@ -41,7 +41,7 @@ static jl_sym_t *mk_symbol(const char *str, size_t len) JL_NOTSAFEPOINT jl_set_typetagof(sym, jl_symbol_tag, GC_OLD_MARKED); #ifdef MMTK_GC jl_ptls_t ptls = jl_current_task->ptls; - mmtk_post_alloc(ptls->mmtk_mutator_ptr, jl_valueof(tag), nb, 1); + mmtk_post_alloc(&ptls->mmtk_mutator, jl_valueof(tag), nb, 1); #endif jl_atomic_store_relaxed(&sym->left, NULL); jl_atomic_store_relaxed(&sym->right, NULL); From 85a9a14b3f3cdabe45d95d77bbd75c408067a7a1 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Wed, 14 Jun 2023 05:02:08 +0000 Subject: [PATCH 2/6] Call post_bind_mutator after memcopy mutator --- src/mmtk-gc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index c81f630a907a6..66552650c2e53 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -268,6 +268,9 @@ void jl_init_thread_heap(jl_ptls_t ptls) MMTk_Mutator mmtk_mutator = mmtk_bind_mutator((void *)ptls, ptls->tid); memcpy(&ptls->mmtk_mutator, mmtk_mutator, sizeof(MMTkMutatorContext)); + printf("Memcopy from %p to %p\n", mmtk_mutator, &ptls->mmtk_mutator); fflush(stdout); + + mmtk_post_bind_mutator(&ptls->mmtk_mutator, mmtk_mutator); } // System-wide initialization From 3145df8085427371d7f38b17be736daaaf434cb2 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Wed, 14 Jun 2023 05:08:41 +0000 Subject: [PATCH 3/6] Minor changes --- src/llvm-final-gc-lowering.cpp | 13 ++----------- src/mmtk-gc.c | 5 +++-- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index edea67d2ffef0..e2e51f1737114 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -282,16 +282,7 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) auto pool_osize = ConstantInt::get(Type::getInt64Ty(F.getContext()), osize); // Assuming we use the first immix allocator. - // FIXME: We should get the allocator index and type from MMTk. We have only implemented immix fastpath, for other allocators, we should panic - // printf("cursor offsets: %ld, %ld, %ld, %ld, %ld\n", - // offsetof(jl_tls_states_t, mmtk_mutator), - // offsetof(MMTkMutatorContext, allocators), - // offsetof(Allocators, immix), - // offsetof(ImmixAllocator, cursor), - // offsetof(ImmixAllocator, limit) - // ); - // fflush(stdout); - // printf("size: %ld\n", sizeof(MMTkMutatorContext)); fflush(stdout); + // FIXME: We should get the allocator index and type from MMTk. auto allocator_offset = offsetof(jl_tls_states_t, mmtk_mutator) + offsetof(MMTkMutatorContext, allocators) + offsetof(Allocators, immix); auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, cursor)); @@ -301,7 +292,7 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) auto cursor_ptr = builder.CreateBitCast(cursor_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "cursor_ptr"); auto cursor = builder.CreateLoad(Type::getInt64Ty(target->getContext()), cursor_ptr, "cursor"); - // TODO: offset = 8? In jl_mmtk_gc_alloc_default, if the type is jl_buff_tag, the offset is 0. + // offset = 8 auto delta_offset = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), ConstantInt::get(Type::getInt64Ty(target->getContext()), 8)); auto delta_cursor = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), cursor); auto delta_op = builder.CreateNSWAdd(delta_offset, delta_cursor); diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 66552650c2e53..f2bcb897b4baa 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -266,10 +266,11 @@ void jl_init_thread_heap(jl_ptls_t ptls) memset(&ptls->gc_num, 0, sizeof(ptls->gc_num)); jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); + // Create mutator MMTk_Mutator mmtk_mutator = mmtk_bind_mutator((void *)ptls, ptls->tid); + // Copy the mutator to the thread local storage memcpy(&ptls->mmtk_mutator, mmtk_mutator, sizeof(MMTkMutatorContext)); - printf("Memcopy from %p to %p\n", mmtk_mutator, &ptls->mmtk_mutator); fflush(stdout); - + // Call post_bind to maintain a list of active mutators and to reclaim the old mutator (which is no longer needed) mmtk_post_bind_mutator(&ptls->mmtk_mutator, mmtk_mutator); } From 94118df6a3a6a675931a20ff63bda0d47674215e Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Wed, 14 Jun 2023 05:57:42 +0000 Subject: [PATCH 4/6] Remove the include mmtk.h in julia_internal.h --- src/julia_internal.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/julia_internal.h b/src/julia_internal.h index 02d0af1ad64b7..a6bcdb2f0df3e 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -25,10 +25,6 @@ #include #endif -#ifdef MMTK_GC -#include "mmtk.h" -#endif - // pragma visibility is more useful than -fvisibility #pragma GCC visibility push(hidden) From e0ff499fed9797529a8efedcc3799a7f9cfb771b Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Wed, 14 Jun 2023 07:30:48 +0000 Subject: [PATCH 5/6] Remove trailing space --- src/llvm-final-gc-lowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index e2e51f1737114..48eb584b81893 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -280,7 +280,7 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) #else // MMTK_GC auto pool_osize_i32 = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize); auto pool_osize = ConstantInt::get(Type::getInt64Ty(F.getContext()), osize); - + // Assuming we use the first immix allocator. // FIXME: We should get the allocator index and type from MMTk. auto allocator_offset = offsetof(jl_tls_states_t, mmtk_mutator) + offsetof(MMTkMutatorContext, allocators) + offsetof(Allocators, immix); From 953583c78ca16383cc5de43e9b342cdcc6696823 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 15 Jun 2023 00:53:46 +0000 Subject: [PATCH 6/6] Add jl_deinit_thread_heap --- src/gc.c | 5 +++++ src/julia_internal.h | 1 + src/mmtk-gc.c | 5 +++++ src/threading.c | 3 +++ 4 files changed, 14 insertions(+) diff --git a/src/gc.c b/src/gc.c index ce80597a937f1..90eae32f0affc 100644 --- a/src/gc.c +++ b/src/gc.c @@ -3501,6 +3501,11 @@ void jl_init_thread_heap(jl_ptls_t ptls) jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); } +void jl_deinit_thread_heap(jl_ptls_t ptls) +{ + // Do nothing +} + // System-wide initializations void jl_gc_init(void) { diff --git a/src/julia_internal.h b/src/julia_internal.h index a6bcdb2f0df3e..76ed8f977dc7a 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -918,6 +918,7 @@ void jl_init_serializer(void); void jl_gc_init(void); void jl_init_uv(void); void jl_init_thread_heap(jl_ptls_t ptls) JL_NOTSAFEPOINT; +void jl_deinit_thread_heap(jl_ptls_t ptls) JL_NOTSAFEPOINT; void jl_init_int32_int64_cache(void); JL_DLLEXPORT void jl_init_options(void); diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index f2bcb897b4baa..db3affd603cb2 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -274,6 +274,11 @@ void jl_init_thread_heap(jl_ptls_t ptls) mmtk_post_bind_mutator(&ptls->mmtk_mutator, mmtk_mutator); } +void jl_deinit_thread_heap(jl_ptls_t ptls) +{ + mmtk_destroy_mutator(&ptls->mmtk_mutator); +} + // System-wide initialization // TODO: remove locks? remove anything else? void jl_gc_init(void) diff --git a/src/threading.c b/src/threading.c index 51bdd6e8107da..d58528fa183be 100644 --- a/src/threading.c +++ b/src/threading.c @@ -478,6 +478,9 @@ static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER #else pthread_mutex_unlock(&in_signal_lock); #endif + + jl_deinit_thread_heap(ptls); + // then park in safe-region (void)jl_gc_safe_enter(ptls); }