diff --git a/src/array.c b/src/array.c index 8a064583bbc9e..2311b2e352d89 100644 --- a/src/array.c +++ b/src/array.c @@ -496,9 +496,8 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len) // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) s = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); #else - int pool_id = jl_gc_szclass_align8(allocsz); - int osize = jl_gc_sizeclasses[pool_id]; - s = jl_mmtk_gc_alloc_default(ptls, pool_id, osize, jl_string_type); + size_t osize = mmtk_align_alloc_size_8(allocsz); + s = jl_mmtk_gc_alloc_default(ptls, osize, jl_string_type); #endif } else { diff --git a/src/julia.h b/src/julia.h index 253105ef94386..2ad7105310eab 100644 --- a/src/julia.h +++ b/src/julia.h @@ -2414,6 +2414,27 @@ STATIC_INLINE void mmtk_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOI { mmtk_gc_wb_fast(parent, ptr); } + +#define MMTK_MIN_ALIGNMENT 4 +STATIC_INLINE size_t mmtk_align_alloc_size(size_t sz) JL_NOTSAFEPOINT +{ + size_t ret = (sz + MMTK_MIN_ALIGNMENT - 1) & ~(MMTK_MIN_ALIGNMENT -1); + if (ret < sz) { + printf("wrong!!\n"); + exit(1); + } + return ret; +} +STATIC_INLINE size_t mmtk_align_alloc_size_8(size_t sz) JL_NOTSAFEPOINT +{ + size_t ret = (sz + 8 - 1) & ~(8 -1); + if (ret < sz) { + printf("wrong!!\n"); + exit(1); + } + return ret; +} + #endif #ifdef __cplusplus diff --git a/src/julia_internal.h b/src/julia_internal.h index 76ed8f977dc7a..1cab2855cb1ee 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -339,7 +339,7 @@ jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset, int osize); jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz); #ifdef MMTK_GC -JL_DLLIMPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int pool_offset, int osize, void* ty); +JL_DLLIMPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, size_t osize, void* ty); JL_DLLIMPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t allocsz); JL_DLLIMPORT extern void mmtk_post_alloc(void* mutator, void* obj, size_t bytes, int allocator); JL_DLLIMPORT extern void mmtk_initialize_collection(void* tls); @@ -494,9 +494,8 @@ STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) jl_value_t *v; const size_t allocsz = sz + sizeof(jl_taggedvalue_t); if (sz <= GC_MAX_SZCLASS) { - int pool_id = jl_gc_szclass(allocsz); - int osize = jl_gc_sizeclasses[pool_id]; - v = jl_mmtk_gc_alloc_default(ptls, pool_id, osize, ty); + int osize = mmtk_align_alloc_size(allocsz); + v = jl_mmtk_gc_alloc_default(ptls, osize, ty); } else { if (allocsz < sz) // overflow in adding offs, size was "negative" diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 48eb584b81893..dbc6f4396142f 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -278,77 +278,86 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize }); derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), osize); #else // MMTK_GC + osize = mmtk_align_alloc_size(sz + sizeof(jl_taggedvalue_t)); auto pool_osize_i32 = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize); auto pool_osize = ConstantInt::get(Type::getInt64Ty(F.getContext()), osize); - // Assuming we use the first immix allocator. - // FIXME: We should get the allocator index and type from MMTk. - auto allocator_offset = offsetof(jl_tls_states_t, mmtk_mutator) + offsetof(MMTkMutatorContext, allocators) + offsetof(Allocators, immix); - - auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, cursor)); - auto limit_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, limit)); - - auto cursor_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, cursor_pos); - auto cursor_ptr = builder.CreateBitCast(cursor_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "cursor_ptr"); - auto cursor = builder.CreateLoad(Type::getInt64Ty(target->getContext()), cursor_ptr, "cursor"); - - // offset = 8 - auto delta_offset = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), ConstantInt::get(Type::getInt64Ty(target->getContext()), 8)); - auto delta_cursor = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), cursor); - auto delta_op = builder.CreateNSWAdd(delta_offset, delta_cursor); - // alignment 16 (15 = 16 - 1) - auto delta = builder.CreateAnd(delta_op, ConstantInt::get(Type::getInt64Ty(target->getContext()), 15), "delta"); - auto result = builder.CreateNSWAdd(cursor, delta, "result"); - - auto new_cursor = builder.CreateNSWAdd(result, pool_osize); - - auto limit_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, limit_pos); - auto limit_ptr = builder.CreateBitCast(limit_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "limit_ptr"); - auto limit = builder.CreateLoad(Type::getInt64Ty(target->getContext()), limit_ptr, "limit"); - - auto gt_limit = builder.CreateICmpSGT(new_cursor, limit); - - auto current_block = target->getParent(); - builder.SetInsertPoint(target->getNextNode()); - auto phiNode = builder.CreatePHI(poolAllocFunc->getReturnType(), 2, "phi_fast_slow"); - auto top_cont = current_block->splitBasicBlock(target->getNextNode(), "top_cont"); - - auto slowpath = BasicBlock::Create(target->getContext(), "slowpath", target->getFunction()); - auto fastpath = BasicBlock::Create(target->getContext(), "fastpath", target->getFunction(), top_cont); - - auto next_br = current_block->getTerminator(); - next_br->eraseFromParent(); - builder.SetInsertPoint(current_block); - builder.CreateCondBr(gt_limit, slowpath, fastpath); - - // slowpath - builder.SetInsertPoint(slowpath); - auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); - auto new_call = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32 }); - new_call->setAttributes(new_call->getCalledFunction()->getAttributes()); - builder.CreateBr(top_cont); - - // // fastpath - builder.SetInsertPoint(fastpath); - builder.CreateStore(new_cursor, cursor_ptr); - - // ptls->gc_num.allocd += osize; - auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_num)); - auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos); - auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc"); - auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls); - auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize); - builder.CreateStore(pool_allocd_total, pool_alloc_tls); - - auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t))); - auto v_as_ptr = builder.CreateIntToPtr(v_raw, poolAllocFunc->getReturnType()); - builder.CreateBr(top_cont); - - phiNode->addIncoming(new_call, slowpath); - phiNode->addIncoming(v_as_ptr, fastpath); - phiNode->takeName(target); - - return phiNode; + const bool INLINE_FASTPATH_ALLOCATION = true; + + if (INLINE_FASTPATH_ALLOCATION) { + // Assuming we use the first immix allocator. + // FIXME: We should get the allocator index and type from MMTk. + auto allocator_offset = offsetof(jl_tls_states_t, mmtk_mutator) + offsetof(MMTkMutatorContext, allocators) + offsetof(Allocators, immix); + + auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, cursor)); + auto limit_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, limit)); + + auto cursor_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, cursor_pos); + auto cursor_ptr = builder.CreateBitCast(cursor_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "cursor_ptr"); + auto cursor = builder.CreateLoad(Type::getInt64Ty(target->getContext()), cursor_ptr, "cursor"); + + // offset = 8 + auto delta_offset = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), ConstantInt::get(Type::getInt64Ty(target->getContext()), 8)); + auto delta_cursor = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), cursor); + auto delta_op = builder.CreateNSWAdd(delta_offset, delta_cursor); + // alignment 16 (15 = 16 - 1) + auto delta = builder.CreateAnd(delta_op, ConstantInt::get(Type::getInt64Ty(target->getContext()), 15), "delta"); + auto result = builder.CreateNSWAdd(cursor, delta, "result"); + + auto new_cursor = builder.CreateNSWAdd(result, pool_osize); + + auto limit_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, limit_pos); + auto limit_ptr = builder.CreateBitCast(limit_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "limit_ptr"); + auto limit = builder.CreateLoad(Type::getInt64Ty(target->getContext()), limit_ptr, "limit"); + + auto gt_limit = builder.CreateICmpSGT(new_cursor, limit); + + auto current_block = target->getParent(); + builder.SetInsertPoint(target->getNextNode()); + auto phiNode = builder.CreatePHI(poolAllocFunc->getReturnType(), 2, "phi_fast_slow"); + auto top_cont = current_block->splitBasicBlock(target->getNextNode(), "top_cont"); + + auto slowpath = BasicBlock::Create(target->getContext(), "slowpath", target->getFunction()); + auto fastpath = BasicBlock::Create(target->getContext(), "fastpath", target->getFunction(), top_cont); + + auto next_br = current_block->getTerminator(); + next_br->eraseFromParent(); + builder.SetInsertPoint(current_block); + builder.CreateCondBr(gt_limit, slowpath, fastpath); + + // slowpath + builder.SetInsertPoint(slowpath); + auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); + auto new_call = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32 }); + new_call->setAttributes(new_call->getCalledFunction()->getAttributes()); + builder.CreateBr(top_cont); + + // // fastpath + builder.SetInsertPoint(fastpath); + builder.CreateStore(new_cursor, cursor_ptr); + + // ptls->gc_num.allocd += osize; + auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_num)); + auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos); + auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc"); + auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls); + auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize); + builder.CreateStore(pool_allocd_total, pool_alloc_tls); + + auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t))); + auto v_as_ptr = builder.CreateIntToPtr(v_raw, poolAllocFunc->getReturnType()); + builder.CreateBr(top_cont); + + phiNode->addIncoming(new_call, slowpath); + phiNode->addIncoming(v_as_ptr, fastpath); + phiNode->takeName(target); + + return phiNode; + } else { + auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); + newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32 }); + derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), osize); + } #endif // MMTK_GC } } else { diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index db3affd603cb2..5ab173ab4e232 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -161,7 +161,7 @@ inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, int o // TODO: drop this okay? // maybe_collect(ptls); - jl_value_t *v = jl_mmtk_gc_alloc_default(ptls, pool_offset, osize, NULL); + jl_value_t *v = jl_mmtk_gc_alloc_default(ptls, (size_t) osize, NULL); // TODO: this is done (without atomic operations) in jl_mmtk_gc_alloc_default; enable // here when that's edited? /* @@ -546,7 +546,8 @@ JL_DLLEXPORT void jl_gc_wb2_slow(const void *parent, const void* ptr) JL_NOTSAFE void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) { jl_ptls_t ptls = jl_current_task->ptls; - void* addr = mmtk_alloc(&ptls->mmtk_mutator, sz, align, offset, 1); + size_t allocsz = mmtk_align_alloc_size(sz); + void* addr = mmtk_alloc(&ptls->mmtk_mutator, allocsz, align, offset, 1); return addr; }