diff --git a/src/Makefile b/src/Makefile index 2e73065d7b24e..8236d28439c4a 100644 --- a/src/Makefile +++ b/src/Makefile @@ -50,7 +50,9 @@ endif LLVMLINK := ifeq ($(JULIACODEGEN),LLVM) -SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-muladd llvm-late-gc-lowering llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces cgmemmgr +SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-muladd \ + llvm-late-gc-lowering llvm-lower-handlers llvm-gc-invariant-verifier \ + llvm-propagate-addrspaces llvm-alloc-opt cgmemmgr FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir) LLVM_LIBS := all ifeq ($(USE_POLLY),1) diff --git a/src/ccall.cpp b/src/ccall.cpp index e6614387d7627..818989cefc24e 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -2106,7 +2106,7 @@ jl_cgval_t function_sig_t::emit_a_ccall( size_t rtsz = jl_datatype_size(rt); assert(rtsz > 0); Value *strct = emit_allocobj(ctx, rtsz, runtime_bt); - int boxalign = jl_gc_alignment(rtsz); + int boxalign = jl_datatype_align(rt); #ifndef JL_NDEBUG #if JL_LLVM_VERSION >= 40000 const DataLayout &DL = jl_data_layout; diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 1ec9e1fa88e28..430a33b105e22 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -2097,25 +2097,12 @@ static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt) { JL_FEAT_REQUIRE(ctx, dynamic_alloc); JL_FEAT_REQUIRE(ctx, runtime); - - int osize; - int offset = jl_gc_classify_pools(static_size, &osize); Value *ptls_ptr = emit_bitcast(ctx, ctx.ptlsStates, T_pint8); - Value *v; - if (offset < 0) { - Value *args[] = {ptls_ptr, - ConstantInt::get(T_size, static_size + sizeof(void*))}; - v = ctx.builder.CreateCall(prepare_call(jlalloc_big_func), - ArrayRef(args, 2)); - } - else { - Value *pool_offs = ConstantInt::get(T_int32, offset); - Value *args[] = {ptls_ptr, pool_offs, ConstantInt::get(T_int32, osize)}; - v = ctx.builder.CreateCall(prepare_call(jlalloc_pool_func), - ArrayRef(args, 3)); - } - tbaa_decorate(tbaa_tag, ctx.builder.CreateStore(maybe_decay_untracked(jt), emit_typeptr_addr(ctx, v))); - return v; + auto call = ctx.builder.CreateCall(prepare_call(jl_alloc_obj_func), + {ptls_ptr, ConstantInt::get(T_size, static_size), + maybe_decay_untracked(jt)}); + call->setAttributes(jl_alloc_obj_func->getAttributes()); + return call; } // if ptr is NULL this emits a write barrier _back_ diff --git a/src/codegen.cpp b/src/codegen.cpp index d72a5a801dc5a..857a05112dc31 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -314,8 +314,7 @@ static Function *jlgenericfunction_func; static Function *jlenter_func; static Function *jlleave_func; static Function *jlegal_func; -static Function *jlalloc_pool_func; -static Function *jlalloc_big_func; +static Function *jl_alloc_obj_func; static Function *jlisa_func; static Function *jlsubtype_func; static Function *jlapplytype_func; @@ -6517,24 +6516,19 @@ static void init_julia_llvm_env(Module *m) "jl_instantiate_type_in_env", m); add_named_global(jlapplytype_func, &jl_instantiate_type_in_env); - std::vector alloc_pool_args(0); - alloc_pool_args.push_back(T_pint8); - alloc_pool_args.push_back(T_int32); - alloc_pool_args.push_back(T_int32); - jlalloc_pool_func = - Function::Create(FunctionType::get(T_prjlvalue, alloc_pool_args, false), - Function::ExternalLinkage, - "jl_gc_pool_alloc", m); - add_named_global(jlalloc_pool_func, &jl_gc_pool_alloc); - - std::vector alloc_big_args(0); - alloc_big_args.push_back(T_pint8); - alloc_big_args.push_back(T_size); - jlalloc_big_func = - Function::Create(FunctionType::get(T_prjlvalue, alloc_big_args, false), - Function::ExternalLinkage, - "jl_gc_big_alloc", m); - add_named_global(jlalloc_big_func, &jl_gc_big_alloc); + std::vector gc_alloc_args(0); + gc_alloc_args.push_back(T_pint8); + gc_alloc_args.push_back(T_size); + gc_alloc_args.push_back(T_prjlvalue); + jl_alloc_obj_func = Function::Create(FunctionType::get(T_prjlvalue, gc_alloc_args, false), + Function::ExternalLinkage, + "julia.gc_alloc_obj"); +#if JL_LLVM_VERSION >= 50000 + jl_alloc_obj_func->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); +#else + jl_alloc_obj_func->addAttribute(AttributeSet::ReturnIndex, Attribute::NoAlias); +#endif + add_named_global(jl_alloc_obj_func, (void*)NULL, /*dllimport*/false); std::vector dlsym_args(0); dlsym_args.push_back(T_pint8); diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index 7c9ef29df2f9e..5b7397ddc80d0 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -325,11 +325,7 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va } int alignment; - if (x.isboxed) { - // julia's gc gives 16-byte aligned addresses - alignment = 16; - } - else if (jt) { + if (jt) { alignment = julia_alignment(p, jt, 0); } else { diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 00e5f84643461..5e4cc93ca5f7f 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -147,6 +147,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level) // effectiveness of the optimization, but should retain correctness. #if JL_LLVM_VERSION < 50000 PM->add(createLowerExcHandlersPass()); + PM->add(createAllocOptPass()); PM->add(createLateLowerGCFramePass()); // Remove dead use of ptls PM->add(createDeadCodeEliminationPass()); @@ -161,6 +162,12 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level) PM->add(createAlwaysInlinerPass()); // Respect always_inline #endif +#if JL_LLVM_VERSION >= 50000 + // Running `memcpyopt` between this and `sroa` seems to give `sroa` a hard time + // merging the `alloca` for the unboxed data and the `alloca` created by the `alloc_opt` + // pass. + PM->add(createAllocOptPass()); +#endif PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl. PM->add(createSROAPass()); // Break up aggregate allocas PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl. diff --git a/src/jitlayers.h b/src/jitlayers.h index 5703a35efd29d..ee13e49f2cb6e 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -175,6 +175,7 @@ Pass *createLateLowerGCFramePass(); Pass *createLowerExcHandlersPass(); Pass *createGCInvariantVerifierPass(bool Strong); Pass *createPropagateJuliaAddrspaces(); +Pass *createAllocOptPass(); // Whether the Function is an llvm or julia intrinsic. static inline bool isIntrinsicFunction(Function *F) { diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp new file mode 100644 index 0000000000000..9130f04f2249f --- /dev/null +++ b/src/llvm-alloc-opt.cpp @@ -0,0 +1,667 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#define DEBUG_TYPE "alloc_opt" +#undef DEBUG +#include "llvm-version.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fix_llvm_assert.h" + +#include "codegen_shared.h" +#include "julia.h" +#include "julia_internal.h" + +#include +#include + +using namespace llvm; + +extern std::pair tbaa_make_child(const char *name, MDNode *parent=nullptr, bool isConstant=false); + +namespace { + +static void copyMetadata(Instruction *dest, const Instruction *src) +{ +#if JL_LLVM_VERSION < 40000 + if (!src->hasMetadata()) + return; + SmallVector,4> TheMDs; + src->getAllMetadataOtherThanDebugLoc(TheMDs); + for (const auto &MD : TheMDs) + dest->setMetadata(MD.first, MD.second); + dest->setDebugLoc(src->getDebugLoc()); +#else + dest->copyMetadata(*src); +#endif +} + +static bool isBundleOperand(CallInst *call, unsigned idx) +{ +#if JL_LLVM_VERSION < 40000 + return call->hasOperandBundles() && idx >= call->getBundleOperandsStartIndex() && + idx < call->getBundleOperandsEndIndex(); +#else + return call->isBundleOperand(idx); +#endif +} + +/** + * Promote `julia.gc_alloc_obj` which do not have escaping root to a alloca. + * Uses that are not considered to escape the object (i.e. heap address) includes, + * + * * load + * * `pointer_from_objref` + * * `ccall` gcroot array (`jl_roots` operand bundle) + * * store (as address) + * * addrspacecast, bitcast, getelementptr + * + * The results of these cast instructions will be scanned recursively. + * + * All other uses are considered to escape conservatively. + */ + +struct AllocOpt : public FunctionPass { + static char ID; + AllocOpt() + : FunctionPass(ID) + { + llvm::initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry()); + } + +private: + LLVMContext *ctx; + + const DataLayout *DL; + + Function *alloc_obj; + Function *ptr_from_objref; + Function *lifetime_start; + Function *lifetime_end; + + Type *T_int8; + Type *T_int32; + Type *T_int64; + Type *T_size; + Type *T_pint8; + Type *T_prjlvalue; + Type *T_pjlvalue; + Type *T_pprjlvalue; + + MDNode *tbaa_tag; + + struct CheckInstFrame { + Instruction *parent; + uint64_t offset; + Instruction::use_iterator use_it; + Instruction::use_iterator use_end; + }; + typedef SmallVector CheckInstStack; + struct ReplaceUsesFrame { + Instruction *orig_i; + Instruction *new_i; + ReplaceUsesFrame(Instruction *orig_i, Instruction *new_i) + : orig_i(orig_i), + new_i(new_i) + {} + }; + typedef SmallVector ReplaceUsesStack; + + struct LifetimeMarker { + LifetimeMarker(AllocOpt &pass) + : pass(pass), + first_safepoint{}, + stack{} + {} + // insert llvm.lifetime.* calls for `ptr` with size `sz` + // based on the use of `orig` given in `alloc_uses`. + void insert(Instruction *ptr, Constant *sz, Instruction *orig, + const std::set &alloc_uses); + private: + Instruction *getFirstSafepoint(BasicBlock *bb); + void insertEnd(Instruction *ptr, Constant *sz, Instruction *insert); + struct Frame { + BasicBlock *bb; + pred_iterator p_cur; + pred_iterator p_end; + Frame(BasicBlock *bb) + : bb(bb), + p_cur(pred_begin(bb)), + p_end(pred_end(bb)) + {} + }; + AllocOpt &pass; + std::map first_safepoint; + SmallVector stack; + }; + + bool doInitialization(Module &m) override; + bool runOnFunction(Function &F) override; + bool checkInst(Instruction *I, CheckInstStack &stack, std::set &uses, + bool &ignore_tag); + void replaceUsesWith(Instruction *orig_i, Instruction *new_i, ReplaceUsesStack &stack); + bool isSafepoint(Instruction *inst); + void getAnalysisUsage(AnalysisUsage &AU) const override + { + FunctionPass::getAnalysisUsage(AU); + AU.addRequired(); + AU.addPreserved(); + AU.setPreservesCFG(); + } +}; + +Instruction *AllocOpt::LifetimeMarker::getFirstSafepoint(BasicBlock *bb) +{ + auto it = first_safepoint.find(bb); + if (it != first_safepoint.end()) + return it->second; + Instruction *first = nullptr; + for (auto &I: *bb) { + if (pass.isSafepoint(&I)) { + first = &I; + break; + } + } + first_safepoint[bb] = first; + return first; +} + +void AllocOpt::LifetimeMarker::insertEnd(Instruction *ptr, Constant *sz, Instruction *insert) +{ + BasicBlock::iterator it(insert); + BasicBlock::iterator begin(insert->getParent()->begin()); + // Makes sure that the end is inserted before nearby start. + // We insert start before the allocation call, if it is the first safepoint we find for + // another instruction, it's better if we insert the end before the start instead of the + // allocation so that the two allocations do not have overlapping lifetime. + while (it != begin) { + --it; + if (auto II = dyn_cast(&*it)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + insert = II; + continue; + } + } + break; + } + CallInst::Create(pass.lifetime_end, {sz, ptr}, "", insert); +} + +void AllocOpt::LifetimeMarker::insert(Instruction *ptr, Constant *sz, Instruction *orig, + const std::set &alloc_uses) +{ + CallInst::Create(pass.lifetime_start, {sz, ptr}, "", orig); + BasicBlock *def_bb = orig->getParent(); + std::set bbs{def_bb}; + auto &DT = pass.getAnalysis().getDomTree(); + // Collect all BB where the allocation is live + for (auto use: alloc_uses) { + auto bb = use->getParent(); + if (!bbs.insert(bb).second) + continue; + assert(stack.empty()); + Frame cur{bb}; + while (true) { + assert(cur.p_cur != cur.p_end); + auto pred = *cur.p_cur; + ++cur.p_cur; + if (bbs.insert(pred).second) { + if (cur.p_cur != cur.p_end) + stack.push_back(cur); + cur = Frame(pred); + } + if (cur.p_cur == cur.p_end) { + if (stack.empty()) + break; + cur = stack.back(); + stack.pop_back(); + } + } + } +#ifndef JL_NDEBUG + for (auto bb: bbs) { + if (bb == def_bb) + continue; + if (DT.dominates(orig, bb)) + continue; + auto F = bb->getParent(); +#if JL_LLVM_VERSION >= 50000 + F->print(llvm::dbgs(), nullptr, false, true); + orig->print(llvm::dbgs(), true); + jl_safe_printf("Does not dominate BB:\n"); + bb->print(llvm::dbgs(), true); +#else + F->dump(); + orig->dump(); + jl_safe_printf("Does not dominate BB:\n"); + bb->dump(); +#endif + abort(); + } +#endif + // For each BB, find the first instruction(s) where the allocation is possibly dead. + // If all successors are live, then there isn't one. + // If all successors are dead, then it's the first instruction after the last use + // within the BB. + // If some successors are live and others are dead, it's the first instruction in + // the successors that are dead. + std::vector first_dead; + for (auto bb: bbs) { + bool has_use = false; + for (auto succ: successors(bb)) { + // def_bb is the only bb in bbs that's not dominated by orig + if (succ != def_bb && bbs.count(succ)) { + has_use = true; + break; + } + } + if (has_use) { + for (auto succ: successors(bb)) { + if (!bbs.count(succ)) { + first_dead.push_back(&*succ->begin()); + } + } + } + else { + for (auto it = bb->rbegin(), end = bb->rend(); it != end; ++it) { + if (alloc_uses.count(&*it)) { + --it; + first_dead.push_back(&*it); + break; + } + } + } + } + bbs.clear(); + // There can/need only be one lifetime.end for each allocation in each bb, use bbs + // to record that. + // Iterate through the first dead and find the first safepoint following each of them. + while (!first_dead.empty()) { + auto I = first_dead.back(); + first_dead.pop_back(); + auto bb = I->getParent(); + if (!bbs.insert(bb).second) + continue; + if (I == &*bb->begin()) { + // There's no use in or after this bb. If this bb is not dominated by + // the def then it has to be dead on entering this bb. + // Otherwise, there could be use that we don't track + // before hitting the next safepoint. + if (!DT.dominates(orig, bb)) { + insertEnd(ptr, sz, &*bb->getFirstInsertionPt()); + continue; + } + else if (auto insert = getFirstSafepoint(bb)) { + insertEnd(ptr, sz, insert); + } + } + else { + assert(bb == def_bb || DT.dominates(orig, I)); + BasicBlock::iterator it(I); + BasicBlock::iterator end = bb->end(); + bool safepoint_found = false; + for (; it != end; ++it) { + auto insert = &*it; + if (pass.isSafepoint(insert)) { + insertEnd(ptr, sz, insert); + safepoint_found = true; + break; + } + } + if (safepoint_found) { + continue; + } + } + for (auto succ: successors(bb)) { + first_dead.push_back(&*succ->begin()); + } + } +} + +bool AllocOpt::doInitialization(Module &M) +{ + ctx = &M.getContext(); + DL = &M.getDataLayout(); + + alloc_obj = M.getFunction("julia.gc_alloc_obj"); + if (!alloc_obj) + return false; + + ptr_from_objref = M.getFunction("julia.pointer_from_objref"); + + T_prjlvalue = alloc_obj->getReturnType(); + T_pjlvalue = PointerType::get(cast(T_prjlvalue)->getElementType(), 0); + T_pprjlvalue = PointerType::get(T_prjlvalue, 0); + + T_int8 = Type::getInt8Ty(*ctx); + T_int32 = Type::getInt32Ty(*ctx); + T_int64 = Type::getInt64Ty(*ctx); + T_size = sizeof(void*) == 8 ? T_int64 : T_int32; + T_pint8 = PointerType::get(T_int8, 0); + +#if JL_LLVM_VERSION >= 50000 + lifetime_start = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_start, { T_pint8 }); + lifetime_end = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_end, { T_pint8 }); +#else + lifetime_start = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_start); + lifetime_end = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_end); +#endif + + MDNode *tbaa_data; + MDNode *tbaa_data_scalar; + std::tie(tbaa_data, tbaa_data_scalar) = tbaa_make_child("jtbaa_data"); + tbaa_tag = tbaa_make_child("jtbaa_tag", tbaa_data_scalar).first; + + return true; +} + +bool AllocOpt::checkInst(Instruction *I, CheckInstStack &stack, std::set &uses, + bool &ignore_tag) +{ + uses.clear(); + if (I->use_empty()) + return true; + CheckInstFrame cur{I, 0, I->use_begin(), I->use_end()}; + stack.clear(); + + // Recursion + auto push_inst = [&] (Instruction *inst) { + if (cur.use_it != cur.use_end) + stack.push_back(cur); + cur.parent = inst; + cur.use_it = inst->use_begin(); + cur.use_end = inst->use_end(); + }; + + auto check_inst = [&] (Instruction *inst, Use *use) { + if (isa(inst)) + return true; + if (auto call = dyn_cast(inst)) { + // TODO: on LLVM 5.0 we may need to handle certain llvm intrinsics + // including `memcpy`, `memset` etc. We might also need to handle + // `memcmp` by coverting to our own intrinsic and lower it after the gc root pass. + if (ptr_from_objref && ptr_from_objref == call->getCalledFunction()) + return true; + auto opno = use->getOperandNo(); + // Uses in `jl_roots` operand bundle are not counted as escaping, everything else is. + if (!isBundleOperand(call, opno)) + return false; + return call->getOperandBundleForOperand(opno).getTagName() == "jl_roots"; + } + if (auto store = dyn_cast(inst)) { + // Only store value count + if (use->getOperandNo() != StoreInst::getPointerOperandIndex()) + return false; + auto storev = store->getValueOperand(); + // There's GC root in this object. + if (auto ptrtype = dyn_cast(storev->getType())) { + if (ptrtype->getAddressSpace() == AddressSpace::Tracked) { + return false; + } + } + return true; + } + if (isa(inst) || isa(inst)) { + push_inst(inst); + return true; + } + if (auto gep = dyn_cast(inst)) { + APInt apoffset(sizeof(void*) * 8, cur.offset, true); + if (ignore_tag && (!gep->accumulateConstantOffset(*DL, apoffset) || + apoffset.isNegative())) + ignore_tag = false; + push_inst(inst); + cur.offset = apoffset.getLimitedValue(); + // Check overflow + if (cur.offset == UINT64_MAX) + ignore_tag = false; + return true; + } + return false; + }; + + while (true) { + assert(cur.use_it != cur.use_end); + auto use = &*cur.use_it; + auto inst = dyn_cast(use->getUser()); + ++cur.use_it; + if (!inst) + return false; + if (!check_inst(inst, use)) + return false; + uses.insert(inst); + if (cur.use_it == cur.use_end) { + if (stack.empty()) + return true; + cur = stack.back(); + stack.pop_back(); + } + } +} + +// This function needs to handle all cases `AllocOpt::checkInst` can handle. +// This function should not erase any safepoint so that the lifetime marker can find and cache +// all the original safepoints. +void AllocOpt::replaceUsesWith(Instruction *orig_inst, Instruction *new_inst, + ReplaceUsesStack &stack) +{ + auto simple_replace = [&] (Instruction *orig_i, Instruction *new_i) { + if (orig_i->user_empty()) { + if (orig_i != orig_inst) + orig_i->eraseFromParent(); + return true; + } + Type *orig_t = orig_i->getType(); + Type *new_t = new_i->getType(); + if (orig_t == new_t) { + orig_i->replaceAllUsesWith(new_i); + if (orig_i != orig_inst) + orig_i->eraseFromParent(); + return true; + } + return false; + }; + if (simple_replace(orig_inst, new_inst)) + return; + assert(stack.empty()); + ReplaceUsesFrame cur{orig_inst, new_inst}; + auto finish_cur = [&] () { + assert(cur.orig_i->user_empty()); + if (cur.orig_i != orig_inst) { + cur.orig_i->eraseFromParent(); + } + }; + auto push_frame = [&] (Instruction *orig_i, Instruction *new_i) { + if (simple_replace(orig_i, new_i)) + return; + stack.push_back(cur); + cur = {orig_i, new_i}; + }; + // Both `orig_i` and `new_i` should be pointer of the same type + // but possibly different address spaces. `new_i` is always in addrspace 0. + auto replace_inst = [&] (Instruction *user) { + Instruction *orig_i = cur.orig_i; + Instruction *new_i = cur.new_i; + if (isa(user) || isa(user)) { + user->replaceUsesOfWith(orig_i, new_i); + } + else if (auto call = dyn_cast(user)) { + if (ptr_from_objref && ptr_from_objref == call->getCalledFunction()) { + call->replaceAllUsesWith(new_i); + call->eraseFromParent(); + return; + } + // remove from operand bundle + Type *new_t = new_i->getType(); + user->replaceUsesOfWith(orig_i, ConstantPointerNull::get(cast(new_t))); + } + else if (isa(user) || isa(user)) { + auto cast_t = PointerType::get(cast(user->getType())->getElementType(), + 0); + auto replace_i = new_i; + Type *new_t = new_i->getType(); + if (cast_t != new_t) { + replace_i = new BitCastInst(replace_i, cast_t, "", user); + replace_i->setDebugLoc(user->getDebugLoc()); + replace_i->takeName(user); + } + push_frame(user, replace_i); + } + else if (auto gep = dyn_cast(user)) { + SmallVector IdxOperands(gep->idx_begin(), gep->idx_end()); + auto new_gep = GetElementPtrInst::Create(gep->getSourceElementType(), + new_i, IdxOperands, + gep->getName(), gep); + new_gep->setIsInBounds(gep->isInBounds()); + new_gep->takeName(gep); + copyMetadata(new_gep, gep); + push_frame(gep, new_gep); + } + else { + abort(); + } + }; + + while (true) { + replace_inst(cast(*cur.orig_i->user_begin())); + while (cur.orig_i->use_empty()) { + finish_cur(); + if (stack.empty()) + return; + cur = stack.back(); + stack.pop_back(); + } + } +} + +bool AllocOpt::isSafepoint(Instruction *inst) +{ + auto call = dyn_cast(inst); + if (!call) + return false; + if (isa(call)) + return false; + if (auto callee = call->getCalledFunction()) { + // Known functions emitted in codegen that are not safepoints + if (callee == ptr_from_objref || callee->getName() == "memcmp") { + return false; + } + } + return true; +} + +bool AllocOpt::runOnFunction(Function &F) +{ + if (!alloc_obj) + return false; + SmallVector,6> allocs; + for (auto &bb: F) { + for (auto &I: bb) { + auto call = dyn_cast(&I); + if (!call) + continue; + auto callee = call->getCalledFunction(); + if (!callee) + continue; + size_t sz; + if (callee == alloc_obj) { + assert(call->getNumArgOperands() == 3); + sz = (size_t)cast(call->getArgOperand(1))->getZExtValue(); + } + else { + continue; + } + if (sz < IntegerType::MAX_INT_BITS / 8 && sz < INT32_MAX) { + allocs.push_back(std::make_pair(call, sz)); + } + } + } + + auto &entry = F.getEntryBlock(); + CheckInstStack check_stack; + ReplaceUsesStack replace_stack; + std::set alloc_uses; + LifetimeMarker lifetime(*this); + for (auto &it: allocs) { + bool ignore_tag = true; + auto orig = it.first; + size_t &sz = it.second; + if (!checkInst(orig, check_stack, alloc_uses, ignore_tag)) { + sz = UINT32_MAX; + continue; + } + // The allocation does not escape or get used in a phi node so none of the derived + // SSA from it are live when we run the allocation again. + // It is now safe to promote the allocation to an entry block alloca. + size_t align = 1; + // TODO make codegen handling of alignment consistent and pass that as a parameter + // to the allocation function directly. + if (!ignore_tag) { + align = sz <= 8 ? 8 : JL_SMALL_BYTE_ALIGNMENT; + sz += align; + } + else if (sz > 1) { + align = JL_SMALL_BYTE_ALIGNMENT; + while (sz < align) { + align = align / 2; + } + } + // No debug info for prolog instructions + IRBuilder<> prolog_builder(&entry.front()); + AllocaInst *buff; + Instruction *ptr; + if (sz == 0) { + buff = prolog_builder.CreateAlloca(T_int8, 0); + ptr = buff; + } + else { + buff = prolog_builder.CreateAlloca(Type::getIntNTy(*ctx, sz * 8)); + buff->setAlignment(align); + ptr = cast(prolog_builder.CreateBitCast(buff, T_pint8)); + } + lifetime.insert(ptr, ConstantInt::get(T_int64, sz), orig, alloc_uses); + // Someone might be reading the tag, initialize it. + if (!ignore_tag) { + ptr = cast(prolog_builder.CreateConstGEP1_32(T_int8, ptr, align)); + auto casti = prolog_builder.CreateBitCast(ptr, T_pprjlvalue); + auto tagaddr = prolog_builder.CreateGEP(T_prjlvalue, casti, + {ConstantInt::get(T_size, -1)}); + // Store should be created at the callsite and not in the prolog + auto store = new StoreInst(orig->getArgOperand(2), tagaddr, orig); + store->setMetadata(LLVMContext::MD_tbaa, tbaa_tag); + store->setDebugLoc(orig->getDebugLoc()); + } + auto casti = cast(prolog_builder.CreateBitCast(ptr, T_pjlvalue)); + casti->takeName(orig); + replaceUsesWith(orig, cast(casti), replace_stack); + } + for (auto it: allocs) { + if (it.second == UINT32_MAX) + continue; + it.first->eraseFromParent(); + } + return true; +} + +char AllocOpt::ID = 0; +static RegisterPass X("AllocOpt", "Promote heap allocation to stack", + false /* Only looks at CFG */, + false /* Analysis Pass */); + +} + +Pass *createAllocOptPass() +{ + return new AllocOpt(); +} diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 80ba32a495332..07db6fabe7c67 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -20,6 +20,7 @@ #include "llvm-version.h" #include "codegen_shared.h" #include "julia.h" +#include "julia_internal.h" #define DEBUG_TYPE "late_lower_gcroot" @@ -292,6 +293,10 @@ struct LateLowerGCFrame: public FunctionPass { { llvm::initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry()); tbaa_gcframe = tbaa_make_child("jtbaa_gcframe").first; + MDNode *tbaa_data; + MDNode *tbaa_data_scalar; + std::tie(tbaa_data, tbaa_data_scalar) = tbaa_make_child("jtbaa_data"); + tbaa_tag = tbaa_make_child("jtbaa_tag", tbaa_data_scalar).first; } protected: @@ -306,11 +311,19 @@ struct LateLowerGCFrame: public FunctionPass { Type *T_prjlvalue; Type *T_ppjlvalue; Type *T_size; + Type *T_int8; Type *T_int32; + Type *T_pint8; + Type *T_pjlvalue_der; + Type *T_ppjlvalue_der; MDNode *tbaa_gcframe; + MDNode *tbaa_tag; Function *ptls_getter; Function *gc_flush_func; Function *pointer_from_objref_func; + Function *alloc_obj_func; + Function *pool_alloc_func; + Function *big_alloc_func; CallInst *ptlsStates; void MaybeNoteDef(State &S, BBState &BBS, Value *Def, const std::vector &SafepointsSoFar, int RefinedPtr = -2); @@ -1083,19 +1096,38 @@ void LateLowerGCFrame::PopGCFrame(AllocaInst *gcframe, Instruction *InsertBefore inst->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe); } +static void copyMetadata(Instruction *dest, const Instruction *src) +{ +#if JL_LLVM_VERSION < 40000 + if (!src->hasMetadata()) + return; + SmallVector,4> TheMDs; + src->getAllMetadataOtherThanDebugLoc(TheMDs); + for (const auto &MD : TheMDs) + dest->setMetadata(MD.first, MD.second); + dest->setDebugLoc(src->getDebugLoc()); +#else + dest->copyMetadata(*src); +#endif +} + bool LateLowerGCFrame::CleanupIR(Function &F) { bool ChangesMade = false; // We create one alloca for all the jlcall frames that haven't been processed // yet. LLVM would merge them anyway later, so might as well save it a bit // of work size_t maxframeargs = 0; - PointerType *T_pprjlvalue = T_prjlvalue->getPointerTo(); Instruction *StartOff = &*(F.getEntryBlock().begin()); - AllocaInst *Frame = new AllocaInst(T_prjlvalue, ConstantInt::get(T_int32, maxframeargs), + PointerType *T_pprjlvalue = nullptr; + AllocaInst *Frame = nullptr; + if (T_prjlvalue) { + T_pprjlvalue = T_prjlvalue->getPointerTo(); + Frame = new AllocaInst(T_prjlvalue, ConstantInt::get(T_int32, maxframeargs), #if JL_LLVM_VERSION >= 50000 0, #endif "", StartOff); + } for (BasicBlock &BB : F) { for (auto it = BB.begin(); it != BB.end();) { auto *CI = dyn_cast(&*it); @@ -1104,16 +1136,47 @@ bool LateLowerGCFrame::CleanupIR(Function &F) { continue; } CallingConv::ID CC = CI->getCallingConv(); - if (gc_flush_func != nullptr && CI->getCalledFunction() == gc_flush_func) { + auto callee = CI->getCalledValue(); + if (gc_flush_func != nullptr && callee == gc_flush_func) { /* No replacement */ - } else if (pointer_from_objref_func != nullptr && - CI->getCalledFunction() == pointer_from_objref_func) { + } else if (pointer_from_objref_func != nullptr && callee == pointer_from_objref_func) { auto *ASCI = new AddrSpaceCastInst(CI->getOperand(0), CI->getType(), "", CI); ASCI->takeName(CI); CI->replaceAllUsesWith(ASCI); + } else if (alloc_obj_func && callee == alloc_obj_func) { + assert(CI->getNumArgOperands() == 3); + auto sz = (size_t)cast(CI->getArgOperand(1))->getZExtValue(); + // This is strongly architecture and OS dependent + int osize; + int offset = jl_gc_classify_pools(sz, &osize); + IRBuilder<> builder(CI); + builder.SetCurrentDebugLocation(CI->getDebugLoc()); + auto ptls = CI->getArgOperand(0); + CallInst *newI; + if (offset < 0) { + newI = builder.CreateCall(big_alloc_func, + {ptls, ConstantInt::get(T_size, + sz + sizeof(void*))}); + } + else { + auto pool_offs = ConstantInt::get(T_int32, offset); + auto pool_osize = ConstantInt::get(T_int32, osize); + newI = builder.CreateCall(pool_alloc_func, {ptls, pool_offs, pool_osize}); + } + newI->setAttributes(CI->getAttributes()); + newI->takeName(CI); + copyMetadata(newI, CI); + auto derived = builder.CreateAddrSpaceCast(newI, T_pjlvalue_der); + auto cast = builder.CreateBitCast(derived, T_ppjlvalue_der); + auto tagaddr = builder.CreateGEP(T_prjlvalue, cast, + {ConstantInt::get(T_size, -1)}); + auto store = builder.CreateStore(CI->getArgOperand(2), tagaddr); + store->setMetadata(LLVMContext::MD_tbaa, tbaa_tag); + CI->replaceAllUsesWith(newI); } else if (CC == JLCALL_CC || CC == JLCALL_F_CC) { + assert(T_prjlvalue); size_t nframeargs = CI->getNumArgOperands() - (CC == JLCALL_F_CC); SmallVector ReplacementArgs; auto it = CI->arg_begin(); @@ -1135,8 +1198,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F) { T_pprjlvalue, T_int32}, false) : FunctionType::get(T_prjlvalue, {T_pprjlvalue, T_int32}, false); - Value *newFptr = Builder.CreateBitCast(CI->getCalledValue(), - FTy->getPointerTo()); + Value *newFptr = Builder.CreateBitCast(callee, FTy->getPointerTo()); CallInst *NewCall = CallInst::Create(newFptr, ReplacementArgs, "", CI); NewCall->setTailCallKind(CI->getTailCallKind()); NewCall->setAttributes(CI->getAttributes()); @@ -1155,10 +1217,12 @@ bool LateLowerGCFrame::CleanupIR(Function &F) { ChangesMade = true; } } - if (maxframeargs == 0) + if (maxframeargs == 0 && Frame) { Frame->eraseFromParent(); - else + } + else if (Frame) { Frame->setOperand(0, ConstantInt::get(T_int32, maxframeargs)); + } return ChangesMade; } @@ -1316,26 +1380,71 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(Function &F, std::vector &C } } +static void addRetNoAlias(Function *F) +{ +#if JL_LLVM_VERSION >= 50000 + F->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); +#else + F->addAttribute(AttributeSet::ReturnIndex, Attribute::NoAlias); +#endif +} + bool LateLowerGCFrame::doInitialization(Module &M) { ptls_getter = M.getFunction("jl_get_ptls_states"); gc_flush_func = M.getFunction("julia.gcroot_flush"); pointer_from_objref_func = M.getFunction("julia.pointer_from_objref"); + auto &ctx = M.getContext(); + T_size = M.getDataLayout().getIntPtrType(ctx); + T_int8 = Type::getInt8Ty(ctx); + T_pint8 = PointerType::get(T_int8, 0); + T_int32 = Type::getInt32Ty(ctx); + if ((alloc_obj_func = M.getFunction("julia.gc_alloc_obj"))) { + T_prjlvalue = alloc_obj_func->getReturnType(); + if (!(pool_alloc_func = M.getFunction("jl_gc_pool_alloc"))) { + std::vector args(0); + args.push_back(T_pint8); + args.push_back(T_int32); + args.push_back(T_int32); + pool_alloc_func = Function::Create(FunctionType::get(T_prjlvalue, args, false), + Function::ExternalLinkage, "jl_gc_pool_alloc", &M); + addRetNoAlias(pool_alloc_func); + } + if (!(big_alloc_func = M.getFunction("jl_gc_big_alloc"))) { + std::vector args(0); + args.push_back(T_pint8); + args.push_back(T_size); + big_alloc_func = Function::Create(FunctionType::get(T_prjlvalue, args, false), + Function::ExternalLinkage, "jl_gc_big_alloc", &M); + addRetNoAlias(big_alloc_func); + } + auto T_jlvalue = cast(T_prjlvalue)->getElementType(); + auto T_pjlvalue = PointerType::get(T_jlvalue, 0); + T_ppjlvalue = PointerType::get(T_pjlvalue, 0); + T_pjlvalue_der = PointerType::get(T_jlvalue, AddressSpace::Derived); + T_ppjlvalue_der = PointerType::get(T_prjlvalue, AddressSpace::Derived); + } + else if (ptls_getter) { + auto functype = ptls_getter->getFunctionType(); + T_ppjlvalue = cast(functype->getReturnType())->getElementType(); + auto T_pjlvalue = cast(T_ppjlvalue)->getElementType(); + auto T_jlvalue = cast(T_pjlvalue)->getElementType(); + T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked); + T_pjlvalue_der = PointerType::get(T_jlvalue, AddressSpace::Derived); + T_ppjlvalue_der = PointerType::get(T_prjlvalue, AddressSpace::Derived); + } + else { + T_ppjlvalue = nullptr; + T_prjlvalue = nullptr; + T_pjlvalue_der = nullptr; + T_ppjlvalue_der = nullptr; + } return false; } bool LateLowerGCFrame::runOnFunction(Function &F) { DEBUG(dbgs() << "GC ROOT PLACEMENT: Processing function " << F.getName() << "\n"); - if (ptls_getter) { - auto functype = ptls_getter->getFunctionType(); - T_ppjlvalue = - cast(functype->getReturnType())->getElementType(); - auto T_pjlvalue = cast(T_ppjlvalue)->getElementType(); - T_prjlvalue = PointerType::get(cast(T_pjlvalue)->getElementType(), AddressSpace::Tracked); - } else { + if (!ptls_getter) return CleanupIR(F); - } - T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext()); - T_int32 = Type::getInt32Ty(F.getContext()); ptlsStates = nullptr; for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); ptls_getter && I != E; ++I) { diff --git a/test/codegen.jl b/test/codegen.jl index e03086e5e3ed5..14b8b0a77f4b6 100644 --- a/test/codegen.jl +++ b/test/codegen.jl @@ -146,6 +146,25 @@ Base.unsafe_convert(::Type{Ptr{BadRef}}, ar::BadRef) = Ptr{BadRef}(pointer_from_ breakpoint_badref(a::MutableStruct) = ccall(:jl_breakpoint, Void, (Ptr{BadRef},), a) +struct PtrStruct + a::Ptr{Void} + b::Int +end + +mutable struct RealStruct + a::Float64 + b::Int +end + +function Base.cconvert(::Type{Ref{PtrStruct}}, a::RealStruct) + (a, Ref(PtrStruct(pointer_from_objref(a), a.b))) +end +Base.unsafe_convert(::Type{Ref{PtrStruct}}, at::Tuple) = + Base.unsafe_convert(Ref{PtrStruct}, at[2]) + +breakpoint_ptrstruct(a::RealStruct) = + ccall(:jl_breakpoint, Void, (Ref{PtrStruct},), a) + if opt_level > 0 @test !contains(get_llvm(isequal, Tuple{Nullable{BigFloat}, Nullable{BigFloat}}), "%gcframe") @test !contains(get_llvm(pointer_not_safepoint, Tuple{}), "%gcframe") @@ -161,6 +180,27 @@ if opt_level > 0 breakpoint_badref_ir = get_llvm(breakpoint_badref, Tuple{MutableStruct}) @test !contains(breakpoint_badref_ir, "%gcframe") @test !contains(breakpoint_badref_ir, "jl_gc_pool_alloc") + + breakpoint_ptrstruct_ir = get_llvm(breakpoint_ptrstruct, Tuple{RealStruct}) + @test !contains(breakpoint_ptrstruct_ir, "%gcframe") + @test !contains(breakpoint_ptrstruct_ir, "jl_gc_pool_alloc") +end + +function two_breakpoint(a::Float64) + ccall(:jl_breakpoint, Void, (Ref{Float64},), a) + ccall(:jl_breakpoint, Void, (Ref{Float64},), a) +end + +if opt_level > 0 + breakpoint_f64_ir = get_llvm((a)->ccall(:jl_breakpoint, Void, (Ref{Float64},), a), + Tuple{Float64}) + @test !contains(breakpoint_f64_ir, "jl_gc_pool_alloc") + breakpoint_any_ir = get_llvm((a)->ccall(:jl_breakpoint, Void, (Ref{Any},), a), + Tuple{Float64}) + @test contains(breakpoint_any_ir, "jl_gc_pool_alloc") + two_breakpoint_ir = get_llvm(two_breakpoint, Tuple{Float64}) + @test !contains(two_breakpoint_ir, "jl_gc_pool_alloc") + @test contains(two_breakpoint_ir, "llvm.lifetime.end") end # Issue 22770 diff --git a/test/llvmpasses/.gitignore b/test/llvmpasses/.gitignore new file mode 100644 index 0000000000000..0e62bc85774ee --- /dev/null +++ b/test/llvmpasses/.gitignore @@ -0,0 +1 @@ +/Output/ \ No newline at end of file diff --git a/test/llvmpasses/alloc-opt.jl b/test/llvmpasses/alloc-opt.jl new file mode 100644 index 0000000000000..60f03088e483d --- /dev/null +++ b/test/llvmpasses/alloc-opt.jl @@ -0,0 +1,203 @@ +# RUN: julia --startup-file=no %s | opt -load libjulia.so -AllocOpt -LateLowerGCFrame -S - | FileCheck %s + +isz = sizeof(UInt) == 8 ? "i64" : "i32" + +println(""" +%jl_value_t = type opaque +@tag = external addrspace(10) global %jl_value_t +""") + +# CHECK-LABEL: @return_obj +# CHECK-NOT: @julia.gc_alloc_obj +# CHECK: %v = call noalias %jl_value_t addrspace(10)* @jl_gc_pool_alloc +# CHECK: store %jl_value_t addrspace(10)* @tag, %jl_value_t addrspace(10)* addrspace(11)* {{.*}}, !tbaa !0 +println(""" +define %jl_value_t addrspace(10)* @return_obj() { + %ptls = call %jl_value_t*** @jl_get_ptls_states() + %ptls_i8 = bitcast %jl_value_t*** %ptls to i8* + %v = call noalias %jl_value_t addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, %jl_value_t addrspace(10)* @tag) + ret %jl_value_t addrspace(10)* %v +} +""") +# CHECK-LABEL: } + +# CHECK-LABEL: @return_load +# CHECK: alloca i64, align 8 +# CHECK-NOT: @julia.gc_alloc_obj +# CHECK-NOT: @jl_gc_pool_alloc +# CHECK: call void @llvm.lifetime.start(i64 8, i8* +# CHECK-NEXT: %v64 = bitcast %jl_value_t* %v to i64* +# CHECK-NOT: @tag +# CHECK-NOT: @llvm.lifetime.end +println(""" +define i64 @return_load(i64 %i) { + %ptls = call %jl_value_t*** @jl_get_ptls_states() + %ptls_i8 = bitcast %jl_value_t*** %ptls to i8* + %v = call noalias %jl_value_t addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, %jl_value_t addrspace(10)* @tag) + %v64 = bitcast %jl_value_t addrspace(10)* %v to i64 addrspace(10)* + %v64a11 = addrspacecast i64 addrspace(10)* %v64 to i64 addrspace(11)* + store i64 %i, i64 addrspace(11)* %v64a11, align 16, !tbaa !4 + call void @external_function() + %l = load i64, i64 addrspace(11)* %v64a11, align 16, !tbaa !4 + ret i64 %l +} +""") +# CHECK-LABEL: } + +# CHECK-LABEL: @return_tag +# CHECK: alloca i128, align 8 +# CHECK: call %jl_value_t*** @jl_get_ptls_states() +# CHECK-NOT: @julia.gc_alloc_obj +# CHECK-NOT: @jl_gc_pool_alloc +# CHECK: call void @llvm.lifetime.start(i64 16, i8* +# CHECK: store %jl_value_t addrspace(10)* @tag, %jl_value_t addrspace(10)** {{.*}}, !tbaa !0 +# CHECK-NOT: @llvm.lifetime.end +println(""" +define %jl_value_t addrspace(10)* @return_tag() { + %ptls = call %jl_value_t*** @jl_get_ptls_states() + %ptls_i8 = bitcast %jl_value_t*** %ptls to i8* + %v = call noalias %jl_value_t addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, %jl_value_t addrspace(10)* @tag) + %va = addrspacecast %jl_value_t addrspace(10)* %v to %jl_value_t addrspace(11)* + %vab = bitcast %jl_value_t addrspace(11)* %va to %jl_value_t addrspace(10)* addrspace(11)* + %tagaddr = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %vab, i64 -1 + %tag = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %tagaddr, align 8, !tbaa !0 + ret %jl_value_t addrspace(10)* %tag +} +""") +# CHECK-LABEL: } + +# CHECK-LABEL: @ccall_obj +# CHECK: call %jl_value_t*** @jl_get_ptls_states() +# CHECK-NOT: @julia.gc_alloc_obj +# CHECK: @jl_gc_pool_alloc +# CHECK: store %jl_value_t addrspace(10)* @tag, %jl_value_t addrspace(10)* addrspace(11)* {{.*}}, !tbaa !0 +println(""" +define void @ccall_obj(i8* %fptr) { + %ptls = call %jl_value_t*** @jl_get_ptls_states() + %ptls_i8 = bitcast %jl_value_t*** %ptls to i8* + %v = call noalias %jl_value_t addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, %jl_value_t addrspace(10)* @tag) + %f = bitcast i8* %fptr to void (%jl_value_t addrspace(10)*)* + call void %f(%jl_value_t addrspace(10)* %v) + ret void +} +""") +# CHECK-LABEL: } + +# CHECK-LABEL: @ccall_ptr +# CHECK: alloca i64 +# CHECK: call %jl_value_t*** @jl_get_ptls_states() +# CHECK-NOT: @julia.gc_alloc_obj +# CHECK-NOT: @jl_gc_pool_alloc +# CHECK: call void @llvm.lifetime.start(i64 8, i8* +# CHECK-NEXT: %f = bitcast i8* %fptr to void (%jl_value_t*)* +# Currently the GC frame lowering pass strips away all operand bundles +# CHECK-NEXT: call void %f(%jl_value_t* %v) +# CHECK-NEXT: ret void +println(""" +define void @ccall_ptr(i8* %fptr) { + %ptls = call %jl_value_t*** @jl_get_ptls_states() + %ptls_i8 = bitcast %jl_value_t*** %ptls to i8* + %v = call noalias %jl_value_t addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, %jl_value_t addrspace(10)* @tag) + %va = addrspacecast %jl_value_t addrspace(10)* %v to %jl_value_t addrspace(11)* + %ptr = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %va) + %f = bitcast i8* %fptr to void (%jl_value_t*)* + call void %f(%jl_value_t* %ptr) [ "jl_roots"(%jl_value_t addrspace(10)* %v), "unknown_bundle"(%jl_value_t* %ptr) ] + ret void +} +""") +# CHECK-LABEL: } + +# CHECK-LABEL: @ccall_unknown_bundle +# CHECK: call %jl_value_t*** @jl_get_ptls_states() +# CHECK-NOT: @julia.gc_alloc_obj +# CHECK: @jl_gc_pool_alloc +# CHECK: store %jl_value_t addrspace(10)* @tag, %jl_value_t addrspace(10)* addrspace(11)* {{.*}}, !tbaa !0 +println(""" +define void @ccall_unknown_bundle(i8* %fptr) { + %ptls = call %jl_value_t*** @jl_get_ptls_states() + %ptls_i8 = bitcast %jl_value_t*** %ptls to i8* + %v = call noalias %jl_value_t addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, %jl_value_t addrspace(10)* @tag) + %va = addrspacecast %jl_value_t addrspace(10)* %v to %jl_value_t addrspace(11)* + %ptr = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %va) + %f = bitcast i8* %fptr to void (%jl_value_t*)* + call void %f(%jl_value_t* %ptr) [ "jl_not_jl_roots"(%jl_value_t addrspace(10)* %v) ] + ret void +} +""") +# CHECK-LABEL: } + +# CHECK-LABEL: @lifetime_branches +# CHECK: alloca i64 +# CHECK: call %jl_value_t*** @jl_get_ptls_states() +# CHECK: L1: +# CHECK-NEXT: call void @llvm.lifetime.start(i64 8, +# CHECK-NEXT: %f = bitcast i8* %fptr to void (%jl_value_t*)* +# CHECK-NEXT: call void %f(%jl_value_t* %v) +# CHECK-NEXT: br i1 %b2, label %L2, label %L3 + +# CHECK: L2: +# CHECK-NEXT: %f2 = bitcast i8* %fptr to void (%jl_value_t*)* +# CHECK-NEXT: call void @llvm.lifetime.end(i64 8, +# CHECK-NEXT: call void %f2(%jl_value_t* null) + +# CHECK: L3: +# CHECK-NEXT: call void @llvm.lifetime.end(i64 8, +println(""" +define void @lifetime_branches(i8* %fptr, i1 %b, i1 %b2) { + %ptls = call %jl_value_t*** @jl_get_ptls_states() + %ptls_i8 = bitcast %jl_value_t*** %ptls to i8* + br i1 %b, label %L1, label %L3 + +L1: + %v = call noalias %jl_value_t addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, %jl_value_t addrspace(10)* @tag) + %va = addrspacecast %jl_value_t addrspace(10)* %v to %jl_value_t addrspace(11)* + %ptr = call %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)* %va) + %f = bitcast i8* %fptr to void (%jl_value_t*)* + call void %f(%jl_value_t* %ptr) [ "jl_roots"(%jl_value_t addrspace(10)* %v) ] + br i1 %b2, label %L2, label %L3 + +L2: + %f2 = bitcast i8* %fptr to void (%jl_value_t*)* + call void %f2(%jl_value_t* null) + br label %L3 + +L3: + ret void +} +""") +# CHECK-LABEL: } + +# CHECK-LABEL: @object_field +# CHECK: call %jl_value_t*** @jl_get_ptls_states() +# CHECK-NOT: @julia.gc_alloc_obj +# CHECK: @jl_gc_pool_alloc +# CHECK: store %jl_value_t addrspace(10)* @tag, %jl_value_t addrspace(10)* addrspace(11)* {{.*}}, !tbaa !0 +println(""" +define void @object_field(%jl_value_t addrspace(10)* %field) { + %ptls = call %jl_value_t*** @jl_get_ptls_states() + %ptls_i8 = bitcast %jl_value_t*** %ptls to i8* + %v = call noalias %jl_value_t addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, %jl_value_t addrspace(10)* @tag) + %va = addrspacecast %jl_value_t addrspace(10)* %v to %jl_value_t addrspace(11)* + %vab = bitcast %jl_value_t addrspace(11)* %va to %jl_value_t addrspace(10)* addrspace(11)* + store %jl_value_t addrspace(10)* %field, %jl_value_t addrspace(10)* addrspace(11)* %vab + ret void +} +""") +# CHECK-LABEL: } + +# CHECK: declare noalias %jl_value_t addrspace(10)* @jl_gc_pool_alloc(i8*, +# CHECK: declare noalias %jl_value_t addrspace(10)* @jl_gc_big_alloc(i8*, +println(""" +declare void @external_function() +declare %jl_value_t*** @jl_get_ptls_states() +declare noalias %jl_value_t addrspace(10)* @julia.gc_alloc_obj(i8*, $isz, %jl_value_t addrspace(10)*) +declare %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)*) + +!0 = !{!1, !1, i64 0} +!1 = !{!"jtbaa_tag", !2, i64 0} +!2 = !{!"jtbaa_data", !3, i64 0} +!3 = !{!"jtbaa"} +!4 = !{!5, !5, i64 0} +!5 = !{!"jtbaa_mutab", !6, i64 0} +!6 = !{!"jtbaa_value", !2, i64 0} +""")