Skip to content

Commit

Permalink
codegen: cleanup gcstack call frames somewhat earlier
Browse files Browse the repository at this point in the history
Slightly reduces the amount of work these optimization passes need to do
later, in most typical cases, and avoids putting an unknown call at the
top of all of our functions, which can inhibit some optimizations of
otherwise trivial functions.

Fixes #57400
  • Loading branch information
vtjnash committed Feb 19, 2025
1 parent a65c2cf commit 7ff7964
Show file tree
Hide file tree
Showing 8 changed files with 110 additions and 105 deletions.
4 changes: 2 additions & 2 deletions src/ccall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2162,12 +2162,12 @@ jl_cgval_t function_sig_t::emit_a_ccall(
}
}

// Potentially we could drop `jl_roots(gc_uses)` in the presence of `gc-transition(gc_uses)`
// Potentially we could add gc_uses to `gc-transition`, instead of emitting them separately as jl_roots
SmallVector<OperandBundleDef, 2> bundles;
if (!gc_uses.empty())
bundles.push_back(OperandBundleDef("jl_roots", gc_uses));
if (gc_safe)
bundles.push_back(OperandBundleDef("gc-transition", ArrayRef<Value*> {}));
bundles.push_back(OperandBundleDef("gc-transition", get_current_ptls(ctx)));
// the actual call
CallInst *ret = ctx.builder.CreateCall(functype, llvmf,
argvals,
Expand Down
110 changes: 61 additions & 49 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7002,8 +7002,9 @@ static void allocate_gc_frame(jl_codectx_t &ctx, BasicBlock *b0, bool or_new=fal
// allocate a placeholder gc instruction
// this will require the runtime, but it gets deleted later if unused
ctx.topalloca = ctx.builder.CreateCall(prepare_call(or_new ? jladoptthread_func : jlpgcstack_func));
ctx.pgcstack = ctx.topalloca;
ctx.pgcstack->setName("pgcstack");
ctx.topalloca->setName("pgcstack");
if (ctx.pgcstack == nullptr)
ctx.pgcstack = ctx.topalloca;
}

static Value *get_current_task(jl_codectx_t &ctx)
Expand Down Expand Up @@ -7150,16 +7151,17 @@ static void emit_specsig_to_specsig(
ctx.builder.SetInsertPoint(b0);
DebugLoc noDbg;
ctx.builder.SetCurrentDebugLocation(noDbg);
allocate_gc_frame(ctx, b0);
Function::arg_iterator AI = gf_thunk->arg_begin();
SmallVector<jl_cgval_t, 0> myargs(nargs);
if (cc == jl_returninfo_t::SRet || cc == jl_returninfo_t::Union)
++AI;
if (return_roots)
++AI;
if (JL_FEAT_TEST(ctx,gcstack_arg)) {
ctx.pgcstack = AI;
++AI; // gcstack_arg
}
allocate_gc_frame(ctx, b0);
for (size_t i = 0; i < nargs; i++) {
if (i == 0 && is_for_opaque_closure) {
// `jt` would be wrong here (it is the captures type), so is not used used for
Expand Down Expand Up @@ -7266,6 +7268,10 @@ static void emit_specsig_to_specsig(
break;
}
}
if (ctx.topalloca != ctx.pgcstack && ctx.topalloca->use_empty()) {
ctx.topalloca->eraseFromParent();
ctx.topalloca = nullptr;
}
}

void emit_specsig_to_fptr1(
Expand Down Expand Up @@ -8122,6 +8128,10 @@ static void gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *abi, jl_va
CreateTrap(ctx.builder, false);
else
ctx.builder.CreateRet(boxed(ctx, retval));
if (ctx.topalloca != ctx.pgcstack && ctx.topalloca->use_empty()) {
ctx.topalloca->eraseFromParent();
ctx.topalloca = nullptr;
}
}

static jl_returninfo_t get_specsig_function(jl_codegen_params_t &params, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure,
Expand Down Expand Up @@ -8778,7 +8788,53 @@ static jl_llvm_functions_t
ctx.spvals_ptr = &*AI++;
}
}
// step 6. set up GC frame
// step 6. set up GC frame and special arguments
Function::arg_iterator AI = f->arg_begin();
SmallVector<AttributeSet, 0> attrs(f->arg_size()); // function declaration attributes

if (has_sret) {
Argument *Arg = &*AI;
++AI;
AttrBuilder param(ctx.builder.getContext(), f->getAttributes().getParamAttrs(Arg->getArgNo()));
if (returninfo.cc == jl_returninfo_t::Union) {
param.addAttribute(Attribute::NonNull);
// The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
param.addDereferenceableAttr(returninfo.union_bytes);
param.addAlignmentAttr(returninfo.union_align);
}
else {
const DataLayout &DL = jl_Module->getDataLayout();
Type *RT = Arg->getParamStructRetType();
TypeSize sz = DL.getTypeAllocSize(RT);
Align al = DL.getPrefTypeAlign(RT);
if (al > MAX_ALIGN)
al = Align(MAX_ALIGN);
param.addAttribute(Attribute::NonNull);
// The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
param.addDereferenceableAttr(sz);
param.addAlignmentAttr(al);
}
attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
}
if (returninfo.return_roots) {
Argument *Arg = &*AI;
++AI;
AttrBuilder param(ctx.builder.getContext(), f->getAttributes().getParamAttrs(Arg->getArgNo()));
param.addAttribute(Attribute::NonNull);
// The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
size_t size = returninfo.return_roots * sizeof(jl_value_t*);
param.addDereferenceableAttr(size);
param.addAlignmentAttr(Align(sizeof(jl_value_t*)));
attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
}
if (specsig && JL_FEAT_TEST(ctx, gcstack_arg)) {
Argument *Arg = &*AI;
ctx.pgcstack = Arg;
++AI;
AttrBuilder param(ctx.builder.getContext());
attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param);
}

allocate_gc_frame(ctx, b0);
Value *last_age = NULL;
Value *world_age_field = NULL;
Expand Down Expand Up @@ -8921,9 +8977,6 @@ static jl_llvm_functions_t
}

// step 8. move args into local variables
Function::arg_iterator AI = f->arg_begin();
SmallVector<AttributeSet, 0> attrs(f->arg_size()); // function declaration attributes

auto get_specsig_arg = [&](jl_value_t *argType, Type *llvmArgType, bool isboxed) {
if (type_is_ghost(llvmArgType)) { // this argument is not actually passed
return ghostValue(ctx, argType);
Expand Down Expand Up @@ -8956,47 +9009,6 @@ static jl_llvm_functions_t
return theArg;
};

if (has_sret) {
Argument *Arg = &*AI;
++AI;
AttrBuilder param(ctx.builder.getContext(), f->getAttributes().getParamAttrs(Arg->getArgNo()));
if (returninfo.cc == jl_returninfo_t::Union) {
param.addAttribute(Attribute::NonNull);
// The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
param.addDereferenceableAttr(returninfo.union_bytes);
param.addAlignmentAttr(returninfo.union_align);
}
else {
const DataLayout &DL = jl_Module->getDataLayout();
Type *RT = Arg->getParamStructRetType();
TypeSize sz = DL.getTypeAllocSize(RT);
Align al = DL.getPrefTypeAlign(RT);
if (al > MAX_ALIGN)
al = Align(MAX_ALIGN);
param.addAttribute(Attribute::NonNull);
// The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
param.addDereferenceableAttr(sz);
param.addAlignmentAttr(al);
}
attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
}
if (returninfo.return_roots) {
Argument *Arg = &*AI;
++AI;
AttrBuilder param(ctx.builder.getContext(), f->getAttributes().getParamAttrs(Arg->getArgNo()));
param.addAttribute(Attribute::NonNull);
// The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
size_t size = returninfo.return_roots * sizeof(jl_value_t*);
param.addDereferenceableAttr(size);
param.addAlignmentAttr(Align(sizeof(jl_value_t*)));
attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
}
if (specsig && JL_FEAT_TEST(ctx, gcstack_arg)){
Argument *Arg = &*AI;
++AI;
AttrBuilder param(ctx.builder.getContext());
attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param);
}
for (i = 0; i < nreq && i < vinfoslen; i++) {
jl_sym_t *s = slot_symbol(ctx, i);
jl_varinfo_t &vi = ctx.slots[i];
Expand Down Expand Up @@ -9964,7 +9976,7 @@ static jl_llvm_functions_t
}
}

if (ctx.topalloca->use_empty()) {
if (ctx.topalloca != ctx.pgcstack && ctx.topalloca->use_empty()) {
ctx.topalloca->eraseFromParent();
ctx.topalloca = nullptr;
}
Expand Down
27 changes: 17 additions & 10 deletions src/llvm-final-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,24 +161,31 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
target->replaceAllUsesWith(newI);
target->eraseFromParent();
}
bool FinalLowerGC::shouldRunFinalGC(Function &F)

static bool hasUse(const JuliaPassContext &ctx, const jl_intrinsics::IntrinsicDescription &v)
{
auto Intr = ctx.getOrNull(v);
return Intr && !Intr->use_empty();
}

bool FinalLowerGC::shouldRunFinalGC()
{
bool should_run = 0;
should_run |= getOrNull(jl_intrinsics ::newGCFrame) != nullptr;
should_run |= getOrNull(jl_intrinsics ::getGCFrameSlot) != nullptr;
should_run |= getOrNull(jl_intrinsics ::pushGCFrame) != nullptr;
should_run |= getOrNull(jl_intrinsics ::popGCFrame) != nullptr;
should_run |= getOrNull(jl_intrinsics ::GCAllocBytes) != nullptr;
should_run |= getOrNull(jl_intrinsics ::queueGCRoot) != nullptr;
should_run |= getOrNull(jl_intrinsics ::safepoint) != nullptr;
should_run |= hasUse(*this, jl_intrinsics::newGCFrame);
should_run |= hasUse(*this, jl_intrinsics::getGCFrameSlot);
should_run |= hasUse(*this, jl_intrinsics::pushGCFrame);
should_run |= hasUse(*this, jl_intrinsics::popGCFrame);
should_run |= hasUse(*this, jl_intrinsics::GCAllocBytes);
should_run |= hasUse(*this, jl_intrinsics::queueGCRoot);
should_run |= hasUse(*this, jl_intrinsics::safepoint);
return should_run;
}

bool FinalLowerGC::runOnFunction(Function &F)
{
initAll(*F.getParent());
pgcstack = getPGCstack(F);
if (!shouldRunFinalGC(F))
if (!pgcstack || !shouldRunFinalGC())
goto verify_skip;

LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n");
Expand Down Expand Up @@ -232,7 +239,7 @@ bool FinalLowerGC::runOnFunction(Function &F)
auto IS_INTRINSIC = [&](auto intrinsic) {
auto intrinsic2 = getOrNull(intrinsic);
if (intrinsic2 == callee) {
errs() << "Final-GC-lowering didn't eliminate all intrinsics'" << F.getName() << "', dumping entire module!\n\n";
errs() << "Final-GC-lowering didn't eliminate all intrinsics from '" << F.getName() << "', dumping entire module!\n\n";
errs() << *F.getParent() << "\n";
abort();
}
Expand Down
7 changes: 4 additions & 3 deletions src/llvm-gc-interface-passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ struct LateLowerGCFrame: private JuliaPassContext {
bool runOnFunction(Function &F, bool *CFGModified = nullptr);

private:
CallInst *pgcstack;
Value *pgcstack;
Function *smallAllocFunc;

void MaybeNoteDef(State &S, BBState &BBS, Value *Def, const ArrayRef<int> &SafepointsSoFar,
Expand Down Expand Up @@ -388,7 +388,7 @@ struct FinalLowerGC: private JuliaPassContext {
Function *smallAllocFunc;
Function *bigAllocFunc;
Function *allocTypedFunc;
Instruction *pgcstack;
Value *pgcstack;
Type *T_size;

// Lowers a `julia.new_gc_frame` intrinsic.
Expand All @@ -411,8 +411,9 @@ struct FinalLowerGC: private JuliaPassContext {

// Lowers a `julia.safepoint` intrinsic.
void lowerSafepoint(CallInst *target, Function &F);

// Check if the pass should be run
bool shouldRunFinalGC(Function &F);
bool shouldRunFinalGC();
};

#endif // LLVM_GC_PASSES_H
18 changes: 10 additions & 8 deletions src/llvm-late-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2219,20 +2219,21 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
SmallVector<OperandBundleDef,2> bundles;
CI->getOperandBundlesAsDefs(bundles);
bool gc_transition = false;
Value *ptls;
for (auto &bundle: bundles)
if (bundle.getTag() == "gc-transition")
if (bundle.getTag() == "gc-transition") {
gc_transition = true;
ptls = bundle.inputs()[0];
}

// In theory LLVM wants us to lower this using RewriteStatepointsForGC
if (gc_transition) {
// Insert the operations to switch to gc_safe if necessary.
IRBuilder<> builder(CI);
Value *pgcstack = getOrAddPGCstack(F);
assert(pgcstack);
assert(ptls);
// We dont use emit_state_set here because safepoints are unconditional for any code that reaches this
// We are basically guaranteed to go from gc_unsafe to gc_safe and back, and both transitions need a safepoint
// We also can't add any BBs here, so just avoiding the branches is good
Value *ptls = get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, pgcstack), tbaa_gcframe);
unsigned offset = offsetof(jl_tls_states_t, gc_state);
Value *gc_state = builder.CreateConstInBoundsGEP1_32(Type::getInt8Ty(builder.getContext()), ptls, offset, "gc_state");
LoadInst *last_gc_state = builder.CreateAlignedLoad(Type::getInt8Ty(builder.getContext()), gc_state, Align(sizeof(void*)));
Expand All @@ -2249,7 +2250,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
++it;
continue;
} else {
// remove operand bundle
// remove all operand bundles
CallInst *NewCall = CallInst::Create(CI, None, CI);
NewCall->takeName(CI);
NewCall->copyMetadata(*CI);
Expand Down Expand Up @@ -2395,7 +2396,10 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(ArrayRef<int> Colors, int PreAss
auto pushGcframe = CallInst::Create(
getOrDeclare(jl_intrinsics::pushGCFrame),
{gcframe, ConstantInt::get(T_int32, 0)});
pushGcframe->insertAfter(pgcstack);
if (isa<Argument>(pgcstack))
pushGcframe->insertAfter(gcframe);
else
pushGcframe->insertAfter(cast<Instruction>(pgcstack));

// we don't run memsetopt after this, so run a basic approximation of it
// that removes any redundant memset calls in the prologue since getGCFrameSlot already includes the null store
Expand Down Expand Up @@ -2513,8 +2517,6 @@ bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) {
initAll(*F.getParent());
smallAllocFunc = getOrDeclare(jl_well_known::GCSmallAlloc);
LLVM_DEBUG(dbgs() << "GC ROOT PLACEMENT: Processing function " << F.getName() << "\n");
if (!pgcstack_getter && !adoptthread_func)
return CleanupIR(F, nullptr, CFGModified);

pgcstack = getPGCstack(F);
if (!pgcstack)
Expand Down
35 changes: 10 additions & 25 deletions src/llvm-pass-helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,25 +72,9 @@ void JuliaPassContext::initAll(Module &M)
T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
}

llvm::CallInst *JuliaPassContext::getPGCstack(llvm::Function &F) const
llvm::Value *JuliaPassContext::getPGCstack(llvm::Function &F) const
{
if (!pgcstack_getter && !adoptthread_func)
return nullptr;
for (auto &I : F.getEntryBlock()) {
if (CallInst *callInst = dyn_cast<CallInst>(&I)) {
Value *callee = callInst->getCalledOperand();
if ((pgcstack_getter && callee == pgcstack_getter) ||
(adoptthread_func && callee == adoptthread_func)) {
return callInst;
}
}
}
return nullptr;
}

llvm::CallInst *JuliaPassContext::getOrAddPGCstack(llvm::Function &F)
{
if (pgcstack_getter || adoptthread_func)
if (pgcstack_getter || adoptthread_func) {
for (auto &I : F.getEntryBlock()) {
if (CallInst *callInst = dyn_cast<CallInst>(&I)) {
Value *callee = callInst->getCalledOperand();
Expand All @@ -100,13 +84,14 @@ llvm::CallInst *JuliaPassContext::getOrAddPGCstack(llvm::Function &F)
}
}
}
IRBuilder<> builder(&F.getEntryBlock().front());
if (pgcstack_getter)
return builder.CreateCall(pgcstack_getter);
auto FT = FunctionType::get(PointerType::get(F.getContext(), 0), false);
auto F2 = Function::Create(FT, Function::ExternalLinkage, "julia.get_pgcstack", F.getParent());
pgcstack_getter = F2;
return builder.CreateCall( F2);
}
if (F.getCallingConv() == CallingConv::Swift) {
for (auto &arg : F.args()) {
if (arg.hasSwiftSelfAttr())
return &arg;
}
}
return nullptr;
}

llvm::Function *JuliaPassContext::getOrNull(
Expand Down
8 changes: 3 additions & 5 deletions src/llvm-pass-helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,10 @@ struct JuliaPassContext {

// Gets a call to the `julia.get_pgcstack' intrinsic in the entry
// point of the given function, if there exists such a call.
// Otherwise, gets a swiftself argument, if there exists such an argument.
// Otherwise, `nullptr` is returned.
llvm::CallInst *getPGCstack(llvm::Function &F) const;
// Gets a call to the `julia.get_pgcstack' intrinsic in the entry
// point of the given function, if there exists such a call.
// Otherwise, creates a new call to the intrinsic
llvm::CallInst *getOrAddPGCstack(llvm::Function &F);
llvm::Value *getPGCstack(llvm::Function &F) const;

// Gets the intrinsic or well-known function that conforms to
// the given description if it exists in the module. If not,
// `nullptr` is returned.
Expand Down
Loading

0 comments on commit 7ff7964

Please sign in to comment.