- Sponsor
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Hoist object allocation before inner field initialization #45153
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
Consider the following pattern for building up nested objects ``` %obj = Expr(:new, obj, ...) %obj_wrapper = Expr(:new, obj_wrapper, ..., %obj) %obj_wrapper2 = Expr(:new, obj_wrapper2, ..., %obj_wrapper) %outer = Expr(:new, outer, %obj_wrapper2) ``` Asssuming everything except `struct outer` is struct-inlineable, the LLVM IR we emit looks something like the following: ``` %obj = alloca %obj_wrapper = alloca %obj_wrapper_wrapper = alloca %obj = alloca init(%obj, <obj>) init(%obj_wrapper, <obj_wrapper>); memcpy(%obj_wrapper, %obj) init(%obj_wrapper2, <obj_wrapper2>); memcpy(%obj_wrapper2, %obj_wrapper) init(%outer, <outer>); memcpy(%outer, %obj_wrapper2) %outer_boxed = julia.gc_alloc memcpy(%outer_boxed, %outer) ``` While LLVM is capable of removing all the allocas and memcpys, it's taking an unreasonable amount of time to do so. This PR introduces a small optimization into the frontend lowering for `:new`: If all the :new calls are in the same LLVM basic block, then we delete the alloca, and hoist the allocation of the object to the earliest point before the initialization of the fields. This gives essentially the same result as LLVM would have given us post-optimization, but is much cheaper to do because we don't have to perform any analysis to tell us that it is a legal optimization. In the above example, we would end up with something like: ``` %outer_boxed = julia.gc_alloc init(%outer_boxed, <obj>) init(%outer_boxed, <obj_wrapper>); init(%outer_boxed, <obj_wrapper2>); init(%outer_boxed, <outer>); ``` Of course this does extend the liftime of the outer object, but I don't think that's a particular problem as long as we're careful not to hoist any boxings out of error paths. In the current implementation, I only allow this optimization to occur in the same LLVM basic block, but I think it should be fine to extend it to allow the same julia basic block or more generally, any allocation that post-dominates the relevant promotion points.
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -3164,6 +3164,33 @@ static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallB | |||||||
return box_merge; | ||||||||
} | ||||||||
|
||||||||
static void recursively_adjust_ptr_type(llvm::Value *Val, unsigned FromAS, unsigned ToAS) | ||||||||
{ | ||||||||
for (auto *User : Val->users()) { | ||||||||
if (isa<GetElementPtrInst>(User)) { | ||||||||
GetElementPtrInst *Inst = cast<GetElementPtrInst>(User); | ||||||||
Inst->mutateType(PointerType::getWithSamePointeeType(cast<PointerType>(Inst->getType()), ToAS)); | ||||||||
recursively_adjust_ptr_type(Inst, FromAS, ToAS); | ||||||||
} | ||||||||
else if (isa<IntrinsicInst>(User)) { | ||||||||
IntrinsicInst *II = cast<IntrinsicInst>(User); | ||||||||
SmallVector<Type*, 3> ArgTys; | ||||||||
Intrinsic::getIntrinsicSignature(II->getCalledFunction(), ArgTys); | ||||||||
assert(ArgTys.size() <= II->arg_size()); | ||||||||
for (size_t i = 0; i < ArgTys.size(); ++i) | ||||||||
ArgTys[i] = II->getArgOperand(i)->getType(); | ||||||||
II->setCalledFunction(Intrinsic::getDeclaration(II->getModule(), II->getIntrinsicID(), ArgTys)); | ||||||||
} | ||||||||
#ifndef JL_LLVM_OPAQUE_POINTERS | ||||||||
else if (isa<BitCastInst>(User)) { | ||||||||
BitCastInst *Inst = cast<BitCastInst>(User); | ||||||||
Inst->mutateType(PointerType::getWithSamePointeeType(cast<PointerType>(Inst->getType()), ToAS)); | ||||||||
recursively_adjust_ptr_type(Inst, FromAS, ToAS); | ||||||||
} | ||||||||
#endif | ||||||||
} | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This may also be required to handle ICmpEQ and AddrspaceCast, but I have the impression we just care about things that could appear in the store code-path? It would be good to document in a comment here that |
||||||||
} | ||||||||
|
||||||||
// this is used to wrap values for generic contexts, where a | ||||||||
// dynamically-typed value is required (e.g. argument to unknown function). | ||||||||
// if it's already a pointer it's left alone. | ||||||||
|
@@ -3197,8 +3224,27 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo) | |||||||
assert(!type_is_ghost(t)); // ghost values should have been handled by vinfo.constant above! | ||||||||
box = _boxed_special(ctx, vinfo, t); | ||||||||
if (!box) { | ||||||||
box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val(ctx, (jl_value_t*)jt)); | ||||||||
init_bits_cgval(ctx, box, vinfo, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut); | ||||||||
bool do_promote = vinfo.promotion_point; | ||||||||
if (do_promote) { | ||||||||
auto IP = ctx.builder.saveIP(); | ||||||||
ctx.builder.SetInsertPoint(vinfo.promotion_point); | ||||||||
box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val(ctx, (jl_value_t*)jt)); | ||||||||
Value *decayed = decay_derived(ctx, box); | ||||||||
AllocaInst *originalAlloca = cast<AllocaInst>(vinfo.V); | ||||||||
#ifndef JL_LLVM_OPAQUE_POINTERS | ||||||||
decayed = maybe_bitcast(ctx, decayed, PointerType::get(originalAlloca->getType()->getPointerElementType(), AddressSpace::Derived)); | ||||||||
#endif | ||||||||
// Warning: Very illegal IR here temporarily | ||||||||
originalAlloca->mutateType(decayed->getType()); | ||||||||
recursively_adjust_ptr_type(originalAlloca, 0, AddressSpace::Derived); | ||||||||
originalAlloca->replaceAllUsesWith(decayed); | ||||||||
// end illegal IR | ||||||||
Comment on lines
+3237
to
+3241
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be better if we emitted the original alloca with an addrspacecast to the derived space, so that we don't require any illegal operations to convert it later? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You mean when the nested struct is first emitted already cast the alloca to the derived space? |
||||||||
cast<Instruction>(vinfo.V)->eraseFromParent(); | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cast-away-const should not be done. Don't mark the argument There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there any risk that the numerous invalid pointers this leaves behind will be re-examined later? In particular, we wouldn't previously have assumed that In particular, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought about this for a bit, we could use |
||||||||
ctx.builder.restoreIP(IP); | ||||||||
} else { | ||||||||
box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val(ctx, (jl_value_t*)jt)); | ||||||||
init_bits_cgval(ctx, box, vinfo, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut); | ||||||||
} | ||||||||
} | ||||||||
} | ||||||||
return box; | ||||||||
|
@@ -3505,7 +3551,7 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx, | |||||||
} | ||||||||
} | ||||||||
|
||||||||
static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv) | ||||||||
static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv, bool is_promotable) | ||||||||
{ | ||||||||
++EmittedNewStructs; | ||||||||
assert(jl_is_datatype(ty)); | ||||||||
|
@@ -3528,6 +3574,8 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg | |||||||
init_as_value = true; | ||||||||
} | ||||||||
|
||||||||
Instruction *promotion_point = nullptr; | ||||||||
ssize_t promotion_ssa = -1; | ||||||||
Value *strct; | ||||||||
if (type_is_ghost(lt)) { | ||||||||
strct = NULL; | ||||||||
|
@@ -3547,8 +3595,17 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg | |||||||
for (unsigned i = 0; i < na; i++) { | ||||||||
jl_value_t *jtype = jl_svecref(sty->types, i); // n.b. ty argument must be concrete | ||||||||
jl_cgval_t fval_info = argv[i]; | ||||||||
|
||||||||
IRBuilderBase::InsertPoint savedIP; | ||||||||
emit_typecheck(ctx, fval_info, jtype, "new"); | ||||||||
fval_info = update_julia_type(ctx, fval_info, jtype); | ||||||||
// TODO: Use (post-)domination instead. | ||||||||
bool field_promotable = !init_as_value && fval_info.promotion_ssa != -1 && | ||||||||
fval_info.promotion_point && fval_info.promotion_point->getParent() == ctx.builder.GetInsertBlock(); | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There may also need to be a check here that the type being stored here is compatible, since we might not post-dominate in execution (e.g. via a typeassert call), even though we observe post-domination in IR order (yes, this is also a problem with our current errors violating dominance)? The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I'm struggling with this one too :/ so I defer to @Keno. But what I got is that a typeassert might make us do an incompatible store but we don't see that because it doesn't dominate the IR we are locally looking at? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hypothetically, the store might be post-dominated in the normal flow of the IR, but be removed by an exception in the runtime flow |
||||||||
if (field_promotable) { | ||||||||
savedIP = ctx.builder.saveIP(); | ||||||||
ctx.builder.SetInsertPoint(fval_info.promotion_point); | ||||||||
} | ||||||||
if (type_is_ghost(lt)) | ||||||||
continue; | ||||||||
Type *fty = julia_type_to_llvm(ctx, jtype); | ||||||||
|
@@ -3560,7 +3617,26 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg | |||||||
if (!init_as_value) { | ||||||||
// avoid unboxing the argument explicitly | ||||||||
// and use memcpy instead | ||||||||
dest = ctx.builder.CreateConstInBoundsGEP2_32(lt, strct, 0, llvm_idx); | ||||||||
Instruction *inst; | ||||||||
#ifndef JL_LLVM_OPAQUE_POINTERS | ||||||||
dest = inst = cast<Instruction>(ctx.builder.CreateConstInBoundsGEP2_32(lt, strct, 0, llvm_idx)); | ||||||||
#else | ||||||||
dest = inst = cast<Instruction>(ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), strct, offs)); | ||||||||
#endif | ||||||||
// Our promotion point needs to come before | ||||||||
// A) All of our arguments' promotion points | ||||||||
// B) Any instructions we insert at any of our arguments' promotion points | ||||||||
// N.B.: Do not use Instruction::comesBefore here. LLVM invalidates its instruction numbering after | ||||||||
// every insert, so querying it here makes code generation accidentally quadartic. | ||||||||
if (!promotion_point) { | ||||||||
promotion_point = inst; | ||||||||
promotion_ssa = fval_info.promotion_ssa; | ||||||||
} else if (field_promotable) { | ||||||||
if (promotion_ssa == -1 || fval_info.promotion_ssa < promotion_ssa) { | ||||||||
promotion_point = inst; | ||||||||
promotion_ssa = fval_info.promotion_ssa; | ||||||||
} | ||||||||
} | ||||||||
} | ||||||||
Value *fval = NULL; | ||||||||
if (jl_field_isptr(sty, i)) { | ||||||||
|
@@ -3571,6 +3647,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg | |||||||
->setOrdering(AtomicOrdering::Unordered); | ||||||||
} | ||||||||
else if (jl_is_uniontype(jtype)) { | ||||||||
assert(!field_promotable); | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This doesn't seem to be ensured anywhere, but just arbitrarily asserted here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I haven't seen that assertion being hit but I will have to defer to @Keno on this one. If we aren't checking anywhere we could just it to Lines 3609 to 3610 in 6d9ae9d
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Functions that return union cgvals don't give it a promotion point (or at least they shouldn't at the moment, which is what the assert is for). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This branch seemed to be about fieldtypes, not return types? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here is an example of a failure this caused, for reference: https://s3.amazonaws.com/julialang-reports/nanosoldier/pkgeval/by_hash/1c5858b_vs_1c5858b/SumTypes.primary.log There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can add a explicit check here but it might just be a bandaid |
||||||||
// compute tindex from rhs | ||||||||
jl_cgval_t rhs_union = convert_julia_type(ctx, fval_info, jtype); | ||||||||
if (rhs_union.typ == jl_bottom_type) | ||||||||
|
@@ -3623,7 +3700,12 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg | |||||||
} | ||||||||
} | ||||||||
else { | ||||||||
fval = emit_unbox(ctx, fty, fval_info, jtype, dest, ctx.tbaa().tbaa_stack); | ||||||||
if (field_promotable) { | ||||||||
fval_info.V->replaceAllUsesWith(dest); | ||||||||
cast<Instruction>(fval_info.V)->eraseFromParent(); | ||||||||
} else { | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
fval = emit_unbox(ctx, fty, fval_info, jtype, dest, ctx.tbaa().tbaa_stack); | ||||||||
} | ||||||||
} | ||||||||
if (init_as_value) { | ||||||||
assert(fval); | ||||||||
|
@@ -3636,6 +3718,9 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg | |||||||
else | ||||||||
assert(false); | ||||||||
} | ||||||||
if (field_promotable) { | ||||||||
ctx.builder.restoreIP(savedIP); | ||||||||
} | ||||||||
} | ||||||||
for (size_t i = nargs; i < nf; i++) { | ||||||||
if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) { | ||||||||
|
@@ -3655,8 +3740,14 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg | |||||||
return mark_julia_const(ctx, sty->instance); | ||||||||
else if (init_as_value) | ||||||||
return mark_julia_type(ctx, strct, false, ty); | ||||||||
else | ||||||||
return mark_julia_slot(strct, ty, NULL, ctx.tbaa(), ctx.tbaa().tbaa_stack); | ||||||||
else { | ||||||||
jl_cgval_t ret = mark_julia_slot(strct, ty, NULL, ctx.tbaa(), ctx.tbaa().tbaa_stack); | ||||||||
if (is_promotable && promotion_point) { | ||||||||
ret.promotion_point = promotion_point; | ||||||||
ret.promotion_ssa = promotion_ssa; | ||||||||
} | ||||||||
return ret; | ||||||||
} | ||||||||
} | ||||||||
Value *strct = emit_allocobj(ctx, jl_datatype_size(sty), | ||||||||
literal_pointer_val(ctx, (jl_value_t*)ty)); | ||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See the
function_sig_t::emit_a_ccall
function for how to minimally remangle a function name correctly (this is not correct). Or see https://llvm.org/doxygen/namespacellvm_1_1Intrinsic.html#a008c247b3c1a9df8b3103a897a967078 for a complicated version (I don't know why LLVM doesn't expose a more convenient wrapper of this, as it is a useful operation we have needed many times)