Skip to content

Commit

Permalink
Hoist object allocation before inner field initialization (#45153)
Browse files Browse the repository at this point in the history
* Hoist object allocation before inner field initialization

Consider the following pattern for building up nested objects
```
%obj = Expr(:new, obj, ...)
%obj_wrapper = Expr(:new, obj_wrapper, ..., %obj)
%obj_wrapper2 = Expr(:new, obj_wrapper2, ..., %obj_wrapper)
%outer = Expr(:new, outer, %obj_wrapper2)
```

Asssuming everything except `struct outer` is struct-inlineable,
the LLVM IR we emit looks something like the following:

```
%obj = alloca
%obj_wrapper = alloca
%obj_wrapper_wrapper = alloca
%obj = alloca

init(%obj, <obj>)
init(%obj_wrapper, <obj_wrapper>); memcpy(%obj_wrapper, %obj)
init(%obj_wrapper2, <obj_wrapper2>); memcpy(%obj_wrapper2, %obj_wrapper)
init(%outer, <outer>); memcpy(%outer, %obj_wrapper2)

%outer_boxed = julia.gc_alloc
memcpy(%outer_boxed, %outer)
```

While LLVM is capable of removing all the allocas and memcpys, it's
taking an unreasonable amount of time to do so.

This PR introduces a small optimization into the frontend lowering
for `:new`: If all the :new calls are in the same LLVM basic block,
then we delete the alloca, and hoist the allocation of the object
to the earliest point before the initialization of the fields.

This gives essentially the same result as LLVM would have given us
post-optimization, but is much cheaper to do because we don't have
to perform any analysis to tell us that it is a legal optimization.

In the above example, we would end up with something like:
```
%outer_boxed = julia.gc_alloc
init(%outer_boxed, <obj>)
init(%outer_boxed, <obj_wrapper>);
init(%outer_boxed, <obj_wrapper2>);
init(%outer_boxed, <outer>);
```

Of course this does extend the liftime of the outer object, but I
don't think that's a particular problem as long as we're careful
not to hoist any boxings out of error paths. In the current
implementation, I only allow this optimization to occur in the
same LLVM basic block, but I think it should be fine to extend
it to allow the same julia basic block or more generally, any
allocation that post-dominates the relevant promotion points.

* fix codegen bug

Co-authored-by: Keno Fischer <keno@juliacomputing.com>
gbaraldi and Keno authored May 12, 2022
1 parent bf2c2e8 commit 93ac4a3
Showing 2 changed files with 197 additions and 51 deletions.
104 changes: 97 additions & 7 deletions src/cgutils.cpp
Original file line number Diff line number Diff line change
@@ -3163,6 +3163,33 @@ static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallB
return box_merge;
}

static void recursively_adjust_ptr_type(llvm::Value *Val, unsigned FromAS, unsigned ToAS)
{
for (auto *User : Val->users()) {
if (isa<GetElementPtrInst>(User)) {
GetElementPtrInst *Inst = cast<GetElementPtrInst>(User);
Inst->mutateType(PointerType::getWithSamePointeeType(cast<PointerType>(Inst->getType()), ToAS));
recursively_adjust_ptr_type(Inst, FromAS, ToAS);
}
else if (isa<IntrinsicInst>(User)) {
IntrinsicInst *II = cast<IntrinsicInst>(User);
SmallVector<Type*, 3> ArgTys;
Intrinsic::getIntrinsicSignature(II->getCalledFunction(), ArgTys);
assert(ArgTys.size() <= II->arg_size());
for (size_t i = 0; i < ArgTys.size(); ++i)
ArgTys[i] = II->getArgOperand(i)->getType();
II->setCalledFunction(Intrinsic::getDeclaration(II->getModule(), II->getIntrinsicID(), ArgTys));
}
#ifndef JL_LLVM_OPAQUE_POINTERS
else if (isa<BitCastInst>(User)) {
BitCastInst *Inst = cast<BitCastInst>(User);
Inst->mutateType(PointerType::getWithSamePointeeType(cast<PointerType>(Inst->getType()), ToAS));
recursively_adjust_ptr_type(Inst, FromAS, ToAS);
}
#endif
}
}

// this is used to wrap values for generic contexts, where a
// dynamically-typed value is required (e.g. argument to unknown function).
// if it's already a pointer it's left alone.
@@ -3196,8 +3223,27 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo)
assert(!type_is_ghost(t)); // ghost values should have been handled by vinfo.constant above!
box = _boxed_special(ctx, vinfo, t);
if (!box) {
box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val(ctx, (jl_value_t*)jt));
init_bits_cgval(ctx, box, vinfo, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut);
bool do_promote = vinfo.promotion_point;
if (do_promote) {
auto IP = ctx.builder.saveIP();
ctx.builder.SetInsertPoint(vinfo.promotion_point);
box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val(ctx, (jl_value_t*)jt));
Value *decayed = decay_derived(ctx, box);
AllocaInst *originalAlloca = cast<AllocaInst>(vinfo.V);
#ifndef JL_LLVM_OPAQUE_POINTERS
decayed = maybe_bitcast(ctx, decayed, PointerType::get(originalAlloca->getType()->getPointerElementType(), AddressSpace::Derived));
#endif
// Warning: Very illegal IR here temporarily
originalAlloca->mutateType(decayed->getType());
recursively_adjust_ptr_type(originalAlloca, 0, AddressSpace::Derived);
originalAlloca->replaceAllUsesWith(decayed);
// end illegal IR
cast<Instruction>(vinfo.V)->eraseFromParent();
ctx.builder.restoreIP(IP);
} else {
box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val(ctx, (jl_value_t*)jt));
init_bits_cgval(ctx, box, vinfo, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut);
}
}
}
return box;
@@ -3511,7 +3557,7 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
}
}

static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv)
static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv, bool is_promotable)
{
++EmittedNewStructs;
assert(jl_is_datatype(ty));
@@ -3534,6 +3580,8 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
init_as_value = true;
}

Instruction *promotion_point = nullptr;
ssize_t promotion_ssa = -1;
Value *strct;
if (type_is_ghost(lt)) {
strct = NULL;
@@ -3553,8 +3601,17 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
for (unsigned i = 0; i < na; i++) {
jl_value_t *jtype = jl_svecref(sty->types, i); // n.b. ty argument must be concrete
jl_cgval_t fval_info = argv[i];

IRBuilderBase::InsertPoint savedIP;
emit_typecheck(ctx, fval_info, jtype, "new");
fval_info = update_julia_type(ctx, fval_info, jtype);
// TODO: Use (post-)domination instead.
bool field_promotable = !init_as_value && fval_info.promotion_ssa != -1 &&
fval_info.promotion_point && fval_info.promotion_point->getParent() == ctx.builder.GetInsertBlock();
if (field_promotable) {
savedIP = ctx.builder.saveIP();
ctx.builder.SetInsertPoint(fval_info.promotion_point);
}
if (type_is_ghost(lt))
continue;
Type *fty = julia_type_to_llvm(ctx, jtype);
@@ -3566,7 +3623,25 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
if (!init_as_value) {
// avoid unboxing the argument explicitly
// and use memcpy instead
dest = ctx.builder.CreateConstInBoundsGEP2_32(lt, strct, 0, llvm_idx);
Instruction *inst;
#ifndef JL_LLVM_OPAQUE_POINTERS
dest = inst = cast<Instruction>(ctx.builder.CreateConstInBoundsGEP2_32(lt, strct, 0, llvm_idx));
#else
dest = inst = cast<Instruction>(ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), strct, offs));
#endif
// Our promotion point needs to come before
// A) All of our arguments' promotion points
// B) Any instructions we insert at any of our arguments' promotion points
// N.B.: Do not use Instruction::comesBefore here. LLVM invalidates its instruction numbering after
// every insert, so querying it here makes code generation accidentally quadartic.
if (field_promotable) {
if (promotion_ssa == -1 || fval_info.promotion_ssa < promotion_ssa) {
promotion_point = inst;
promotion_ssa = fval_info.promotion_ssa;
}
} else if (!promotion_point) {
promotion_point = inst;
}
}
Value *fval = NULL;
if (jl_field_isptr(sty, i)) {
@@ -3577,6 +3652,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
->setOrdering(AtomicOrdering::Unordered);
}
else if (jl_is_uniontype(jtype)) {
assert(!field_promotable);
// compute tindex from rhs
jl_cgval_t rhs_union = convert_julia_type(ctx, fval_info, jtype);
if (rhs_union.typ == jl_bottom_type)
@@ -3629,7 +3705,12 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
}
}
else {
fval = emit_unbox(ctx, fty, fval_info, jtype, dest, ctx.tbaa().tbaa_stack);
if (field_promotable) {
fval_info.V->replaceAllUsesWith(dest);
cast<Instruction>(fval_info.V)->eraseFromParent();
} else {
fval = emit_unbox(ctx, fty, fval_info, jtype, dest, ctx.tbaa().tbaa_stack);
}
}
if (init_as_value) {
assert(fval);
@@ -3642,6 +3723,9 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
else
assert(false);
}
if (field_promotable) {
ctx.builder.restoreIP(savedIP);
}
}
for (size_t i = nargs; i < nf; i++) {
if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) {
@@ -3661,8 +3745,14 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
return mark_julia_const(ctx, sty->instance);
else if (init_as_value)
return mark_julia_type(ctx, strct, false, ty);
else
return mark_julia_slot(strct, ty, NULL, ctx.tbaa(), ctx.tbaa().tbaa_stack);
else {
jl_cgval_t ret = mark_julia_slot(strct, ty, NULL, ctx.tbaa(), ctx.tbaa().tbaa_stack);
if (is_promotable && promotion_point) {
ret.promotion_point = promotion_point;
ret.promotion_ssa = promotion_ssa;
}
return ret;
}
}
Value *strct = emit_allocobj(ctx, jl_datatype_size(sty),
literal_pointer_val(ctx, (jl_value_t*)ty));
Loading

2 comments on commit 93ac4a3

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily package evaluation, I will reply here when finished:

@nanosoldier runtests(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your package evaluation job has completed - possible new issues were detected. A full report can be found here.

Please sign in to comment.