Skip to content

Commit

Permalink
Refine out callee rooted values from live set at the call site (#37197)
Browse files Browse the repository at this point in the history
  • Loading branch information
yuyichao authored Aug 30, 2020
1 parent 0336f67 commit 41e603e
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 5 deletions.
42 changes: 37 additions & 5 deletions src/llvm-late-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,8 @@ struct State {
// Those values that - if live out from our parent basic block - are live
// at this safepoint.
std::vector<std::vector<int>> LiveIfLiveOut;
// The set of values that are kept alive by the callee.
std::vector<std::vector<int>> CalleeRoots;
// We don't bother doing liveness on Allocas that were not mem2reg'ed.
// they just get directly sunk into the root array.
std::vector<AllocaInst *> Allocas;
Expand Down Expand Up @@ -359,7 +361,7 @@ struct LateLowerGCFrame: public FunctionPass, private JuliaPassContext {
void NoteUseChain(State &S, BBState &BBS, User *TheUser);
SmallVector<int, 1> GetPHIRefinements(PHINode *phi, State &S);
void FixUpRefinements(ArrayRef<int> PHINumbers, State &S);
void RefineLiveSet(BitVector &LS, State &S);
void RefineLiveSet(BitVector &LS, State &S, const std::vector<int> &CalleeRoots);
Value *EmitTagPtr(IRBuilder<> &builder, Type *T, Value *V);
Value *EmitLoadTag(IRBuilder<> &builder, Value *V);
};
Expand Down Expand Up @@ -1002,7 +1004,7 @@ void LateLowerGCFrame::MaybeNoteDef(State &S, BBState &BBS, Value *Def, const st
}
}

static int NoteSafepoint(State &S, BBState &BBS, CallInst *CI) {
static int NoteSafepoint(State &S, BBState &BBS, CallInst *CI, std::vector<int> CalleeRoots) {
int Number = ++S.MaxSafepointNumber;
S.SafepointNumbering[CI] = Number;
S.ReverseSafepointNumbering.push_back(CI);
Expand All @@ -1012,6 +1014,7 @@ static int NoteSafepoint(State &S, BBState &BBS, CallInst *CI) {
// computation)
S.LiveSets.push_back(BBS.UpExposedUses);
S.LiveIfLiveOut.push_back(std::vector<int>{});
S.CalleeRoots.push_back(std::move(CalleeRoots));
return Number;
}

Expand Down Expand Up @@ -1515,7 +1518,25 @@ State LateLowerGCFrame::LocalScan(Function &F) {
// Intrinsics are never safepoints.
continue;
}
int SafepointNumber = NoteSafepoint(S, BBS, CI);
std::vector<int> CalleeRoots;
for (Use &U : CI->arg_operands()) {
// Find all callee rooted arguments.
// Record them instead of simply remove them from live values here
// since they can be useful during refinment
// (e.g. to remove roots of objects that are refined to these)
Value *V = U;
if (isa<Constant>(V) || !isa<PointerType>(V->getType()) ||
getValueAddrSpace(V) != AddressSpace::CalleeRooted)
continue;
V = V->stripPointerCasts();
if (!isTrackedValue(V))
continue;
auto Num = Number(S, V);
if (Num < 0)
continue;
CalleeRoots.push_back(Num);
}
int SafepointNumber = NoteSafepoint(S, BBS, CI, std::move(CalleeRoots));
BBS.HasSafepoint = true;
BBS.TopmostSafepoint = SafepointNumber;
BBS.Safepoints.push_back(SafepointNumber);
Expand Down Expand Up @@ -1845,12 +1866,18 @@ JL_USED_FUNC static void dumpSafepointsForBBName(Function &F, State &S, const ch
}
}

void LateLowerGCFrame::RefineLiveSet(BitVector &LS, State &S)
void LateLowerGCFrame::RefineLiveSet(BitVector &LS, State &S, const std::vector<int> &CalleeRoots)
{
BitVector FullLS(S.MaxPtrNumber + 1, false);
FullLS |= LS;
// First expand the live set according to the refinement map
// so that we can see all the values that are effectively live.
for (auto Num: CalleeRoots) {
// For callee rooted values, they are all kept alive at the safepoint.
// Make sure they are marked (even though they probably are already)
// so that other values can be refined to them.
FullLS[Num] = 1;
}
bool changed;
do {
changed = false;
Expand Down Expand Up @@ -1891,6 +1918,11 @@ void LateLowerGCFrame::RefineLiveSet(BitVector &LS, State &S)
LS[Idx] = 0;
}
}
for (auto Num: CalleeRoots) {
// Now unmark all values that are rooted by the callee after
// refining other values to them.
LS[Num] = 0;
}
}

void LateLowerGCFrame::ComputeLiveSets(State &S) {
Expand All @@ -1909,7 +1941,7 @@ void LateLowerGCFrame::ComputeLiveSets(State &S) {
if (HasBitSet(BBS.LiveOut, Live))
LS[Live] = 1;
}
RefineLiveSet(LS, S);
RefineLiveSet(LS, S, S.CalleeRoots[idx]);
// If the function has GC preserves, figure out whether we need to
// add in any extra live values.
if (!S.GCPreserves.empty()) {
Expand Down
19 changes: 19 additions & 0 deletions test/llvmpasses/late-lower-gc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ declare {}*** @julia.ptls_states()
declare void @jl_safepoint()
declare {} addrspace(10)* @jl_apply_generic({} addrspace(10)*, {} addrspace(10)**, i32)
declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj(i8*, i64, {} addrspace(10)*)
declare i32 @rooting_callee({} addrspace(12)*, {} addrspace(12)*)

define void @gc_frame_lowering(i64 %a, i64 %b) {
top:
Expand Down Expand Up @@ -74,6 +75,24 @@ top:
ret void
}

define i32 @callee_root({} addrspace(10)* %v0, {} addrspace(10)* %v1) {
top:
; CHECK-LABEL: @callee_root
; CHECK-NOT: @julia.new_gc_frame
%v2 = call {}*** @julia.ptls_states()
%v3 = bitcast {} addrspace(10)* %v0 to {} addrspace(10)* addrspace(10)*
%v4 = addrspacecast {} addrspace(10)* addrspace(10)* %v3 to {} addrspace(10)* addrspace(11)*
%v5 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %v4 unordered, align 8
%v6 = bitcast {} addrspace(10)* %v1 to {} addrspace(10)* addrspace(10)*
%v7 = addrspacecast {} addrspace(10)* addrspace(10)* %v6 to {} addrspace(10)* addrspace(11)*
%v8 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %v7 unordered, align 8
%v9 = addrspacecast {} addrspace(10)* %v5 to {} addrspace(12)*
%v10 = addrspacecast {} addrspace(10)* %v8 to {} addrspace(12)*
%v11 = call i32 @rooting_callee({} addrspace(12)* %v9, {} addrspace(12)* %v10)
ret i32 %v11
; CHECK: ret i32
}

!0 = !{i64 0, i64 23}
!1 = !{}
!2 = distinct !{!2}
Expand Down

2 comments on commit 41e603e

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily benchmark build, I will reply here when finished:

@nanosoldier runbenchmarks(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your benchmark job has completed - possible performance regressions were detected. A full report can be found here. cc @ararslan

Please sign in to comment.