-
Notifications
You must be signed in to change notification settings - Fork 13.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[coro][CoroSplit] Use llvm.lifetime.end
to compute putting objects on the frame vs the stack
#90265
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-coroutines Author: Alan Zhao (alanzhao1) ChangesThe current logic for using lifetime intrinsics to determine whether a To fix this, new logic is introduced where if an alloca's address is Fixes #86580 Full diff: https://github.com/llvm/llvm-project/pull/90265.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 08a4522e3fac6d..81d25f9e617a90 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/PtrUseVisitor.h"
#include "llvm/Analysis/StackLifetime.h"
#include "llvm/Config/llvm-config.h"
@@ -1441,9 +1442,11 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
using Base = PtrUseVisitor<AllocaUseVisitor>;
AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT,
const CoroBeginInst &CB, const SuspendCrossingInfo &Checker,
- bool ShouldUseLifetimeStartInfo)
+ bool ShouldUseLifetimeStartInfo,
+ const coro::Shape &CoroShape)
: PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker),
- ShouldUseLifetimeStartInfo(ShouldUseLifetimeStartInfo) {}
+ ShouldUseLifetimeStartInfo(ShouldUseLifetimeStartInfo),
+ CoroShape(CoroShape) {}
void visit(Instruction &I) {
Users.insert(&I);
@@ -1550,6 +1553,8 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
}
void visitIntrinsicInst(IntrinsicInst &II) {
+ if (II.getIntrinsicID() == Intrinsic::lifetime_end)
+ LifetimeEndBBs.insert(II.getParent());
// When we found the lifetime markers refers to a
// subrange of the original alloca, ignore the lifetime
// markers to avoid misleading the analysis.
@@ -1594,8 +1599,10 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
DenseMap<Instruction *, std::optional<APInt>> AliasOffetMap{};
SmallPtrSet<Instruction *, 4> Users{};
SmallPtrSet<IntrinsicInst *, 2> LifetimeStarts{};
+ SmallPtrSet<BasicBlock *, 2> LifetimeEndBBs;
bool MayWriteBeforeCoroBegin{false};
bool ShouldUseLifetimeStartInfo{true};
+ const coro::Shape &CoroShape;
mutable std::optional<bool> ShouldLiveOnFrame{};
@@ -1614,6 +1621,24 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
// suspend point between lifetime markers. This should also cover the
// case of a single lifetime.start intrinsic in a loop with suspend point.
if (PI.isEscaped()) {
+ // If there is no explicit lifetime.end, then assume the address can
+ // cross suspension points.
+ if (LifetimeEndBBs.empty())
+ return true;
+
+ // If there is a path from a lifetime.start to a suspend without a
+ // corresponding lifetime.end, then the alloca's lifetime persists
+ // beyond that suspension point and the alloca must go on the frame.
+ for (AnyCoroSuspendInst *SuspendInst : CoroShape.CoroSuspends) {
+ SmallVector<BasicBlock *> LifetimeStartsBB;
+ for (IntrinsicInst *II : LifetimeStarts)
+ LifetimeStartsBB.push_back(II->getParent());
+ if (isPotentiallyReachableFromMany(LifetimeStartsBB,
+ SuspendInst->getParent(),
+ &LifetimeEndBBs, &DT))
+ return true;
+ }
+
for (auto *A : LifetimeStarts) {
for (auto *B : LifetimeStarts) {
if (Checker.hasPathOrLoopCrossingSuspendPoint(A->getParent(),
@@ -2830,9 +2855,9 @@ static void collectFrameAlloca(AllocaInst *AI, coro::Shape &Shape,
bool ShouldUseLifetimeStartInfo =
(Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
Shape.ABI != coro::ABI::RetconOnce);
- AllocaUseVisitor Visitor{AI->getModule()->getDataLayout(), DT,
- *Shape.CoroBegin, Checker,
- ShouldUseLifetimeStartInfo};
+ AllocaUseVisitor Visitor{
+ AI->getModule()->getDataLayout(), DT, *Shape.CoroBegin, Checker,
+ ShouldUseLifetimeStartInfo, Shape};
Visitor.visitPtr(*AI);
if (!Visitor.getShouldLiveOnFrame())
return;
diff --git a/llvm/test/Transforms/Coroutines/coro-lifetime-end.ll b/llvm/test/Transforms/Coroutines/coro-lifetime-end.ll
new file mode 100644
index 00000000000000..9a26a722a1e0db
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-lifetime-end.ll
@@ -0,0 +1,142 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+
+declare ptr @malloc(i64)
+
+%i8.array = type { [100 x i8] }
+declare void @consume.i8.array(ptr)
+
+@testbool = external local_unnamed_addr global i8, align 1
+
+; testval does not contain an explicit lifetime end. We must assume that it may
+; live across suspension.
+define void @HasNoLifetimeEnd() presplitcoroutine {
+; CHECK-LABEL: define void @HasNoLifetimeEnd() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @HasNoLifetimeEnd.resumers)
+; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 16)
+; CHECK-NEXT: [[VFRAME:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]])
+; CHECK-NEXT: store ptr @HasNoLifetimeEnd.resume, ptr [[VFRAME]], align 8
+; CHECK-NEXT: [[DESTROY_ADDR:%.*]] = getelementptr inbounds [[HASNOLIFETIMEEND_FRAME:%.*]], ptr [[VFRAME]], i32 0, i32 1
+; CHECK-NEXT: store ptr @HasNoLifetimeEnd.destroy, ptr [[DESTROY_ADDR]], align 8
+; CHECK-NEXT: [[INDEX_ADDR1:%.*]] = getelementptr inbounds [[HASNOLIFETIMEEND_FRAME]], ptr [[VFRAME]], i32 0, i32 2
+; CHECK-NEXT: call void @consume.i8.array(ptr [[INDEX_ADDR1]])
+; CHECK-NEXT: [[INDEX_ADDR2:%.*]] = getelementptr inbounds [[HASNOLIFETIMEEND_FRAME]], ptr [[VFRAME]], i32 0, i32 3
+; CHECK-NEXT: store i1 false, ptr [[INDEX_ADDR2]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %testval = alloca %i8.array
+ %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+ %alloc = call ptr @malloc(i64 16) #3
+ %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
+
+ call void @llvm.lifetime.start.p0(i64 100, ptr %testval)
+ call void @consume.i8.array(ptr %testval)
+
+ %save = call token @llvm.coro.save(ptr null)
+ %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+ switch i8 %suspend, label %exit [
+ i8 0, label %await.ready
+ i8 1, label %exit
+ ]
+await.ready:
+ br label %exit
+exit:
+ call i1 @llvm.coro.end(ptr null, i1 false, token none)
+ ret void
+}
+
+define void @LifetimeEndAfterCoroEnd() presplitcoroutine {
+; CHECK-LABEL: define void @LifetimeEndAfterCoroEnd() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @LifetimeEndAfterCoroEnd.resumers)
+; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 16)
+; CHECK-NEXT: [[VFRAME:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]])
+; CHECK-NEXT: store ptr @LifetimeEndAfterCoroEnd.resume, ptr [[VFRAME]], align 8
+; CHECK-NEXT: [[DESTROY_ADDR:%.*]] = getelementptr inbounds [[LIFETIMEENDAFTERCOROEND_FRAME:%.*]], ptr [[VFRAME]], i32 0, i32 1
+; CHECK-NEXT: store ptr @LifetimeEndAfterCoroEnd.destroy, ptr [[DESTROY_ADDR]], align 8
+; CHECK-NEXT: [[INDEX_ADDR1:%.*]] = getelementptr inbounds [[LIFETIMEENDAFTERCOROEND_FRAME]], ptr [[VFRAME]], i32 0, i32 2
+; CHECK-NEXT: call void @consume.i8.array(ptr [[INDEX_ADDR1]])
+; CHECK-NEXT: [[INDEX_ADDR2:%.*]] = getelementptr inbounds [[LIFETIMEENDAFTERCOROEND_FRAME]], ptr [[VFRAME]], i32 0, i32 3
+; CHECK-NEXT: store i1 false, ptr [[INDEX_ADDR2]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %testval = alloca %i8.array
+ %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+ %alloc = call ptr @malloc(i64 16) #3
+ %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
+
+ call void @llvm.lifetime.start.p0(i64 100, ptr %testval)
+ call void @consume.i8.array(ptr %testval)
+
+ %save = call token @llvm.coro.save(ptr null)
+ %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+ switch i8 %suspend, label %exit [
+ i8 0, label %await.ready
+ i8 1, label %exit
+ ]
+await.ready:
+ br label %exit
+exit:
+ call i1 @llvm.coro.end(ptr null, i1 false, token none)
+ call void @llvm.lifetime.end.p0(i64 100, ptr %testval)
+ ret void
+}
+
+define void @BranchWithoutLifetimeEnd() presplitcoroutine {
+; CHECK-LABEL: define void @BranchWithoutLifetimeEnd() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @BranchWithoutLifetimeEnd.resumers)
+; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 16)
+; CHECK-NEXT: [[VFRAME:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]])
+; CHECK-NEXT: store ptr @BranchWithoutLifetimeEnd.resume, ptr [[VFRAME]], align 8
+; CHECK-NEXT: [[DESTROY_ADDR:%.*]] = getelementptr inbounds [[BRANCHWITHOUTLIFETIMEEND_FRAME:%.*]], ptr [[VFRAME]], i32 0, i32 1
+; CHECK-NEXT: store ptr @BranchWithoutLifetimeEnd.destroy, ptr [[DESTROY_ADDR]], align 8
+; CHECK-NEXT: [[TESTVAL:%.*]] = getelementptr inbounds [[BRANCHWITHOUTLIFETIMEEND_FRAME]], ptr [[VFRAME]], i32 0, i32 2
+; CHECK-NEXT: call void @consume.i8.array(ptr [[TESTVAL]])
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr @testbool, align 1
+; CHECK-NEXT: [[INDEX_ADDR1:%.*]] = getelementptr inbounds [[BRANCHWITHOUTLIFETIMEEND_FRAME]], ptr [[VFRAME]], i32 0, i32 3
+; CHECK-NEXT: store i1 false, ptr [[INDEX_ADDR1]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %testval = alloca %i8.array
+ %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+ %alloc = call ptr @malloc(i64 16) #3
+ %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
+
+ call void @llvm.lifetime.start.p0(i64 100, ptr %testval)
+ call void @consume.i8.array(ptr %testval)
+
+ %0 = load i8, ptr @testbool, align 1
+ %tobool = trunc nuw i8 %0 to i1
+ br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+ call void @llvm.lifetime.end.p0(i64 100, ptr %testval)
+ br label %if.end
+
+if.end:
+ %save = call token @llvm.coro.save(ptr null)
+ %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+ switch i8 %suspend, label %exit [
+ i8 0, label %await.ready
+ i8 1, label %exit
+ ]
+await.ready:
+ br label %exit
+exit:
+ call i1 @llvm.coro.end(ptr null, i1 false, token none)
+ ret void
+}
+
+
+declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr)
+declare ptr @llvm.coro.begin(token, ptr writeonly) #3
+declare ptr @llvm.coro.frame() #5
+declare i8 @llvm.coro.suspend(token, i1) #3
+declare i1 @llvm.coro.end(ptr, i1, token) #3
+declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #4
+declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #4
|
not sure why buildkite is failing - I didn't see any test failures and |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with nits. Thanks.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm
…e vs the stack The current logic for using lifetime intrinsics to determine whether a coroutine alloca should live on the coroutine frame or stack doesn't consider `llvm.lifetime.end`. As a result, some allocas are incorrectly placed on the stack even though their lifetimes may outlive the stack. For example, SimplifyCFG may generate code that drops the corresponding `llvm.lifetime.end` of an `llvm.lifetime.start`, and that code is incorrectly handled by the existing logic. To fix this, new logic is introduced where if an alloca's address is escaped, and there is a path from an `llvm.lifetime.start` to a coroutine suspend point (e.g. `llvm.coro.suspend`) without an `llvm.lifetime.end`, then we know the object lives beyond the suspension point and therefore must go on the coroutine frame. Fixes llvm#86580
98a62ff
to
f562590
Compare
Reverted in 9243841 due to compile-time regressions, see https://llvm-compile-time-tracker.com/compare.php?from=94204f59e92473bb19333f40a2250b64a398191a&to=fcf341d3ddfe2289ac88aa3c122b25df8732cc8e&stat=instructions:u. Presumably your changes to the isPotentiallyReachable infrastructure have made it much slower than it was. |
… objects on the frame vs the stack (llvm#90265)" This reverts commit 9243841. This reland addresses the performance regressions seen in llvm#90265 by retaining the original definition of `isPotentiallyReachableFromMany(...)` instead of reimplementing it with `isManyPotentiallyReachableFromMany(...)`. Fixes llvm#86580
ifetime.end` to compute putting objects on the frame vs the stack (llvm#90265)" This reverts commit 9243841. This reland addresses the performance regressions seen in llvm#90265 by retaining the original definition of `isPotentiallyReachableFromMany(...)` instead of reimplementing it with `isManyPotentiallyReachableFromMany(...)`. Fixes llvm#86580 fix clang format issue Use SmallPtrSet instead of DenseMap remove extra change use SmallPtrSet for loops Also make curly braces more consistent deduplicate code by using templates remove unnecessary change..again reduce size of StopBBReachable to align with size of CoroSuspendBBs ditto with StopLoops
… objects on the frame vs the stack (llvm#90265)" This reverts commit 9243841. This reland addresses the performance regressions seen in llvm#90265 by retaining the original definition of `isPotentiallyReachableFromMany(...)` instead of reimplementing it with `isManyPotentiallyReachableFromMany(...)`. Fixes llvm#86580 fix clang format issue Use SmallPtrSet instead of DenseMap remove extra change use SmallPtrSet for loops Also make curly braces more consistent deduplicate code by using templates remove unnecessary change..again reduce size of StopBBReachable to align with size of CoroSuspendBBs ditto with StopLoops Improve template parameter names Co-authored-by: Chuanqi Xu <yedeng.yd@linux.alibaba.com> fix build failure
… objects on the frame vs the stack (llvm#90265)" This reverts commit 9243841. This reland addresses the performance regressions seen in llvm#90265 by retaining the original definition of `isPotentiallyReachableFromMany(...)` instead of reimplementing it with `isManyPotentiallyReachableFromMany(...)`. Fixes llvm#86580 fix clang format issue Use SmallPtrSet instead of DenseMap remove extra change use SmallPtrSet for loops Also make curly braces more consistent deduplicate code by using templates remove unnecessary change..again reduce size of StopBBReachable to align with size of CoroSuspendBBs ditto with StopLoops Improve template parameter names Co-authored-by: Chuanqi Xu <yedeng.yd@linux.alibaba.com> fix build failure create and use SingleEntrySet
… objects on the frame vs the stack (#90265) (#91372) This reverts commit 9243841. This reland addresses the performance regressions seen in #90265 by retaining the original definition of `isPotentiallyReachableFromMany(...)` instead of reimplementing it with `isManyPotentiallyReachableFromMany(...)`. Fixes #86580
The current logic for using lifetime intrinsics to determine whether a
coroutine alloca should live on the coroutine frame or stack doesn't
consider
llvm.lifetime.end
. As a result, some allocas are incorrectlyplaced on the stack even though their lifetimes may outlive the stack.
For example, SimplifyCFG may generate code that drops the corresponding
llvm.lifetime.end
of anllvm.lifetime.start
, and that code isincorrectly handled by the existing logic.
To fix this, new logic is introduced where if an alloca's address is
escaped, and there is a path from an
llvm.lifetime.start
to acoroutine suspend point (e.g.
llvm.coro.suspend
) without anllvm.lifetime.end
, then we know the object lives beyond the suspensionpoint and therefore must go on the coroutine frame.
Fixes #86580