-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[GlobalOpt] Check if users are CallBase when changing CC #161399
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Hongyu Chen (XChy) ChangesFixes #156656 Full diff: https://github.com/llvm/llvm-project/pull/161399.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index f88d51f443bcf..c5a13c83ab39b 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1680,7 +1680,8 @@ processGlobal(GlobalValue &GV,
/// FastCC.
static void ChangeCalleesToFastCall(Function *F) {
for (User *U : F->users())
- cast<CallBase>(U)->setCallingConv(CallingConv::Fast);
+ if (auto *Call = dyn_cast<CallBase>(U))
+ Call->setCallingConv(CallingConv::Fast);
}
static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
@@ -1779,7 +1780,8 @@ isValidCandidateForColdCC(Function &F,
static void changeCallSitesToColdCC(Function *F) {
for (User *U : F->users())
- cast<CallBase>(U)->setCallingConv(CallingConv::Cold);
+ if (auto *Call = dyn_cast<CallBase>(U))
+ Call->setCallingConv(CallingConv::Cold);
}
// This function iterates over all the call instructions in the input Function
diff --git a/llvm/test/Transforms/GlobalOpt/fastcc.ll b/llvm/test/Transforms/GlobalOpt/fastcc.ll
index 854357e6fad97..f63cf64ca2f11 100644
--- a/llvm/test/Transforms/GlobalOpt/fastcc.ll
+++ b/llvm/test/Transforms/GlobalOpt/fastcc.ll
@@ -1,16 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt < %s -passes=globalopt -S | FileCheck %s
declare token @llvm.call.preallocated.setup(i32)
declare ptr @llvm.call.preallocated.arg(token, i32)
define internal i32 @f(ptr %m) {
-; CHECK-LABEL: define internal fastcc i32 @f
+; CHECK-LABEL: define internal fastcc i32 @f(
+; CHECK-SAME: ptr [[M:%.*]]) unnamed_addr {
+; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4
+; CHECK-NEXT: ret i32 [[V]]
+;
%v = load i32, ptr %m
ret i32 %v
}
define internal x86_thiscallcc i32 @g(ptr %m) {
-; CHECK-LABEL: define internal fastcc i32 @g
+; CHECK-LABEL: define internal fastcc i32 @g(
+; CHECK-SAME: ptr [[M:%.*]]) unnamed_addr {
+; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4
+; CHECK-NEXT: ret i32 [[V]]
+;
%v = load i32, ptr %m
ret i32 %v
}
@@ -18,41 +27,80 @@ define internal x86_thiscallcc i32 @g(ptr %m) {
; Leave this one alone, because the user went out of their way to request this
; convention.
define internal coldcc i32 @h(ptr %m) {
-; CHECK-LABEL: define internal coldcc i32 @h
+; CHECK-LABEL: define internal coldcc i32 @h(
+; CHECK-SAME: ptr [[M:%.*]]) unnamed_addr {
+; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4
+; CHECK-NEXT: ret i32 [[V]]
+;
%v = load i32, ptr %m
ret i32 %v
}
define internal i32 @j(ptr %m) {
-; CHECK-LABEL: define internal i32 @j
+; CHECK-LABEL: define internal i32 @j(
+; CHECK-SAME: ptr [[M:%.*]]) {
+; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4
+; CHECK-NEXT: ret i32 [[V]]
+;
%v = load i32, ptr %m
ret i32 %v
}
define internal i32 @inalloca(ptr inalloca(i32) %p) {
-; CHECK-LABEL: define internal fastcc i32 @inalloca(ptr %p)
+; CHECK-LABEL: define internal fastcc i32 @inalloca(
+; CHECK-SAME: ptr [[P:%.*]]) unnamed_addr {
+; CHECK-NEXT: [[RV:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT: ret i32 [[RV]]
+;
%rv = load i32, ptr %p
ret i32 %rv
}
define i32 @inalloca2_caller(ptr inalloca(i32) %p) {
+; CHECK-LABEL: define i32 @inalloca2_caller(
+; CHECK-SAME: ptr inalloca(i32) [[P:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[RV:%.*]] = musttail call i32 @inalloca2(ptr inalloca(i32) [[P]])
+; CHECK-NEXT: ret i32 [[RV]]
+;
%rv = musttail call i32 @inalloca2(ptr inalloca(i32) %p)
ret i32 %rv
}
define internal i32 @inalloca2(ptr inalloca(i32) %p) {
; Because of the musttail caller, this inalloca cannot be dropped.
-; CHECK-LABEL: define internal i32 @inalloca2(ptr inalloca(i32) %p)
+; CHECK-LABEL: define internal i32 @inalloca2(
+; CHECK-SAME: ptr inalloca(i32) [[P:%.*]]) unnamed_addr {
+; CHECK-NEXT: [[RV:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT: ret i32 [[RV]]
+;
%rv = load i32, ptr %p
ret i32 %rv
}
define internal i32 @preallocated(ptr preallocated(i32) %p) {
-; CHECK-LABEL: define internal fastcc i32 @preallocated(ptr %p)
+; CHECK-LABEL: define internal fastcc i32 @preallocated(
+; CHECK-SAME: ptr [[P:%.*]]) unnamed_addr {
+; CHECK-NEXT: [[RV:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT: ret i32 [[RV]]
+;
%rv = load i32, ptr %p
ret i32 %rv
}
define void @call_things() {
+; CHECK-LABEL: define void @call_things() local_unnamed_addr {
+; CHECK-NEXT: [[M:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP1:%.*]] = call fastcc i32 @f(ptr [[M]])
+; CHECK-NEXT: [[TMP2:%.*]] = call fastcc i32 @g(ptr [[M]])
+; CHECK-NEXT: [[TMP3:%.*]] = call coldcc i32 @h(ptr [[M]])
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @j(ptr [[M]])
+; CHECK-NEXT: [[ARGS:%.*]] = alloca inalloca i32, align 4
+; CHECK-NEXT: [[TMP5:%.*]] = call fastcc i32 @inalloca(ptr [[ARGS]])
+; CHECK-NEXT: [[TMP6:%.*]] = call ptr @llvm.stacksave.p0()
+; CHECK-NEXT: [[PAARG:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP7:%.*]] = call fastcc i32 @preallocated(ptr [[PAARG]])
+; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP6]])
+; CHECK-NEXT: ret void
+;
%m = alloca i32
call i32 @f(ptr %m)
call x86_thiscallcc i32 @g(ptr %m)
@@ -65,15 +113,16 @@ define void @call_things() {
call i32 @preallocated(ptr preallocated(i32) %N) ["preallocated"(token %c)]
ret void
}
-; CHECK-LABEL: define void @call_things()
-; CHECK: call fastcc i32 @f
-; CHECK: call fastcc i32 @g
-; CHECK: call coldcc i32 @h
-; CHECK: call i32 @j
-; CHECK: call fastcc i32 @inalloca(ptr %args)
-; CHECK-NOT: llvm.call.preallocated
-; CHECK: call fastcc i32 @preallocated(ptr %paarg)
@llvm.used = appending global [1 x ptr] [
- ptr @j
+ ptr @j
], section "llvm.metadata"
+
+define internal i32 @constexpr_self_user() addrspace(1) {
+; CHECK-LABEL: define internal fastcc i32 @constexpr_self_user() addrspace(1) {
+; CHECK-NEXT: [[OBJSIZE:%.*]] = call i32 @llvm.objectsize.i32.p0(ptr addrspacecast (ptr addrspace(1) @constexpr_self_user to ptr), i1 false, i1 false, i1 false)
+; CHECK-NEXT: ret i32 [[OBJSIZE]]
+;
+ %objsize = call i32 @llvm.objectsize.i32.p0(ptr addrspacecast (ptr addrspace(1) @constexpr_self_user to ptr), i1 false, i1 false, i1 false)
+ ret i32 %objsize
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
], section "llvm.metadata" | ||
|
||
define internal i32 @constexpr_self_user() addrspace(1) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove the address space, assuming it's not relevant somehow?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's only reproducible with addrspacecast. See also https://godbolt.org/z/qcfY8z91G.
My description is not clear. Actually, hasChangeableCCImpl
ignores assume-like calls with addrspacecast.
But we try to cast addrspacecast
to CallBase
later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, but that indicates another issue now. We'll also ignore assume-like calls directly:
llvm-project/llvm/lib/IR/Function.cpp
Lines 995 to 999 in 3c0f7b1
if (IgnoreAssumeLikeCalls) { | |
if (const auto *I = dyn_cast<IntrinsicInst>(Call)) | |
if (I->isAssumeLikeIntrinsic()) | |
continue; | |
} |
But in that case we're going to change the CC on the assume-like call, not on a call to the function. So I think the code needs to check that it's the callee operand.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, I missed it. Fix it now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Fixes llvm#156656 `hasChangeableCCImpl` guarantees the address of the function is not taken, but it ignores assume-like calls. This patch ignores assume-like calls when changing CC.
Fixes #156656
hasChangeableCCImpl
guarantees the address of the function is not taken, but it ignores assume-like calls.This patch ignores assume-like calls when changing CC.