Skip to content

Commit f80e7e1

Browse files
authored
[GlobalOpt] Check if users are CallBase when changing CC (#161399)
Fixes #156656 `hasChangeableCCImpl` guarantees the address of the function is not taken, but it ignores assume-like calls. This patch ignores assume-like calls when changing CC.
1 parent 57b1b25 commit f80e7e1

File tree

2 files changed

+85
-21
lines changed

2 files changed

+85
-21
lines changed

llvm/lib/Transforms/IPO/GlobalOpt.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1680,7 +1680,9 @@ processGlobal(GlobalValue &GV,
16801680
/// FastCC.
16811681
static void ChangeCalleesToFastCall(Function *F) {
16821682
for (User *U : F->users())
1683-
cast<CallBase>(U)->setCallingConv(CallingConv::Fast);
1683+
if (auto *Call = dyn_cast<CallBase>(U))
1684+
if (Call->getCalledOperand() == F)
1685+
Call->setCallingConv(CallingConv::Fast);
16841686
}
16851687

16861688
static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
@@ -1766,10 +1768,12 @@ isValidCandidateForColdCC(Function &F,
17661768
return false;
17671769

17681770
for (User *U : F.users()) {
1769-
CallBase &CB = cast<CallBase>(*U);
1770-
Function *CallerFunc = CB.getParent()->getParent();
1771+
CallBase *CB = dyn_cast<CallBase>(U);
1772+
if (!CB || CB->getCalledOperand() != &F)
1773+
continue;
1774+
Function *CallerFunc = CB->getParent()->getParent();
17711775
BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc);
1772-
if (!isColdCallSite(CB, CallerBFI))
1776+
if (!isColdCallSite(*CB, CallerBFI))
17731777
return false;
17741778
if (!llvm::is_contained(AllCallsCold, CallerFunc))
17751779
return false;
@@ -1779,7 +1783,9 @@ isValidCandidateForColdCC(Function &F,
17791783

17801784
static void changeCallSitesToColdCC(Function *F) {
17811785
for (User *U : F->users())
1782-
cast<CallBase>(U)->setCallingConv(CallingConv::Cold);
1786+
if (auto *Call = dyn_cast<CallBase>(U))
1787+
if (Call->getCalledOperand() == F)
1788+
Call->setCallingConv(CallingConv::Cold);
17831789
}
17841790

17851791
// This function iterates over all the call instructions in the input Function
Lines changed: 74 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,106 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
12
; RUN: opt < %s -passes=globalopt -S | FileCheck %s
23

34
declare token @llvm.call.preallocated.setup(i32)
45
declare ptr @llvm.call.preallocated.arg(token, i32)
56

67
define internal i32 @f(ptr %m) {
7-
; CHECK-LABEL: define internal fastcc i32 @f
8+
; CHECK-LABEL: define internal fastcc i32 @f(
9+
; CHECK-SAME: ptr [[M:%.*]]) unnamed_addr {
10+
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4
11+
; CHECK-NEXT: ret i32 [[V]]
12+
;
813
%v = load i32, ptr %m
914
ret i32 %v
1015
}
1116

1217
define internal x86_thiscallcc i32 @g(ptr %m) {
13-
; CHECK-LABEL: define internal fastcc i32 @g
18+
; CHECK-LABEL: define internal fastcc i32 @g(
19+
; CHECK-SAME: ptr [[M:%.*]]) unnamed_addr {
20+
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4
21+
; CHECK-NEXT: ret i32 [[V]]
22+
;
1423
%v = load i32, ptr %m
1524
ret i32 %v
1625
}
1726

1827
; Leave this one alone, because the user went out of their way to request this
1928
; convention.
2029
define internal coldcc i32 @h(ptr %m) {
21-
; CHECK-LABEL: define internal coldcc i32 @h
30+
; CHECK-LABEL: define internal coldcc i32 @h(
31+
; CHECK-SAME: ptr [[M:%.*]]) unnamed_addr {
32+
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4
33+
; CHECK-NEXT: ret i32 [[V]]
34+
;
2235
%v = load i32, ptr %m
2336
ret i32 %v
2437
}
2538

2639
define internal i32 @j(ptr %m) {
27-
; CHECK-LABEL: define internal i32 @j
40+
; CHECK-LABEL: define internal i32 @j(
41+
; CHECK-SAME: ptr [[M:%.*]]) {
42+
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4
43+
; CHECK-NEXT: ret i32 [[V]]
44+
;
2845
%v = load i32, ptr %m
2946
ret i32 %v
3047
}
3148

3249
define internal i32 @inalloca(ptr inalloca(i32) %p) {
33-
; CHECK-LABEL: define internal fastcc i32 @inalloca(ptr %p)
50+
; CHECK-LABEL: define internal fastcc i32 @inalloca(
51+
; CHECK-SAME: ptr [[P:%.*]]) unnamed_addr {
52+
; CHECK-NEXT: [[RV:%.*]] = load i32, ptr [[P]], align 4
53+
; CHECK-NEXT: ret i32 [[RV]]
54+
;
3455
%rv = load i32, ptr %p
3556
ret i32 %rv
3657
}
3758

3859
define i32 @inalloca2_caller(ptr inalloca(i32) %p) {
60+
; CHECK-LABEL: define i32 @inalloca2_caller(
61+
; CHECK-SAME: ptr inalloca(i32) [[P:%.*]]) local_unnamed_addr {
62+
; CHECK-NEXT: [[RV:%.*]] = musttail call i32 @inalloca2(ptr inalloca(i32) [[P]])
63+
; CHECK-NEXT: ret i32 [[RV]]
64+
;
3965
%rv = musttail call i32 @inalloca2(ptr inalloca(i32) %p)
4066
ret i32 %rv
4167
}
4268
define internal i32 @inalloca2(ptr inalloca(i32) %p) {
4369
; Because of the musttail caller, this inalloca cannot be dropped.
44-
; CHECK-LABEL: define internal i32 @inalloca2(ptr inalloca(i32) %p)
70+
; CHECK-LABEL: define internal i32 @inalloca2(
71+
; CHECK-SAME: ptr inalloca(i32) [[P:%.*]]) unnamed_addr {
72+
; CHECK-NEXT: [[RV:%.*]] = load i32, ptr [[P]], align 4
73+
; CHECK-NEXT: ret i32 [[RV]]
74+
;
4575
%rv = load i32, ptr %p
4676
ret i32 %rv
4777
}
4878

4979
define internal i32 @preallocated(ptr preallocated(i32) %p) {
50-
; CHECK-LABEL: define internal fastcc i32 @preallocated(ptr %p)
80+
; CHECK-LABEL: define internal fastcc i32 @preallocated(
81+
; CHECK-SAME: ptr [[P:%.*]]) unnamed_addr {
82+
; CHECK-NEXT: [[RV:%.*]] = load i32, ptr [[P]], align 4
83+
; CHECK-NEXT: ret i32 [[RV]]
84+
;
5185
%rv = load i32, ptr %p
5286
ret i32 %rv
5387
}
5488

5589
define void @call_things() {
90+
; CHECK-LABEL: define void @call_things() local_unnamed_addr {
91+
; CHECK-NEXT: [[M:%.*]] = alloca i32, align 4
92+
; CHECK-NEXT: [[TMP1:%.*]] = call fastcc i32 @f(ptr [[M]])
93+
; CHECK-NEXT: [[TMP2:%.*]] = call fastcc i32 @g(ptr [[M]])
94+
; CHECK-NEXT: [[TMP3:%.*]] = call coldcc i32 @h(ptr [[M]])
95+
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @j(ptr [[M]])
96+
; CHECK-NEXT: [[ARGS:%.*]] = alloca inalloca i32, align 4
97+
; CHECK-NEXT: [[TMP5:%.*]] = call fastcc i32 @inalloca(ptr [[ARGS]])
98+
; CHECK-NEXT: [[TMP6:%.*]] = call ptr @llvm.stacksave.p0()
99+
; CHECK-NEXT: [[PAARG:%.*]] = alloca i32, align 4
100+
; CHECK-NEXT: [[TMP7:%.*]] = call fastcc i32 @preallocated(ptr [[PAARG]])
101+
; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP6]])
102+
; CHECK-NEXT: ret void
103+
;
56104
%m = alloca i32
57105
call i32 @f(ptr %m)
58106
call x86_thiscallcc i32 @g(ptr %m)
@@ -65,15 +113,25 @@ define void @call_things() {
65113
call i32 @preallocated(ptr preallocated(i32) %N) ["preallocated"(token %c)]
66114
ret void
67115
}
68-
; CHECK-LABEL: define void @call_things()
69-
; CHECK: call fastcc i32 @f
70-
; CHECK: call fastcc i32 @g
71-
; CHECK: call coldcc i32 @h
72-
; CHECK: call i32 @j
73-
; CHECK: call fastcc i32 @inalloca(ptr %args)
74-
; CHECK-NOT: llvm.call.preallocated
75-
; CHECK: call fastcc i32 @preallocated(ptr %paarg)
76116

77117
@llvm.used = appending global [1 x ptr] [
78-
ptr @j
118+
ptr @j
79119
], section "llvm.metadata"
120+
121+
define internal i32 @assume_fastcc() {
122+
; CHECK-LABEL: define internal fastcc i32 @assume_fastcc() {
123+
; CHECK-NEXT: [[OBJSIZE:%.*]] = call i32 @llvm.objectsize.i32.p0(ptr @assume_fastcc, i1 false, i1 false, i1 false)
124+
; CHECK-NEXT: ret i32 [[OBJSIZE]]
125+
;
126+
%objsize = call i32 @llvm.objectsize.i32.p0(ptr @assume_fastcc, i1 false, i1 false, i1 false)
127+
ret i32 %objsize
128+
}
129+
130+
define internal i32 @constexpr_self_user() addrspace(1) {
131+
; CHECK-LABEL: define internal fastcc i32 @constexpr_self_user() addrspace(1) {
132+
; CHECK-NEXT: [[OBJSIZE:%.*]] = call i32 @llvm.objectsize.i32.p0(ptr addrspacecast (ptr addrspace(1) @constexpr_self_user to ptr), i1 false, i1 false, i1 false)
133+
; CHECK-NEXT: ret i32 [[OBJSIZE]]
134+
;
135+
%objsize = call i32 @llvm.objectsize.i32.p0(ptr addrspacecast (ptr addrspace(1) @constexpr_self_user to ptr), i1 false, i1 false, i1 false)
136+
ret i32 %objsize
137+
}

0 commit comments

Comments
 (0)