Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions llvm/lib/Transforms/IPO/GlobalOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1680,7 +1680,9 @@ processGlobal(GlobalValue &GV,
/// FastCC.
static void ChangeCalleesToFastCall(Function *F) {
for (User *U : F->users())
cast<CallBase>(U)->setCallingConv(CallingConv::Fast);
if (auto *Call = dyn_cast<CallBase>(U))
if (Call->getCalledOperand() == F)
Call->setCallingConv(CallingConv::Fast);
}

static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
Expand Down Expand Up @@ -1766,10 +1768,12 @@ isValidCandidateForColdCC(Function &F,
return false;

for (User *U : F.users()) {
CallBase &CB = cast<CallBase>(*U);
Function *CallerFunc = CB.getParent()->getParent();
CallBase *CB = dyn_cast<CallBase>(U);
if (!CB || CB->getCalledOperand() != &F)
continue;
Function *CallerFunc = CB->getParent()->getParent();
BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc);
if (!isColdCallSite(CB, CallerBFI))
if (!isColdCallSite(*CB, CallerBFI))
return false;
if (!llvm::is_contained(AllCallsCold, CallerFunc))
return false;
Expand All @@ -1779,7 +1783,9 @@ isValidCandidateForColdCC(Function &F,

static void changeCallSitesToColdCC(Function *F) {
for (User *U : F->users())
cast<CallBase>(U)->setCallingConv(CallingConv::Cold);
if (auto *Call = dyn_cast<CallBase>(U))
if (Call->getCalledOperand() == F)
Call->setCallingConv(CallingConv::Cold);
}

// This function iterates over all the call instructions in the input Function
Expand Down
90 changes: 74 additions & 16 deletions llvm/test/Transforms/GlobalOpt/fastcc.ll
Original file line number Diff line number Diff line change
@@ -1,58 +1,106 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt < %s -passes=globalopt -S | FileCheck %s

declare token @llvm.call.preallocated.setup(i32)
declare ptr @llvm.call.preallocated.arg(token, i32)

define internal i32 @f(ptr %m) {
; CHECK-LABEL: define internal fastcc i32 @f
; CHECK-LABEL: define internal fastcc i32 @f(
; CHECK-SAME: ptr [[M:%.*]]) unnamed_addr {
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4
; CHECK-NEXT: ret i32 [[V]]
;
%v = load i32, ptr %m
ret i32 %v
}

define internal x86_thiscallcc i32 @g(ptr %m) {
; CHECK-LABEL: define internal fastcc i32 @g
; CHECK-LABEL: define internal fastcc i32 @g(
; CHECK-SAME: ptr [[M:%.*]]) unnamed_addr {
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4
; CHECK-NEXT: ret i32 [[V]]
;
%v = load i32, ptr %m
ret i32 %v
}

; Leave this one alone, because the user went out of their way to request this
; convention.
define internal coldcc i32 @h(ptr %m) {
; CHECK-LABEL: define internal coldcc i32 @h
; CHECK-LABEL: define internal coldcc i32 @h(
; CHECK-SAME: ptr [[M:%.*]]) unnamed_addr {
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4
; CHECK-NEXT: ret i32 [[V]]
;
%v = load i32, ptr %m
ret i32 %v
}

define internal i32 @j(ptr %m) {
; CHECK-LABEL: define internal i32 @j
; CHECK-LABEL: define internal i32 @j(
; CHECK-SAME: ptr [[M:%.*]]) {
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4
; CHECK-NEXT: ret i32 [[V]]
;
%v = load i32, ptr %m
ret i32 %v
}

define internal i32 @inalloca(ptr inalloca(i32) %p) {
; CHECK-LABEL: define internal fastcc i32 @inalloca(ptr %p)
; CHECK-LABEL: define internal fastcc i32 @inalloca(
; CHECK-SAME: ptr [[P:%.*]]) unnamed_addr {
; CHECK-NEXT: [[RV:%.*]] = load i32, ptr [[P]], align 4
; CHECK-NEXT: ret i32 [[RV]]
;
%rv = load i32, ptr %p
ret i32 %rv
}

define i32 @inalloca2_caller(ptr inalloca(i32) %p) {
; CHECK-LABEL: define i32 @inalloca2_caller(
; CHECK-SAME: ptr inalloca(i32) [[P:%.*]]) local_unnamed_addr {
; CHECK-NEXT: [[RV:%.*]] = musttail call i32 @inalloca2(ptr inalloca(i32) [[P]])
; CHECK-NEXT: ret i32 [[RV]]
;
%rv = musttail call i32 @inalloca2(ptr inalloca(i32) %p)
ret i32 %rv
}
define internal i32 @inalloca2(ptr inalloca(i32) %p) {
; Because of the musttail caller, this inalloca cannot be dropped.
; CHECK-LABEL: define internal i32 @inalloca2(ptr inalloca(i32) %p)
; CHECK-LABEL: define internal i32 @inalloca2(
; CHECK-SAME: ptr inalloca(i32) [[P:%.*]]) unnamed_addr {
; CHECK-NEXT: [[RV:%.*]] = load i32, ptr [[P]], align 4
; CHECK-NEXT: ret i32 [[RV]]
;
%rv = load i32, ptr %p
ret i32 %rv
}

define internal i32 @preallocated(ptr preallocated(i32) %p) {
; CHECK-LABEL: define internal fastcc i32 @preallocated(ptr %p)
; CHECK-LABEL: define internal fastcc i32 @preallocated(
; CHECK-SAME: ptr [[P:%.*]]) unnamed_addr {
; CHECK-NEXT: [[RV:%.*]] = load i32, ptr [[P]], align 4
; CHECK-NEXT: ret i32 [[RV]]
;
%rv = load i32, ptr %p
ret i32 %rv
}

define void @call_things() {
; CHECK-LABEL: define void @call_things() local_unnamed_addr {
; CHECK-NEXT: [[M:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[TMP1:%.*]] = call fastcc i32 @f(ptr [[M]])
; CHECK-NEXT: [[TMP2:%.*]] = call fastcc i32 @g(ptr [[M]])
; CHECK-NEXT: [[TMP3:%.*]] = call coldcc i32 @h(ptr [[M]])
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @j(ptr [[M]])
; CHECK-NEXT: [[ARGS:%.*]] = alloca inalloca i32, align 4
; CHECK-NEXT: [[TMP5:%.*]] = call fastcc i32 @inalloca(ptr [[ARGS]])
; CHECK-NEXT: [[TMP6:%.*]] = call ptr @llvm.stacksave.p0()
; CHECK-NEXT: [[PAARG:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[TMP7:%.*]] = call fastcc i32 @preallocated(ptr [[PAARG]])
; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP6]])
; CHECK-NEXT: ret void
;
%m = alloca i32
call i32 @f(ptr %m)
call x86_thiscallcc i32 @g(ptr %m)
Expand All @@ -65,15 +113,25 @@ define void @call_things() {
call i32 @preallocated(ptr preallocated(i32) %N) ["preallocated"(token %c)]
ret void
}
; CHECK-LABEL: define void @call_things()
; CHECK: call fastcc i32 @f
; CHECK: call fastcc i32 @g
; CHECK: call coldcc i32 @h
; CHECK: call i32 @j
; CHECK: call fastcc i32 @inalloca(ptr %args)
; CHECK-NOT: llvm.call.preallocated
; CHECK: call fastcc i32 @preallocated(ptr %paarg)

@llvm.used = appending global [1 x ptr] [
ptr @j
ptr @j
], section "llvm.metadata"

define internal i32 @assume_fastcc() {
; CHECK-LABEL: define internal fastcc i32 @assume_fastcc() {
; CHECK-NEXT: [[OBJSIZE:%.*]] = call i32 @llvm.objectsize.i32.p0(ptr @assume_fastcc, i1 false, i1 false, i1 false)
; CHECK-NEXT: ret i32 [[OBJSIZE]]
;
%objsize = call i32 @llvm.objectsize.i32.p0(ptr @assume_fastcc, i1 false, i1 false, i1 false)
ret i32 %objsize
}

define internal i32 @constexpr_self_user() addrspace(1) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove the address space, assuming it's not relevant somehow?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's only reproducible with addrspacecast. See also https://godbolt.org/z/qcfY8z91G.
My description is not clear. Actually, hasChangeableCCImpl ignores assume-like calls with addrspacecast.
But we try to cast addrspacecast to CallBase later.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, but that indicates another issue now. We'll also ignore assume-like calls directly:

if (IgnoreAssumeLikeCalls) {
if (const auto *I = dyn_cast<IntrinsicInst>(Call))
if (I->isAssumeLikeIntrinsic())
continue;
}

But in that case we're going to change the CC on the assume-like call, not on a call to the function. So I think the code needs to check that it's the callee operand.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, I missed it. Fix it now.

; CHECK-LABEL: define internal fastcc i32 @constexpr_self_user() addrspace(1) {
; CHECK-NEXT: [[OBJSIZE:%.*]] = call i32 @llvm.objectsize.i32.p0(ptr addrspacecast (ptr addrspace(1) @constexpr_self_user to ptr), i1 false, i1 false, i1 false)
; CHECK-NEXT: ret i32 [[OBJSIZE]]
;
%objsize = call i32 @llvm.objectsize.i32.p0(ptr addrspacecast (ptr addrspace(1) @constexpr_self_user to ptr), i1 false, i1 false, i1 false)
ret i32 %objsize
}