diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 75d16a42d0205..476b7b349294a 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -3024,18 +3024,27 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( assert(Subtarget->supportsTailCall()); - // Indirect tail calls cannot be optimized for Thumb1 if the args - // to the call take up r0-r3. The reason is that there are no legal registers - // left to hold the pointer to the function to be called. - // Similarly, if the function uses return address sign and authentication, - // r12 is needed to hold the PAC and is not available to hold the callee - // address. - if (Outs.size() >= 4 && - (!isa(Callee.getNode()) || isIndirect)) { - if (Subtarget->isThumb1Only()) - return false; - // Conservatively assume the function spills LR. - if (MF.getInfo()->shouldSignReturnAddress(true)) + // Indirect tail-calls require a register to hold the target address. That + // register must be: + // * Allocatable (i.e. r0-r7 if the target is Thumb1). + // * Not callee-saved, so must be one of r0-r3 or r12. + // * Not used to hold an argument to the tail-called function, which might be + // in r0-r3. + // * Not used to hold the return address authentication code, which is in r12 + // if enabled. + // Sometimes, no register matches all of these conditions, so we can't do a + // tail-call. + if (!isa(Callee.getNode()) || isIndirect) { + SmallSet AddressRegisters; + for (Register R : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) + AddressRegisters.insert(R); + if (!(Subtarget->isThumb1Only() || + MF.getInfo()->shouldSignReturnAddress(true))) + AddressRegisters.insert(ARM::R12); + for (const CCValAssign &AL : ArgLocs) + if (AL.isRegLoc()) + AddressRegisters.erase(AL.getLocReg()); + if (AddressRegisters.empty()) return false; } diff --git a/llvm/test/CodeGen/Thumb2/indirect-tail-call-free-registers.ll b/llvm/test/CodeGen/Thumb2/indirect-tail-call-free-registers.ll new file mode 100644 index 0000000000000..c6ace3eb55b28 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/indirect-tail-call-free-registers.ll @@ -0,0 +1,111 @@ +; RUN: llc %s -o - -mtriple=thumbv8m.main -mattr=+vfp4 | FileCheck %s + +;; No outgoing arguments, plenty of free registers to hold the target address. +define void @test0(ptr %fptr) { +; CHECK-LABEL: test0: +; CHECK: bx {{r0|r1|r2|r3|r12}} +entry: + tail call void %fptr() + ret void +} + +;; Four integer outgoing arguments, which use up r0-r3. +define void @test1(ptr %fptr) { +; CHECK-LABEL: test1: +; CHECK: bx r12 +entry: + tail call void %fptr(i32 0, i32 0, i32 0, i32 0) + ret void +} + +;; Four integer outgoing arguments, which use up r0-r3, and sign-return-address +;; uses r12, so we can never tail-call this. +define void @test2(ptr %fptr) "sign-return-address"="all" { +; CHECK-LABEL: test2: +; CHECK: blx + entry: + tail call void %fptr(i32 0, i32 0, i32 0, i32 0) + ret void +} + +;; An i32 and an i64 argument, which uses r0, r2 and r3 for arguments, leaving +;; r1 free for the address. +define void @test3(ptr %fptr) { +; CHECK-LABEL: test3: +; CHECK: bx {{r1|r12}} +entry: + tail call void %fptr(i32 0, i64 0) + ret void +} + +;; Four float arguments, using the soft-float calling convention, which uses +;; r0-r3. +define void @test4(ptr %fptr) { +; CHECK-LABEL: test4: +; CHECK: bx r12 +entry: + tail call arm_aapcscc void %fptr(float 0.0, float 0.0, float 0.0, float 0.0) + ret void +} + +;; Four float arguments, using the soft-float calling convention, which uses +;; r0-r3, and sign-return-address uses r12. Currently fails with "ran out of +;; registers during register allocation". +define void @test5(ptr %fptr) "sign-return-address"="all" { +; CHECK-LABEL: test5: +; CHECK: blx +entry: + tail call arm_aapcscc void %fptr(float 0.0, float 0.0, float 0.0, float 0.0) + ret void +} + +;; Four float arguments, using the hard-float calling convention, which uses +;; s0-s3, leaving the all of the integer registers free for the address. +define void @test6(ptr %fptr) { +; CHECK-LABEL: test6: +; CHECK: bx {{r0|r1|r2|r3|r12}} +entry: + tail call arm_aapcs_vfpcc void %fptr(float 0.0, float 0.0, float 0.0, float 0.0) + ret void +} + +;; Four float arguments, using the hard-float calling convention, which uses +;; s0-s3, leaving r0-r3 free for the address, with r12 used for +;; sign-return-address. +define void @test7(ptr %fptr) "sign-return-address"="all" { +; CHECK-LABEL: test7: +; CHECK: bx {{r0|r1|r2|r3}} +entry: + tail call arm_aapcs_vfpcc void %fptr(float 0.0, float 0.0, float 0.0, float 0.0) + ret void +} + +;; Two double arguments, using the soft-float calling convention, which uses +;; r0-r3. +define void @test8(ptr %fptr) { +; CHECK-LABEL: test8: +; CHECK: bx r12 +entry: + tail call arm_aapcscc void %fptr(double 0.0, double 0.0) + ret void +} + +;; Two double arguments, using the soft-float calling convention, which uses +;; r0-r3, and sign-return-address uses r12, so we can't tail-call this. +define void @test9(ptr %fptr) "sign-return-address"="all" { +; CHECK-LABEL: test9: +; CHECK: blx +entry: + tail call arm_aapcscc void %fptr(double 0.0, double 0.0) + ret void +} + +;; Four integer arguments (one on the stack), but dut to alignment r1 is left +;; empty, so can be used for the tail-call. +define void @test10(ptr %fptr, i64 %b, i32 %c) "sign-return-address"="all" { +; CHECK-LABEL: test10: +; CHECK: bx r1 +entry: + tail call void %fptr(i32 0, i64 %b, i32 %c) + ret void +}