From c3f9697f1f227296818fbaf1a770a29842ea454c Mon Sep 17 00:00:00 2001 From: Kai Luo Date: Wed, 22 Jul 2020 04:16:20 +0000 Subject: [PATCH] [PowerPC] Fix wrong codegen when stack pointer has to realign performing dynalloc Current powerpc backend generates wrong code sequence if stack pointer has to realign if `-fstack-clash-protection` enabled. When probing dynamic stack allocation, current `PREPARE_PROBED_ALLOCA` takes `NegSizeReg` as input and returns `FinalStackPtr`. `FinalStackPtr=StackPtr+ActualNegSize` is calculated correctly, however code following `PREPARE_PROBED_ALLOCA` still uses value of `NegSizeReg`, which does not contain `ActualNegSize` if `MaxAlign > TargetAlign`, to calculate loop trip count and residual number of bytes. This patch is part of fix of https://bugs.llvm.org/show_bug.cgi?id=46759. Differential Revision: https://reviews.llvm.org/D84152 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 36 ++- llvm/lib/Target/PowerPC/PPCInstr64Bit.td | 9 +- llvm/lib/Target/PowerPC/PPCInstrInfo.td | 9 +- llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 37 ++- llvm/test/CodeGen/PowerPC/pr46759.ll | 27 +- .../PowerPC/stack-clash-dynamic-alloca.ll | 240 +++++++++--------- 6 files changed, 198 insertions(+), 160 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index f8d7ab87f35ccc..fe9ab604ec2f7a 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -11954,18 +11954,34 @@ PPCTargetLowering::emitProbedAlloca(MachineInstr &MI, Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC); Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC); - - // Get the canonical FinalStackPtr like what - // PPCRegisterInfo::lowerDynamicAlloc does. - BuildMI(*MBB, {MI}, DL, - TII->get(isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 - : PPC::PREPARE_PROBED_ALLOCA_32), - FramePointer) - .addDef(FinalStackPtr) + Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC); + + // Since value of NegSizeReg might be realigned in prologepilog, insert a + // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and + // NegSize. + unsigned ProbeOpc; + if (!MRI.hasOneNonDBGUse(NegSizeReg)) + ProbeOpc = + isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32; + else + // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg + // and NegSizeReg will be allocated in the same phyreg to avoid + // redundant copy when NegSizeReg has only one use which is current MI and + // will be replaced by PREPARE_PROBED_ALLOCA then. + ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64 + : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32; + BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer) + .addDef(ActualNegSizeReg) .addReg(NegSizeReg) .add(MI.getOperand(2)) .add(MI.getOperand(3)); + // Calculate final stack pointer, which equals to SP + ActualNegSize. + BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), + FinalStackPtr) + .addReg(SPReg) + .addReg(ActualNegSizeReg); + // Materialize a scratch register for update. int64_t NegProbeSize = -(int64_t)ProbeSize; assert(isInt<32>(NegProbeSize) && "Unhandled probe size!"); @@ -11986,7 +12002,7 @@ PPCTargetLowering::emitProbedAlloca(MachineInstr &MI, // Probing leading residual part. Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC); BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div) - .addReg(NegSizeReg) + .addReg(ActualNegSizeReg) .addReg(ScratchReg); Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC); BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul) @@ -11995,7 +12011,7 @@ PPCTargetLowering::emitProbedAlloca(MachineInstr &MI, Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC); BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod) .addReg(Mul) - .addReg(NegSizeReg); + .addReg(ActualNegSizeReg); BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg) .addReg(FramePointer) .addReg(SPReg) diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index 1c457d4170d549..6956c40a70be5c 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -431,9 +431,14 @@ def PROBED_ALLOCA_64 : PPCCustomInserterPseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi), "#PROBED_ALLOCA_64", [(set i64:$result, (PPCprobedalloca i64:$negsize, iaddr:$fpsi))]>; -def PREPARE_PROBED_ALLOCA_64 : PPCEmitTimePseudo<(outs g8rc:$fp, - g8rc:$sp), +def PREPARE_PROBED_ALLOCA_64 : PPCEmitTimePseudo<(outs + g8rc:$fp, g8rc:$actual_negsize), (ins g8rc:$negsize, memri:$fpsi), "#PREPARE_PROBED_ALLOCA_64", []>; +def PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64 : PPCEmitTimePseudo<(outs + g8rc:$fp, g8rc:$actual_negsize), + (ins g8rc:$negsize, memri:$fpsi), + "#PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64", []>, + RegConstraint<"$actual_negsize = $negsize">; def PROBED_STACKALLOC_64 : PPCEmitTimePseudo<(outs g8rc:$scratch, g8rc:$temp), (ins i64imm:$stacksize), "#PROBED_STACKALLOC_64", []>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index c49e7a3dc6c236..c565758973bf54 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1406,9 +1406,14 @@ def PROBED_ALLOCA_32 : PPCCustomInserterPseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#PROBED_ALLOCA_32", [(set i32:$result, (PPCprobedalloca i32:$negsize, iaddr:$fpsi))]>; -def PREPARE_PROBED_ALLOCA_32 : PPCEmitTimePseudo<(outs gprc:$fp, - gprc:$sp), +def PREPARE_PROBED_ALLOCA_32 : PPCEmitTimePseudo<(outs + gprc:$fp, gprc:$actual_negsize), (ins gprc:$negsize, memri:$fpsi), "#PREPARE_PROBED_ALLOCA_32", []>; +def PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32 : PPCEmitTimePseudo<(outs + gprc:$fp, gprc:$actual_negsize), + (ins gprc:$negsize, memri:$fpsi), + "#PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32", []>, + RegConstraint<"$actual_negsize = $negsize">; def PROBED_STACKALLOC_32 : PPCEmitTimePseudo<(outs gprc:$scratch, gprc:$temp), (ins i64imm:$stacksize), "#PROBED_STACKALLOC_32", []>; diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 35f5e1fbebcdf3..ed8948a6397283 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -624,21 +624,30 @@ void PPCRegisterInfo::lowerPrepareProbedAlloca( bool LP64 = TM.isPPC64(); DebugLoc dl = MI.getDebugLoc(); Register FramePointer = MI.getOperand(0).getReg(); - Register FinalStackPtr = MI.getOperand(1).getReg(); + const Register ActualNegSizeReg = MI.getOperand(1).getReg(); bool KillNegSizeReg = MI.getOperand(2).isKill(); Register NegSizeReg = MI.getOperand(2).getReg(); - prepareDynamicAlloca(II, NegSizeReg, KillNegSizeReg, FramePointer); - if (LP64) { - BuildMI(MBB, II, dl, TII.get(PPC::ADD8), FinalStackPtr) - .addReg(PPC::X1) - .addReg(NegSizeReg, getKillRegState(KillNegSizeReg)); - - } else { - BuildMI(MBB, II, dl, TII.get(PPC::ADD4), FinalStackPtr) - .addReg(PPC::R1) - .addReg(NegSizeReg, getKillRegState(KillNegSizeReg)); + const MCInstrDesc &CopyInst = TII.get(LP64 ? PPC::OR8 : PPC::OR); + // RegAllocator might allocate FramePointer and NegSizeReg in the same phyreg. + if (FramePointer == NegSizeReg) { + assert(KillNegSizeReg && "FramePointer is a def and NegSizeReg is an use, " + "NegSizeReg should be killed"); + // FramePointer is clobbered earlier than the use of NegSizeReg in + // prepareDynamicAlloca, save NegSizeReg in ActualNegSizeReg to avoid + // misuse. + BuildMI(MBB, II, dl, CopyInst, ActualNegSizeReg) + .addReg(NegSizeReg) + .addReg(NegSizeReg); + NegSizeReg = ActualNegSizeReg; + KillNegSizeReg = false; } - + prepareDynamicAlloca(II, NegSizeReg, KillNegSizeReg, FramePointer); + // NegSizeReg might be updated in prepareDynamicAlloca if MaxAlign > + // TargetAlign. + if (NegSizeReg != ActualNegSizeReg) + BuildMI(MBB, II, dl, CopyInst, ActualNegSizeReg) + .addReg(NegSizeReg) + .addReg(NegSizeReg); MBB.erase(II); } @@ -1084,7 +1093,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (FPSI && FrameIndex == FPSI && (OpC == PPC::PREPARE_PROBED_ALLOCA_64 || - OpC == PPC::PREPARE_PROBED_ALLOCA_32)) { + OpC == PPC::PREPARE_PROBED_ALLOCA_32 || + OpC == PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64 || + OpC == PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32)) { lowerPrepareProbedAlloca(II); return; } diff --git a/llvm/test/CodeGen/PowerPC/pr46759.ll b/llvm/test/CodeGen/PowerPC/pr46759.ll index 4d3e8cadc21e92..d1d68a5db7e312 100644 --- a/llvm/test/CodeGen/PowerPC/pr46759.ll +++ b/llvm/test/CodeGen/PowerPC/pr46759.ll @@ -20,26 +20,27 @@ define void @foo(i32 %vla_size) #0 { ; CHECK-LE-NEXT: .cfi_offset r31, -8 ; CHECK-LE-NEXT: .cfi_offset r30, -16 ; CHECK-LE-NEXT: clrldi r3, r3, 32 -; CHECK-LE-NEXT: li r6, -4096 -; CHECK-LE-NEXT: ld r4, 0(r1) +; CHECK-LE-NEXT: li r5, -2048 ; CHECK-LE-NEXT: mr r31, r1 ; CHECK-LE-NEXT: addi r3, r3, 15 ; CHECK-LE-NEXT: rldicl r3, r3, 60, 4 ; CHECK-LE-NEXT: rldicl r3, r3, 4, 31 -; CHECK-LE-NEXT: neg r5, r3 -; CHECK-LE-NEXT: li r3, -2048 -; CHECK-LE-NEXT: divd r7, r5, r6 -; CHECK-LE-NEXT: and r3, r5, r3 -; CHECK-LE-NEXT: add r3, r1, r3 -; CHECK-LE-NEXT: mulld r6, r7, r6 -; CHECK-LE-NEXT: sub r5, r5, r6 -; CHECK-LE-NEXT: stdux r4, r1, r5 -; CHECK-LE-NEXT: cmpd r1, r3 +; CHECK-LE-NEXT: neg r4, r3 +; CHECK-LE-NEXT: ld r3, 0(r1) +; CHECK-LE-NEXT: and r5, r4, r5 +; CHECK-LE-NEXT: mr r4, r5 +; CHECK-LE-NEXT: li r5, -4096 +; CHECK-LE-NEXT: divd r6, r4, r5 +; CHECK-LE-NEXT: mulld r5, r6, r5 +; CHECK-LE-NEXT: sub r5, r4, r5 +; CHECK-LE-NEXT: add r4, r1, r4 +; CHECK-LE-NEXT: stdux r3, r1, r5 +; CHECK-LE-NEXT: cmpd r1, r4 ; CHECK-LE-NEXT: beq cr0, .LBB0_2 ; CHECK-LE-NEXT: .LBB0_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdu r4, -4096(r1) -; CHECK-LE-NEXT: cmpd r1, r3 +; CHECK-LE-NEXT: stdu r3, -4096(r1) +; CHECK-LE-NEXT: cmpd r1, r4 ; CHECK-LE-NEXT: bne cr0, .LBB0_1 ; CHECK-LE-NEXT: .LBB0_2: # %entry ; CHECK-LE-NEXT: addi r3, r1, 2048 diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll index b475a2f7fbf1cb..eef02e77c2b166 100644 --- a/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll +++ b/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll @@ -18,23 +18,23 @@ define i32 @foo(i32 %n) local_unnamed_addr #0 "stack-probe-size"="32768" nounwin ; CHECK-LE-NEXT: std r31, -8(r1) ; CHECK-LE-NEXT: stdu r1, -48(r1) ; CHECK-LE-NEXT: rldic r3, r3, 2, 30 -; CHECK-LE-NEXT: li r6, -32768 +; CHECK-LE-NEXT: li r5, -32768 ; CHECK-LE-NEXT: mr r31, r1 ; CHECK-LE-NEXT: addi r3, r3, 15 -; CHECK-LE-NEXT: addi r4, r31, 48 ; CHECK-LE-NEXT: rldicl r3, r3, 60, 4 ; CHECK-LE-NEXT: rldicl r3, r3, 4, 29 -; CHECK-LE-NEXT: neg r5, r3 -; CHECK-LE-NEXT: divd r7, r5, r6 -; CHECK-LE-NEXT: add r3, r1, r5 -; CHECK-LE-NEXT: mulld r6, r7, r6 -; CHECK-LE-NEXT: sub r5, r5, r6 -; CHECK-LE-NEXT: stdux r4, r1, r5 -; CHECK-LE-NEXT: cmpd r1, r3 +; CHECK-LE-NEXT: neg r4, r3 +; CHECK-LE-NEXT: addi r3, r31, 48 +; CHECK-LE-NEXT: divd r6, r4, r5 +; CHECK-LE-NEXT: mulld r5, r6, r5 +; CHECK-LE-NEXT: sub r5, r4, r5 +; CHECK-LE-NEXT: add r4, r1, r4 +; CHECK-LE-NEXT: stdux r3, r1, r5 +; CHECK-LE-NEXT: cmpd r1, r4 ; CHECK-LE-NEXT: beq cr0, .LBB0_2 ; CHECK-LE-NEXT: .LBB0_1: -; CHECK-LE-NEXT: stdu r4, -32768(r1) -; CHECK-LE-NEXT: cmpd r1, r3 +; CHECK-LE-NEXT: stdu r3, -32768(r1) +; CHECK-LE-NEXT: cmpd r1, r4 ; CHECK-LE-NEXT: bne cr0, .LBB0_1 ; CHECK-LE-NEXT: .LBB0_2: ; CHECK-LE-NEXT: li r4, 1 @@ -53,20 +53,20 @@ define i32 @foo(i32 %n) local_unnamed_addr #0 "stack-probe-size"="32768" nounwin ; CHECK-P9-LE-NEXT: addi r3, r3, 15 ; CHECK-P9-LE-NEXT: li r6, -32768 ; CHECK-P9-LE-NEXT: mr r31, r1 -; CHECK-P9-LE-NEXT: addi r4, r31, 48 ; CHECK-P9-LE-NEXT: rldicl r3, r3, 60, 4 ; CHECK-P9-LE-NEXT: rldicl r3, r3, 4, 29 ; CHECK-P9-LE-NEXT: neg r5, r3 +; CHECK-P9-LE-NEXT: addi r3, r31, 48 ; CHECK-P9-LE-NEXT: divd r7, r5, r6 -; CHECK-P9-LE-NEXT: add r3, r1, r5 +; CHECK-P9-LE-NEXT: add r4, r1, r5 ; CHECK-P9-LE-NEXT: mulld r6, r7, r6 ; CHECK-P9-LE-NEXT: sub r5, r5, r6 -; CHECK-P9-LE-NEXT: stdux r4, r1, r5 -; CHECK-P9-LE-NEXT: cmpd r1, r3 +; CHECK-P9-LE-NEXT: stdux r3, r1, r5 +; CHECK-P9-LE-NEXT: cmpd r1, r4 ; CHECK-P9-LE-NEXT: beq cr0, .LBB0_2 ; CHECK-P9-LE-NEXT: .LBB0_1: -; CHECK-P9-LE-NEXT: stdu r4, -32768(r1) -; CHECK-P9-LE-NEXT: cmpd r1, r3 +; CHECK-P9-LE-NEXT: stdu r3, -32768(r1) +; CHECK-P9-LE-NEXT: cmpd r1, r4 ; CHECK-P9-LE-NEXT: bne cr0, .LBB0_1 ; CHECK-P9-LE-NEXT: .LBB0_2: ; CHECK-P9-LE-NEXT: li r4, 1 @@ -82,23 +82,23 @@ define i32 @foo(i32 %n) local_unnamed_addr #0 "stack-probe-size"="32768" nounwin ; CHECK-BE-NEXT: std r31, -8(r1) ; CHECK-BE-NEXT: stdu r1, -64(r1) ; CHECK-BE-NEXT: rldic r3, r3, 2, 30 -; CHECK-BE-NEXT: li r6, -32768 +; CHECK-BE-NEXT: li r5, -32768 ; CHECK-BE-NEXT: addi r3, r3, 15 ; CHECK-BE-NEXT: rldicl r3, r3, 60, 4 ; CHECK-BE-NEXT: mr r31, r1 ; CHECK-BE-NEXT: rldicl r3, r3, 4, 29 -; CHECK-BE-NEXT: addi r4, r31, 64 -; CHECK-BE-NEXT: neg r5, r3 -; CHECK-BE-NEXT: divd r7, r5, r6 -; CHECK-BE-NEXT: add r3, r1, r5 -; CHECK-BE-NEXT: mulld r6, r7, r6 -; CHECK-BE-NEXT: sub r5, r5, r6 -; CHECK-BE-NEXT: stdux r4, r1, r5 -; CHECK-BE-NEXT: cmpd r1, r3 +; CHECK-BE-NEXT: neg r4, r3 +; CHECK-BE-NEXT: divd r6, r4, r5 +; CHECK-BE-NEXT: addi r3, r31, 64 +; CHECK-BE-NEXT: mulld r5, r6, r5 +; CHECK-BE-NEXT: sub r5, r4, r5 +; CHECK-BE-NEXT: add r4, r1, r4 +; CHECK-BE-NEXT: stdux r3, r1, r5 +; CHECK-BE-NEXT: cmpd r1, r4 ; CHECK-BE-NEXT: beq cr0, .LBB0_2 ; CHECK-BE-NEXT: .LBB0_1: -; CHECK-BE-NEXT: stdu r4, -32768(r1) -; CHECK-BE-NEXT: cmpd r1, r3 +; CHECK-BE-NEXT: stdu r3, -32768(r1) +; CHECK-BE-NEXT: cmpd r1, r4 ; CHECK-BE-NEXT: bne cr0, .LBB0_1 ; CHECK-BE-NEXT: .LBB0_2: ; CHECK-BE-NEXT: li r4, 1 @@ -115,21 +115,21 @@ define i32 @foo(i32 %n) local_unnamed_addr #0 "stack-probe-size"="32768" nounwin ; CHECK-32-NEXT: slwi r3, r3, 2 ; CHECK-32-NEXT: addi r3, r3, 15 ; CHECK-32-NEXT: rlwinm r3, r3, 0, 0, 27 -; CHECK-32-NEXT: neg r5, r3 -; CHECK-32-NEXT: li r6, -32768 -; CHECK-32-NEXT: divw r7, r5, r6 +; CHECK-32-NEXT: neg r4, r3 +; CHECK-32-NEXT: li r5, -32768 +; CHECK-32-NEXT: divw r6, r4, r5 ; CHECK-32-NEXT: stw r31, 28(r1) ; CHECK-32-NEXT: mr r31, r1 -; CHECK-32-NEXT: addi r4, r31, 32 -; CHECK-32-NEXT: add r3, r1, r5 -; CHECK-32-NEXT: mullw r6, r7, r6 -; CHECK-32-NEXT: sub r5, r5, r6 -; CHECK-32-NEXT: stwux r4, r1, r5 -; CHECK-32-NEXT: cmpw r1, r3 +; CHECK-32-NEXT: addi r3, r31, 32 +; CHECK-32-NEXT: mullw r5, r6, r5 +; CHECK-32-NEXT: sub r5, r4, r5 +; CHECK-32-NEXT: add r4, r1, r4 +; CHECK-32-NEXT: stwux r3, r1, r5 +; CHECK-32-NEXT: cmpw r1, r4 ; CHECK-32-NEXT: beq cr0, .LBB0_2 ; CHECK-32-NEXT: .LBB0_1: -; CHECK-32-NEXT: stwu r4, -32768(r1) -; CHECK-32-NEXT: cmpw r1, r3 +; CHECK-32-NEXT: stwu r3, -32768(r1) +; CHECK-32-NEXT: cmpw r1, r4 ; CHECK-32-NEXT: bne cr0, .LBB0_1 ; CHECK-32-NEXT: .LBB0_2: ; CHECK-32-NEXT: li r4, 1 @@ -154,23 +154,23 @@ define i32 @bar(i32 %n) local_unnamed_addr #0 nounwind { ; CHECK-LE-NEXT: std r31, -8(r1) ; CHECK-LE-NEXT: stdu r1, -48(r1) ; CHECK-LE-NEXT: rldic r4, r3, 2, 30 -; CHECK-LE-NEXT: li r7, -4096 +; CHECK-LE-NEXT: li r6, -4096 ; CHECK-LE-NEXT: mr r31, r1 ; CHECK-LE-NEXT: addi r4, r4, 15 -; CHECK-LE-NEXT: addi r5, r31, 48 ; CHECK-LE-NEXT: rldicl r4, r4, 60, 4 ; CHECK-LE-NEXT: rldicl r4, r4, 4, 29 -; CHECK-LE-NEXT: neg r6, r4 -; CHECK-LE-NEXT: divd r8, r6, r7 -; CHECK-LE-NEXT: add r4, r1, r6 -; CHECK-LE-NEXT: mulld r7, r8, r7 -; CHECK-LE-NEXT: sub r6, r6, r7 -; CHECK-LE-NEXT: stdux r5, r1, r6 -; CHECK-LE-NEXT: cmpd r1, r4 +; CHECK-LE-NEXT: neg r5, r4 +; CHECK-LE-NEXT: addi r4, r31, 48 +; CHECK-LE-NEXT: divd r7, r5, r6 +; CHECK-LE-NEXT: mulld r6, r7, r6 +; CHECK-LE-NEXT: sub r6, r5, r6 +; CHECK-LE-NEXT: add r5, r1, r5 +; CHECK-LE-NEXT: stdux r4, r1, r6 +; CHECK-LE-NEXT: cmpd r1, r5 ; CHECK-LE-NEXT: beq cr0, .LBB1_2 ; CHECK-LE-NEXT: .LBB1_1: -; CHECK-LE-NEXT: stdu r5, -4096(r1) -; CHECK-LE-NEXT: cmpd r1, r4 +; CHECK-LE-NEXT: stdu r4, -4096(r1) +; CHECK-LE-NEXT: cmpd r1, r5 ; CHECK-LE-NEXT: bne cr0, .LBB1_1 ; CHECK-LE-NEXT: .LBB1_2: ; CHECK-LE-NEXT: extsw r3, r3 @@ -192,20 +192,20 @@ define i32 @bar(i32 %n) local_unnamed_addr #0 nounwind { ; CHECK-P9-LE-NEXT: addi r4, r4, 15 ; CHECK-P9-LE-NEXT: li r7, -4096 ; CHECK-P9-LE-NEXT: mr r31, r1 -; CHECK-P9-LE-NEXT: addi r5, r31, 48 ; CHECK-P9-LE-NEXT: rldicl r4, r4, 60, 4 ; CHECK-P9-LE-NEXT: rldicl r4, r4, 4, 29 ; CHECK-P9-LE-NEXT: neg r6, r4 +; CHECK-P9-LE-NEXT: addi r4, r31, 48 ; CHECK-P9-LE-NEXT: divd r8, r6, r7 -; CHECK-P9-LE-NEXT: add r4, r1, r6 +; CHECK-P9-LE-NEXT: add r5, r1, r6 ; CHECK-P9-LE-NEXT: mulld r7, r8, r7 ; CHECK-P9-LE-NEXT: sub r6, r6, r7 -; CHECK-P9-LE-NEXT: stdux r5, r1, r6 -; CHECK-P9-LE-NEXT: cmpd r1, r4 +; CHECK-P9-LE-NEXT: stdux r4, r1, r6 +; CHECK-P9-LE-NEXT: cmpd r1, r5 ; CHECK-P9-LE-NEXT: beq cr0, .LBB1_2 ; CHECK-P9-LE-NEXT: .LBB1_1: -; CHECK-P9-LE-NEXT: stdu r5, -4096(r1) -; CHECK-P9-LE-NEXT: cmpd r1, r4 +; CHECK-P9-LE-NEXT: stdu r4, -4096(r1) +; CHECK-P9-LE-NEXT: cmpd r1, r5 ; CHECK-P9-LE-NEXT: bne cr0, .LBB1_1 ; CHECK-P9-LE-NEXT: .LBB1_2: ; CHECK-P9-LE-NEXT: extswsli r3, r3, 2 @@ -223,23 +223,23 @@ define i32 @bar(i32 %n) local_unnamed_addr #0 nounwind { ; CHECK-BE-NEXT: std r31, -8(r1) ; CHECK-BE-NEXT: stdu r1, -64(r1) ; CHECK-BE-NEXT: rldic r4, r3, 2, 30 -; CHECK-BE-NEXT: li r7, -4096 +; CHECK-BE-NEXT: li r6, -4096 ; CHECK-BE-NEXT: addi r4, r4, 15 ; CHECK-BE-NEXT: rldicl r4, r4, 60, 4 ; CHECK-BE-NEXT: mr r31, r1 ; CHECK-BE-NEXT: rldicl r4, r4, 4, 29 -; CHECK-BE-NEXT: addi r5, r31, 64 -; CHECK-BE-NEXT: neg r6, r4 -; CHECK-BE-NEXT: divd r8, r6, r7 -; CHECK-BE-NEXT: add r4, r1, r6 -; CHECK-BE-NEXT: mulld r7, r8, r7 -; CHECK-BE-NEXT: sub r6, r6, r7 -; CHECK-BE-NEXT: stdux r5, r1, r6 -; CHECK-BE-NEXT: cmpd r1, r4 +; CHECK-BE-NEXT: neg r5, r4 +; CHECK-BE-NEXT: divd r7, r5, r6 +; CHECK-BE-NEXT: addi r4, r31, 64 +; CHECK-BE-NEXT: mulld r6, r7, r6 +; CHECK-BE-NEXT: sub r6, r5, r6 +; CHECK-BE-NEXT: add r5, r1, r5 +; CHECK-BE-NEXT: stdux r4, r1, r6 +; CHECK-BE-NEXT: cmpd r1, r5 ; CHECK-BE-NEXT: beq cr0, .LBB1_2 ; CHECK-BE-NEXT: .LBB1_1: -; CHECK-BE-NEXT: stdu r5, -4096(r1) -; CHECK-BE-NEXT: cmpd r1, r4 +; CHECK-BE-NEXT: stdu r4, -4096(r1) +; CHECK-BE-NEXT: cmpd r1, r5 ; CHECK-BE-NEXT: bne cr0, .LBB1_1 ; CHECK-BE-NEXT: .LBB1_2: ; CHECK-BE-NEXT: extsw r3, r3 @@ -259,21 +259,21 @@ define i32 @bar(i32 %n) local_unnamed_addr #0 nounwind { ; CHECK-32-NEXT: slwi r3, r3, 2 ; CHECK-32-NEXT: addi r4, r3, 15 ; CHECK-32-NEXT: rlwinm r4, r4, 0, 0, 27 -; CHECK-32-NEXT: neg r6, r4 -; CHECK-32-NEXT: li r7, -4096 -; CHECK-32-NEXT: divw r8, r6, r7 +; CHECK-32-NEXT: neg r5, r4 +; CHECK-32-NEXT: li r6, -4096 +; CHECK-32-NEXT: divw r7, r5, r6 ; CHECK-32-NEXT: stw r31, 28(r1) ; CHECK-32-NEXT: mr r31, r1 -; CHECK-32-NEXT: addi r5, r31, 32 -; CHECK-32-NEXT: add r4, r1, r6 -; CHECK-32-NEXT: mullw r7, r8, r7 -; CHECK-32-NEXT: sub r6, r6, r7 -; CHECK-32-NEXT: stwux r5, r1, r6 -; CHECK-32-NEXT: cmpw r1, r4 +; CHECK-32-NEXT: addi r4, r31, 32 +; CHECK-32-NEXT: mullw r6, r7, r6 +; CHECK-32-NEXT: sub r6, r5, r6 +; CHECK-32-NEXT: add r5, r1, r5 +; CHECK-32-NEXT: stwux r4, r1, r6 +; CHECK-32-NEXT: cmpw r1, r5 ; CHECK-32-NEXT: beq cr0, .LBB1_2 ; CHECK-32-NEXT: .LBB1_1: -; CHECK-32-NEXT: stwu r5, -4096(r1) -; CHECK-32-NEXT: cmpw r1, r4 +; CHECK-32-NEXT: stwu r4, -4096(r1) +; CHECK-32-NEXT: cmpw r1, r5 ; CHECK-32-NEXT: bne cr0, .LBB1_1 ; CHECK-32-NEXT: .LBB1_2: ; CHECK-32-NEXT: addi r4, r1, 16 @@ -300,24 +300,24 @@ define i32 @f(i32 %n) local_unnamed_addr #0 "stack-probe-size"="65536" nounwind ; CHECK-LE-NEXT: std r31, -8(r1) ; CHECK-LE-NEXT: stdu r1, -48(r1) ; CHECK-LE-NEXT: rldic r3, r3, 2, 30 -; CHECK-LE-NEXT: lis r5, -1 +; CHECK-LE-NEXT: lis r4, -1 ; CHECK-LE-NEXT: mr r31, r1 ; CHECK-LE-NEXT: addi r3, r3, 15 -; CHECK-LE-NEXT: ori r5, r5, 0 -; CHECK-LE-NEXT: addi r4, r31, 48 +; CHECK-LE-NEXT: ori r4, r4, 0 ; CHECK-LE-NEXT: rldicl r3, r3, 60, 4 ; CHECK-LE-NEXT: rldicl r3, r3, 4, 29 -; CHECK-LE-NEXT: neg r6, r3 -; CHECK-LE-NEXT: divd r7, r6, r5 -; CHECK-LE-NEXT: add r3, r1, r6 -; CHECK-LE-NEXT: mulld r7, r7, r5 -; CHECK-LE-NEXT: sub r6, r6, r7 -; CHECK-LE-NEXT: stdux r4, r1, r6 -; CHECK-LE-NEXT: cmpd r1, r3 +; CHECK-LE-NEXT: neg r5, r3 +; CHECK-LE-NEXT: addi r3, r31, 48 +; CHECK-LE-NEXT: divd r6, r5, r4 +; CHECK-LE-NEXT: mulld r6, r6, r4 +; CHECK-LE-NEXT: sub r6, r5, r6 +; CHECK-LE-NEXT: add r5, r1, r5 +; CHECK-LE-NEXT: stdux r3, r1, r6 +; CHECK-LE-NEXT: cmpd r1, r5 ; CHECK-LE-NEXT: beq cr0, .LBB2_2 ; CHECK-LE-NEXT: .LBB2_1: -; CHECK-LE-NEXT: stdux r4, r1, r5 -; CHECK-LE-NEXT: cmpd r1, r3 +; CHECK-LE-NEXT: stdux r3, r1, r4 +; CHECK-LE-NEXT: cmpd r1, r5 ; CHECK-LE-NEXT: bne cr0, .LBB2_1 ; CHECK-LE-NEXT: .LBB2_2: ; CHECK-LE-NEXT: li r4, 1 @@ -337,20 +337,20 @@ define i32 @f(i32 %n) local_unnamed_addr #0 "stack-probe-size"="65536" nounwind ; CHECK-P9-LE-NEXT: lis r5, -1 ; CHECK-P9-LE-NEXT: ori r5, r5, 0 ; CHECK-P9-LE-NEXT: mr r31, r1 -; CHECK-P9-LE-NEXT: addi r4, r31, 48 ; CHECK-P9-LE-NEXT: rldicl r3, r3, 60, 4 ; CHECK-P9-LE-NEXT: rldicl r3, r3, 4, 29 ; CHECK-P9-LE-NEXT: neg r6, r3 +; CHECK-P9-LE-NEXT: addi r3, r31, 48 ; CHECK-P9-LE-NEXT: divd r7, r6, r5 -; CHECK-P9-LE-NEXT: add r3, r1, r6 +; CHECK-P9-LE-NEXT: add r4, r1, r6 ; CHECK-P9-LE-NEXT: mulld r7, r7, r5 ; CHECK-P9-LE-NEXT: sub r6, r6, r7 -; CHECK-P9-LE-NEXT: stdux r4, r1, r6 -; CHECK-P9-LE-NEXT: cmpd r1, r3 +; CHECK-P9-LE-NEXT: stdux r3, r1, r6 +; CHECK-P9-LE-NEXT: cmpd r1, r4 ; CHECK-P9-LE-NEXT: beq cr0, .LBB2_2 ; CHECK-P9-LE-NEXT: .LBB2_1: -; CHECK-P9-LE-NEXT: stdux r4, r1, r5 -; CHECK-P9-LE-NEXT: cmpd r1, r3 +; CHECK-P9-LE-NEXT: stdux r3, r1, r5 +; CHECK-P9-LE-NEXT: cmpd r1, r4 ; CHECK-P9-LE-NEXT: bne cr0, .LBB2_1 ; CHECK-P9-LE-NEXT: .LBB2_2: ; CHECK-P9-LE-NEXT: li r4, 1 @@ -366,24 +366,24 @@ define i32 @f(i32 %n) local_unnamed_addr #0 "stack-probe-size"="65536" nounwind ; CHECK-BE-NEXT: std r31, -8(r1) ; CHECK-BE-NEXT: stdu r1, -64(r1) ; CHECK-BE-NEXT: rldic r3, r3, 2, 30 -; CHECK-BE-NEXT: lis r5, -1 +; CHECK-BE-NEXT: lis r4, -1 ; CHECK-BE-NEXT: addi r3, r3, 15 ; CHECK-BE-NEXT: rldicl r3, r3, 60, 4 -; CHECK-BE-NEXT: ori r5, r5, 0 +; CHECK-BE-NEXT: ori r4, r4, 0 ; CHECK-BE-NEXT: rldicl r3, r3, 4, 29 ; CHECK-BE-NEXT: mr r31, r1 -; CHECK-BE-NEXT: neg r6, r3 -; CHECK-BE-NEXT: divd r7, r6, r5 -; CHECK-BE-NEXT: addi r4, r31, 64 -; CHECK-BE-NEXT: mulld r7, r7, r5 -; CHECK-BE-NEXT: add r3, r1, r6 -; CHECK-BE-NEXT: sub r6, r6, r7 -; CHECK-BE-NEXT: stdux r4, r1, r6 -; CHECK-BE-NEXT: cmpd r1, r3 +; CHECK-BE-NEXT: neg r5, r3 +; CHECK-BE-NEXT: divd r6, r5, r4 +; CHECK-BE-NEXT: addi r3, r31, 64 +; CHECK-BE-NEXT: mulld r6, r6, r4 +; CHECK-BE-NEXT: sub r6, r5, r6 +; CHECK-BE-NEXT: add r5, r1, r5 +; CHECK-BE-NEXT: stdux r3, r1, r6 +; CHECK-BE-NEXT: cmpd r1, r5 ; CHECK-BE-NEXT: beq cr0, .LBB2_2 ; CHECK-BE-NEXT: .LBB2_1: -; CHECK-BE-NEXT: stdux r4, r1, r5 -; CHECK-BE-NEXT: cmpd r1, r3 +; CHECK-BE-NEXT: stdux r3, r1, r4 +; CHECK-BE-NEXT: cmpd r1, r5 ; CHECK-BE-NEXT: bne cr0, .LBB2_1 ; CHECK-BE-NEXT: .LBB2_2: ; CHECK-BE-NEXT: li r4, 1 @@ -400,22 +400,22 @@ define i32 @f(i32 %n) local_unnamed_addr #0 "stack-probe-size"="65536" nounwind ; CHECK-32-NEXT: slwi r3, r3, 2 ; CHECK-32-NEXT: addi r3, r3, 15 ; CHECK-32-NEXT: rlwinm r3, r3, 0, 0, 27 -; CHECK-32-NEXT: lis r5, -1 -; CHECK-32-NEXT: neg r6, r3 -; CHECK-32-NEXT: ori r5, r5, 0 -; CHECK-32-NEXT: divw r7, r6, r5 +; CHECK-32-NEXT: lis r4, -1 +; CHECK-32-NEXT: neg r5, r3 +; CHECK-32-NEXT: ori r4, r4, 0 +; CHECK-32-NEXT: divw r6, r5, r4 ; CHECK-32-NEXT: stw r31, 28(r1) ; CHECK-32-NEXT: mr r31, r1 -; CHECK-32-NEXT: addi r4, r31, 32 -; CHECK-32-NEXT: add r3, r1, r6 -; CHECK-32-NEXT: mullw r7, r7, r5 -; CHECK-32-NEXT: sub r6, r6, r7 -; CHECK-32-NEXT: stwux r4, r1, r6 -; CHECK-32-NEXT: cmpw r1, r3 +; CHECK-32-NEXT: addi r3, r31, 32 +; CHECK-32-NEXT: mullw r6, r6, r4 +; CHECK-32-NEXT: sub r6, r5, r6 +; CHECK-32-NEXT: add r5, r1, r5 +; CHECK-32-NEXT: stwux r3, r1, r6 +; CHECK-32-NEXT: cmpw r1, r5 ; CHECK-32-NEXT: beq cr0, .LBB2_2 ; CHECK-32-NEXT: .LBB2_1: -; CHECK-32-NEXT: stwux r4, r1, r5 -; CHECK-32-NEXT: cmpw r1, r3 +; CHECK-32-NEXT: stwux r3, r1, r4 +; CHECK-32-NEXT: cmpw r1, r5 ; CHECK-32-NEXT: bne cr0, .LBB2_1 ; CHECK-32-NEXT: .LBB2_2: ; CHECK-32-NEXT: li r4, 1