Skip to content

Commit

Permalink
[PowerPC] Fix wrong codegen when stack pointer has to realign perform…
Browse files Browse the repository at this point in the history
…ing dynalloc

Current powerpc backend generates wrong code sequence if stack pointer
has to realign if `-fstack-clash-protection` enabled. When probing
dynamic stack allocation, current `PREPARE_PROBED_ALLOCA` takes
`NegSizeReg` as input and returns
`FinalStackPtr`. `FinalStackPtr=StackPtr+ActualNegSize` is calculated
correctly, however code following `PREPARE_PROBED_ALLOCA` still uses
value of `NegSizeReg`, which does not contain `ActualNegSize` if
`MaxAlign > TargetAlign`, to calculate loop trip count and residual
number of bytes.

This patch is part of fix of
https://bugs.llvm.org/show_bug.cgi?id=46759.

Differential Revision: https://reviews.llvm.org/D84152
  • Loading branch information
Kai Luo committed Jul 22, 2020
1 parent 8912252 commit c3f9697
Show file tree
Hide file tree
Showing 6 changed files with 198 additions and 160 deletions.
36 changes: 26 additions & 10 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11954,18 +11954,34 @@ PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);

// Get the canonical FinalStackPtr like what
// PPCRegisterInfo::lowerDynamicAlloc does.
BuildMI(*MBB, {MI}, DL,
TII->get(isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64
: PPC::PREPARE_PROBED_ALLOCA_32),
FramePointer)
.addDef(FinalStackPtr)
Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);

// Since value of NegSizeReg might be realigned in prologepilog, insert a
// PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
// NegSize.
unsigned ProbeOpc;
if (!MRI.hasOneNonDBGUse(NegSizeReg))
ProbeOpc =
isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
else
// By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
// and NegSizeReg will be allocated in the same phyreg to avoid
// redundant copy when NegSizeReg has only one use which is current MI and
// will be replaced by PREPARE_PROBED_ALLOCA then.
ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
: PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
.addDef(ActualNegSizeReg)
.addReg(NegSizeReg)
.add(MI.getOperand(2))
.add(MI.getOperand(3));

// Calculate final stack pointer, which equals to SP + ActualNegSize.
BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
FinalStackPtr)
.addReg(SPReg)
.addReg(ActualNegSizeReg);

// Materialize a scratch register for update.
int64_t NegProbeSize = -(int64_t)ProbeSize;
assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
Expand All @@ -11986,7 +12002,7 @@ PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
// Probing leading residual part.
Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
.addReg(NegSizeReg)
.addReg(ActualNegSizeReg)
.addReg(ScratchReg);
Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
Expand All @@ -11995,7 +12011,7 @@ PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
.addReg(Mul)
.addReg(NegSizeReg);
.addReg(ActualNegSizeReg);
BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
.addReg(FramePointer)
.addReg(SPReg)
Expand Down
9 changes: 7 additions & 2 deletions llvm/lib/Target/PowerPC/PPCInstr64Bit.td
Original file line number Diff line number Diff line change
Expand Up @@ -431,9 +431,14 @@ def PROBED_ALLOCA_64 : PPCCustomInserterPseudo<(outs g8rc:$result),
(ins g8rc:$negsize, memri:$fpsi), "#PROBED_ALLOCA_64",
[(set i64:$result,
(PPCprobedalloca i64:$negsize, iaddr:$fpsi))]>;
def PREPARE_PROBED_ALLOCA_64 : PPCEmitTimePseudo<(outs g8rc:$fp,
g8rc:$sp),
def PREPARE_PROBED_ALLOCA_64 : PPCEmitTimePseudo<(outs
g8rc:$fp, g8rc:$actual_negsize),
(ins g8rc:$negsize, memri:$fpsi), "#PREPARE_PROBED_ALLOCA_64", []>;
def PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64 : PPCEmitTimePseudo<(outs
g8rc:$fp, g8rc:$actual_negsize),
(ins g8rc:$negsize, memri:$fpsi),
"#PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64", []>,
RegConstraint<"$actual_negsize = $negsize">;
def PROBED_STACKALLOC_64 : PPCEmitTimePseudo<(outs g8rc:$scratch, g8rc:$temp),
(ins i64imm:$stacksize),
"#PROBED_STACKALLOC_64", []>;
Expand Down
9 changes: 7 additions & 2 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1406,9 +1406,14 @@ def PROBED_ALLOCA_32 : PPCCustomInserterPseudo<(outs gprc:$result),
(ins gprc:$negsize, memri:$fpsi), "#PROBED_ALLOCA_32",
[(set i32:$result,
(PPCprobedalloca i32:$negsize, iaddr:$fpsi))]>;
def PREPARE_PROBED_ALLOCA_32 : PPCEmitTimePseudo<(outs gprc:$fp,
gprc:$sp),
def PREPARE_PROBED_ALLOCA_32 : PPCEmitTimePseudo<(outs
gprc:$fp, gprc:$actual_negsize),
(ins gprc:$negsize, memri:$fpsi), "#PREPARE_PROBED_ALLOCA_32", []>;
def PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32 : PPCEmitTimePseudo<(outs
gprc:$fp, gprc:$actual_negsize),
(ins gprc:$negsize, memri:$fpsi),
"#PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32", []>,
RegConstraint<"$actual_negsize = $negsize">;
def PROBED_STACKALLOC_32 : PPCEmitTimePseudo<(outs gprc:$scratch, gprc:$temp),
(ins i64imm:$stacksize),
"#PROBED_STACKALLOC_32", []>;
Expand Down
37 changes: 24 additions & 13 deletions llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -624,21 +624,30 @@ void PPCRegisterInfo::lowerPrepareProbedAlloca(
bool LP64 = TM.isPPC64();
DebugLoc dl = MI.getDebugLoc();
Register FramePointer = MI.getOperand(0).getReg();
Register FinalStackPtr = MI.getOperand(1).getReg();
const Register ActualNegSizeReg = MI.getOperand(1).getReg();
bool KillNegSizeReg = MI.getOperand(2).isKill();
Register NegSizeReg = MI.getOperand(2).getReg();
prepareDynamicAlloca(II, NegSizeReg, KillNegSizeReg, FramePointer);
if (LP64) {
BuildMI(MBB, II, dl, TII.get(PPC::ADD8), FinalStackPtr)
.addReg(PPC::X1)
.addReg(NegSizeReg, getKillRegState(KillNegSizeReg));

} else {
BuildMI(MBB, II, dl, TII.get(PPC::ADD4), FinalStackPtr)
.addReg(PPC::R1)
.addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
const MCInstrDesc &CopyInst = TII.get(LP64 ? PPC::OR8 : PPC::OR);
// RegAllocator might allocate FramePointer and NegSizeReg in the same phyreg.
if (FramePointer == NegSizeReg) {
assert(KillNegSizeReg && "FramePointer is a def and NegSizeReg is an use, "
"NegSizeReg should be killed");
// FramePointer is clobbered earlier than the use of NegSizeReg in
// prepareDynamicAlloca, save NegSizeReg in ActualNegSizeReg to avoid
// misuse.
BuildMI(MBB, II, dl, CopyInst, ActualNegSizeReg)
.addReg(NegSizeReg)
.addReg(NegSizeReg);
NegSizeReg = ActualNegSizeReg;
KillNegSizeReg = false;
}

prepareDynamicAlloca(II, NegSizeReg, KillNegSizeReg, FramePointer);
// NegSizeReg might be updated in prepareDynamicAlloca if MaxAlign >
// TargetAlign.
if (NegSizeReg != ActualNegSizeReg)
BuildMI(MBB, II, dl, CopyInst, ActualNegSizeReg)
.addReg(NegSizeReg)
.addReg(NegSizeReg);
MBB.erase(II);
}

Expand Down Expand Up @@ -1084,7 +1093,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,

if (FPSI && FrameIndex == FPSI &&
(OpC == PPC::PREPARE_PROBED_ALLOCA_64 ||
OpC == PPC::PREPARE_PROBED_ALLOCA_32)) {
OpC == PPC::PREPARE_PROBED_ALLOCA_32 ||
OpC == PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64 ||
OpC == PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32)) {
lowerPrepareProbedAlloca(II);
return;
}
Expand Down
27 changes: 14 additions & 13 deletions llvm/test/CodeGen/PowerPC/pr46759.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,26 +20,27 @@ define void @foo(i32 %vla_size) #0 {
; CHECK-LE-NEXT: .cfi_offset r31, -8
; CHECK-LE-NEXT: .cfi_offset r30, -16
; CHECK-LE-NEXT: clrldi r3, r3, 32
; CHECK-LE-NEXT: li r6, -4096
; CHECK-LE-NEXT: ld r4, 0(r1)
; CHECK-LE-NEXT: li r5, -2048
; CHECK-LE-NEXT: mr r31, r1
; CHECK-LE-NEXT: addi r3, r3, 15
; CHECK-LE-NEXT: rldicl r3, r3, 60, 4
; CHECK-LE-NEXT: rldicl r3, r3, 4, 31
; CHECK-LE-NEXT: neg r5, r3
; CHECK-LE-NEXT: li r3, -2048
; CHECK-LE-NEXT: divd r7, r5, r6
; CHECK-LE-NEXT: and r3, r5, r3
; CHECK-LE-NEXT: add r3, r1, r3
; CHECK-LE-NEXT: mulld r6, r7, r6
; CHECK-LE-NEXT: sub r5, r5, r6
; CHECK-LE-NEXT: stdux r4, r1, r5
; CHECK-LE-NEXT: cmpd r1, r3
; CHECK-LE-NEXT: neg r4, r3
; CHECK-LE-NEXT: ld r3, 0(r1)
; CHECK-LE-NEXT: and r5, r4, r5
; CHECK-LE-NEXT: mr r4, r5
; CHECK-LE-NEXT: li r5, -4096
; CHECK-LE-NEXT: divd r6, r4, r5
; CHECK-LE-NEXT: mulld r5, r6, r5
; CHECK-LE-NEXT: sub r5, r4, r5
; CHECK-LE-NEXT: add r4, r1, r4
; CHECK-LE-NEXT: stdux r3, r1, r5
; CHECK-LE-NEXT: cmpd r1, r4
; CHECK-LE-NEXT: beq cr0, .LBB0_2
; CHECK-LE-NEXT: .LBB0_1: # %entry
; CHECK-LE-NEXT: #
; CHECK-LE-NEXT: stdu r4, -4096(r1)
; CHECK-LE-NEXT: cmpd r1, r3
; CHECK-LE-NEXT: stdu r3, -4096(r1)
; CHECK-LE-NEXT: cmpd r1, r4
; CHECK-LE-NEXT: bne cr0, .LBB0_1
; CHECK-LE-NEXT: .LBB0_2: # %entry
; CHECK-LE-NEXT: addi r3, r1, 2048
Expand Down
Loading

0 comments on commit c3f9697

Please sign in to comment.