Skip to content

Commit

Permalink
[PowerPC] Try to fold sqrt/sdiv test results with the branch.
Browse files Browse the repository at this point in the history
Summary: The patch tries to fold sqrt/sdiv test node, i.g FTSQRT, XVTDIVDP, and the branch, i.e br_cc if they meet these patterns:
(br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
(br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
(br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
(br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
(br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
(br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
(br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
(br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)

Reviewed By: steven.zhang

Differential Revision: https://reviews.llvm.org/D94054
  • Loading branch information
EsmeYi committed Jan 14, 2021
1 parent 336ab2d commit ff40fb0
Show file tree
Hide file tree
Showing 2 changed files with 282 additions and 0 deletions.
78 changes: 78 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ namespace {

private:
bool trySETCC(SDNode *N);
bool tryFoldSWTestBRCC(SDNode *N);
bool tryAsSingleRLDICL(SDNode *N);
bool tryAsSingleRLDICR(SDNode *N);
bool tryAsSingleRLWINM(SDNode *N);
Expand Down Expand Up @@ -4378,6 +4379,81 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
return true;
}

// Return true if it's a software square-root/divide operand.
static bool isSWTestOp(SDValue N) {
if (N.getOpcode() == PPCISD::FTSQRT)
return true;
if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)))
return false;
switch (N.getConstantOperandVal(0)) {
case Intrinsic::ppc_vsx_xvtdivdp:
case Intrinsic::ppc_vsx_xvtdivsp:
case Intrinsic::ppc_vsx_xvtsqrtdp:
case Intrinsic::ppc_vsx_xvtsqrtsp:
return true;
}
return false;
}

bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.");
// We are looking for following patterns, where `truncate to i1` actually has
// the same semantic with `and 1`.
// (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
// (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
// (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
// (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
// (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
// (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
// (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
// (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
if (CC != ISD::SETEQ && CC != ISD::SETNE)
return false;

SDValue CmpRHS = N->getOperand(3);
if (!isa<ConstantSDNode>(CmpRHS) ||
cast<ConstantSDNode>(CmpRHS)->getSExtValue() != 0)
return false;

SDValue CmpLHS = N->getOperand(2);
if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0)))
return false;

unsigned PCC = 0;
bool IsCCNE = CC == ISD::SETNE;
if (CmpLHS.getOpcode() == ISD::AND &&
isa<ConstantSDNode>(CmpLHS.getOperand(1)))
switch (CmpLHS.getConstantOperandVal(1)) {
case 1:
PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
break;
case 2:
PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;
break;
case 4:
PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;
break;
case 8:
PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;
break;
default:
return false;
}
else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&
CmpLHS.getValueType() == MVT::i1)
PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;

if (PCC) {
SDLoc dl(N);
SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4),
N->getOperand(0)};
CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
return true;
}
return false;
}

bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
unsigned Imm;
Expand Down Expand Up @@ -5247,6 +5323,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
case ISD::BR_CC: {
if (tryFoldSWTestBRCC(N))
return;
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
unsigned PCC =
getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget);
Expand Down
204 changes: 204 additions & 0 deletions llvm/test/CodeGen/PowerPC/fold_swtest_br.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s

@val = external local_unnamed_addr global i32, align 4
declare i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double>)

define dso_local signext i32 @xvtsqrtdp_and_1_eq(<2 x double> %input) {
; CHECK-LABEL: xvtsqrtdp_and_1_eq:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvtsqrtdp cr0, v2
; CHECK-NEXT: bnu cr0, .LBB0_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: addis r3, r2, .LC0@toc@ha
; CHECK-NEXT: li r4, 100
; CHECK-NEXT: ld r3, .LC0@toc@l(r3)
; CHECK-NEXT: stw r4, 0(r3)
; CHECK-NEXT: .LBB0_2: # %if.end
; CHECK-NEXT: li r3, 1
; CHECK-NEXT: blr
entry:
%0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
%1 = and i32 %0, 1
%cmp.not = icmp eq i32 %1, 0
br i1 %cmp.not, label %if.end, label %if.then

if.then: ; preds = %entry
store i32 100, i32* @val, align 4
br label %if.end

if.end: ; preds = %if.then, %entry
ret i32 1
}

define dso_local signext i32 @xvtsqrtdp_and_2_eq(<2 x double> %input) {
; CHECK-LABEL: xvtsqrtdp_and_2_eq:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvtsqrtdp cr0, v2
; CHECK-NEXT: bne cr0, .LBB1_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: addis r3, r2, .LC0@toc@ha
; CHECK-NEXT: li r4, 100
; CHECK-NEXT: ld r3, .LC0@toc@l(r3)
; CHECK-NEXT: stw r4, 0(r3)
; CHECK-NEXT: .LBB1_2: # %if.end
; CHECK-NEXT: li r3, 1
; CHECK-NEXT: blr
entry:
%0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
%1 = and i32 %0, 2
%cmp.not = icmp eq i32 %1, 0
br i1 %cmp.not, label %if.end, label %if.then

if.then: ; preds = %entry
store i32 100, i32* @val, align 4
br label %if.end

if.end: ; preds = %if.then, %entry
ret i32 1
}

define dso_local signext i32 @xvtsqrtdp_and_4_eq(<2 x double> %input) {
; CHECK-LABEL: xvtsqrtdp_and_4_eq:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvtsqrtdp cr0, v2
; CHECK-NEXT: ble cr0, .LBB2_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: addis r3, r2, .LC0@toc@ha
; CHECK-NEXT: li r4, 100
; CHECK-NEXT: ld r3, .LC0@toc@l(r3)
; CHECK-NEXT: stw r4, 0(r3)
; CHECK-NEXT: .LBB2_2: # %if.end
; CHECK-NEXT: li r3, 1
; CHECK-NEXT: blr
entry:
%0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
%1 = and i32 %0, 4
%cmp.not = icmp eq i32 %1, 0
br i1 %cmp.not, label %if.end, label %if.then

if.then: ; preds = %entry
store i32 100, i32* @val, align 4
br label %if.end

if.end: ; preds = %if.then, %entry
ret i32 1
}

define dso_local signext i32 @xvtsqrtdp_and_8_eq(<2 x double> %input) {
; CHECK-LABEL: xvtsqrtdp_and_8_eq:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvtsqrtdp cr0, v2
; CHECK-NEXT: bge cr0, .LBB3_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: addis r3, r2, .LC0@toc@ha
; CHECK-NEXT: li r4, 100
; CHECK-NEXT: ld r3, .LC0@toc@l(r3)
; CHECK-NEXT: stw r4, 0(r3)
; CHECK-NEXT: .LBB3_2: # %if.end
; CHECK-NEXT: li r3, 1
; CHECK-NEXT: blr
entry:
%0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
%1 = and i32 %0, 8
%cmp.not = icmp eq i32 %1, 0
br i1 %cmp.not, label %if.end, label %if.then

if.then: ; preds = %entry
store i32 100, i32* @val, align 4
br label %if.end

if.end: ; preds = %if.then, %entry
ret i32 1
}

define dso_local signext i32 @xvtsqrtdp_and_1_ne(<2 x double> %input) {
; CHECK-LABEL: xvtsqrtdp_and_1_ne:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvtsqrtdp cr0, v2
; CHECK-NEXT: bun cr0, .LBB4_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: addis r3, r2, .LC0@toc@ha
; CHECK-NEXT: li r4, 100
; CHECK-NEXT: ld r3, .LC0@toc@l(r3)
; CHECK-NEXT: stw r4, 0(r3)
; CHECK-NEXT: .LBB4_2: # %if.end
; CHECK-NEXT: li r3, 1
; CHECK-NEXT: blr
entry:
%0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
%1 = and i32 %0, 1
%cmp.not = icmp ne i32 %1, 0
br i1 %cmp.not, label %if.end, label %if.then

if.then: ; preds = %entry
store i32 100, i32* @val, align 4
br label %if.end

if.end: ; preds = %if.then, %entry
ret i32 1
}

define dso_local signext i32 @xvtsqrtdp_and_2_ne(<2 x double> %input) {
; CHECK-LABEL: xvtsqrtdp_and_2_ne:
; CHECK: # %bb.0: # %if.end
; CHECK-NEXT: li r3, 1
; CHECK-NEXT: blr
if.end: ; preds = %if.then, %entry
ret i32 1
}

define dso_local signext i32 @xvtsqrtdp_and_4_ne(<2 x double> %input) {
; CHECK-LABEL: xvtsqrtdp_and_4_ne:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvtsqrtdp cr0, v2
; CHECK-NEXT: bgt cr0, .LBB6_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: addis r3, r2, .LC0@toc@ha
; CHECK-NEXT: li r4, 100
; CHECK-NEXT: ld r3, .LC0@toc@l(r3)
; CHECK-NEXT: stw r4, 0(r3)
; CHECK-NEXT: .LBB6_2: # %if.end
; CHECK-NEXT: li r3, 1
; CHECK-NEXT: blr
entry:
%0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
%1 = and i32 %0, 4
%cmp.not = icmp ne i32 %1, 0
br i1 %cmp.not, label %if.end, label %if.then

if.then: ; preds = %entry
store i32 100, i32* @val, align 4
br label %if.end

if.end: ; preds = %if.then, %entry
ret i32 1
}

define dso_local signext i32 @xvtsqrtdp_and_8_ne(<2 x double> %input) {
; CHECK-LABEL: xvtsqrtdp_and_8_ne:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvtsqrtdp cr0, v2
; CHECK-NEXT: blt cr0, .LBB7_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: addis r3, r2, .LC0@toc@ha
; CHECK-NEXT: li r4, 100
; CHECK-NEXT: ld r3, .LC0@toc@l(r3)
; CHECK-NEXT: stw r4, 0(r3)
; CHECK-NEXT: .LBB7_2: # %if.end
; CHECK-NEXT: li r3, 1
; CHECK-NEXT: blr
entry:
%0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
%1 = and i32 %0, 8
%cmp.not = icmp ne i32 %1, 0
br i1 %cmp.not, label %if.end, label %if.then

if.then: ; preds = %entry
store i32 100, i32* @val, align 4
br label %if.end

if.end: ; preds = %if.then, %entry
ret i32 1
}

0 comments on commit ff40fb0

Please sign in to comment.