Skip to content

Commit ff40fb0

Browse files
committed
[PowerPC] Try to fold sqrt/sdiv test results with the branch.
Summary: The patch tries to fold sqrt/sdiv test node, i.g FTSQRT, XVTDIVDP, and the branch, i.e br_cc if they meet these patterns: (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp) (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp) (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp) (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp) (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp) (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp) (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp) (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp) Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D94054
1 parent 336ab2d commit ff40fb0

File tree

2 files changed

+282
-0
lines changed

2 files changed

+282
-0
lines changed

Diff for: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

+78
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,7 @@ namespace {
352352

353353
private:
354354
bool trySETCC(SDNode *N);
355+
bool tryFoldSWTestBRCC(SDNode *N);
355356
bool tryAsSingleRLDICL(SDNode *N);
356357
bool tryAsSingleRLDICR(SDNode *N);
357358
bool tryAsSingleRLWINM(SDNode *N);
@@ -4378,6 +4379,81 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
43784379
return true;
43794380
}
43804381

4382+
// Return true if it's a software square-root/divide operand.
4383+
static bool isSWTestOp(SDValue N) {
4384+
if (N.getOpcode() == PPCISD::FTSQRT)
4385+
return true;
4386+
if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)))
4387+
return false;
4388+
switch (N.getConstantOperandVal(0)) {
4389+
case Intrinsic::ppc_vsx_xvtdivdp:
4390+
case Intrinsic::ppc_vsx_xvtdivsp:
4391+
case Intrinsic::ppc_vsx_xvtsqrtdp:
4392+
case Intrinsic::ppc_vsx_xvtsqrtsp:
4393+
return true;
4394+
}
4395+
return false;
4396+
}
4397+
4398+
bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
4399+
assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.");
4400+
// We are looking for following patterns, where `truncate to i1` actually has
4401+
// the same semantic with `and 1`.
4402+
// (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
4403+
// (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
4404+
// (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
4405+
// (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
4406+
// (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
4407+
// (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
4408+
// (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
4409+
// (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
4410+
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4411+
if (CC != ISD::SETEQ && CC != ISD::SETNE)
4412+
return false;
4413+
4414+
SDValue CmpRHS = N->getOperand(3);
4415+
if (!isa<ConstantSDNode>(CmpRHS) ||
4416+
cast<ConstantSDNode>(CmpRHS)->getSExtValue() != 0)
4417+
return false;
4418+
4419+
SDValue CmpLHS = N->getOperand(2);
4420+
if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0)))
4421+
return false;
4422+
4423+
unsigned PCC = 0;
4424+
bool IsCCNE = CC == ISD::SETNE;
4425+
if (CmpLHS.getOpcode() == ISD::AND &&
4426+
isa<ConstantSDNode>(CmpLHS.getOperand(1)))
4427+
switch (CmpLHS.getConstantOperandVal(1)) {
4428+
case 1:
4429+
PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4430+
break;
4431+
case 2:
4432+
PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;
4433+
break;
4434+
case 4:
4435+
PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;
4436+
break;
4437+
case 8:
4438+
PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;
4439+
break;
4440+
default:
4441+
return false;
4442+
}
4443+
else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&
4444+
CmpLHS.getValueType() == MVT::i1)
4445+
PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4446+
4447+
if (PCC) {
4448+
SDLoc dl(N);
4449+
SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4),
4450+
N->getOperand(0)};
4451+
CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
4452+
return true;
4453+
}
4454+
return false;
4455+
}
4456+
43814457
bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
43824458
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
43834459
unsigned Imm;
@@ -5247,6 +5323,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
52475323
return;
52485324
}
52495325
case ISD::BR_CC: {
5326+
if (tryFoldSWTestBRCC(N))
5327+
return;
52505328
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
52515329
unsigned PCC =
52525330
getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget);

Diff for: llvm/test/CodeGen/PowerPC/fold_swtest_br.ll

+204
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
3+
; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s
4+
5+
@val = external local_unnamed_addr global i32, align 4
6+
declare i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double>)
7+
8+
define dso_local signext i32 @xvtsqrtdp_and_1_eq(<2 x double> %input) {
9+
; CHECK-LABEL: xvtsqrtdp_and_1_eq:
10+
; CHECK: # %bb.0: # %entry
11+
; CHECK-NEXT: xvtsqrtdp cr0, v2
12+
; CHECK-NEXT: bnu cr0, .LBB0_2
13+
; CHECK-NEXT: # %bb.1: # %if.then
14+
; CHECK-NEXT: addis r3, r2, .LC0@toc@ha
15+
; CHECK-NEXT: li r4, 100
16+
; CHECK-NEXT: ld r3, .LC0@toc@l(r3)
17+
; CHECK-NEXT: stw r4, 0(r3)
18+
; CHECK-NEXT: .LBB0_2: # %if.end
19+
; CHECK-NEXT: li r3, 1
20+
; CHECK-NEXT: blr
21+
entry:
22+
%0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
23+
%1 = and i32 %0, 1
24+
%cmp.not = icmp eq i32 %1, 0
25+
br i1 %cmp.not, label %if.end, label %if.then
26+
27+
if.then: ; preds = %entry
28+
store i32 100, i32* @val, align 4
29+
br label %if.end
30+
31+
if.end: ; preds = %if.then, %entry
32+
ret i32 1
33+
}
34+
35+
define dso_local signext i32 @xvtsqrtdp_and_2_eq(<2 x double> %input) {
36+
; CHECK-LABEL: xvtsqrtdp_and_2_eq:
37+
; CHECK: # %bb.0: # %entry
38+
; CHECK-NEXT: xvtsqrtdp cr0, v2
39+
; CHECK-NEXT: bne cr0, .LBB1_2
40+
; CHECK-NEXT: # %bb.1: # %if.then
41+
; CHECK-NEXT: addis r3, r2, .LC0@toc@ha
42+
; CHECK-NEXT: li r4, 100
43+
; CHECK-NEXT: ld r3, .LC0@toc@l(r3)
44+
; CHECK-NEXT: stw r4, 0(r3)
45+
; CHECK-NEXT: .LBB1_2: # %if.end
46+
; CHECK-NEXT: li r3, 1
47+
; CHECK-NEXT: blr
48+
entry:
49+
%0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
50+
%1 = and i32 %0, 2
51+
%cmp.not = icmp eq i32 %1, 0
52+
br i1 %cmp.not, label %if.end, label %if.then
53+
54+
if.then: ; preds = %entry
55+
store i32 100, i32* @val, align 4
56+
br label %if.end
57+
58+
if.end: ; preds = %if.then, %entry
59+
ret i32 1
60+
}
61+
62+
define dso_local signext i32 @xvtsqrtdp_and_4_eq(<2 x double> %input) {
63+
; CHECK-LABEL: xvtsqrtdp_and_4_eq:
64+
; CHECK: # %bb.0: # %entry
65+
; CHECK-NEXT: xvtsqrtdp cr0, v2
66+
; CHECK-NEXT: ble cr0, .LBB2_2
67+
; CHECK-NEXT: # %bb.1: # %if.then
68+
; CHECK-NEXT: addis r3, r2, .LC0@toc@ha
69+
; CHECK-NEXT: li r4, 100
70+
; CHECK-NEXT: ld r3, .LC0@toc@l(r3)
71+
; CHECK-NEXT: stw r4, 0(r3)
72+
; CHECK-NEXT: .LBB2_2: # %if.end
73+
; CHECK-NEXT: li r3, 1
74+
; CHECK-NEXT: blr
75+
entry:
76+
%0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
77+
%1 = and i32 %0, 4
78+
%cmp.not = icmp eq i32 %1, 0
79+
br i1 %cmp.not, label %if.end, label %if.then
80+
81+
if.then: ; preds = %entry
82+
store i32 100, i32* @val, align 4
83+
br label %if.end
84+
85+
if.end: ; preds = %if.then, %entry
86+
ret i32 1
87+
}
88+
89+
define dso_local signext i32 @xvtsqrtdp_and_8_eq(<2 x double> %input) {
90+
; CHECK-LABEL: xvtsqrtdp_and_8_eq:
91+
; CHECK: # %bb.0: # %entry
92+
; CHECK-NEXT: xvtsqrtdp cr0, v2
93+
; CHECK-NEXT: bge cr0, .LBB3_2
94+
; CHECK-NEXT: # %bb.1: # %if.then
95+
; CHECK-NEXT: addis r3, r2, .LC0@toc@ha
96+
; CHECK-NEXT: li r4, 100
97+
; CHECK-NEXT: ld r3, .LC0@toc@l(r3)
98+
; CHECK-NEXT: stw r4, 0(r3)
99+
; CHECK-NEXT: .LBB3_2: # %if.end
100+
; CHECK-NEXT: li r3, 1
101+
; CHECK-NEXT: blr
102+
entry:
103+
%0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
104+
%1 = and i32 %0, 8
105+
%cmp.not = icmp eq i32 %1, 0
106+
br i1 %cmp.not, label %if.end, label %if.then
107+
108+
if.then: ; preds = %entry
109+
store i32 100, i32* @val, align 4
110+
br label %if.end
111+
112+
if.end: ; preds = %if.then, %entry
113+
ret i32 1
114+
}
115+
116+
define dso_local signext i32 @xvtsqrtdp_and_1_ne(<2 x double> %input) {
117+
; CHECK-LABEL: xvtsqrtdp_and_1_ne:
118+
; CHECK: # %bb.0: # %entry
119+
; CHECK-NEXT: xvtsqrtdp cr0, v2
120+
; CHECK-NEXT: bun cr0, .LBB4_2
121+
; CHECK-NEXT: # %bb.1: # %if.then
122+
; CHECK-NEXT: addis r3, r2, .LC0@toc@ha
123+
; CHECK-NEXT: li r4, 100
124+
; CHECK-NEXT: ld r3, .LC0@toc@l(r3)
125+
; CHECK-NEXT: stw r4, 0(r3)
126+
; CHECK-NEXT: .LBB4_2: # %if.end
127+
; CHECK-NEXT: li r3, 1
128+
; CHECK-NEXT: blr
129+
entry:
130+
%0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
131+
%1 = and i32 %0, 1
132+
%cmp.not = icmp ne i32 %1, 0
133+
br i1 %cmp.not, label %if.end, label %if.then
134+
135+
if.then: ; preds = %entry
136+
store i32 100, i32* @val, align 4
137+
br label %if.end
138+
139+
if.end: ; preds = %if.then, %entry
140+
ret i32 1
141+
}
142+
143+
define dso_local signext i32 @xvtsqrtdp_and_2_ne(<2 x double> %input) {
144+
; CHECK-LABEL: xvtsqrtdp_and_2_ne:
145+
; CHECK: # %bb.0: # %if.end
146+
; CHECK-NEXT: li r3, 1
147+
; CHECK-NEXT: blr
148+
if.end: ; preds = %if.then, %entry
149+
ret i32 1
150+
}
151+
152+
define dso_local signext i32 @xvtsqrtdp_and_4_ne(<2 x double> %input) {
153+
; CHECK-LABEL: xvtsqrtdp_and_4_ne:
154+
; CHECK: # %bb.0: # %entry
155+
; CHECK-NEXT: xvtsqrtdp cr0, v2
156+
; CHECK-NEXT: bgt cr0, .LBB6_2
157+
; CHECK-NEXT: # %bb.1: # %if.then
158+
; CHECK-NEXT: addis r3, r2, .LC0@toc@ha
159+
; CHECK-NEXT: li r4, 100
160+
; CHECK-NEXT: ld r3, .LC0@toc@l(r3)
161+
; CHECK-NEXT: stw r4, 0(r3)
162+
; CHECK-NEXT: .LBB6_2: # %if.end
163+
; CHECK-NEXT: li r3, 1
164+
; CHECK-NEXT: blr
165+
entry:
166+
%0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
167+
%1 = and i32 %0, 4
168+
%cmp.not = icmp ne i32 %1, 0
169+
br i1 %cmp.not, label %if.end, label %if.then
170+
171+
if.then: ; preds = %entry
172+
store i32 100, i32* @val, align 4
173+
br label %if.end
174+
175+
if.end: ; preds = %if.then, %entry
176+
ret i32 1
177+
}
178+
179+
define dso_local signext i32 @xvtsqrtdp_and_8_ne(<2 x double> %input) {
180+
; CHECK-LABEL: xvtsqrtdp_and_8_ne:
181+
; CHECK: # %bb.0: # %entry
182+
; CHECK-NEXT: xvtsqrtdp cr0, v2
183+
; CHECK-NEXT: blt cr0, .LBB7_2
184+
; CHECK-NEXT: # %bb.1: # %if.then
185+
; CHECK-NEXT: addis r3, r2, .LC0@toc@ha
186+
; CHECK-NEXT: li r4, 100
187+
; CHECK-NEXT: ld r3, .LC0@toc@l(r3)
188+
; CHECK-NEXT: stw r4, 0(r3)
189+
; CHECK-NEXT: .LBB7_2: # %if.end
190+
; CHECK-NEXT: li r3, 1
191+
; CHECK-NEXT: blr
192+
entry:
193+
%0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input)
194+
%1 = and i32 %0, 8
195+
%cmp.not = icmp ne i32 %1, 0
196+
br i1 %cmp.not, label %if.end, label %if.then
197+
198+
if.then: ; preds = %entry
199+
store i32 100, i32* @val, align 4
200+
br label %if.end
201+
202+
if.end: ; preds = %if.then, %entry
203+
ret i32 1
204+
}

0 commit comments

Comments
 (0)