Skip to content

Commit 197f3c0

Browse files
committed
[CodeGen][AArch64] Ensure isSExtCheaperThanZExt returns true for negative constants
When we know the value we're extending is a negative constant then it makes sense to use SIGN_EXTEND because this may improve code quality in some cases, particularly when doing a constant splat of an unpacked vector type. For example, for SVE when splatting the value -1 into all elements of a vector of type <vscale x 2 x i32> the element type will get promoted from i32 -> i64. In this case we want the splat value to sign-extend from (i32 -1) -> (i64 -1), whereas currently it zero-extends from (i32 -1) -> (i64 0xFFFFFFFF). Sign-extending the constant means we can use a single mov immediate instruction. New tests added here: CodeGen/AArch64/sve-vector-splat.ll I believe we see some code quality improvements in these existing tests too: CodeGen/AArch64/reduce-and.ll CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll The apparent regressions in CodeGen/AArch64/fast-isel-cmp-vec.ll only occur because the test disables codegen prepare and branch folding. Differential Revision: https://reviews.llvm.org/D114357
1 parent 5f2edad commit 197f3c0

14 files changed

+35
-47
lines changed

Diff for: llvm/include/llvm/CodeGen/TargetLowering.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -2647,9 +2647,9 @@ class TargetLoweringBase {
26472647
getApproximateEVTForLLT(ToTy, DL, Ctx));
26482648
}
26492649

2650-
/// Return true if sign-extension from FromTy to ToTy is cheaper than
2651-
/// zero-extension.
2652-
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const {
2650+
/// Return true if sign-extension of value \p V from FromTy to ToTy is
2651+
/// cheaper than zero-extension, where \p V can be SDValue() if unknown.
2652+
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy, SDValue V) const {
26532653
return false;
26542654
}
26552655

Diff for: llvm/lib/CodeGen/CodeGenPrepare.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -7004,7 +7004,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
70047004
// matching the argument extension instead.
70057005
Instruction::CastOps ExtType = Instruction::ZExt;
70067006
// Some targets prefer SExt over ZExt.
7007-
if (TLI->isSExtCheaperThanZExt(OldVT, RegType))
7007+
if (TLI->isSExtCheaperThanZExt(OldVT, RegType, SDValue()))
70087008
ExtType = Instruction::SExt;
70097009

70107010
if (auto *Arg = dyn_cast<Argument>(Cond)) {

Diff for: llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1704,7 +1704,7 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &LHS, SDValue &RHS,
17041704
SDValue OpL = GetPromotedInteger(LHS);
17051705
SDValue OpR = GetPromotedInteger(RHS);
17061706

1707-
if (TLI.isSExtCheaperThanZExt(LHS.getValueType(), OpL.getValueType())) {
1707+
if (TLI.isSExtCheaperThanZExt(LHS.getValueType(), OpL.getValueType(), LHS)) {
17081708
// The target would prefer to promote the comparison operand with sign
17091709
// extension. Honor that unless the promoted values are already zero
17101710
// extended.

Diff for: llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
283283
EVT OldVT = Op.getValueType();
284284
SDLoc DL(Op);
285285
Op = GetPromotedInteger(Op);
286-
if (TLI.isSExtCheaperThanZExt(OldVT, Op.getValueType()))
286+
if (TLI.isSExtCheaperThanZExt(OldVT, Op.getValueType(), Op))
287287
return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), Op,
288288
DAG.getValueType(OldVT));
289289
return DAG.getZeroExtendInReg(Op, DL, OldVT);

Diff for: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -4765,7 +4765,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
47654765
C->isTargetOpcode(), C->isOpaque());
47664766
case ISD::ANY_EXTEND:
47674767
// Some targets like RISCV prefer to sign extend some types.
4768-
if (TLI->isSExtCheaperThanZExt(Operand.getValueType(), VT))
4768+
if (TLI->isSExtCheaperThanZExt(Operand.getValueType(), VT, Operand))
47694769
return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
47704770
C->isTargetOpcode(), C->isOpaque());
47714771
return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,

Diff for: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -3844,7 +3844,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
38443844
} else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
38453845
(Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
38463846
!isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
3847-
OpVT)) {
3847+
OpVT, N0.getOperand(1))) {
38483848
EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
38493849
unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
38503850
EVT ExtDstTy = N0.getValueType();

Diff for: llvm/lib/Target/AArch64/AArch64ISelLowering.h

+8
Original file line numberDiff line numberDiff line change
@@ -1138,6 +1138,14 @@ class AArch64TargetLowering : public TargetLowering {
11381138

11391139
bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1,
11401140
LLT Ty2) const override;
1141+
1142+
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT, SDValue V) const override {
1143+
if (!V || SrcVT.getScalarType() == MVT::i1)
1144+
return false;
1145+
if (ConstantSDNode *C = isConstOrConstSplat(V))
1146+
return C->getAPIntValue().isNegative();
1147+
return false;
1148+
}
11411149
};
11421150

11431151
namespace AArch64 {

Diff for: llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -1198,7 +1198,8 @@ bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
11981198
return TargetLowering::isZExtFree(Val, VT2);
11991199
}
12001200

1201-
bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
1201+
bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT,
1202+
SDValue V) const {
12021203
return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
12031204
}
12041205

Diff for: llvm/lib/Target/RISCV/RISCVISelLowering.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ class RISCVTargetLowering : public TargetLowering {
326326
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
327327
bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
328328
bool isZExtFree(SDValue Val, EVT VT2) const override;
329-
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
329+
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT, SDValue V) const override;
330330
bool isCheapToSpeculateCttz() const override;
331331
bool isCheapToSpeculateCtlz() const override;
332332
bool hasAndNotCompare(SDValue Y) const override;

Diff for: llvm/test/CodeGen/AArch64/funnel-shift.ll

+7-7
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ declare i7 @llvm.fshl.i7(i7, i7, i7)
9393
define i7 @fshl_i7_const_fold() {
9494
; CHECK-LABEL: fshl_i7_const_fold:
9595
; CHECK: // %bb.0:
96-
; CHECK-NEXT: mov w0, #67
96+
; CHECK-NEXT: mov w0, #-61
9797
; CHECK-NEXT: ret
9898
%f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2)
9999
ret i7 %f
@@ -102,7 +102,7 @@ define i7 @fshl_i7_const_fold() {
102102
define i8 @fshl_i8_const_fold_overshift_1() {
103103
; CHECK-LABEL: fshl_i8_const_fold_overshift_1:
104104
; CHECK: // %bb.0:
105-
; CHECK-NEXT: mov w0, #128
105+
; CHECK-NEXT: mov w0, #-128
106106
; CHECK-NEXT: ret
107107
%f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15)
108108
ret i8 %f
@@ -164,7 +164,7 @@ define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) {
164164
define i8 @fshl_i8_const_fold() {
165165
; CHECK-LABEL: fshl_i8_const_fold:
166166
; CHECK: // %bb.0:
167-
; CHECK-NEXT: mov w0, #128
167+
; CHECK-NEXT: mov w0, #-128
168168
; CHECK-NEXT: ret
169169
%f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7)
170170
ret i8 %f
@@ -241,7 +241,7 @@ define i7 @fshr_i7_const_fold() {
241241
define i8 @fshr_i8_const_fold_overshift_1() {
242242
; CHECK-LABEL: fshr_i8_const_fold_overshift_1:
243243
; CHECK: // %bb.0:
244-
; CHECK-NEXT: mov w0, #254
244+
; CHECK-NEXT: mov w0, #-2
245245
; CHECK-NEXT: ret
246246
%f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15)
247247
ret i8 %f
@@ -250,7 +250,7 @@ define i8 @fshr_i8_const_fold_overshift_1() {
250250
define i8 @fshr_i8_const_fold_overshift_2() {
251251
; CHECK-LABEL: fshr_i8_const_fold_overshift_2:
252252
; CHECK: // %bb.0:
253-
; CHECK-NEXT: mov w0, #225
253+
; CHECK-NEXT: mov w0, #-31
254254
; CHECK-NEXT: ret
255255
%f = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11)
256256
ret i8 %f
@@ -259,7 +259,7 @@ define i8 @fshr_i8_const_fold_overshift_2() {
259259
define i8 @fshr_i8_const_fold_overshift_3() {
260260
; CHECK-LABEL: fshr_i8_const_fold_overshift_3:
261261
; CHECK: // %bb.0:
262-
; CHECK-NEXT: mov w0, #255
262+
; CHECK-NEXT: mov w0, #-1
263263
; CHECK-NEXT: ret
264264
%f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8)
265265
ret i8 %f
@@ -303,7 +303,7 @@ define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) {
303303
define i8 @fshr_i8_const_fold() {
304304
; CHECK-LABEL: fshr_i8_const_fold:
305305
; CHECK: // %bb.0:
306-
; CHECK-NEXT: mov w0, #254
306+
; CHECK-NEXT: mov w0, #-2
307307
; CHECK-NEXT: ret
308308
%f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7)
309309
ret i8 %f

Diff for: llvm/test/CodeGen/AArch64/reduce-and.ll

+1-2
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,7 @@ define i8 @test_redand_v3i8(<3 x i8> %a) {
223223
; CHECK-LABEL: test_redand_v3i8:
224224
; CHECK: // %bb.0:
225225
; CHECK-NEXT: and w8, w0, w1
226-
; CHECK-NEXT: and w8, w8, w2
227-
; CHECK-NEXT: and w0, w8, #0xff
226+
; CHECK-NEXT: and w0, w8, w2
228227
; CHECK-NEXT: ret
229228
;
230229
; GISEL-LABEL: test_redand_v3i8:

Diff for: llvm/test/CodeGen/AArch64/sve-vector-splat.ll

+3-6
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,7 @@ define <vscale x 8 x i8> @sve_splat_8xi8(i8 %val) {
119119
define <vscale x 8 x i8> @sve_splat_8xi8_imm() {
120120
; CHECK-LABEL: sve_splat_8xi8_imm:
121121
; CHECK: // %bb.0:
122-
; CHECK-NEXT: mov w8, #255
123-
; CHECK-NEXT: mov z0.h, w8
122+
; CHECK-NEXT: mov z0.h, #-1 // =0xffffffffffffffff
124123
; CHECK-NEXT: ret
125124
%ins = insertelement <vscale x 8 x i8> undef, i8 -1, i32 0
126125
%splat = shufflevector <vscale x 8 x i8> %ins, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
@@ -151,8 +150,7 @@ define <vscale x 4 x i16> @sve_splat_4xi16(i16 %val) {
151150
define <vscale x 4 x i16> @sve_splat_4xi16_imm() {
152151
; CHECK-LABEL: sve_splat_4xi16_imm:
153152
; CHECK: // %bb.0:
154-
; CHECK-NEXT: mov w8, #65535
155-
; CHECK-NEXT: mov z0.s, w8
153+
; CHECK-NEXT: mov z0.s, #-1 // =0xffffffffffffffff
156154
; CHECK-NEXT: ret
157155
%ins = insertelement <vscale x 4 x i16> undef, i16 -1, i32 0
158156
%splat = shufflevector <vscale x 4 x i16> %ins, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
@@ -173,8 +171,7 @@ define <vscale x 2 x i32> @sve_splat_2xi32(i32 %val) {
173171
define <vscale x 2 x i32> @sve_splat_2xi32_imm() {
174172
; CHECK-LABEL: sve_splat_2xi32_imm:
175173
; CHECK: // %bb.0:
176-
; CHECK-NEXT: mov w8, #-1
177-
; CHECK-NEXT: mov z0.d, x8
174+
; CHECK-NEXT: mov z0.d, #-1 // =0xffffffffffffffff
178175
; CHECK-NEXT: ret
179176
%ins = insertelement <vscale x 2 x i32> undef, i32 -1, i32 0
180177
%splat = shufflevector <vscale x 2 x i32> %ins, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer

Diff for: llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll

+4-20
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,7 @@ define <1 x i8> @out_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind {
2929
define <2 x i8> @out_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind {
3030
; CHECK-LABEL: out_v2i8:
3131
; CHECK: // %bb.0:
32-
; CHECK-NEXT: movi d3, #0x0000ff000000ff
33-
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
34-
; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b
35-
; CHECK-NEXT: and v1.8b, v1.8b, v2.8b
36-
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
32+
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
3733
; CHECK-NEXT: ret
3834
%mx = and <2 x i8> %x, %mask
3935
%notmask = xor <2 x i8> %mask, <i8 -1, i8 -1>
@@ -61,11 +57,7 @@ define <1 x i16> @out_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwin
6157
define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind {
6258
; CHECK-LABEL: out_v4i8:
6359
; CHECK: // %bb.0:
64-
; CHECK-NEXT: movi d3, #0xff00ff00ff00ff
65-
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
66-
; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b
67-
; CHECK-NEXT: and v1.8b, v1.8b, v2.8b
68-
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
60+
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
6961
; CHECK-NEXT: ret
7062
%mx = and <4 x i8> %x, %mask
7163
%notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1>
@@ -77,11 +69,7 @@ define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind {
7769
define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind {
7870
; CHECK-LABEL: out_v4i8_undef:
7971
; CHECK: // %bb.0:
80-
; CHECK-NEXT: movi d3, #0xff00ff00ff00ff
81-
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
82-
; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b
83-
; CHECK-NEXT: and v1.8b, v1.8b, v2.8b
84-
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
72+
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
8573
; CHECK-NEXT: ret
8674
%mx = and <4 x i8> %x, %mask
8775
%notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 undef, i8 -1>
@@ -93,11 +81,7 @@ define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwi
9381
define <2 x i16> @out_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind {
9482
; CHECK-LABEL: out_v2i16:
9583
; CHECK: // %bb.0:
96-
; CHECK-NEXT: movi d3, #0x00ffff0000ffff
97-
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
98-
; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b
99-
; CHECK-NEXT: and v1.8b, v1.8b, v2.8b
100-
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
84+
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
10185
; CHECK-NEXT: ret
10286
%mx = and <2 x i16> %x, %mask
10387
%notmask = xor <2 x i16> %mask, <i16 -1, i16 -1>

Diff for: llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll

+1-2
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,7 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind {
8686
; CHECK-LABEL: test_v3i8:
8787
; CHECK: // %bb.0:
8888
; CHECK-NEXT: and w8, w0, w1
89-
; CHECK-NEXT: and w8, w8, w2
90-
; CHECK-NEXT: and w0, w8, #0xff
89+
; CHECK-NEXT: and w0, w8, w2
9190
; CHECK-NEXT: ret
9291
%b = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> %a)
9392
ret i8 %b

0 commit comments

Comments
 (0)