Skip to content

Commit bb20d10

Browse files
committed
[RISCV] Narrow build_vector indices of scatter/gather nodes
Doing so allows the use of smaller constants overall, and may allow (for some small vector constants) avoiding the constant pool entirely. As seen in some of the tests, this can result in extra VTYPE toggles if we get unlucky. We could reasonable restrict this to > LMUL1 types, but I think it's also reasonable not to. What do reviewwers think?
1 parent 37aa07a commit bb20d10

File tree

2 files changed

+63
-40
lines changed

2 files changed

+63
-40
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11628,15 +11628,39 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
1162811628
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
1162911629
}
1163011630

11631-
// According to the property that indexed load/store instructions
11632-
// zero-extended their indices, \p narrowIndex tries to narrow the type of index
11633-
// operand if it is matched to pattern (shl (zext x to ty), C) and bits(x) + C <
11634-
// bits(ty).
11631+
/// According to the property that indexed load/store instructions zero-extend
11632+
/// their indices, try to narrow the type of index operand.
1163511633
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
1163611634
if (isIndexTypeSigned(IndexType))
1163711635
return false;
1163811636

11639-
if (N.getOpcode() != ISD::SHL || !N->hasOneUse())
11637+
if (!N->hasOneUse())
11638+
return false;
11639+
11640+
EVT VT = N.getValueType();
11641+
SDLoc DL(N);
11642+
11643+
// In general, what we're doing here is seeing if we can sink a truncate to
11644+
// a smaller element type into the expression tree building our index.
11645+
// TODO: We can generalize this and handle a bunch more cases if useful.
11646+
11647+
// Narrow a buildvector to the narrowest element type. This requires less
11648+
// work and less register pressure at high LMUL, and creates smaller constants
11649+
// which may be cheaper to materialize.
11650+
if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
11651+
KnownBits Known = DAG.computeKnownBits(N);
11652+
unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
11653+
LLVMContext &C = *DAG.getContext();
11654+
EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
11655+
if (ResultVT.bitsLT(VT.getVectorElementType())) {
11656+
N = DAG.getNode(ISD::TRUNCATE, DL,
11657+
VT.changeVectorElementType(ResultVT), N);
11658+
return true;
11659+
}
11660+
}
11661+
11662+
// Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
11663+
if (N.getOpcode() != ISD::SHL)
1164011664
return false;
1164111665

1164211666
SDValue N0 = N.getOperand(0);
@@ -11651,7 +11675,6 @@ static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &D
1165111675
if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
1165211676
return false;;
1165311677

11654-
SDLoc DL(N);
1165511678
SDValue Src = N0.getOperand(0);
1165611679
EVT SrcVT = Src.getValueType();
1165711680
unsigned SrcElen = SrcVT.getScalarSizeInBits();

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -12857,10 +12857,10 @@ define <4 x i32> @mgather_broadcast_load_unmasked(ptr %base) {
1285712857
;
1285812858
; RV64V-LABEL: mgather_broadcast_load_unmasked:
1285912859
; RV64V: # %bb.0:
12860-
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
12861-
; RV64V-NEXT: vmv.v.i v10, 0
12860+
; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
12861+
; RV64V-NEXT: vmv.v.i v9, 0
1286212862
; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
12863-
; RV64V-NEXT: vluxei64.v v8, (a0), v10
12863+
; RV64V-NEXT: vluxei8.v v8, (a0), v9
1286412864
; RV64V-NEXT: ret
1286512865
;
1286612866
; RV64ZVE32F-LABEL: mgather_broadcast_load_unmasked:
@@ -12949,10 +12949,10 @@ define <4 x i32> @mgather_broadcast_load_masked(ptr %base, <4 x i1> %m) {
1294912949
;
1295012950
; RV64V-LABEL: mgather_broadcast_load_masked:
1295112951
; RV64V: # %bb.0:
12952-
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
12953-
; RV64V-NEXT: vmv.v.i v10, 0
12952+
; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
12953+
; RV64V-NEXT: vmv.v.i v9, 0
1295412954
; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
12955-
; RV64V-NEXT: vluxei64.v v8, (a0), v10, v0.t
12955+
; RV64V-NEXT: vluxei8.v v8, (a0), v9, v0.t
1295612956
; RV64V-NEXT: ret
1295712957
;
1295812958
; RV64ZVE32F-LABEL: mgather_broadcast_load_masked:
@@ -13016,11 +13016,11 @@ define <4 x i32> @mgather_unit_stride_load(ptr %base) {
1301613016
;
1301713017
; RV64V-LABEL: mgather_unit_stride_load:
1301813018
; RV64V: # %bb.0:
13019-
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
13019+
; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1302013020
; RV64V-NEXT: vid.v v8
13021-
; RV64V-NEXT: vsll.vi v10, v8, 2
13021+
; RV64V-NEXT: vsll.vi v9, v8, 2
1302213022
; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
13023-
; RV64V-NEXT: vluxei64.v v8, (a0), v10
13023+
; RV64V-NEXT: vluxei8.v v8, (a0), v9
1302413024
; RV64V-NEXT: ret
1302513025
;
1302613026
; RV64ZVE32F-LABEL: mgather_unit_stride_load:
@@ -13089,11 +13089,9 @@ define <4 x i32> @mgather_unit_stride_load_with_offset(ptr %base) {
1308913089
; RV64V: # %bb.0:
1309013090
; RV64V-NEXT: lui a1, 115073
1309113091
; RV64V-NEXT: addiw a1, a1, 1040
13092-
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
13093-
; RV64V-NEXT: vmv.s.x v8, a1
13094-
; RV64V-NEXT: vsext.vf8 v10, v8
13095-
; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
13096-
; RV64V-NEXT: vluxei64.v v8, (a0), v10
13092+
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
13093+
; RV64V-NEXT: vmv.s.x v9, a1
13094+
; RV64V-NEXT: vluxei8.v v8, (a0), v9
1309713095
; RV64V-NEXT: ret
1309813096
;
1309913097
; RV64ZVE32F-LABEL: mgather_unit_stride_load_with_offset:
@@ -13153,19 +13151,20 @@ define <4 x i32> @mgather_unit_stride_load_with_offset(ptr %base) {
1315313151
define <4 x i32> @mgather_unit_stride_load_narrow_idx(ptr %base) {
1315413152
; RV32-LABEL: mgather_unit_stride_load_narrow_idx:
1315513153
; RV32: # %bb.0:
13156-
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
13154+
; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1315713155
; RV32-NEXT: vid.v v8
13158-
; RV32-NEXT: vsll.vi v8, v8, 2
13159-
; RV32-NEXT: vluxei32.v v8, (a0), v8
13156+
; RV32-NEXT: vsll.vi v9, v8, 2
13157+
; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
13158+
; RV32-NEXT: vluxei8.v v8, (a0), v9
1316013159
; RV32-NEXT: ret
1316113160
;
1316213161
; RV64V-LABEL: mgather_unit_stride_load_narrow_idx:
1316313162
; RV64V: # %bb.0:
13164-
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
13163+
; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1316513164
; RV64V-NEXT: vid.v v8
13166-
; RV64V-NEXT: vsll.vi v10, v8, 2
13165+
; RV64V-NEXT: vsll.vi v9, v8, 2
1316713166
; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
13168-
; RV64V-NEXT: vluxei64.v v8, (a0), v10
13167+
; RV64V-NEXT: vluxei8.v v8, (a0), v9
1316913168
; RV64V-NEXT: ret
1317013169
;
1317113170
; RV64ZVE32F-LABEL: mgather_unit_stride_load_narrow_idx:
@@ -13224,19 +13223,20 @@ define <4 x i32> @mgather_unit_stride_load_narrow_idx(ptr %base) {
1322413223
define <4 x i32> @mgather_unit_stride_load_wide_idx(ptr %base) {
1322513224
; RV32-LABEL: mgather_unit_stride_load_wide_idx:
1322613225
; RV32: # %bb.0:
13227-
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
13226+
; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1322813227
; RV32-NEXT: vid.v v8
13229-
; RV32-NEXT: vsll.vi v8, v8, 2
13230-
; RV32-NEXT: vluxei32.v v8, (a0), v8
13228+
; RV32-NEXT: vsll.vi v9, v8, 2
13229+
; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
13230+
; RV32-NEXT: vluxei8.v v8, (a0), v9
1323113231
; RV32-NEXT: ret
1323213232
;
1323313233
; RV64V-LABEL: mgather_unit_stride_load_wide_idx:
1323413234
; RV64V: # %bb.0:
13235-
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
13235+
; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1323613236
; RV64V-NEXT: vid.v v8
13237-
; RV64V-NEXT: vsll.vi v10, v8, 2
13237+
; RV64V-NEXT: vsll.vi v9, v8, 2
1323813238
; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
13239-
; RV64V-NEXT: vluxei64.v v8, (a0), v10
13239+
; RV64V-NEXT: vluxei8.v v8, (a0), v9
1324013240
; RV64V-NEXT: ret
1324113241
;
1324213242
; RV64ZVE32F-LABEL: mgather_unit_stride_load_wide_idx:
@@ -13374,17 +13374,17 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) {
1337413374
; RV32-NEXT: lui a1, %hi(.LCPI107_0)
1337513375
; RV32-NEXT: addi a1, a1, %lo(.LCPI107_0)
1337613376
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
13377-
; RV32-NEXT: vle32.v v10, (a1)
13378-
; RV32-NEXT: vluxei32.v v8, (a0), v10
13377+
; RV32-NEXT: vle8.v v9, (a1)
13378+
; RV32-NEXT: vluxei8.v v8, (a0), v9
1337913379
; RV32-NEXT: ret
1338013380
;
1338113381
; RV64V-LABEL: mgather_strided_2xSEW:
1338213382
; RV64V: # %bb.0:
1338313383
; RV64V-NEXT: lui a1, %hi(.LCPI107_0)
1338413384
; RV64V-NEXT: addi a1, a1, %lo(.LCPI107_0)
1338513385
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
13386-
; RV64V-NEXT: vle64.v v12, (a1)
13387-
; RV64V-NEXT: vluxei64.v v8, (a0), v12
13386+
; RV64V-NEXT: vle8.v v9, (a1)
13387+
; RV64V-NEXT: vluxei8.v v8, (a0), v9
1338813388
; RV64V-NEXT: ret
1338913389
;
1339013390
; RV64ZVE32F-LABEL: mgather_strided_2xSEW:
@@ -13491,17 +13491,17 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
1349113491
; RV32-NEXT: lui a1, %hi(.LCPI108_0)
1349213492
; RV32-NEXT: addi a1, a1, %lo(.LCPI108_0)
1349313493
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
13494-
; RV32-NEXT: vle32.v v10, (a1)
13495-
; RV32-NEXT: vluxei32.v v8, (a0), v10
13494+
; RV32-NEXT: vle8.v v9, (a1)
13495+
; RV32-NEXT: vluxei8.v v8, (a0), v9
1349613496
; RV32-NEXT: ret
1349713497
;
1349813498
; RV64V-LABEL: mgather_gather_2xSEW:
1349913499
; RV64V: # %bb.0:
1350013500
; RV64V-NEXT: lui a1, %hi(.LCPI108_0)
1350113501
; RV64V-NEXT: addi a1, a1, %lo(.LCPI108_0)
1350213502
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
13503-
; RV64V-NEXT: vle64.v v12, (a1)
13504-
; RV64V-NEXT: vluxei64.v v8, (a0), v12
13503+
; RV64V-NEXT: vle8.v v9, (a1)
13504+
; RV64V-NEXT: vluxei8.v v8, (a0), v9
1350513505
; RV64V-NEXT: ret
1350613506
;
1350713507
; RV64ZVE32F-LABEL: mgather_gather_2xSEW:

0 commit comments

Comments
 (0)