Skip to content

Commit 9de1bc0

Browse files
authored
[LoongArch] Generate [x]vldi instructions with special constant splats (#159258)
1 parent 2cacf71 commit 9de1bc0

16 files changed

+286
-379
lines changed

llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -113,44 +113,59 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
113113
APInt SplatValue, SplatUndef;
114114
unsigned SplatBitSize;
115115
bool HasAnyUndefs;
116-
unsigned Op;
116+
unsigned Op = 0;
117117
EVT ResTy = BVN->getValueType(0);
118118
bool Is128Vec = BVN->getValueType(0).is128BitVector();
119119
bool Is256Vec = BVN->getValueType(0).is256BitVector();
120+
SDNode *Res;
120121

121122
if (!Subtarget->hasExtLSX() || (!Is128Vec && !Is256Vec))
122123
break;
123124
if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
124125
HasAnyUndefs, 8))
125126
break;
126127

127-
switch (SplatBitSize) {
128-
default:
129-
break;
130-
case 8:
131-
Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B;
132-
break;
133-
case 16:
134-
Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H;
135-
break;
136-
case 32:
137-
Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W;
138-
break;
139-
case 64:
140-
Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D;
141-
break;
142-
}
143-
144-
SDNode *Res;
145128
// If we have a signed 10 bit integer, we can splat it directly.
146129
if (SplatValue.isSignedIntN(10)) {
130+
switch (SplatBitSize) {
131+
default:
132+
break;
133+
case 8:
134+
Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B;
135+
break;
136+
case 16:
137+
Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H;
138+
break;
139+
case 32:
140+
Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W;
141+
break;
142+
case 64:
143+
Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D;
144+
break;
145+
}
146+
147147
EVT EleType = ResTy.getVectorElementType();
148148
APInt Val = SplatValue.sextOrTrunc(EleType.getSizeInBits());
149149
SDValue Imm = CurDAG->getTargetConstant(Val, DL, EleType);
150150
Res = CurDAG->getMachineNode(Op, DL, ResTy, Imm);
151151
ReplaceNode(Node, Res);
152152
return;
153153
}
154+
155+
// Select appropriate [x]vldi instructions for some special constant splats,
156+
// where the immediate value `imm[12] == 1` for used [x]vldi instructions.
157+
const auto &TLI =
158+
*static_cast<const LoongArchTargetLowering *>(getTargetLowering());
159+
std::pair<bool, uint64_t> ConvertVLDI =
160+
TLI.isImmVLDILegalForMode1(SplatValue, SplatBitSize);
161+
if (ConvertVLDI.first) {
162+
Op = Is256Vec ? LoongArch::XVLDI : LoongArch::VLDI;
163+
SDValue Imm = CurDAG->getSignedTargetConstant(
164+
SignExtend32<13>(ConvertVLDI.second), DL, MVT::i32);
165+
Res = CurDAG->getMachineNode(Op, DL, ResTy, Imm);
166+
ReplaceNode(Node, Res);
167+
return;
168+
}
154169
break;
155170
}
156171
}

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2851,9 +2851,10 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
28512851

28522852
if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
28532853
// We can only handle 64-bit elements that are within
2854-
// the signed 10-bit range on 32-bit targets.
2854+
// the signed 10-bit range or match vldi patterns on 32-bit targets.
28552855
// See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
2856-
if (!SplatValue.isSignedIntN(10))
2856+
if (!SplatValue.isSignedIntN(10) &&
2857+
!isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
28572858
return SDValue();
28582859
if ((Is128Vec && ResTy == MVT::v4i32) ||
28592860
(Is256Vec && ResTy == MVT::v8i32))
@@ -8543,6 +8544,87 @@ SDValue LoongArchTargetLowering::LowerReturn(
85438544
return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
85448545
}
85458546

8547+
// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
8548+
// Note: The following prefixes are excluded:
8549+
// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
8550+
// as they can be represented using [x]vrepli.[whb]
8551+
std::pair<bool, uint64_t> LoongArchTargetLowering::isImmVLDILegalForMode1(
8552+
const APInt &SplatValue, const unsigned SplatBitSize) const {
8553+
uint64_t RequiredImm = 0;
8554+
uint64_t V = SplatValue.getZExtValue();
8555+
if (SplatBitSize == 16 && !(V & 0x00FF)) {
8556+
// 4'b0101
8557+
RequiredImm = (0b10101 << 8) | (V >> 8);
8558+
return {true, RequiredImm};
8559+
} else if (SplatBitSize == 32) {
8560+
// 4'b0001
8561+
if (!(V & 0xFFFF00FF)) {
8562+
RequiredImm = (0b10001 << 8) | (V >> 8);
8563+
return {true, RequiredImm};
8564+
}
8565+
// 4'b0010
8566+
if (!(V & 0xFF00FFFF)) {
8567+
RequiredImm = (0b10010 << 8) | (V >> 16);
8568+
return {true, RequiredImm};
8569+
}
8570+
// 4'b0011
8571+
if (!(V & 0x00FFFFFF)) {
8572+
RequiredImm = (0b10011 << 8) | (V >> 24);
8573+
return {true, RequiredImm};
8574+
}
8575+
// 4'b0110
8576+
if ((V & 0xFFFF00FF) == 0xFF) {
8577+
RequiredImm = (0b10110 << 8) | (V >> 8);
8578+
return {true, RequiredImm};
8579+
}
8580+
// 4'b0111
8581+
if ((V & 0xFF00FFFF) == 0xFFFF) {
8582+
RequiredImm = (0b10111 << 8) | (V >> 16);
8583+
return {true, RequiredImm};
8584+
}
8585+
// 4'b1010
8586+
if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
8587+
RequiredImm =
8588+
(0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8589+
return {true, RequiredImm};
8590+
}
8591+
} else if (SplatBitSize == 64) {
8592+
// 4'b1011
8593+
if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
8594+
(V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
8595+
RequiredImm =
8596+
(0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8597+
return {true, RequiredImm};
8598+
}
8599+
// 4'b1100
8600+
if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
8601+
(V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
8602+
RequiredImm =
8603+
(0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
8604+
return {true, RequiredImm};
8605+
}
8606+
// 4'b1001
8607+
auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
8608+
uint8_t res = 0;
8609+
for (int i = 0; i < 8; ++i) {
8610+
uint8_t byte = x & 0xFF;
8611+
if (byte == 0 || byte == 0xFF)
8612+
res |= ((byte & 1) << i);
8613+
else
8614+
return {false, 0};
8615+
x >>= 8;
8616+
}
8617+
return {true, res};
8618+
};
8619+
auto [IsSame, Suffix] = sameBitsPreByte(V);
8620+
if (IsSame) {
8621+
RequiredImm = (0b11001 << 8) | Suffix;
8622+
return {true, RequiredImm};
8623+
}
8624+
}
8625+
return {false, RequiredImm};
8626+
}
8627+
85468628
bool LoongArchTargetLowering::isFPImmVLDILegal(const APFloat &Imm,
85478629
EVT VT) const {
85488630
if (!Subtarget.hasExtLSX())

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,12 @@ class LoongArchTargetLowering : public TargetLowering {
339339

340340
bool shouldScalarizeBinop(SDValue VecOp) const override;
341341

342+
/// Check if a constant splat can be generated using [x]vldi, where imm[12]
343+
/// is 1.
344+
std::pair<bool, uint64_t>
345+
isImmVLDILegalForMode1(const APInt &SplatValue,
346+
const unsigned SplatBitSize) const;
347+
342348
private:
343349
/// Target-specific function used to lower LoongArch calling conventions.
344350
typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI,

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ def vsplatf32_fpimm_eq_1
158158
N = N->getOperand(0).getNode();
159159

160160
return selectVSplat(N, Imm, EltTy.getSizeInBits()) &&
161+
Imm.getBitWidth() == 32 &&
161162
Imm.getBitWidth() == EltTy.getSizeInBits() &&
162163
Imm == APFloat(+1.0f).bitcastToAPInt();
163164
}]>;

llvm/test/CodeGen/LoongArch/lasx/build-vector.ll

Lines changed: 23 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,7 @@ entry:
196196
define void @buildvector_v8f32_const_splat(ptr %dst) nounwind {
197197
; CHECK-LABEL: buildvector_v8f32_const_splat:
198198
; CHECK: # %bb.0: # %entry
199-
; CHECK-NEXT: lu12i.w $a1, 260096
200-
; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1
199+
; CHECK-NEXT: xvldi $xr0, -1424
201200
; CHECK-NEXT: xvst $xr0, $a0, 0
202201
; CHECK-NEXT: ret
203202
entry:
@@ -207,19 +206,11 @@ entry:
207206

208207
;; Also check buildvector_const_splat_xvldi_1100.
209208
define void @buildvector_v4f64_const_splat(ptr %dst) nounwind {
210-
; LA32-LABEL: buildvector_v4f64_const_splat:
211-
; LA32: # %bb.0: # %entry
212-
; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI14_0)
213-
; LA32-NEXT: xvld $xr0, $a1, %pc_lo12(.LCPI14_0)
214-
; LA32-NEXT: xvst $xr0, $a0, 0
215-
; LA32-NEXT: ret
216-
;
217-
; LA64-LABEL: buildvector_v4f64_const_splat:
218-
; LA64: # %bb.0: # %entry
219-
; LA64-NEXT: lu52i.d $a1, $zero, 1023
220-
; LA64-NEXT: xvreplgr2vr.d $xr0, $a1
221-
; LA64-NEXT: xvst $xr0, $a0, 0
222-
; LA64-NEXT: ret
209+
; CHECK-LABEL: buildvector_v4f64_const_splat:
210+
; CHECK: # %bb.0: # %entry
211+
; CHECK-NEXT: xvldi $xr0, -912
212+
; CHECK-NEXT: xvst $xr0, $a0, 0
213+
; CHECK-NEXT: ret
223214
entry:
224215
store <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, ptr %dst
225216
ret void
@@ -229,8 +220,7 @@ entry:
229220
define void @buildvector_const_splat_xvldi_0001(ptr %dst) nounwind {
230221
; CHECK-LABEL: buildvector_const_splat_xvldi_0001:
231222
; CHECK: # %bb.0: # %entry
232-
; CHECK-NEXT: ori $a1, $zero, 768
233-
; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1
223+
; CHECK-NEXT: xvldi $xr0, -3837
234224
; CHECK-NEXT: xvst $xr0, $a0, 0
235225
; CHECK-NEXT: ret
236226
entry:
@@ -241,8 +231,7 @@ entry:
241231
define void @buildvector_const_splat_xvldi_0010(ptr %dst) nounwind {
242232
; CHECK-LABEL: buildvector_const_splat_xvldi_0010:
243233
; CHECK: # %bb.0: # %entry
244-
; CHECK-NEXT: lu12i.w $a1, 16
245-
; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1
234+
; CHECK-NEXT: xvldi $xr0, -3583
246235
; CHECK-NEXT: xvst $xr0, $a0, 0
247236
; CHECK-NEXT: ret
248237
entry:
@@ -253,8 +242,7 @@ entry:
253242
define void @buildvector_const_splat_xvldi_0011(ptr %dst) nounwind {
254243
; CHECK-LABEL: buildvector_const_splat_xvldi_0011:
255244
; CHECK: # %bb.0: # %entry
256-
; CHECK-NEXT: lu12i.w $a1, 4096
257-
; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1
245+
; CHECK-NEXT: xvldi $xr0, -3327
258246
; CHECK-NEXT: xvst $xr0, $a0, 0
259247
; CHECK-NEXT: ret
260248
entry:
@@ -265,8 +253,7 @@ entry:
265253
define void @buildvector_const_splat_xvldi_0101(ptr %dst) {
266254
; CHECK-LABEL: buildvector_const_splat_xvldi_0101:
267255
; CHECK: # %bb.0: # %entry
268-
; CHECK-NEXT: ori $a1, $zero, 768
269-
; CHECK-NEXT: xvreplgr2vr.h $xr0, $a1
256+
; CHECK-NEXT: xvldi $xr0, -2813
270257
; CHECK-NEXT: xvst $xr0, $a0, 0
271258
; CHECK-NEXT: ret
272259
entry:
@@ -277,8 +264,7 @@ entry:
277264
define void @buildvector_const_splat_xvldi_0110(ptr %dst) nounwind {
278265
; CHECK-LABEL: buildvector_const_splat_xvldi_0110:
279266
; CHECK: # %bb.0: # %entry
280-
; CHECK-NEXT: ori $a1, $zero, 1023
281-
; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1
267+
; CHECK-NEXT: xvldi $xr0, -2557
282268
; CHECK-NEXT: xvst $xr0, $a0, 0
283269
; CHECK-NEXT: ret
284270
entry:
@@ -289,9 +275,7 @@ entry:
289275
define void @buildvector_const_splat_xvldi_0111(ptr %dst) nounwind {
290276
; CHECK-LABEL: buildvector_const_splat_xvldi_0111:
291277
; CHECK: # %bb.0: # %entry
292-
; CHECK-NEXT: lu12i.w $a1, 15
293-
; CHECK-NEXT: ori $a1, $a1, 4095
294-
; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1
278+
; CHECK-NEXT: xvldi $xr0, -2305
295279
; CHECK-NEXT: xvst $xr0, $a0, 0
296280
; CHECK-NEXT: ret
297281
entry:
@@ -300,39 +284,22 @@ entry:
300284
}
301285

302286
define void @buildvector_const_splat_xvldi_1001(ptr %dst) nounwind {
303-
; LA32-LABEL: buildvector_const_splat_xvldi_1001:
304-
; LA32: # %bb.0: # %entry
305-
; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI21_0)
306-
; LA32-NEXT: xvld $xr0, $a1, %pc_lo12(.LCPI21_0)
307-
; LA32-NEXT: xvst $xr0, $a0, 0
308-
; LA32-NEXT: ret
309-
;
310-
; LA64-LABEL: buildvector_const_splat_xvldi_1001:
311-
; LA64: # %bb.0: # %entry
312-
; LA64-NEXT: lu12i.w $a1, 15
313-
; LA64-NEXT: ori $a1, $a1, 4095
314-
; LA64-NEXT: xvreplgr2vr.d $xr0, $a1
315-
; LA64-NEXT: xvst $xr0, $a0, 0
316-
; LA64-NEXT: ret
287+
; CHECK-LABEL: buildvector_const_splat_xvldi_1001:
288+
; CHECK: # %bb.0: # %entry
289+
; CHECK-NEXT: xvldi $xr0, -1789
290+
; CHECK-NEXT: xvst $xr0, $a0, 0
291+
; CHECK-NEXT: ret
317292
entry:
318293
store <8 x i32> <i32 65535, i32 0, i32 65535, i32 0, i32 65535, i32 0, i32 65535, i32 0>, ptr %dst
319294
ret void
320295
}
321296

322297
define void @buildvector_const_splat_xvldi_1011(ptr %dst) nounwind {
323-
; LA32-LABEL: buildvector_const_splat_xvldi_1011:
324-
; LA32: # %bb.0: # %entry
325-
; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI22_0)
326-
; LA32-NEXT: xvld $xr0, $a1, %pc_lo12(.LCPI22_0)
327-
; LA32-NEXT: xvst $xr0, $a0, 0
328-
; LA32-NEXT: ret
329-
;
330-
; LA64-LABEL: buildvector_const_splat_xvldi_1011:
331-
; LA64: # %bb.0: # %entry
332-
; LA64-NEXT: lu12i.w $a1, 262144
333-
; LA64-NEXT: xvreplgr2vr.d $xr0, $a1
334-
; LA64-NEXT: xvst $xr0, $a0, 0
335-
; LA64-NEXT: ret
298+
; CHECK-LABEL: buildvector_const_splat_xvldi_1011:
299+
; CHECK: # %bb.0: # %entry
300+
; CHECK-NEXT: xvldi $xr0, -1280
301+
; CHECK-NEXT: xvst $xr0, $a0, 0
302+
; CHECK-NEXT: ret
336303
entry:
337304
store <8 x float> <float 2.0, float 0.0, float 2.0, float 0.0, float 2.0, float 0.0, float 2.0, float 0.0>, ptr %dst
338305
ret void
@@ -1626,8 +1593,7 @@ define void @buildvector_v8f32_with_constant(ptr %dst, float %a1, float %a2, flo
16261593
; CHECK-NEXT: # kill: def $f2 killed $f2 def $xr2
16271594
; CHECK-NEXT: # kill: def $f1 killed $f1 def $xr1
16281595
; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
1629-
; CHECK-NEXT: lu12i.w $a1, 262144
1630-
; CHECK-NEXT: xvreplgr2vr.w $xr4, $a1
1596+
; CHECK-NEXT: xvldi $xr4, -3264
16311597
; CHECK-NEXT: xvinsve0.w $xr4, $xr0, 1
16321598
; CHECK-NEXT: xvinsve0.w $xr4, $xr1, 2
16331599
; CHECK-NEXT: xvinsve0.w $xr4, $xr2, 5

0 commit comments

Comments
 (0)