Skip to content

Commit 6238b8e

Browse files
committed
[LegalizeTypes] Factor in vscale_range when widening insert_subvector
Currently when widening operands for insert_subvector nodes, we check first that the indices are valid by seeing if the subvector is statically known to be smaller than or equal to the in-place vector. However if we're inserting a fixed subvector into a scalable vector we rely on the minimum vector length of the latter. This patch extends the widening logic to also take into account the minimum vscale from the vscale_range attribute, so we can handle more scenarios where we know the scalable vector is large enough to contain the subvector. Fixes llvm#63437 Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D153519
1 parent 28f1312 commit 6238b8e

File tree

2 files changed

+38
-2
lines changed

2 files changed

+38
-2
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6317,8 +6317,30 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
63176317
if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector)
63186318
SubVec = GetWidenedVector(SubVec);
63196319

6320-
if (SubVec.getValueType().knownBitsLE(VT) && InVec.isUndef() &&
6321-
N->getConstantOperandVal(2) == 0)
6320+
EVT SubVT = SubVec.getValueType();
6321+
6322+
// Whether or not all the elements of the widened SubVec will be inserted into
6323+
// valid indices of VT.
6324+
bool IndicesValid = false;
6325+
// If we statically know that VT can fit SubVT, the indices are valid.
6326+
if (VT.knownBitsGE(SubVT))
6327+
IndicesValid = true;
6328+
else if (VT.isScalableVector() && SubVT.isFixedLengthVector()) {
6329+
// Otherwise, if we're inserting a fixed vector into a scalable vector and
6330+
// we know the minimum vscale we can work out if it's valid ourselves.
6331+
Attribute Attr = DAG.getMachineFunction().getFunction().getFnAttribute(
6332+
Attribute::VScaleRange);
6333+
if (Attr.isValid()) {
6334+
unsigned VScaleMin = Attr.getVScaleRangeMin();
6335+
if (VT.getSizeInBits().getKnownMinValue() * VScaleMin >=
6336+
SubVT.getFixedSizeInBits())
6337+
IndicesValid = true;
6338+
}
6339+
}
6340+
6341+
// We need to make sure that the indices are still valid, otherwise we might
6342+
// widen what was previously well-defined to something undefined.
6343+
if (IndicesValid && InVec.isUndef() && N->getConstantOperandVal(2) == 0)
63226344
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, InVec, SubVec,
63236345
N->getOperand(2));
63246346

llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,18 @@ define void @insert_nxv8i64_nxv16i64_hi(<vscale x 8 x i64> %sv0, <vscale x 16 x
495495
ret void
496496
}
497497

498+
; We should be able to widen the <3 x i64> subvector to a <4 x i64> here because
499+
; we know that the minimum vscale is 2
500+
define <vscale x 2 x i64> @insert_nxv2i64_nxv3i64(<3 x i64> %sv) #0 {
501+
; CHECK-LABEL: insert_nxv2i64_nxv3i64:
502+
; CHECK: # %bb.0:
503+
; CHECK-NEXT: ret
504+
%vec = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v3i64(<vscale x 2 x i64> undef, <3 x i64> %sv, i64 0)
505+
ret <vscale x 2 x i64> %vec
506+
}
507+
508+
attributes #0 = { vscale_range(2,1024) }
509+
498510
declare <vscale x 4 x i1> @llvm.vector.insert.nxv1i1.nxv4i1(<vscale x 4 x i1>, <vscale x 1 x i1>, i64)
499511
declare <vscale x 32 x i1> @llvm.vector.insert.nxv8i1.nxv32i1(<vscale x 32 x i1>, <vscale x 8 x i1>, i64)
500512

@@ -512,3 +524,5 @@ declare <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x
512524
declare <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32>, <vscale x 2 x i32>, i64 %idx)
513525
declare <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32>, <vscale x 4 x i32>, i64 %idx)
514526
declare <vscale x 16 x i32> @llvm.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32>, <vscale x 8 x i32>, i64 %idx)
527+
528+
declare <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v3i64(<vscale x 2 x i64>, <3 x i64>, i64 %idx)

0 commit comments

Comments
 (0)