Skip to content

Commit bbd1941

Browse files
authored
[VPlan] Add disjoint flag to VPRecipeWithIRFlags. (#74364)
A new disjoint flag was added for OR instructions in #72583. Update VPRecipeWithIRFlags to also support the new flag. This allows printing and preserving the disjoint flag in vectorized code.
1 parent dfdedaf commit bbd1941

File tree

7 files changed

+24
-6
lines changed

7 files changed

+24
-6
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -826,6 +826,7 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
826826
enum class OperationType : unsigned char {
827827
Cmp,
828828
OverflowingBinOp,
829+
DisjointOp,
829830
PossiblyExactOp,
830831
GEPOp,
831832
FPMathOp,
@@ -842,6 +843,9 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
842843
};
843844

844845
private:
846+
struct DisjointFlagsTy {
847+
char IsDisjoint : 1;
848+
};
845849
struct ExactFlagsTy {
846850
char IsExact : 1;
847851
};
@@ -868,6 +872,7 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
868872
union {
869873
CmpInst::Predicate CmpPredicate;
870874
WrapFlagsTy WrapFlags;
875+
DisjointFlagsTy DisjointFlags;
871876
ExactFlagsTy ExactFlags;
872877
GEPFlagsTy GEPFlags;
873878
NonNegFlagsTy NonNegFlags;
@@ -889,6 +894,9 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
889894
if (auto *Op = dyn_cast<CmpInst>(&I)) {
890895
OpType = OperationType::Cmp;
891896
CmpPredicate = Op->getPredicate();
897+
} else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
898+
OpType = OperationType::DisjointOp;
899+
DisjointFlags.IsDisjoint = Op->isDisjoint();
892900
} else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
893901
OpType = OperationType::OverflowingBinOp;
894902
WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
@@ -942,6 +950,9 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
942950
WrapFlags.HasNUW = false;
943951
WrapFlags.HasNSW = false;
944952
break;
953+
case OperationType::DisjointOp:
954+
DisjointFlags.IsDisjoint = false;
955+
break;
945956
case OperationType::PossiblyExactOp:
946957
ExactFlags.IsExact = false;
947958
break;
@@ -968,6 +979,9 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
968979
I->setHasNoUnsignedWrap(WrapFlags.HasNUW);
969980
I->setHasNoSignedWrap(WrapFlags.HasNSW);
970981
break;
982+
case OperationType::DisjointOp:
983+
cast<PossiblyDisjointInst>(I)->setIsDisjoint(DisjointFlags.IsDisjoint);
984+
break;
971985
case OperationType::PossiblyExactOp:
972986
I->setIsExact(ExactFlags.IsExact);
973987
break;

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -635,6 +635,10 @@ void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {
635635
case OperationType::Cmp:
636636
O << " " << CmpInst::getPredicateName(getPredicate());
637637
break;
638+
case OperationType::DisjointOp:
639+
if (DisjointFlags.IsDisjoint)
640+
O << " disjoint";
641+
break;
638642
case OperationType::PossiblyExactOp:
639643
if (ExactFlags.IsExact)
640644
O << " exact";

llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 {
122122
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
123123
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1024 x i16], ptr @AB_i16, i64 0, <vscale x 4 x i64> [[VEC_IND]]
124124
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[TMP4]], i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i16> poison)
125-
; CHECK-NEXT: [[TMP5:%.*]] = or <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
125+
; CHECK-NEXT: [[TMP5:%.*]] = or disjoint <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
126126
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i16], ptr @AB_i16, i64 0, <vscale x 4 x i64> [[TMP5]]
127127
; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[TMP6]], i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i16> poison)
128128
; CHECK-NEXT: [[TMP7:%.*]] = sext <vscale x 4 x i16> [[WIDE_MASKED_GATHER]] to <vscale x 4 x i32>
@@ -217,7 +217,7 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 {
217217
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
218218
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
219219
; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
220-
; CHECK-NEXT: [[TMP7:%.*]] = or <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
220+
; CHECK-NEXT: [[TMP7:%.*]] = or disjoint <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
221221
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <vscale x 4 x i32> [[TMP5]], [[BROADCAST_SPLAT]]
222222
; CHECK-NEXT: [[TMP9:%.*]] = trunc <vscale x 4 x i32> [[TMP8]] to <vscale x 4 x i16>
223223
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], ptr @CD_i16, i64 0, <vscale x 4 x i64> [[VEC_IND]]

llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ define void @_Z3fn1v() #0 {
2424
; CHECK-NEXT: [[TMP12:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]]
2525
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP11]], <16 x i64> [[TMP12]], i64 0
2626
; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x ptr> [[TMP13]], i32 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
27-
; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i64> [[VEC_IND3]], <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
27+
; CHECK-NEXT: [[TMP14:%.*]] = or disjoint <16 x i64> [[VEC_IND3]], <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
2828
; CHECK-NEXT: [[TMP15:%.*]] = add nsw <16 x i64> [[TMP10]], [[TMP14]]
2929
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP11]], <16 x i64> [[TMP15]], i64 0
3030
; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x ptr> [[TMP16]], i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)

llvm/test/Transforms/LoopVectorize/preserve-or-disjoint.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ define void @generate_disjoint_flags(i64 %n, ptr noalias %x) {
1818
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[TMP0]]
1919
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
2020
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
21-
; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[WIDE_LOAD]], <i32 1, i32 1, i32 1, i32 1>
21+
; CHECK-NEXT: [[TMP3:%.*]] = or disjoint <4 x i32> [[WIDE_LOAD]], <i32 1, i32 1, i32 1, i32 1>
2222
; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[WIDE_LOAD]], <i32 3, i32 3, i32 3, i32 3>
2323
; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw <4 x i32> [[TMP3]], [[TMP4]]
2424
; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP2]], align 4

llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ for.end:
195195
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i32 2
196196
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP11]], i32 3
197197
; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> [[TMP15]], [[VEC_PHI]]
198-
; CHECK-NEXT: [[TMP17:%.*]] = or <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
198+
; CHECK-NEXT: [[TMP17:%.*]] = or disjoint <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
199199
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i64> [[TMP17]], i32 0
200200
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP18]]
201201
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP17]], i32 1

llvm/test/Transforms/LoopVectorize/vplan-printing.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -773,7 +773,7 @@ define void @print_disjoint_flags(i64 %n, ptr noalias %x) {
773773
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
774774
; CHECK-NEXT: CLONE ir<%gep.x> = getelementptr inbounds ir<%x>, vp<[[STEPS]]>
775775
; CHECK-NEXT: WIDEN ir<%lv> = load ir<%gep.x>
776-
; CHECK-NEXT: WIDEN ir<%or.1> = or ir<%lv>, ir<1>
776+
; CHECK-NEXT: WIDEN ir<%or.1> = or disjoint ir<%lv>, ir<1>
777777
; CHECK-NEXT: WIDEN ir<%or.2> = or ir<%lv>, ir<3>
778778
; CHECK-NEXT: WIDEN ir<%add> = add nuw nsw ir<%or.1>, ir<%or.2>
779779
; CHECK-NEXT: WIDEN store ir<%gep.x>, ir<%add>

0 commit comments

Comments
 (0)