-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[LV]Support dropping of nneg flag for zext widencast recipes. #74112
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LV]Support dropping of nneg flag for zext widencast recipes. #74112
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Alexey Bataev (alexey-bataev) ChangesCompiler crashes when the assertion triggered for zext nneg instruction, Full diff: https://github.com/llvm/llvm-project/pull/74112.diff 5 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 09a6e01226ab68c..ca3dc0ad73761d0 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8725,8 +8725,8 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
}
if (auto *CI = dyn_cast<CastInst>(Instr)) {
- return toVPRecipeResult(
- new VPWidenCastRecipe(CI->getOpcode(), Operands[0], CI->getType(), CI));
+ return toVPRecipeResult(new VPWidenCastRecipe(CI->getOpcode(), Operands[0],
+ CI->getType(), *CI));
}
return toVPRecipeResult(tryToWiden(Instr, Operands, VPBB, Plan));
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 1088c8dba4644cb..17994f9e97beaa7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -833,6 +833,7 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
PossiblyExactOp,
GEPOp,
FPMathOp,
+ CastOp,
Other
};
@@ -851,6 +852,9 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
struct GEPFlagsTy {
char IsInBounds : 1;
};
+ struct CastFlagsTy {
+ char NonNeg : 1;
+ };
struct FastMathFlagsTy {
char AllowReassoc : 1;
char NoNaNs : 1;
@@ -870,6 +874,7 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
WrapFlagsTy WrapFlags;
ExactFlagsTy ExactFlags;
GEPFlagsTy GEPFlags;
+ CastFlagsTy CastFlags;
FastMathFlagsTy FMFs;
unsigned AllFlags;
};
@@ -897,6 +902,11 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
OpType = OperationType::GEPOp;
GEPFlags.IsInBounds = GEP->isInBounds();
+ } else if (auto *CI = dyn_cast<CastInst>(&I)) {
+ OpType = OperationType::CastOp;
+ CastFlags.NonNeg = false;
+ if (isa<ZExtInst>(CI))
+ CastFlags.NonNeg = CI->hasNonNeg();
} else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
OpType = OperationType::FPMathOp;
FMFs = Op->getFastMathFlags();
@@ -925,6 +935,7 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
R->getVPDefID() == VPRecipeBase::VPWidenSC ||
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
+ R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
R->getVPDefID() == VPRecipeBase::VPReplicateSC;
}
@@ -947,6 +958,9 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
FMFs.NoNaNs = false;
FMFs.NoInfs = false;
break;
+ case OperationType::CastOp:
+ CastFlags.NonNeg = false;
+ break;
case OperationType::Cmp:
case OperationType::Other:
break;
@@ -975,6 +989,10 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
I->setHasAllowContract(FMFs.AllowContract);
I->setHasApproxFunc(FMFs.ApproxFunc);
break;
+ case OperationType::CastOp:
+ if (isa<ZExtInst>(I))
+ I->setNonNeg(CastFlags.NonNeg);
+ break;
case OperationType::Cmp:
case OperationType::Other:
break;
@@ -1181,7 +1199,7 @@ class VPWidenRecipe : public VPRecipeWithIRFlags, public VPValue {
};
/// VPWidenCastRecipe is a recipe to create vector cast instructions.
-class VPWidenCastRecipe : public VPRecipeBase, public VPValue {
+class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPValue {
/// Cast instruction opcode.
Instruction::CastOps Opcode;
@@ -1190,15 +1208,19 @@ class VPWidenCastRecipe : public VPRecipeBase, public VPValue {
public:
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
- CastInst *UI = nullptr)
- : VPRecipeBase(VPDef::VPWidenCastSC, Op), VPValue(this, UI),
+ CastInst &UI)
+ : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), VPValue(this, &UI),
Opcode(Opcode), ResultTy(ResultTy) {
- assert((!UI || UI->getOpcode() == Opcode) &&
+ assert(UI.getOpcode() == Opcode &&
"opcode of underlying cast doesn't match");
- assert((!UI || UI->getType() == ResultTy) &&
+ assert(UI.getType() == ResultTy &&
"result type of underlying cast doesn't match");
}
+ VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
+ : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), VPValue(this, nullptr),
+ Opcode(Opcode), ResultTy(ResultTy) {}
+
~VPWidenCastRecipe() override = default;
VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 2c3fabdbc85619d..52338c3cf20e52f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -652,6 +652,10 @@ void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {
if (GEPFlags.IsInBounds)
O << " inbounds";
break;
+ case OperationType::CastOp:
+ if (CastFlags.NonNeg)
+ O << " nneg";
+ break;
case OperationType::Other:
break;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 0eaaa037ad5782f..1849b9d694b894f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -81,7 +81,7 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands());
} else if (auto *CI = dyn_cast<CastInst>(Inst)) {
NewRecipe = new VPWidenCastRecipe(
- CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI);
+ CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), *CI);
} else {
NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands());
}
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/zext-drop-nneg.ll b/llvm/test/Transforms/LoopVectorize/RISCV/zext-drop-nneg.ll
new file mode 100644
index 000000000000000..99ca707d1190800
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/zext-drop-nneg.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=loop-vectorize -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 0, [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; CHECK: vector.scevcheck:
+; CHECK-NEXT: br i1 true, label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 0, [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 0, [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i32> @llvm.experimental.stepvector.nxv2i32()
+; CHECK-NEXT: [[TMP5:%.*]] = add <vscale x 2 x i32> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = mul <vscale x 2 x i32> [[TMP5]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i64 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> zeroinitializer, [[TMP6]]
+; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], 2
+; CHECK-NEXT: [[TMP9:%.*]] = mul i32 1, [[TMP8]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP9]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = zext <vscale x 2 x i32> [[VEC_IND]] to <vscale x 2 x i64>
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <vscale x 2 x i64> [[TMP10]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr double, ptr null, i64 [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP12]], i32 0
+; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP13]], i32 8, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> poison)
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x double> zeroinitializer, <vscale x 2 x double> [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <vscale x 2 x double> [[PREDPHI]], i32 [[TMP16]]
+; CHECK-NEXT: store double [[TMP17]], ptr null, align 8
+; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 0, [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: br label [[BODY:%.*]]
+; CHECK: body:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[NEXT:%.*]], [[ELSE:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[TMP21:%.*]] = trunc i64 [[IV]] to i32
+; CHECK-NEXT: br i1 false, label [[THEN:%.*]], label [[ELSE]]
+; CHECK: then:
+; CHECK-NEXT: [[ZEXT:%.*]] = zext nneg i32 [[TMP21]] to i64
+; CHECK-NEXT: [[IDX1:%.*]] = getelementptr double, ptr null, i64 [[ZEXT]]
+; CHECK-NEXT: [[IDX2:%.*]] = getelementptr double, ptr null, i64 [[ZEXT]]
+; CHECK-NEXT: [[TMP22:%.*]] = load double, ptr [[IDX2]], align 8
+; CHECK-NEXT: br label [[ELSE]]
+; CHECK: else:
+; CHECK-NEXT: [[PHI:%.*]] = phi double [ [[TMP22]], [[THEN]] ], [ 0.000000e+00, [[BODY]] ]
+; CHECK-NEXT: store double [[PHI]], ptr null, align 8
+; CHECK-NEXT: [[NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[NEXT]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[EXIT]], label [[BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %body
+
+body:
+ %iv = phi i64 [ %next, %else ], [ 0, %entry ]
+ %0 = trunc i64 %iv to i32
+ br i1 false, label %then, label %else
+
+then:
+ %zext = zext nneg i32 %0 to i64
+ %idx1 = getelementptr double, ptr null, i64 %zext
+ %idx2 = getelementptr double, ptr null, i64 %zext
+ %1 = load double, ptr %idx2, align 8
+ br label %else
+
+else:
+ %phi = phi double [ %1, %then ], [ 0.000000e+00, %body ]
+ store double %phi, ptr null, align 8
+ %next = add i64 %iv, 1
+ %cmp = icmp eq i64 %next, 0
+ br i1 %cmp, label %exit, label %body
+
+exit:
+ ret void
+}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
+;.
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the patch!
@@ -0,0 +1,104 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 | |||
; RUN: opt -passes=loop-vectorize -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can this be a target independent test?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tried to make target independent but could not reproduce it. Forced vector length does not trigger the crash, forced scalable too.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah right, this needs a target with masked gather-scatter support. The existing tests are in llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
might be good to consolidate them there.
@@ -652,6 +652,10 @@ void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const { | |||
if (GEPFlags.IsInBounds) | |||
O << " inbounds"; | |||
break; | |||
case OperationType::CastOp: | |||
if (CastFlags.NonNeg) | |||
O << " nneg"; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add a test for printing the flag to llvm/test/Transforms/LoopVectorize/vplan-printing.ll
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will add
371574e
to
125fb56
Compare
body: | ||
%iv = phi i64 [ %next, %else ], [ 0, %entry ] | ||
%0 = trunc i64 %iv to i32 | ||
br i1 false, label %then, label %else |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please update the condition here to use a loop dependent value; if the condition is false, it will zext nneg
will always execute and wouldn't need to be dropped.
|
||
then: | ||
%zext = zext nneg i32 %0 to i64 | ||
%idx1 = getelementptr double, ptr null, i64 %zext |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
unused?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. I tried to remove it. It does not crash without this getelements for some reason.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh that's interesting, but I guess we can figure this out later
then: | ||
%zext = zext nneg i32 %0 to i64 | ||
%idx1 = getelementptr double, ptr null, i64 %zext | ||
%idx2 = getelementptr double, ptr null, i64 %zext |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please use a no alias
argument instead of null
here and below, as this is immediate UB and means verification tools like Alive2 won't be able to highlight any verification issues (as source being UB allows any transformation)
@@ -0,0 +1,104 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 | |||
; RUN: opt -passes=loop-vectorize -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah right, this needs a target with masked gather-scatter support. The existing tests are in llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
might be good to consolidate them there.
; RUN: opt -passes=loop-vectorize -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s | ||
; REQUIRES: asserts | ||
|
||
define void @test() { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It should be possible to have the print test target independent in llvm/test/Transforms/LoopVectorize/vplan-printing.ll
as testing printing doesn't need conditional blocks I think
125fb56
to
683ae5b
Compare
@@ -971,6 +985,10 @@ class VPRecipeWithIRFlags : public VPRecipeBase { | |||
I->setHasAllowContract(FMFs.AllowContract); | |||
I->setHasApproxFunc(FMFs.ApproxFunc); | |||
break; | |||
case OperationType::CastOp: | |||
if (isa<ZExtInst>(I)) | |||
I->setNonNeg(CastFlags.NonNeg); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For other flags, we only set OpType if the underlying operant supports the flag, so no extra checks are needed. To be consistent with other flags, it would be good to avoid the need for checking I
here, by having ZExtFlags
/ZExtOp
instead here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See PossiblyNonNegInst
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, NonNegFlags/NonNegOp would probably be best
Compiler crashes when the assertion triggered for zext nneg instruction, that checks that the instruction cannot produce poison. Changed the base class for widencast recipe to handle dropping nneg flag to avoid compiler crash.
683ae5b
to
62a6ab5
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
Compiler crashes when the assertion triggered for zext nneg instruction,
that checks that the instruction cannot produce poison. Changed the base
class for widencast recipe to handle dropping nneg flag to avoid
compiler crash.