-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SLP][REVEC] Make GetMinMaxCost support FixedVectorType when REVEC is enabled. #115417
[SLP][REVEC] Make GetMinMaxCost support FixedVectorType when REVEC is enabled. #115417
Conversation
@llvm/pr-subscribers-vectorizers Author: Han-Kuan Chen (HanKuanChen) Changesreference: #114946 Full diff: https://github.com/llvm/llvm-project/pull/115417.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b2f677fb84f983..bd8680d4c16638 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10986,7 +10986,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
// If the selects are the only uses of the compares, they will be
// dead and we can adjust the cost by removing their cost.
if (VI && SelectOnly) {
- assert(!Ty->isVectorTy() && "Expected only for scalar type.");
+ assert((!Ty->isVectorTy() || SLPReVec) &&
+ "Expected only for scalar type.");
auto *CI = cast<CmpInst>(VI->getOperand(0));
IntrinsicCost -= TTI->getCmpSelInstrCost(
CI->getOpcode(), Ty, Builder.getInt1Ty(), CI->getPredicate(),
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll
index 3d00ddf89aaa3b..b312688b7932dc 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll
@@ -94,3 +94,43 @@ entry:
%23 = fcmp ogt <8 x float> zeroinitializer, %19
ret void
}
+
+define void @test3(float %0) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY_LR_PH:%.*]]
+; CHECK: for.body.lr.ph:
+; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> zeroinitializer, i64 0)
+; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP1]], <2 x float> zeroinitializer, i64 2)
+; CHECK-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x float> [ [[TMP2]], [[FOR_BODY_LR_PH]] ], [ [[TMP10:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: ret void
+; CHECK: for.body:
+; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr null, align 4
+; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <2 x float> zeroinitializer, [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> poison, <2 x i1> splat (i1 true), i64 0)
+; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> [[TMP6]], <2 x i1> [[TMP5]], i64 2)
+; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP4]], i64 0)
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT: [[TMP10]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP9]], <4 x float> [[TMP2]]
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
+;
+entry:
+ br label %for.body.lr.ph
+
+for.body.lr.ph:
+ br i1 false, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %for.body.lr.ph
+ %1 = phi <2 x float> [ zeroinitializer, %for.body.lr.ph ], [ %5, %for.body ]
+ %2 = phi <2 x float> [ zeroinitializer, %for.body.lr.ph ], [ %6, %for.body ]
+ ret void
+
+for.body:
+ %3 = load <2 x float>, ptr null, align 4
+ %4 = fcmp olt <2 x float> zeroinitializer, %3
+ %5 = select <2 x i1> <i1 true, i1 true>, <2 x float> %3, <2 x float> zeroinitializer
+ %6 = select <2 x i1> %4, <2 x float> %3, <2 x float> zeroinitializer
+ br label %for.cond.cleanup
+}
|
@llvm/pr-subscribers-llvm-transforms Author: Han-Kuan Chen (HanKuanChen) Changesreference: #114946 Full diff: https://github.com/llvm/llvm-project/pull/115417.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b2f677fb84f983..bd8680d4c16638 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10986,7 +10986,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
// If the selects are the only uses of the compares, they will be
// dead and we can adjust the cost by removing their cost.
if (VI && SelectOnly) {
- assert(!Ty->isVectorTy() && "Expected only for scalar type.");
+ assert((!Ty->isVectorTy() || SLPReVec) &&
+ "Expected only for scalar type.");
auto *CI = cast<CmpInst>(VI->getOperand(0));
IntrinsicCost -= TTI->getCmpSelInstrCost(
CI->getOpcode(), Ty, Builder.getInt1Ty(), CI->getPredicate(),
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll
index 3d00ddf89aaa3b..b312688b7932dc 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll
@@ -94,3 +94,43 @@ entry:
%23 = fcmp ogt <8 x float> zeroinitializer, %19
ret void
}
+
+define void @test3(float %0) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY_LR_PH:%.*]]
+; CHECK: for.body.lr.ph:
+; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> zeroinitializer, i64 0)
+; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP1]], <2 x float> zeroinitializer, i64 2)
+; CHECK-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x float> [ [[TMP2]], [[FOR_BODY_LR_PH]] ], [ [[TMP10:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: ret void
+; CHECK: for.body:
+; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr null, align 4
+; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <2 x float> zeroinitializer, [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> poison, <2 x i1> splat (i1 true), i64 0)
+; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> [[TMP6]], <2 x i1> [[TMP5]], i64 2)
+; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP4]], i64 0)
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT: [[TMP10]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP9]], <4 x float> [[TMP2]]
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
+;
+entry:
+ br label %for.body.lr.ph
+
+for.body.lr.ph:
+ br i1 false, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %for.body.lr.ph
+ %1 = phi <2 x float> [ zeroinitializer, %for.body.lr.ph ], [ %5, %for.body ]
+ %2 = phi <2 x float> [ zeroinitializer, %for.body.lr.ph ], [ %6, %for.body ]
+ ret void
+
+for.body:
+ %3 = load <2 x float>, ptr null, align 4
+ %4 = fcmp olt <2 x float> zeroinitializer, %3
+ %5 = select <2 x i1> <i1 true, i1 true>, <2 x float> %3, <2 x float> zeroinitializer
+ %6 = select <2 x i1> %4, <2 x float> %3, <2 x float> zeroinitializer
+ br label %for.cond.cleanup
+}
|
Thank you for updating the patch! I don't know about its correctness, but it does pass |
Here is the diff
|
reference: #114946