-
Notifications
You must be signed in to change notification settings - Fork 12.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Disable performCombineVMergeAndVOps for PseduoVIOTA_M. #71483
Conversation
This transformation is illegal for PseduoVIOTA_M. The value of `viota.m vd, vs2` is the prefix sum of vd2 and adding mask for it may cause wrong prefix sum. Take an example, the result of following expression is `{5, 5, 5, 3}`, ``` ; v4 = {1, 1, 1, 1} viota.m v1, v4 ; v0 = {0, 0, 0, 1}, v1 = {0, 1, 2, 3}, v8 = {5, 5, 5, 5} vmerge.vvm v8, v8, v1, v0.t ; v8 = {5, 5, 5, 3} ``` but if we merge them to `viota.m v8, v4, v0.t`, then the result of is `{5, 5, 5, 0}` We still does the transformation when mask of vmerge.vvm is a true mask.
@llvm/pr-subscribers-backend-risc-v Author: Yeting Kuo (yetingk) ChangesThis transformation is illegal for PseduoVIOTA_M. The value of Take an example, the result of following expression is
but if we merge them to We still does the transformation when mask of vmerge.vvm is a true mask. Full diff: https://github.com/llvm/llvm-project/pull/71483.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 51a235bf2ca1861..f103d323648d16a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3501,6 +3501,19 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
if (!True.isMachineOpcode())
return false;
+ // This transformation is illegal for viota.m when Mask is not a true mask.
+ switch (True->getMachineOpcode()) {
+ case RISCV::PseudoVIOTA_M_MF8:
+ case RISCV::PseudoVIOTA_M_MF4:
+ case RISCV::PseudoVIOTA_M_MF2:
+ case RISCV::PseudoVIOTA_M_M1:
+ case RISCV::PseudoVIOTA_M_M2:
+ case RISCV::PseudoVIOTA_M_M4:
+ case RISCV::PseudoVIOTA_M_M8:
+ if (Mask && !usesAllOnesMask(Mask, Glue))
+ return false;
+ }
+
unsigned TrueOpc = True.getMachineOpcode();
const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
uint64_t TrueTSFlags = TrueMCID.TSFlags;
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll
index 7e137d6a6196921..f6d9d1e711e7169 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll
@@ -258,3 +258,19 @@ entry:
%res = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %i, <vscale x 2 x i32> %passthru, i32 %evl)
ret <vscale x 2 x i32> %res
}
+
+; Test VIOTA_M
+declare <vscale x 2 x i32> @llvm.riscv.viota.mask.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, <vscale x 2 x i1>, i64, i64)
+define <vscale x 2 x i32> @vpmerge_viota(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, <vscale x 2 x i1> %vm, i32 zeroext %vl) {
+; CHECK-LABEL: vpmerge_viota:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT: viota.m v8, v9, v0.t
+; CHECK-NEXT: ret
+ %1 = zext i32 %vl to i64
+ %a = call <vscale x 2 x i32> @llvm.riscv.viota.mask.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, <vscale x 2 x i1> %m, i64 %1, i64 0)
+ %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %mask, i64 %1)
+ ret <vscale x 2 x i32> %b
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
index 1ea2b7ef57cf081..df119435611d167 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
@@ -279,13 +279,15 @@ define <vscale x 2 x i32> @vpmerge_vid(<vscale x 2 x i32> %passthru, <vscale x 2
ret <vscale x 2 x i32> %b
}
-; Test riscv.viota
+; Test not combine VIOTA_M and VMERGE_VVM without true mask.
declare <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i64)
define <vscale x 2 x i32> @vpmerge_viota(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, <vscale x 2 x i1> %vm, i32 zeroext %vl) {
; CHECK-LABEL: vpmerge_viota:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT: viota.m v8, v9, v0.t
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: viota.m v10, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
+; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
%1 = zext i32 %vl to i64
%a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, i64 %1)
@@ -293,6 +295,21 @@ define <vscale x 2 x i32> @vpmerge_viota(<vscale x 2 x i32> %passthru, <vscale x
ret <vscale x 2 x i32> %b
}
+; Test combine VIOTA_M and VMERGE_VVM with true mask.
+define <vscale x 2 x i32> @vpmerge_viota2(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %vm, i32 zeroext %vl) {
+; CHECK-LABEL: vpmerge_viota2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT: viota.m v8, v0
+; CHECK-NEXT: ret
+ %1 = zext i32 %vl to i64
+ %a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, i64 %1)
+ %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
+ %true = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %true, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
+ ret <vscale x 2 x i32> %b
+}
+
; Test riscv.vfclass
declare <vscale x 2 x i32> @llvm.riscv.vfclass.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x float>, i64)
define <vscale x 2 x i32> @vpmerge_vflcass(<vscale x 2 x i32> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) {
@@ -730,8 +747,9 @@ define <vscale x 2 x i32> @vpselect_vid(<vscale x 2 x i32> %passthru, <vscale x
define <vscale x 2 x i32> @vpselect_viota(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, <vscale x 2 x i1> %vm, i32 zeroext %vl) {
; CHECK-LABEL: vpselect_viota:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: viota.m v8, v9, v0.t
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: viota.m v10, v9
+; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
%1 = zext i32 %vl to i64
%a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, i64 %1)
|
switch (True->getMachineOpcode()) { | ||
case RISCV::PseudoVIOTA_M_MF8: | ||
case RISCV::PseudoVIOTA_M_MF4: | ||
case RISCV::PseudoVIOTA_M_MF2: | ||
case RISCV::PseudoVIOTA_M_M1: | ||
case RISCV::PseudoVIOTA_M_M2: | ||
case RISCV::PseudoVIOTA_M_M4: | ||
case RISCV::PseudoVIOTA_M_M8: | ||
if (Mask && !usesAllOnesMask(Mask, Glue)) | ||
return false; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we use that new getRVVMCOpcode
helper
switch (True->getMachineOpcode()) { | |
case RISCV::PseudoVIOTA_M_MF8: | |
case RISCV::PseudoVIOTA_M_MF4: | |
case RISCV::PseudoVIOTA_M_MF2: | |
case RISCV::PseudoVIOTA_M_M1: | |
case RISCV::PseudoVIOTA_M_M2: | |
case RISCV::PseudoVIOTA_M_M4: | |
case RISCV::PseudoVIOTA_M_M8: | |
if (Mask && !usesAllOnesMask(Mask, Glue)) | |
return false; | |
} | |
if (getRVVMCOpcode(True->getMachineOpcode()) == RISCV::VIOTA_M && !usesAllOnesMask(Mask, Glue)) | |
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you. I does similar refine in 540ce31
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. I wonder if we should add a bit to RISCVMaskedPseudo
so that we can mark pseudos where changing the enabled elements affects semantics? I think the reduction instructions are in the same boat, except we don't mark it with RISCVMaskedPseudo at all. Which presumably means we can't convert from an all ones mask -> unmasked
I think we can still does this transformation for |
Sorry yes, I meant as in it's correct and we should do this transform for viota.m, but we should also do it for vredsum.vs since we don't do it currently |
Sorry, I misunderstood it and your comment was actually very clear.
I think your idea is great. |
Address @lukel97's idea to add a bit into |
Pseudo MaskedPseudo = !cast<Pseudo>(NAME); | ||
Pseudo UnmaskedPseudo = !cast<Pseudo>(!subst("_MASK", "", NAME)); | ||
bits<4> MaskOpIdx = MaskIdx; | ||
bit IsAccumulatedOp = IsAcc; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there any better/general naming than IsAccumulatedOp?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe something more generic like MaskAffectsResult
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for your opinion.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM x2
After llvm#71483 we now have a way of marking masked pseudos as having an unmasked equivalent, but their mask shouldn't be folded unless it's all ones since it would affect the result. This patch uses it to mark the pseudos for vredsum and friends, which in turn allows us to remove the unmasked patterns and remove vmerges entirely if it's known to have an all ones mask.
After #71483 we now have a way of marking masked pseudos as having an unmasked equivalent, but their mask shouldn't be folded unless it's all ones since it would affect the result. This patch uses it to mark the pseudos for vredsum and friends, which in turn allows us to remove the unmasked patterns, and catch some other forms of vmerge.
This transformation might be illegal for
PseduoVIOTA_M
. The value ofviota.m vd, vs2
is the prefix sum of vd2 and adding mask for it may cause wrong prefix sum.Take an example, the result of following expression is
{5, 5, 5, 3}
,but if we merge them to
viota.m v8, v4, v0.t
, then the result of is{5, 5, 5, 0}
.Also, we still does
performCombineVMergeAndVOps
forvoita.m
when mask ofvmerge.vvm
is a true mask.