-
Notifications
You must be signed in to change notification settings - Fork 12.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Add a combine to form masked.store from unit strided store #66677
Conversation
Add a DAG combine to form a masked.store from a masked_strided_store intrinsic with stride equal to element size. This is the store analogy to PR llvm#65674. As seen in the tests, this does pickup a few cases that we'd previously missed due to selection ordering. We match strided stores early without going through the recently added generic mscatter combines, and thus weren't recognizing the unit strided store.
@llvm/pr-subscribers-backend-risc-v ChangesAdd a DAG combine to form a masked.store from a masked_strided_store intrinsic with stride equal to element size. This is the store analogy to PR #65674. As seen in the tests, this does pickup a few cases that we'd previously missed due to selection ordering. We match strided stores early without going through the recently added generic mscatter combines, and thus weren't recognizing the unit strided store. Full diff: https://github.com/llvm/llvm-project/pull/66677.diff 4 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index de58335b435651c..46482cfcfe25ea5 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14368,6 +14368,24 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
ISD::UNINDEXED, ISD::NON_EXTLOAD);
return SDValue();
}
+ case Intrinsic::riscv_masked_strided_store: {
+ auto *Store = cast<MemIntrinsicSDNode>(N);
+ SDValue Value = N->getOperand(2);
+ SDValue Base = N->getOperand(3);
+ SDValue Stride = N->getOperand(4);
+ SDValue Mask = N->getOperand(5);
+
+ // If the stride is equal to the element size in bytes, we can use
+ // a masked.store.
+ const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
+ if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
+ StrideC && StrideC->getZExtValue() == ElementSize)
+ return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
+ DAG.getUNDEF(XLenVT), Mask,
+ Store->getMemoryVT(), Store->getMemOperand(),
+ ISD::UNINDEXED, false);
+ return SDValue();
+ }
case Intrinsic::riscv_vcpop:
case Intrinsic::riscv_vcpop_mask:
case Intrinsic::riscv_vfirst:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index 7309fb9edec0657..60b61e889315cfe 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -11296,9 +11296,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
define void @mscatter_unit_stride(<8 x i16> %val, ptr %base) {
; CHECK-LABEL: mscatter_unit_stride:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 2
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i16 0
%allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -11311,9 +11310,8 @@ define void @mscatter_unit_stride_with_offset(<8 x i16> %val, ptr %base) {
; CHECK-LABEL: mscatter_unit_stride_with_offset:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, a0, 10
-; CHECK-NEXT: li a1, 2
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vsse16.v v8, (a0), a1
+; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i16 0
%allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll
index 9f8f1b224db9e85..df944fada7964b0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll
@@ -114,8 +114,7 @@ define void @stride_one_store(i64 %n, ptr %p) {
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v8, 0
-; RV64-NEXT: li a0, 8
-; RV64-NEXT: vsse64.v v8, (a1), a0
+; RV64-NEXT: vs1r.v v8, (a1)
; RV64-NEXT: ret
%step = tail call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
%gep = getelementptr inbounds i64, ptr %p, <vscale x 1 x i64> %step
diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll
index 06af0fc8971a543..f8a5b0f9d03a6ca 100644
--- a/llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll
@@ -89,9 +89,8 @@ define void @strided_store_i8_nostride(ptr %p, <32 x i8> %v, <32 x i1> %m) {
; CHECK-LABEL: strided_store_i8_nostride:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: li a2, 1
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vsse8.v v8, (a0), a2, v0.t
+; CHECK-NEXT: vse8.v v8, (a0), v0.t
; CHECK-NEXT: ret
call void @llvm.riscv.masked.strided.store.v32i8.p0.i64(<32 x i8> %v, ptr %p, i64 1, <32 x i1> %m)
ret void
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Landed as 188d5c7 |
This is the VP equivalent of llvm#66677. If we have a strided store where the stride is equal to the element width, we can just use a regular VP store.
Add a DAG combine to form a masked.store from a masked_strided_store intrinsic with stride equal to element size. This is the store analogy to PR #65674.
As seen in the tests, this does pickup a few cases that we'd previously missed due to selection ordering. We match strided stores early without going through the recently added generic mscatter combines, and thus weren't recognizing the unit strided store.