-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[MemoryLocation] Support strided matrix loads / stores #163368
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-analysis Author: Nathan Corbyn (cofibrant) ChangesThis patch provides an approximation of the memory locations touched by CC @fhahn Full diff: https://github.com/llvm/llvm-project/pull/163368.diff 4 Files Affected:
diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp
index dcc51178b975a..123a1444a5b71 100644
--- a/llvm/lib/Analysis/MemoryLocation.cpp
+++ b/llvm/lib/Analysis/MemoryLocation.cpp
@@ -288,6 +288,33 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
LocationSize::precise(DL.getTypeStoreSize(
II->getArgOperand(1)->getType())),
AATags);
+ case Intrinsic::matrix_column_major_load:
+ case Intrinsic::matrix_column_major_store: {
+ bool IsLoad = II->getIntrinsicID() == Intrinsic::matrix_column_major_load;
+ assert(ArgIdx == (IsLoad ? 0 : 1) && "Invalid argument index");
+
+ auto *Stride = dyn_cast<ConstantInt>(II->getArgOperand(IsLoad ? 1 : 2));
+ uint64_t Rows =
+ cast<ConstantInt>(II->getArgOperand(IsLoad ? 3 : 4))->getZExtValue();
+ uint64_t Cols =
+ cast<ConstantInt>(II->getArgOperand(IsLoad ? 4 : 5))->getZExtValue();
+
+ // The stride is dynamic, so there's nothing we can say.
+ if (!Stride)
+ return MemoryLocation(Arg, LocationSize::afterPointer(), AATags);
+
+ uint64_t ConstStride = Stride->getZExtValue();
+ auto *VT = cast<VectorType>(IsLoad ? II->getType()
+ : II->getArgOperand(0)->getType());
+ TypeSize Size =
+ DL.getTypeStoreSize(VT->getScalarType()) * ConstStride * Cols;
+
+ // In the unstrided case, we have a precise size, ...
+ if (ConstStride == Rows)
+ return MemoryLocation(Arg, LocationSize::precise(Size), AATags);
+ // otherwise we merely obtain an upper bound.
+ return MemoryLocation(Arg, LocationSize::upperBound(Size), AATags);
+ }
}
assert(
diff --git a/llvm/test/Analysis/BasicAA/matrix-intrinsics.ll b/llvm/test/Analysis/BasicAA/matrix-intrinsics.ll
new file mode 100644
index 0000000000000..b5f12f5daeb49
--- /dev/null
+++ b/llvm/test/Analysis/BasicAA/matrix-intrinsics.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -aa-pipeline=basic-aa -passes=gvn -S < %s | FileCheck %s
+
+; BasicAA should prove that loads from sufficiently large static offsets
+; don't overlap with matrix loads with a statically known size.
+
+define <8 x double> @non_overlapping_strided_load(ptr %p) {
+; CHECK-LABEL: define <8 x double> @non_overlapping_strided_load(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[P_OFFSET:%.*]] = getelementptr inbounds double, ptr [[P]], i64 16
+; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[P_OFFSET]], i32 8, i1 false, i32 4, i32 2)
+; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i64(<8 x double> [[L]], ptr [[P]], i64 8, i1 false, i32 4, i32 2)
+; CHECK-NEXT: [[S:%.*]] = fadd <8 x double> [[L]], [[L]]
+; CHECK-NEXT: ret <8 x double> [[S]]
+;
+entry:
+ %p.offset = getelementptr inbounds double, double* %p, i64 16
+ %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %p.offset, i32 8, i1 false, i32 4, i32 2)
+ call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %p, i64 8, i1 false, i32 4, i32 2)
+ %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %p.offset, i32 8, i1 false, i32 4, i32 2)
+ %s = fadd <8 x double> %l, %l.2
+ ret <8 x double> %s
+}
+
+define <8 x double> @overlapping_strided_load(ptr %p) {
+; CHECK-LABEL: define <8 x double> @overlapping_strided_load(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[P_OFFSET:%.*]] = getelementptr inbounds double, ptr [[P]], i64 15
+; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[P_OFFSET]], i32 8, i1 false, i32 4, i32 2)
+; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i64(<8 x double> [[L]], ptr [[P]], i64 8, i1 false, i32 4, i32 2)
+; CHECK-NEXT: [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[P_OFFSET]], i32 8, i1 false, i32 4, i32 2)
+; CHECK-NEXT: [[S:%.*]] = fadd <8 x double> [[L]], [[L_2]]
+; CHECK-NEXT: ret <8 x double> [[S]]
+;
+entry:
+ %p.offset = getelementptr inbounds double, double* %p, i64 15
+ %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %p.offset, i32 8, i1 false, i32 4, i32 2)
+ call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %p, i64 8, i1 false, i32 4, i32 2)
+ %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %p.offset, i32 8, i1 false, i32 4, i32 2)
+ %s = fadd <8 x double> %l, %l.2
+ ret <8 x double> %s
+}
+
+declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32)
+declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32)
diff --git a/llvm/test/Transforms/DeadStoreElimination/matrix-intrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/matrix-intrinsics.ll
new file mode 100644
index 0000000000000..5f397e5f82181
--- /dev/null
+++ b/llvm/test/Transforms/DeadStoreElimination/matrix-intrinsics.ll
@@ -0,0 +1,82 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=dse -S < %s | FileCheck %s
+
+define void @dead_unstrided_store(ptr noalias %src, ptr noalias %dst) {
+; CHECK-LABEL: define void @dead_unstrided_store(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i64(<8 x double> [[L]], ptr [[DST]], i64 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT: ret void
+;
+entry:
+ %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
+ call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i64 4, i1 false, i32 4, i32 2)
+ call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i64 4, i1 false, i32 4, i32 2)
+ ret void
+}
+
+define void @live_strided_store(ptr noalias %src, ptr noalias %dst) {
+; CHECK-LABEL: define void @live_strided_store(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i64(<8 x double> [[L]], ptr [[DST]], i64 200, i1 false, i32 4, i32 2)
+; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i64(<8 x double> [[L]], ptr [[DST]], i64 100, i1 false, i32 4, i32 2)
+; CHECK-NEXT: ret void
+;
+entry:
+ %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
+ call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i64 200, i1 false, i32 4, i32 2)
+ call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i64 100, i1 false, i32 4, i32 2)
+ ret void
+}
+
+define void @dead_strided_store(ptr noalias %src, ptr noalias %dst) {
+; CHECK-LABEL: define void @dead_strided_store(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 200, i1 false, i32 4, i32 2)
+; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i64(<8 x double> [[L]], ptr [[DST]], i64 100, i1 false, i32 4, i32 2)
+; CHECK-NEXT: ret void
+;
+entry:
+ %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 200, i1 false, i32 4, i32 2)
+ call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i64 100, i1 false, i32 4, i32 2)
+ call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i64 100, i1 false, i32 4, i32 2)
+ ret void
+}
+
+define void @dead_dynamically_strided_store(ptr noalias %src, ptr noalias %dst, i64 %stride) {
+; CHECK-LABEL: define void @dead_dynamically_strided_store(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], i64 [[STRIDE:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i64(<8 x double> [[L]], ptr [[DST]], i64 [[STRIDE]], i1 false, i32 4, i32 2)
+; CHECK-NEXT: ret void
+;
+entry:
+ %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
+ call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i64 %stride, i1 false, i32 4, i32 2)
+ call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i64 %stride, i1 false, i32 4, i32 2)
+ ret void
+}
+
+define void @live_dynamically_strided_store(ptr noalias %src, ptr noalias %dst, i64 %stride, i64 %stride.2) {
+; CHECK-LABEL: define void @live_dynamically_strided_store(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], i64 [[STRIDE:%.*]], i64 [[STRIDE_2:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i64(<8 x double> [[L]], ptr [[DST]], i64 [[STRIDE]], i1 false, i32 4, i32 2)
+; CHECK-NEXT: call void @llvm.matrix.column.major.store.v8f64.i64(<8 x double> [[L]], ptr [[DST]], i64 [[STRIDE_2]], i1 false, i32 4, i32 2)
+; CHECK-NEXT: ret void
+;
+entry:
+ %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
+ call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i64 %stride, i1 false, i32 4, i32 2)
+ call void @llvm.matrix.column.major.store(<8 x double> %l, ptr %dst, i64 %stride.2, i1 false, i32 4, i32 2)
+ ret void
+}
+
+declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32)
+declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32)
diff --git a/llvm/test/Transforms/GVN/matrix-intrinsics.ll b/llvm/test/Transforms/GVN/matrix-intrinsics.ll
new file mode 100644
index 0000000000000..18d8a450fccd1
--- /dev/null
+++ b/llvm/test/Transforms/GVN/matrix-intrinsics.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=gvn -S < %s | FileCheck %s
+
+define <8 x double> @redundant_unstrided_load(ptr %src) {
+; CHECK-LABEL: define <8 x double> @redundant_unstrided_load(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT: [[S:%.*]] = fadd contract <8 x double> [[L]], [[L]]
+; CHECK-NEXT: ret <8 x double> [[S]]
+;
+entry:
+ %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
+ %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
+ %s = fadd contract <8 x double> %l, %l.2
+ ret <8 x double> %s
+}
+
+define <8 x double> @redundant_strided_load(ptr %src) {
+; CHECK-LABEL: define <8 x double> @redundant_strided_load(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 200, i1 false, i32 4, i32 2)
+; CHECK-NEXT: [[S:%.*]] = fadd contract <8 x double> [[L]], [[L]]
+; CHECK-NEXT: ret <8 x double> [[S]]
+;
+entry:
+ %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 200, i1 false, i32 4, i32 2)
+ %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 200, i1 false, i32 4, i32 2)
+ %s = fadd contract <8 x double> %l, %l.2
+ ret <8 x double> %s
+}
+
+define <8 x double> @necessary_unstrided_load(ptr %src) {
+; CHECK-LABEL: define <8 x double> @necessary_unstrided_load(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 4, i1 false, i32 4, i32 2)
+; CHECK-NEXT: [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 2, i1 false, i32 2, i32 4)
+; CHECK-NEXT: [[S:%.*]] = fadd contract <8 x double> [[L]], [[L_2]]
+; CHECK-NEXT: ret <8 x double> [[S]]
+;
+entry:
+ %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 4, i1 false, i32 4, i32 2)
+ %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 2, i1 false, i32 2, i32 4)
+ %s = fadd contract <8 x double> %l, %l.2
+ ret <8 x double> %s
+}
+
+define <8 x double> @necessary_strided_load(ptr %src) {
+; CHECK-LABEL: define <8 x double> @necessary_strided_load(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 200, i1 false, i32 4, i32 2)
+; CHECK-NEXT: [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 100, i1 false, i32 4, i32 2)
+; CHECK-NEXT: [[S:%.*]] = fadd contract <8 x double> [[L]], [[L_2]]
+; CHECK-NEXT: ret <8 x double> [[S]]
+;
+entry:
+ %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 200, i1 false, i32 4, i32 2)
+ %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 100, i1 false, i32 4, i32 2)
+ %s = fadd contract <8 x double> %l, %l.2
+ ret <8 x double> %s
+}
+
+define <8 x double> @redundant_dynamically_strided_load(ptr %src, i32 %stride) {
+; CHECK-LABEL: define <8 x double> @redundant_dynamically_strided_load(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 [[STRIDE]], i1 false, i32 4, i32 2)
+; CHECK-NEXT: [[S:%.*]] = fadd contract <8 x double> [[L]], [[L]]
+; CHECK-NEXT: ret <8 x double> [[S]]
+;
+entry:
+ %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 %stride, i1 false, i32 4, i32 2)
+ %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 %stride, i1 false, i32 4, i32 2)
+ %s = fadd contract <8 x double> %l, %l.2
+ ret <8 x double> %s
+}
+
+define <8 x double> @necessary_dynamically_strided_load(ptr %src, i32 %stride, i32 %stride.2) {
+; CHECK-LABEL: define <8 x double> @necessary_dynamically_strided_load(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[STRIDE:%.*]], i32 [[STRIDE_2:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[L:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 [[STRIDE]], i1 false, i32 4, i32 2)
+; CHECK-NEXT: [[L_2:%.*]] = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr [[SRC]], i32 [[STRIDE_2]], i1 false, i32 4, i32 2)
+; CHECK-NEXT: [[S:%.*]] = fadd contract <8 x double> [[L]], [[L_2]]
+; CHECK-NEXT: ret <8 x double> [[S]]
+;
+entry:
+ %l = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 %stride, i1 false, i32 4, i32 2)
+ %l.2 = call <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr %src, i32 %stride.2, i1 false, i32 4, i32 2)
+ %s = fadd contract <8 x double> %l, %l.2
+ ret <8 x double> %s
+}
+
+declare <8 x double> @llvm.matrix.column.major.load.v8f64.i32(ptr, i32, i1, i32, i32)
+declare void @llvm.matrix.column.major.store.v8f64.i32(<8 x double>, ptr, i32, i1, i32, i32)
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For the title, something like [MemoryLocation] Support strided matrix loads/stores. would be more accurate
463c02c to
b40ddb5
Compare
b40ddb5 to
adbfde0
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
llvm/lib/Analysis/MemoryLocation.cpp
Outdated
| auto *VT = cast<VectorType>(IsLoad ? II->getType() | ||
| : II->getArgOperand(0)->getType()); | ||
| assert(Cols != 0 && "Matrix cannot have 0 columns"); | ||
| TypeSize Size = DL.getTypeStoreSize(VT->getScalarType()) * |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| TypeSize Size = DL.getTypeStoreSize(VT->getScalarType()) * | |
| TypeSize Size = DL.getTypeAllocSize(VT->getScalarType()) * |
Not that it is likely to make a difference here, but my understanding is that this is a GEP stride, which uses the alloc size.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks
…63368) This patch provides an approximation of the memory locations touched by `llvm.matrix.column.major.load` and `llvm.matrix.column.major.store`, enabling dead store elimination and GVN to remove redundant loads and dead stores. PR: llvm/llvm-project#163368
This patch provides an approximation of the memory locations touched by `llvm.matrix.column.major.load` and `llvm.matrix.column.major.store`, enabling dead store elimination and GVN to remove redundant loads and dead stores. PR: llvm#163368
This patch provides an approximation of the memory locations touched by `llvm.matrix.column.major.load` and `llvm.matrix.column.major.store`, enabling dead store elimination and GVN to remove redundant loads and dead stores. PR: llvm#163368
This patch provides an approximation of the memory locations touched by `llvm.matrix.column.major.load` and `llvm.matrix.column.major.store`, enabling dead store elimination and GVN to remove redundant loads and dead stores. PR: llvm#163368
This patch provides an approximation of the memory locations touched by
llvm.matrix.column.major.loadandllvm.matrix.column.major.store, enabling dead store elimination and GVN to remove redundant loads and dead stores.CC @fhahn