-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Remove ds_fmin/ds_fmax intrinsics #96739
Merged
arsenm
merged 1 commit into
main
from
users/arsenm/amdgpu-remove-ds-fmin-fmax-intrinsics
Jun 27, 2024
Merged
AMDGPU: Remove ds_fmin/ds_fmax intrinsics #96739
arsenm
merged 1 commit into
main
from
users/arsenm/amdgpu-remove-ds-fmin-fmax-intrinsics
Jun 27, 2024
Conversation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesThese have been replaced with atomicrmw. Full diff: https://github.com/llvm/llvm-project/pull/96739.diff 9 Files Affected:
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 76356dd76f1d2..7644da2b78bd7 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -132,6 +132,11 @@ Changes to the AMDGPU Backend
* Implemented :ref:`llvm.get.rounding <int_get_rounding>` and :ref:`llvm.set.rounding <int_set_rounding>`
+* Removed ``llvm.amdgcn.ds.fadd``, ``llvm.amdgcn.ds.fmin`` and
+ ``llvm.amdgcn.ds.fmax`` intrinsics. Users should use the
+ :ref:`atomicrmw <i_atomicrmw>` instruction with `fadd`, `fmin` and
+ `fmax` with addrspace(3) instead.
+
Changes to the ARM Backend
--------------------------
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 11662ccc1a695..2aa52ef99aaf8 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -523,17 +523,6 @@ def int_amdgcn_fmad_ftz :
[IntrNoMem, IntrSpeculatable]
>;
-class AMDGPULDSIntrin :
- Intrinsic<[llvm_any_ty],
- [LLVMQualPointerType<3>,
- LLVMMatchType<0>,
- llvm_i32_ty, // ordering
- llvm_i32_ty, // scope
- llvm_i1_ty], // isVolatile
- [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>,
- ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree]
->;
-
// FIXME: The m0 argument should be moved after the normal arguments
class AMDGPUDSOrderedIntrinsic : Intrinsic<
[llvm_i32_ty],
@@ -571,9 +560,6 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic;
def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic;
-def int_amdgcn_ds_fmin : AMDGPULDSIntrin;
-def int_amdgcn_ds_fmax : AMDGPULDSIntrin;
-
} // TargetPrefix = "amdgcn"
// New-style image intrinsics
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index d7825d9b3e3e5..32076a07d30e7 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -1033,8 +1033,10 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
break; // No other 'amdgcn.atomic.*'
}
- if (Name.starts_with("ds.fadd")) {
- // Replaced with atomicrmw fadd, so there's no new declaration.
+ if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") ||
+ Name.starts_with("ds.fmax")) {
+ // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
+ // declaration.
NewFn = nullptr;
return true;
}
@@ -2347,6 +2349,8 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
AtomicRMWInst::BinOp RMWOp =
StringSwitch<AtomicRMWInst::BinOp>(Name)
.StartsWith("ds.fadd", AtomicRMWInst::FAdd)
+ .StartsWith("ds.fmin", AtomicRMWInst::FMin)
+ .StartsWith("ds.fmax", AtomicRMWInst::FMax)
.StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
.StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 4b48091b7143e..83a5933ceaed6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -5401,35 +5401,6 @@ bool AMDGPULegalizerInfo::legalizeRsqClampIntrinsic(MachineInstr &MI,
return true;
}
-static unsigned getDSFPAtomicOpcode(Intrinsic::ID IID) {
- switch (IID) {
- case Intrinsic::amdgcn_ds_fmin:
- return AMDGPU::G_ATOMICRMW_FMIN;
- case Intrinsic::amdgcn_ds_fmax:
- return AMDGPU::G_ATOMICRMW_FMAX;
- default:
- llvm_unreachable("not a DS FP intrinsic");
- }
-}
-
-bool AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
- MachineInstr &MI,
- Intrinsic::ID IID) const {
- GISelChangeObserver &Observer = Helper.Observer;
- Observer.changingInstr(MI);
-
- MI.setDesc(ST.getInstrInfo()->get(getDSFPAtomicOpcode(IID)));
-
- // The remaining operands were used to set fields in the MemOperand on
- // construction.
- for (int I = 6; I > 3; --I)
- MI.removeOperand(I);
-
- MI.removeOperand(1); // Remove the intrinsic ID.
- Observer.changedInstr(MI);
- return true;
-}
-
// TODO: Fix pointer type handling
bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
MachineInstr &MI,
@@ -7423,9 +7394,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return legalizeBufferAtomic(MI, B, IntrID);
case Intrinsic::amdgcn_rsq_clamp:
return legalizeRsqClampIntrinsic(MI, MRI, B);
- case Intrinsic::amdgcn_ds_fmin:
- case Intrinsic::amdgcn_ds_fmax:
- return legalizeDSAtomicFPIntrinsic(Helper, MI, IntrID);
case Intrinsic::amdgcn_image_bvh_intersect_ray:
return legalizeBVHIntrinsic(MI, B);
case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index ae01bb29c1108..db1c5874093a7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -175,9 +175,6 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
bool legalizeRsqClampIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
- bool legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
- MachineInstr &MI, Intrinsic::ID IID) const;
-
bool getImplicitArgPtr(Register DstReg, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index ed5bae3e4ff61..a323f63767737 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -252,8 +252,6 @@ def : SourceOfDivergence<int_amdgcn_flat_atomic_fmin_num>;
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmax_num>;
def : SourceOfDivergence<int_amdgcn_global_atomic_fadd_v2bf16>;
def : SourceOfDivergence<int_amdgcn_flat_atomic_fadd_v2bf16>;
-def : SourceOfDivergence<int_amdgcn_ds_fmin>;
-def : SourceOfDivergence<int_amdgcn_ds_fmax>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_swap>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_add>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_sub>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 1192b49fd1f08..8882839ed8de3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -501,9 +501,7 @@ bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
MemIntrinsicInfo &Info) const {
switch (Inst->getIntrinsicID()) {
case Intrinsic::amdgcn_ds_ordered_add:
- case Intrinsic::amdgcn_ds_ordered_swap:
- case Intrinsic::amdgcn_ds_fmin:
- case Intrinsic::amdgcn_ds_fmax: {
+ case Intrinsic::amdgcn_ds_ordered_swap: {
auto *Ordering = dyn_cast<ConstantInt>(Inst->getArgOperand(2));
auto *Volatile = dyn_cast<ConstantInt>(Inst->getArgOperand(4));
if (!Ordering || !Volatile)
@@ -1018,8 +1016,6 @@ bool GCNTTIImpl::isAlwaysUniform(const Value *V) const {
bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
Intrinsic::ID IID) const {
switch (IID) {
- case Intrinsic::amdgcn_ds_fmin:
- case Intrinsic::amdgcn_ds_fmax:
case Intrinsic::amdgcn_is_shared:
case Intrinsic::amdgcn_is_private:
case Intrinsic::amdgcn_flat_atomic_fadd:
@@ -1039,20 +1035,6 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
Value *NewV) const {
auto IntrID = II->getIntrinsicID();
switch (IntrID) {
- case Intrinsic::amdgcn_ds_fmin:
- case Intrinsic::amdgcn_ds_fmax: {
- const ConstantInt *IsVolatile = cast<ConstantInt>(II->getArgOperand(4));
- if (!IsVolatile->isZero())
- return nullptr;
- Module *M = II->getParent()->getParent()->getParent();
- Type *DestTy = II->getType();
- Type *SrcTy = NewV->getType();
- Function *NewDecl =
- Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy});
- II->setArgOperand(0, NewV);
- II->setCalledFunction(NewDecl);
- return II;
- }
case Intrinsic::amdgcn_is_shared:
case Intrinsic::amdgcn_is_private: {
unsigned TrueAS = IntrID == Intrinsic::amdgcn_is_shared ?
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b8ff5ed35ac80..0ed6d685ecfb0 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1279,9 +1279,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
switch (IntrID) {
case Intrinsic::amdgcn_ds_ordered_add:
- case Intrinsic::amdgcn_ds_ordered_swap:
- case Intrinsic::amdgcn_ds_fmin:
- case Intrinsic::amdgcn_ds_fmax: {
+ case Intrinsic::amdgcn_ds_ordered_swap: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType());
Info.ptrVal = CI.getOperand(0);
@@ -1450,8 +1448,6 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
case Intrinsic::amdgcn_atomic_cond_sub_u32:
case Intrinsic::amdgcn_ds_append:
case Intrinsic::amdgcn_ds_consume:
- case Intrinsic::amdgcn_ds_fmax:
- case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap:
case Intrinsic::amdgcn_flat_atomic_fadd:
@@ -8869,15 +8865,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
M->getVTList(), Ops, M->getMemoryVT(),
M->getMemOperand());
}
- case Intrinsic::amdgcn_ds_fmin:
- case Intrinsic::amdgcn_ds_fmax: {
- MemSDNode *M = cast<MemSDNode>(Op);
- unsigned Opc = IntrID == Intrinsic::amdgcn_ds_fmin ? ISD::ATOMIC_LOAD_FMIN
- : ISD::ATOMIC_LOAD_FMAX;
- return DAG.getAtomic(Opc, SDLoc(Op), M->getMemoryVT(), M->getOperand(0),
- M->getOperand(2), M->getOperand(3),
- M->getMemOperand());
- }
case Intrinsic::amdgcn_raw_buffer_load:
case Intrinsic::amdgcn_raw_ptr_buffer_load:
case Intrinsic::amdgcn_raw_buffer_load_format:
diff --git a/llvm/test/Bitcode/amdgcn-atomic.ll b/llvm/test/Bitcode/amdgcn-atomic.ll
index 311bd8863859b..ed7b04a2f3146 100644
--- a/llvm/test/Bitcode/amdgcn-atomic.ll
+++ b/llvm/test/Bitcode/amdgcn-atomic.ll
@@ -248,4 +248,56 @@ define <2 x i16> @upgrade_amdgcn_ds_fadd_v2bf16__missing_args_as_i16(ptr addrspa
ret <2 x i16> %result0
}
+declare float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg)
+declare double @llvm.amdgcn.ds.fmin.f64(ptr addrspace(3) nocapture, double, i32 immarg, i32 immarg, i1 immarg)
+
+define float @upgrade_amdgcn_ds_fmin_f32(ptr addrspace(3) %ptr, float %val) {
+ ; CHECK: atomicrmw fmin ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4
+ %result0 = call float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
+
+ ; CHECK: = atomicrmw volatile fmin ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4
+ %result1 = call float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 true)
+
+ ; CHECK: = atomicrmw fmin ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4
+ %result2 = call float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) %ptr, float %val, i32 43, i32 3, i1 false)
+
+ ; CHECK: = atomicrmw fmin ptr addrspace(3) %ptr, float %val syncscope("agent") acquire, align 4
+ %result3 = call float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) %ptr, float %val, i32 4, i32 2, i1 false)
+
+ ret float %result3
+}
+
+define double @upgrade_amdgcn_ds_fmin_f64(ptr addrspace(3) %ptr, double %val) {
+ ; CHECK: atomicrmw fmin ptr addrspace(3) %ptr, double %val syncscope("agent") seq_cst, align 8
+ %result0 = call double @llvm.amdgcn.ds.fmin.f64(ptr addrspace(3) %ptr, double %val, i32 0, i32 0, i1 false)
+
+ ; CHECK: = atomicrmw volatile fmin ptr addrspace(3) %ptr, double %val syncscope("agent") seq_cst, align 8
+ %result1 = call double @llvm.amdgcn.ds.fmin.f64(ptr addrspace(3) %ptr, double %val, i32 0, i32 0, i1 true)
+
+ ; CHECK: = atomicrmw fmin ptr addrspace(3) %ptr, double %val syncscope("agent") seq_cst, align 8
+ %result2 = call double @llvm.amdgcn.ds.fmin.f64(ptr addrspace(3) %ptr, double %val, i32 43, i32 3, i1 false)
+
+ ; CHECK: = atomicrmw fmin ptr addrspace(3) %ptr, double %val syncscope("agent") acquire, align 8
+ %result3 = call double @llvm.amdgcn.ds.fmin.f64(ptr addrspace(3) %ptr, double %val, i32 4, i32 2, i1 false)
+
+ ret double %result3
+}
+
+declare float @llvm.amdgcn.ds.fmin(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg)
+
+define float @upgrade_amdgcn_ds_fmin_f32_no_suffix(ptr addrspace(3) %ptr, float %val) {
+ ; CHECK: = atomicrmw fmin ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4
+
+ %result0 = call float @llvm.amdgcn.ds.fmin(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
+ ret float %result0
+}
+
+declare float @llvm.amdgcn.ds.fmax(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg)
+
+define float @upgrade_amdgcn_ds_fmax_f32_no_suffix(ptr addrspace(3) %ptr, float %val) {
+ ; CHECK: = atomicrmw fmax ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4
+ %result0 = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
+ ret float %result0
+}
+
attributes #0 = { argmemonly nounwind willreturn }
|
arsenm
force-pushed
the
users/arsenm/amdgpu-remove-ds-fmin-fmax-intrinsics
branch
from
June 26, 2024 16:11
e95c252
to
401d82f
Compare
rampitec
approved these changes
Jun 26, 2024
arsenm
force-pushed
the
users/arsenm/amdgpu-clang-use-atomicrmw-ds-fmin-fmax-builtins
branch
from
June 27, 2024 08:55
5c2be85
to
7c9636a
Compare
arsenm
force-pushed
the
users/arsenm/amdgpu-remove-ds-fmin-fmax-intrinsics
branch
from
June 27, 2024 08:55
401d82f
to
864e3bb
Compare
arsenm
force-pushed
the
users/arsenm/amdgpu-clang-use-atomicrmw-ds-fmin-fmax-builtins
branch
from
June 27, 2024 13:29
7c9636a
to
ff880d5
Compare
Base automatically changed from
users/arsenm/amdgpu-clang-use-atomicrmw-ds-fmin-fmax-builtins
to
main
June 27, 2024 13:32
These have been replaced with atomicrmw.
arsenm
force-pushed
the
users/arsenm/amdgpu-remove-ds-fmin-fmax-intrinsics
branch
from
June 27, 2024 13:32
864e3bb
to
906c8fe
Compare
lravenclaw
pushed a commit
to lravenclaw/llvm-project
that referenced
this pull request
Jul 3, 2024
These have been replaced with atomicrmw.
AlexisPerry
pushed a commit
to llvm-project-tlp/llvm-project
that referenced
this pull request
Jul 9, 2024
These have been replaced with atomicrmw.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
These have been replaced with atomicrmw.