Skip to content

Commit

Permalink
AMDGPU: Remove .v2bf16 buffer atomic fadd intrinsics
Browse files Browse the repository at this point in the history
These are redundant with the unsuffixed versions, and have a name
collision with surprising behavior when the base intrinsic is used with
v2bf16.

The global and flat variants should be removed too, but those are complicated
due to using v2i16 in place of the natural v2bf16. Those cases can soon be
completely deleted in favor of atomicrmw.

The GlobalISel codegen change is broken and substitutes handling as bf16
for handling as f16, but it's a bug that this passed the IRTranslator in the first
place.
  • Loading branch information
arsenm committed Jun 17, 2024
1 parent 405882d commit fb5e46d
Show file tree
Hide file tree
Showing 12 changed files with 9 additions and 78 deletions.
44 changes: 2 additions & 42 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1337,27 +1337,9 @@ def int_amdgcn_raw_ptr_buffer_atomic_cmpswap : Intrinsic<

// gfx908 intrinsic
def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;

// Supports float and <2 x half> on gfx908. Supports v2bf16 on gfx90a, gfx940, gfx12+.
def int_amdgcn_raw_ptr_buffer_atomic_fadd : AMDGPURawPtrBufferAtomic<llvm_anyfloat_ty>;
// gfx12+ intrinsic
def int_amdgcn_raw_buffer_atomic_fadd_v2bf16 : Intrinsic <
[llvm_v2bf16_ty],
[llvm_v2bf16_ty,
llvm_v4i32_ty,
llvm_i32_ty,
llvm_i32_ty,
llvm_i32_ty],
[ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1, 0>;
def int_amdgcn_raw_ptr_buffer_atomic_fadd_v2bf16 : Intrinsic <
[llvm_v2bf16_ty],
[llvm_v2bf16_ty,
AMDGPUBufferRsrcTy,
llvm_i32_ty,
llvm_i32_ty,
llvm_i32_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<1>>,
ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1, 0>;

class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty> : Intrinsic <
[data_ty],
Expand Down Expand Up @@ -1434,28 +1416,6 @@ def int_amdgcn_struct_ptr_buffer_atomic_cmpswap : Intrinsic<
// gfx908 intrinsic
def int_amdgcn_struct_buffer_atomic_fadd : AMDGPUStructBufferAtomic<llvm_anyfloat_ty>;
def int_amdgcn_struct_ptr_buffer_atomic_fadd : AMDGPUStructPtrBufferAtomic<llvm_anyfloat_ty>;
// gfx12 intrinsic
def int_amdgcn_struct_buffer_atomic_fadd_v2bf16 : Intrinsic <
[llvm_v2bf16_ty],
[llvm_v2bf16_ty,
llvm_v4i32_ty,
llvm_i32_ty,
llvm_i32_ty,
llvm_i32_ty,
llvm_i32_ty],
[ImmArg<ArgIndex<5>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1, 0>;
def int_amdgcn_struct_ptr_buffer_atomic_fadd_v2bf16 : Intrinsic <
[llvm_v2bf16_ty],
[llvm_v2bf16_ty,
AMDGPUBufferRsrcTy,
llvm_i32_ty,
llvm_i32_ty,
llvm_i32_ty,
llvm_i32_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<1>>,
ImmArg<ArgIndex<5>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1, 0>;

// gfx90a intrinsics
def int_amdgcn_struct_buffer_atomic_fmin : AMDGPUStructBufferAtomic<llvm_anyfloat_ty>;
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,6 @@ def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_XOR, SIbuffer_atomic_xor>;
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_INC, SIbuffer_atomic_inc>;
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_DEC, SIbuffer_atomic_dec>;
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_FADD, SIbuffer_atomic_fadd>;
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_FADD_BF16, SIbuffer_atomic_fadd_bf16>;
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_FMIN, SIbuffer_atomic_fmin>;
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_FMAX, SIbuffer_atomic_fmax>;
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_CMPSWAP, SIbuffer_atomic_cmpswap>;
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5564,7 +5564,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP)
NODE_NAME_CASE(BUFFER_ATOMIC_CSUB)
NODE_NAME_CASE(BUFFER_ATOMIC_FADD)
NODE_NAME_CASE(BUFFER_ATOMIC_FADD_BF16)
NODE_NAME_CASE(BUFFER_ATOMIC_FMIN)
NODE_NAME_CASE(BUFFER_ATOMIC_FMAX)
NODE_NAME_CASE(BUFFER_ATOMIC_COND_SUB_U32)
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,6 @@ enum NodeType : unsigned {
BUFFER_ATOMIC_CMPSWAP,
BUFFER_ATOMIC_CSUB,
BUFFER_ATOMIC_FADD,
BUFFER_ATOMIC_FADD_BF16,
BUFFER_ATOMIC_FMIN,
BUFFER_ATOMIC_FMAX,
BUFFER_ATOMIC_COND_SUB_U32,
Expand Down
9 changes: 0 additions & 9 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6011,11 +6011,6 @@ static unsigned getBufferAtomicPseudo(Intrinsic::ID IntrID) {
case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd:
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD;
case Intrinsic::amdgcn_raw_buffer_atomic_fadd_v2bf16:
case Intrinsic::amdgcn_struct_buffer_atomic_fadd_v2bf16:
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd_v2bf16:
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd_v2bf16:
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD_BF16;
case Intrinsic::amdgcn_raw_buffer_atomic_fmin:
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin:
case Intrinsic::amdgcn_struct_buffer_atomic_fmin:
Expand Down Expand Up @@ -7323,10 +7318,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd:
case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd:
case Intrinsic::amdgcn_raw_buffer_atomic_fadd_v2bf16:
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd_v2bf16:
case Intrinsic::amdgcn_struct_buffer_atomic_fadd_v2bf16:
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd_v2bf16:
return legalizeBufferAtomic(MI, B, IntrID);
case Intrinsic::amdgcn_rsq_clamp:
return legalizeRsqClampIntrinsic(MI, MRI, B);
Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3079,7 +3079,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
return;
}
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD_BF16:
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
applyDefaultMapping(OpdMapper);
Expand Down Expand Up @@ -4376,7 +4375,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD_BF16:
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
// vdata_out
Expand Down
4 changes: 0 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,6 @@ def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_xor>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_inc>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_dec>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fadd>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fadd_v2bf16>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fmin>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fmax>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_cmpswap>;
Expand All @@ -287,7 +286,6 @@ def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_xor>;
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_inc>;
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_dec>;
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_fadd>;
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_fadd_v2bf16>;
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_fmin>;
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_fmax>;
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_cmpswap>;
Expand All @@ -305,7 +303,6 @@ def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_xor>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_inc>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_dec>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fadd>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fadd_v2bf16>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fmin>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fmax>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_cmpswap>;
Expand All @@ -323,7 +320,6 @@ def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_xor>;
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_inc>;
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_dec>;
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_fadd>;
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_fadd_v2bf16>;
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_fmin>;
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_fmax>;
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_cmpswap>;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/BUFInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1751,7 +1751,7 @@ let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["noret"]>;

let SubtargetPredicate = isGFX12Plus in {
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd_bf16", v2bf16, "BUFFER_ATOMIC_PK_ADD_BF16_VBUFFER">;
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2bf16, "BUFFER_ATOMIC_PK_ADD_BF16_VBUFFER">;
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_cond_sub_u32", i32, "BUFFER_ATOMIC_COND_SUB_U32_VBUFFER", ["ret"]>;

let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
Expand Down
9 changes: 0 additions & 9 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8833,17 +8833,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FADD);
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd_v2bf16:
case Intrinsic::amdgcn_raw_buffer_atomic_fadd_v2bf16:
return lowerRawBufferAtomicIntrin(Op, DAG,
AMDGPUISD::BUFFER_ATOMIC_FADD_BF16);
case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd:
return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FADD);
case Intrinsic::amdgcn_struct_buffer_atomic_fadd_v2bf16:
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd_v2bf16:
return lowerStructBufferAtomicIntrin(Op, DAG,
AMDGPUISD::BUFFER_ATOMIC_FADD_BF16);
case Intrinsic::amdgcn_raw_buffer_atomic_fmin:
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FMIN);
Expand Down Expand Up @@ -15841,7 +15833,6 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode *N,
case AMDGPUISD::BUFFER_ATOMIC_CMPSWAP:
case AMDGPUISD::BUFFER_ATOMIC_CSUB:
case AMDGPUISD::BUFFER_ATOMIC_FADD:
case AMDGPUISD::BUFFER_ATOMIC_FADD_BF16:
case AMDGPUISD::BUFFER_ATOMIC_FMIN:
case AMDGPUISD::BUFFER_ATOMIC_FMAX:
// Target-specific read-modify-write atomics are sources of divergence.
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,6 @@ defm SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">;
defm SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">;
defm SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">;
defm SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">;
defm SIbuffer_atomic_fadd_bf16 : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD_BF16">;
defm SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">;
defm SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">;
defm SIbuffer_atomic_cond_sub_u32 : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_COND_SUB_U32">;
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -3892,7 +3892,6 @@ def G_AMDGPU_BUFFER_ATOMIC_XOR : BufferAtomicGenericInstruction;
def G_AMDGPU_BUFFER_ATOMIC_INC : BufferAtomicGenericInstruction;
def G_AMDGPU_BUFFER_ATOMIC_DEC : BufferAtomicGenericInstruction;
def G_AMDGPU_BUFFER_ATOMIC_FADD : BufferAtomicGenericInstruction;
def G_AMDGPU_BUFFER_ATOMIC_FADD_BF16 : BufferAtomicGenericInstruction;
def G_AMDGPU_BUFFER_ATOMIC_FMIN : BufferAtomicGenericInstruction;
def G_AMDGPU_BUFFER_ATOMIC_FMAX : BufferAtomicGenericInstruction;

Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret_offset(<2 x half> %val,
;
; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_noret_offset:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s4 offset:92
; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, off, s[0:3], s4 offset:92
; GFX12-GISEL-NEXT: s_nop 0
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX12-GISEL-NEXT: s_endpgm
Expand All @@ -339,7 +339,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret(<2 x half> %val, <4 x i
;
; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_noret:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s4 offen
; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen
; GFX12-GISEL-NEXT: s_nop 0
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX12-GISEL-NEXT: s_endpgm
Expand All @@ -356,7 +356,7 @@ define amdgpu_ps <2 x half> @raw_buffer_atomic_add_v2f16_ret_offset(<2 x half> %
;
; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_ret_offset:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s4 offset:92 th:TH_ATOMIC_RETURN
; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, off, s[0:3], s4 offset:92 th:TH_ATOMIC_RETURN
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: ; return to shader part epilog
%ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 92, i32 %soffset, i32 0)
Expand All @@ -372,7 +372,7 @@ define amdgpu_ps <2 x half> @raw_buffer_atomic_add_v2f16_ret(<2 x half> %val, <4
;
; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_ret:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_RETURN
; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_RETURN
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: ; return to shader part epilog
%ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
Expand All @@ -388,7 +388,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_v2f16_ret(<2 x half> %val, <4 x
;
; GFX12-GISEL-LABEL: struct_buffer_atomic_add_v2f16_ret:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, v[1:2], s[0:3], s4 idxen offen th:TH_ATOMIC_RETURN
; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen th:TH_ATOMIC_RETURN
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: ; return to shader part epilog
%orig = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
Expand All @@ -406,7 +406,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret(<2 x half> %val, <4
;
; GFX12-GISEL-LABEL: struct_buffer_atomic_add_v2f16_noret:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, v[1:2], s[0:3], s4 idxen offen
; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen
; GFX12-GISEL-NEXT: s_nop 0
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX12-GISEL-NEXT: s_endpgm
Expand Down

0 comments on commit fb5e46d

Please sign in to comment.