-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Revert "AMDGPU: Move attributor into optimization pipeline (#83131)" and follow up commit "clang/AMDGPU: Defeat attribute optimization in attribute test" #98851
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-clang Author: None (dyung) ChangesThis reverts commits 677cc15 and 78bc1b6. The test CodeGenHIP/default-attributes.hip is failing on multiple bots even after the attempted fix including the following:
These bots have been broken for a day, so reverting to get everything back to green. Patch is 16.75 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/98851.diff 562 Files Affected:
diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip
index ee16ecd134bfe..63572bfd242b9 100644
--- a/clang/test/CodeGenHIP/default-attributes.hip
+++ b/clang/test/CodeGenHIP/default-attributes.hip
@@ -8,68 +8,49 @@
#define __device__ __attribute__((device))
#define __global__ __attribute__((global))
-//.
-// OPTNONE: @__hip_cuid_ = addrspace(1) global i8 0
-// OPTNONE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
-// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
-//.
-// OPT: @__hip_cuid_ = addrspace(1) global i8 0
-// OPT: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
-// OPT: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
-//.
-__device__ void extern_func();
-
// OPTNONE: Function Attrs: convergent mustprogress noinline nounwind optnone
// OPTNONE-LABEL: define {{[^@]+}}@_Z4funcv
// OPTNONE-SAME: () #[[ATTR0:[0-9]+]] {
// OPTNONE-NEXT: entry:
-// OPTNONE-NEXT: call void @_Z11extern_funcv() #[[ATTR3:[0-9]+]]
// OPTNONE-NEXT: ret void
//
-// OPT: Function Attrs: convergent mustprogress nounwind
+// OPT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
// OPT-LABEL: define {{[^@]+}}@_Z4funcv
// OPT-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] {
// OPT-NEXT: entry:
-// OPT-NEXT: tail call void @_Z11extern_funcv() #[[ATTR3:[0-9]+]]
// OPT-NEXT: ret void
//
__device__ void func() {
- extern_func();
+
}
// OPTNONE: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
// OPTNONE-LABEL: define {{[^@]+}}@_Z6kernelv
-// OPTNONE-SAME: () #[[ATTR2:[0-9]+]] {
+// OPTNONE-SAME: () #[[ATTR1:[0-9]+]] {
// OPTNONE-NEXT: entry:
-// OPTNONE-NEXT: call void @_Z11extern_funcv() #[[ATTR3]]
// OPTNONE-NEXT: ret void
//
-// OPT: Function Attrs: convergent mustprogress norecurse nounwind
+// OPT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
// OPT-LABEL: define {{[^@]+}}@_Z6kernelv
-// OPT-SAME: () local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// OPT-SAME: () local_unnamed_addr #[[ATTR1:[0-9]+]] {
// OPT-NEXT: entry:
-// OPT-NEXT: tail call void @_Z11extern_funcv() #[[ATTR3]]
// OPT-NEXT: ret void
//
__global__ void kernel() {
- extern_func();
+
}
//.
-// OPTNONE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// OPTNONE: attributes #[[ATTR1:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
-// OPTNONE: attributes #[[ATTR3]] = { convergent nounwind }
+// OPTNONE: attributes #0 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// OPTNONE: attributes #1 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
//.
-// OPT: attributes #[[ATTR0]] = { convergent mustprogress nounwind "amdgpu-waves-per-eu"="4,10" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
-// OPT: attributes #[[ATTR1:[0-9]+]] = { convergent nounwind "amdgpu-waves-per-eu"="4,10" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
-// OPT: attributes #[[ATTR2]] = { convergent mustprogress norecurse nounwind "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
-// OPT: attributes #[[ATTR3]] = { convergent nounwind }
+// OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
//.
-// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
-// OPTNONE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
-// OPTNONE: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// OPTNONE: !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
+// OPTNONE: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
+// OPTNONE: !2 = !{i32 1, !"wchar_size", i32 4}
//.
-// OPT: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
-// OPT: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
-// OPT: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// OPT: !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
+// OPT: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
+// OPT: !2 = !{i32 1, !"wchar_size", i32 4}
//.
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 55b3b486d705d..1dd7fce2334c9 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -139,10 +139,6 @@ Changes to the AMDGPU Backend
:ref:`atomicrmw <i_atomicrmw>` instruction with `fadd`, `fmin` and
`fmax` with addrspace(3) instead.
-* AMDGPUAttributor is no longer run as part of the codegen pass
- pipeline. It is expected to run as part of the middle end
- optimizations.
-
Changes to the ARM Backend
--------------------------
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 9ddf0a310ed06..f50a18ccc2188 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -731,14 +731,6 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
});
- // FIXME: Why is AMDGPUAttributor not in CGSCC?
- PB.registerOptimizerLastEPCallback(
- [this](ModulePassManager &MPM, OptimizationLevel Level) {
- if (Level != OptimizationLevel::O0) {
- MPM.addPass(AMDGPUAttributorPass(*this));
- }
- });
-
PB.registerFullLinkTimeOptimizationLastEPCallback(
[this](ModulePassManager &PM, OptimizationLevel Level) {
// We want to support the -lto-partitions=N option as "best effort".
@@ -1045,6 +1037,11 @@ void AMDGPUPassConfig::addIRPasses() {
addPass(createAMDGPULowerModuleLDSLegacyPass(&TM));
}
+ // AMDGPUAttributor infers lack of llvm.amdgcn.lds.kernel.id calls, so run
+ // after their introduction
+ if (TM.getOptLevel() > CodeGenOptLevel::None)
+ addPass(createAMDGPUAttributorLegacyPass());
+
if (TM.getOptLevel() > CodeGenOptLevel::None)
addPass(createInferAddressSpacesPass());
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 8c951105101d9..97a8ff4486609 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -679,12 +679,6 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
break;
}
}
-
- // FIXME: We can spill incoming arguments and restore at the end of the
- // prolog.
- if (!ScratchWaveOffsetReg)
- report_fatal_error(
- "could not find temporary scratch offset register in prolog");
} else {
ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/addsubu64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/addsubu64.ll
index 359c1e53de99e..a38b6e3263882 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/addsubu64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/addsubu64.ll
@@ -6,8 +6,8 @@ define amdgpu_kernel void @s_add_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
; GFX11-LABEL: s_add_u64:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
-; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
+; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_add_u32 s0, s6, s0
@@ -22,8 +22,8 @@ define amdgpu_kernel void @s_add_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
; GFX12-LABEL: s_add_u64:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_clause 0x1
-; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
-; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
+; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
+; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX12-NEXT: v_mov_b32_e32 v2, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[6:7], s[0:1]
@@ -58,8 +58,8 @@ define amdgpu_kernel void @s_sub_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
; GFX11-LABEL: s_sub_u64:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
-; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
+; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_sub_u32 s0, s6, s0
@@ -74,8 +74,8 @@ define amdgpu_kernel void @s_sub_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
; GFX12-LABEL: s_sub_u64:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_clause 0x1
-; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
-; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
+; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
+; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX12-NEXT: v_mov_b32_e32 v2, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_sub_nc_u64 s[0:1], s[6:7], s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll
index 0a8e805027c77..9be8620b024eb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll
@@ -2026,7 +2026,7 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v2, s6
+; GFX12-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v2, s4
; GFX12-NEXT: s_mov_b32 s4, 0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_max_num_f32_e32 v3, v1, v1
@@ -2056,7 +2056,7 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v2, s6
+; GFX940-NEXT: v_mov_b32_e32 v2, s4
; GFX940-NEXT: v_mov_b32_e32 v1, v0
; GFX940-NEXT: buffer_load_dword v0, v2, s[0:3], 0 offen
; GFX940-NEXT: s_mov_b64 s[4:5], 0
@@ -2083,7 +2083,7 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v2, s6
+; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v2, s4
; GFX11-NEXT: s_mov_b32 s4, 0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_max_f32_e32 v3, v1, v1
@@ -2114,14 +2114,10 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_mov_b32_e32 v2, s18
-; GFX10-NEXT: s_mov_b32 s4, s6
-; GFX10-NEXT: s_mov_b32 s5, s7
-; GFX10-NEXT: s_mov_b32 s6, s16
-; GFX10-NEXT: s_mov_b32 s7, s17
+; GFX10-NEXT: v_mov_b32_e32 v2, s8
; GFX10-NEXT: v_mov_b32_e32 v1, v0
-; GFX10-NEXT: buffer_load_dword v0, v2, s[4:7], 0 offen
; GFX10-NEXT: s_mov_b32 s8, 0
+; GFX10-NEXT: buffer_load_dword v0, v2, s[4:7], 0 offen
; GFX10-NEXT: v_max_f32_e32 v3, v1, v1
; GFX10-NEXT: .LBB12_1: ; %atomicrmw.start
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -2147,11 +2143,7 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: s_mov_b32 s4, s6
-; GFX90A-NEXT: s_mov_b32 s5, s7
-; GFX90A-NEXT: s_mov_b32 s6, s16
-; GFX90A-NEXT: s_mov_b32 s7, s17
-; GFX90A-NEXT: v_mov_b32_e32 v2, s18
+; GFX90A-NEXT: v_mov_b32_e32 v2, s8
; GFX90A-NEXT: v_mov_b32_e32 v1, v0
; GFX90A-NEXT: buffer_load_dword v0, v2, s[4:7], 0 offen
; GFX90A-NEXT: s_mov_b64 s[8:9], 0
@@ -2177,11 +2169,7 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX908-NEXT: s_mov_b32 s4, s6
-; GFX908-NEXT: s_mov_b32 s5, s7
-; GFX908-NEXT: s_mov_b32 s6, s16
-; GFX908-NEXT: s_mov_b32 s7, s17
-; GFX908-NEXT: v_mov_b32_e32 v2, s18
+; GFX908-NEXT: v_mov_b32_e32 v2, s8
; GFX908-NEXT: v_mov_b32_e32 v1, v0
; GFX908-NEXT: buffer_load_dword v0, v2, s[4:7], 0 offen
; GFX908-NEXT: s_mov_b64 s[8:9], 0
@@ -2208,11 +2196,7 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_mov_b32 s4, s6
-; GFX8-NEXT: s_mov_b32 s5, s7
-; GFX8-NEXT: s_mov_b32 s6, s16
-; GFX8-NEXT: s_mov_b32 s7, s17
-; GFX8-NEXT: v_mov_b32_e32 v2, s18
+; GFX8-NEXT: v_mov_b32_e32 v2, s8
; GFX8-NEXT: v_mov_b32_e32 v1, v0
; GFX8-NEXT: buffer_load_dword v0, v2, s[4:7], 0 offen
; GFX8-NEXT: s_mov_b64 s[8:9], 0
@@ -2239,11 +2223,7 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: s_mov_b32 s4, s6
-; GFX7-NEXT: s_mov_b32 s5, s7
-; GFX7-NEXT: s_mov_b32 s6, s16
-; GFX7-NEXT: s_mov_b32 s7, s17
-; GFX7-NEXT: v_mov_b32_e32 v2, s18
+; GFX7-NEXT: v_mov_b32_e32 v2, s8
; GFX7-NEXT: v_mov_b32_e32 v1, v0
; GFX7-NEXT: buffer_load_dword v0, v2, s[4:7], 0 offen
; GFX7-NEXT: s_mov_b64 s[8:9], 0
@@ -2278,7 +2258,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_max_num_f32 v3, v0, v0
+; GFX12-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_max_num_f32 v3, v0, v0
; GFX12-NEXT: s_mov_b32 s4, 0
; GFX12-NEXT: buffer_load_b32 v1, v2, s[0:3], null offen
; GFX12-NEXT: .LBB13_1: ; %atomicrmw.start
@@ -2305,7 +2285,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v2, s6
+; GFX940-NEXT: v_mov_b32_e32 v2, s4
; GFX940-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen
; GFX940-NEXT: s_mov_b64 s[4:5], 0
; GFX940-NEXT: v_max_f32_e32 v3, v0, v0
@@ -2331,7 +2311,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_max_f32 v3, v0, v0
+; GFX11-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_max_f32 v3, v0, v0
; GFX11-NEXT: s_mov_b32 s4, 0
; GFX11-NEXT: buffer_load_b32 v1, v2, s[0:3], 0 offen
; GFX11-NEXT: .LBB13_1: ; %atomicrmw.start
@@ -2359,14 +2339,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_mov_b32_e32 v2, s18
-; GFX10-NEXT: s_mov_b32 s4, s6
-; GFX10-NEXT: s_mov_b32 s5, s7
-; GFX10-NEXT: s_mov_b32 s6, s16
-; GFX10-NEXT: s_mov_b32 s7, s17
+; GFX10-NEXT: v_mov_b32_e32 v2, s8
; GFX10-NEXT: v_max_f32_e32 v3, v0, v0
-; GFX10-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen
; GFX10-NEXT: s_mov_b32 s8, 0
+; GFX10-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen
; GFX10-NEXT: .LBB13_1: ; %atomicrmw.start
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: s_waitcnt vmcnt(0)
@@ -2391,11 +2367,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: s_mov_b32 s4, s6
-; GFX90A-NEXT: s_mov_b32 s5, s7
-; GFX90A-NEXT: s_mov_b32 s6, s16
-; GFX90A-NEXT: s_mov_b32 s7, s17
-; GFX90A-NEXT: v_mov_b32_e32 v2, s18
+; GFX90A-NEXT: v_mov_b32_e32 v2, s8
; GFX90A-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen
; GFX90A-NEXT: s_mov_b64 s[8:9], 0
; GFX90A-NEXT: v_max_f32_e32 v3, v0, v0
@@ -2420,11 +2392,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX908-NEXT: s_mov_b32 s4, s6
-; GFX908-NEXT: s_mov_b32 s5, s7
-; GFX908-NEXT: s_mov_b32 s6, s16
-; GFX908-NEXT: s_mov_b32 s7, s17
-; GFX908-NEXT: v_mov_b32_e32 v2, s18
+; GFX908-NEXT: v_mov_b32_e32 v2, s8
; GFX908-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen
; GFX908-NEXT: s_mov_b64 s[8:9], 0
; GFX908-NEXT: v_max_f32_e32 v3, v0, v0
@@ -2450,11 +2418,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_mov_b32 s4, s6
-; GFX8-NEXT: s_mov_b32 s5, s7
-; GFX8-NEXT: s_mov_b32 s6, s16
-; GFX8-NEXT: s_mov_b32 s7, s17
-; GFX8-NEXT: v_mov_b32_e32 v2, s18
+; GFX8-NEXT: v_mov_b32_e32 v2, s8
; GFX8-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen
; GFX8-NEXT: s_mov_b64 s[8:9], 0
; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v0
@@ -2480,11 +2444,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: s_mov_b32 s4, s6
-; GFX7-NEXT: s_mov_b32 s5, s7
-; GFX7-NEXT: s_mov_b32 s6, s16
-; GFX7-NEXT: s_mov_b32 s7, s17
-; GFX7-NEXT: v_mov_b32_e32 v2, s18
+; GFX7-NEXT: v_mov_b32_e32 v2, s8
; GFX7-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen
; GFX7-NEXT: s_mov_b64 s[8:9], 0
; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v0
@@ -2518,7 +2478,7 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_...
[truncated]
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/30/builds/1975 Here is the relevant piece of the build log for the reference:
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/73/builds/1842 Here is the relevant piece of the build log for the reference:
|
@arsenm the previous two bot failure notifications are from your reapplication of your changes, not from my original revert. Not sure why it is mentioning it here. |
…lvm#83131)" and follow up commit "clang/AMDGPU: Defeat attribute optimization in attribute test" (llvm#98851)" This reverts commit b1bcb7c. Change-Id: Ia262230003989ed152f82ea475364b42d2592090
This reverts commits 677cc15 and 78bc1b6.
The test CodeGenHIP/default-attributes.hip is failing on multiple bots even after the attempted fix including the following:
These bots have been broken for a day, so reverting to get everything back to green.