Skip to content

Conversation

ritter-x2a
Copy link
Member

This is in preparation for a patch that disables folding offsets into FLAT
instructions if the corresponding address computation is not inbounds, to avoid
miscompilations where this would lead to wrong aperture check results.

With the added inbounds flags for GEPs and G_PTR_ADDs affecting FLAT
instructions, the outputs for these tests won't change.

For SWDEV-516125.

This is in preparation for a patch that disables folding offsets into FLAT
instructions if the corresponding address computation is not inbounds, to avoid
miscompilations where this would lead to wrong aperture check results.

With the added inbounds flags for GEPs and G_PTR_ADDs affecting FLAT
instructions, the outputs for these tests won't change.

For SWDEV-516125.
Copy link
Member Author

ritter-x2a commented Aug 11, 2025

@llvmbot
Copy link
Member

llvmbot commented Aug 11, 2025

@llvm/pr-subscribers-llvm-globalisel

@llvm/pr-subscribers-backend-amdgpu

Author: Fabian Ritter (ritter-x2a)

Changes

This is in preparation for a patch that disables folding offsets into FLAT
instructions if the corresponding address computation is not inbounds, to avoid
miscompilations where this would lead to wrong aperture check results.

With the added inbounds flags for GEPs and G_PTR_ADDs affecting FLAT
instructions, the outputs for these tests won't change.

For SWDEV-516125.


Patch is 39.99 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/153000.diff

13 Files Affected:

  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll (+11-11)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll (+12-12)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir (+3-3)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir (+10-10)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir (+14-14)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir (+3-3)
  • (modified) llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll (+3-3)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.flat.prefetch.ll (+1-1)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.prefetch.ll (+1-1)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.monitor.gfx1250.ll (+3-3)
  • (modified) llvm/test/CodeGen/AMDGPU/offset-split-flat.ll (+29-29)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll
index a86939fc2ce8e..f2035c2787131 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll
@@ -1119,7 +1119,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(ptr %out, ptr %ptr) #1
 ; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
 ; GFX11-NEXT:    flat_store_b32 v[0:1], v2
 ; GFX11-NEXT:    s_endpgm
-  %gep = getelementptr i32, ptr %ptr, i32 4
+  %gep = getelementptr inbounds i32, ptr %ptr, i32 4
   %result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
   store i32 %result, ptr %out, align 4
   ret void
@@ -1218,7 +1218,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_system(ptr %out, ptr %
 ; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
 ; GFX11-NEXT:    flat_store_b32 v[0:1], v2
 ; GFX11-NEXT:    s_endpgm
-  %gep = getelementptr i32, ptr %ptr, i32 4
+  %gep = getelementptr inbounds i32, ptr %ptr, i32 4
   %result = atomicrmw udec_wrap ptr %gep, i32 42 seq_cst, align 4
   store i32 %result, ptr %out, align 4
   ret void
@@ -1384,7 +1384,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) #1 {
 ; GFX11-NEXT:    buffer_gl1_inv
 ; GFX11-NEXT:    buffer_gl0_inv
 ; GFX11-NEXT:    s_endpgm
-  %gep = getelementptr i32, ptr %ptr, i32 4
+  %gep = getelementptr inbounds i32, ptr %ptr, i32 4
   %result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
   ret void
 }
@@ -1470,7 +1470,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_system(ptr %ptr) #1
 ; GFX11-NEXT:    buffer_gl1_inv
 ; GFX11-NEXT:    buffer_gl0_inv
 ; GFX11-NEXT:    s_endpgm
-  %gep = getelementptr i32, ptr %ptr, i32 4
+  %gep = getelementptr inbounds i32, ptr %ptr, i32 4
   %result = atomicrmw udec_wrap ptr %gep, i32 42 seq_cst, align 4
   ret void
 }
@@ -1599,7 +1599,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(ptr %out, ptr %
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i32, ptr %ptr, i32 %id
   %out.gep = getelementptr i32, ptr %out, i32 %id
-  %gep = getelementptr i32, ptr %gep.tid, i32 5
+  %gep = getelementptr inbounds i32, ptr %gep.tid, i32 5
   %result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
   store i32 %result, ptr %out.gep, align 4
   ret void
@@ -1706,7 +1706,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(ptr %ptr) #1
 ; GFX11-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i32, ptr %ptr, i32 %id
-  %gep = getelementptr i32, ptr %gep.tid, i32 5
+  %gep = getelementptr inbounds i32, ptr %gep.tid, i32 5
   %result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
   ret void
 }
@@ -1926,7 +1926,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(ptr %out, ptr %ptr) #1
 ; GFX11-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX11-NEXT:    flat_store_b64 v[2:3], v[0:1]
 ; GFX11-NEXT:    s_endpgm
-  %gep = getelementptr i64, ptr %ptr, i32 4
+  %gep = getelementptr inbounds i64, ptr %ptr, i32 4
   %result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
   store i64 %result, ptr %out, align 4
   ret void
@@ -2102,7 +2102,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(ptr %ptr) #1 {
 ; GFX11-NEXT:    buffer_gl1_inv
 ; GFX11-NEXT:    buffer_gl0_inv
 ; GFX11-NEXT:    s_endpgm
-  %gep = getelementptr i64, ptr %ptr, i32 4
+  %gep = getelementptr inbounds i64, ptr %ptr, i32 4
   %result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
   ret void
 }
@@ -2193,7 +2193,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_system(ptr %ptr) #1
 ; GFX11-NEXT:    buffer_gl1_inv
 ; GFX11-NEXT:    buffer_gl0_inv
 ; GFX11-NEXT:    s_endpgm
-  %gep = getelementptr i64, ptr %ptr, i32 4
+  %gep = getelementptr inbounds i64, ptr %ptr, i32 4
   %result = atomicrmw udec_wrap ptr %gep, i64 42 seq_cst, align 8, !noalias.addrspace !0
   ret void
 }
@@ -2333,7 +2333,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(ptr %out, ptr %
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i64, ptr %ptr, i32 %id
   %out.gep = getelementptr i64, ptr %out, i32 %id
-  %gep = getelementptr i64, ptr %gep.tid, i32 5
+  %gep = getelementptr inbounds i64, ptr %gep.tid, i32 5
   %result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
   store i64 %result, ptr %out.gep, align 4
   ret void
@@ -2444,7 +2444,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(ptr %ptr) #1
 ; GFX11-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i64, ptr %ptr, i32 %id
-  %gep = getelementptr i64, ptr %gep.tid, i32 5
+  %gep = getelementptr inbounds i64, ptr %gep.tid, i32 5
   %result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll
index 7958e40ea0e68..80c743cac4840 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll
@@ -2525,7 +2525,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(ptr %out, ptr %ptr) #1
 ; GFX12-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
 ; GFX12-NEXT:    flat_store_b32 v[0:1], v2
 ; GFX12-NEXT:    s_endpgm
-  %gep = getelementptr i32, ptr %ptr, i32 4
+  %gep = getelementptr inbounds i32, ptr %ptr, i32 4
   %result = atomicrmw uinc_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
   store i32 %result, ptr %out, align 4
   ret void
@@ -2639,7 +2639,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_system(ptr %out, ptr %
 ; GFX12-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
 ; GFX12-NEXT:    flat_store_b32 v[0:1], v2
 ; GFX12-NEXT:    s_endpgm
-  %gep = getelementptr i32, ptr %ptr, i32 4
+  %gep = getelementptr inbounds i32, ptr %ptr, i32 4
   %result = atomicrmw uinc_wrap ptr %gep, i32 42 seq_cst, align 4
   store i32 %result, ptr %out, align 4
   ret void
@@ -2827,7 +2827,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(ptr %ptr) #1 {
 ; GFX12-NEXT:    s_wait_storecnt_dscnt 0x0
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_endpgm
-  %gep = getelementptr i32, ptr %ptr, i32 4
+  %gep = getelementptr inbounds i32, ptr %ptr, i32 4
   %result = atomicrmw uinc_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
   ret void
 }
@@ -2926,7 +2926,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_system(ptr %ptr) #1
 ; GFX12-NEXT:    s_wait_storecnt_dscnt 0x0
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_endpgm
-  %gep = getelementptr i32, ptr %ptr, i32 4
+  %gep = getelementptr inbounds i32, ptr %ptr, i32 4
   %result = atomicrmw uinc_wrap ptr %gep, i32 42 seq_cst, align 4
   ret void
 }
@@ -3077,7 +3077,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(ptr %out, ptr %
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i32, ptr %ptr, i32 %id
   %out.gep = getelementptr i32, ptr %out, i32 %id
-  %gep = getelementptr i32, ptr %gep.tid, i32 5
+  %gep = getelementptr inbounds i32, ptr %gep.tid, i32 5
   %result = atomicrmw uinc_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
   store i32 %result, ptr %out.gep, align 4
   ret void
@@ -3201,7 +3201,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(ptr %ptr) #1
 ; GFX12-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i32, ptr %ptr, i32 %id
-  %gep = getelementptr i32, ptr %gep.tid, i32 5
+  %gep = getelementptr inbounds i32, ptr %gep.tid, i32 5
   %result = atomicrmw uinc_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
   ret void
 }
@@ -3571,7 +3571,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(ptr %out, ptr %ptr) #1
 ; GFX12-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX12-NEXT:    flat_store_b64 v[2:3], v[0:1]
 ; GFX12-NEXT:    s_endpgm
-  %gep = getelementptr i64, ptr %ptr, i32 4
+  %gep = getelementptr inbounds i64, ptr %ptr, i32 4
   %result = atomicrmw uinc_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
   store i64 %result, ptr %out, align 4
   ret void
@@ -3701,7 +3701,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_system(ptr %out, ptr %
 ; GFX12-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX12-NEXT:    flat_store_b64 v[2:3], v[0:1]
 ; GFX12-NEXT:    s_endpgm
-  %gep = getelementptr i64, ptr %ptr, i32 4
+  %gep = getelementptr inbounds i64, ptr %ptr, i32 4
   %result = atomicrmw uinc_wrap ptr %gep, i64 42 seq_cst, align 8, !noalias.addrspace !0
   store i64 %result, ptr %out, align 4
   ret void
@@ -3901,7 +3901,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(ptr %ptr) #1 {
 ; GFX12-NEXT:    s_wait_storecnt_dscnt 0x0
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_endpgm
-  %gep = getelementptr i64, ptr %ptr, i32 4
+  %gep = getelementptr inbounds i64, ptr %ptr, i32 4
   %result = atomicrmw uinc_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
   ret void
 }
@@ -4006,7 +4006,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_system(ptr %ptr) #1
 ; GFX12-NEXT:    s_wait_storecnt_dscnt 0x0
 ; GFX12-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-NEXT:    s_endpgm
-  %gep = getelementptr i64, ptr %ptr, i32 4
+  %gep = getelementptr inbounds i64, ptr %ptr, i32 4
   %result = atomicrmw uinc_wrap ptr %gep, i64 42 seq_cst, align 8, !noalias.addrspace !0
   ret void
 }
@@ -4169,7 +4169,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(ptr %out, ptr %
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i64, ptr %ptr, i32 %id
   %out.gep = getelementptr i64, ptr %out, i32 %id
-  %gep = getelementptr i64, ptr %gep.tid, i32 5
+  %gep = getelementptr inbounds i64, ptr %gep.tid, i32 5
   %result = atomicrmw uinc_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
   store i64 %result, ptr %out.gep, align 4
   ret void
@@ -4297,7 +4297,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(ptr %ptr) #1
 ; GFX12-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i64, ptr %ptr, i32 %id
-  %gep = getelementptr i64, ptr %gep.tid, i32 5
+  %gep = getelementptr inbounds i64, ptr %gep.tid, i32 5
   %result = atomicrmw uinc_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll
index 6792612ded368..7766b3ad45962 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll
@@ -108,7 +108,7 @@ define <2 x half> @flat_atomic_fadd_ret_v2f16_agent_offset(ptr %ptr, <2 x half>
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr <2 x half>, ptr %ptr, i32 256
+  %gep = getelementptr inbounds <2 x half>, ptr %ptr, i32 256
   %result = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
   ret <2 x half> %result
 }
@@ -122,7 +122,7 @@ define void @flat_atomic_fadd_noret_v2f16_agent_offset(ptr %ptr, <2 x half> %val
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX942-NEXT:    buffer_inv sc1
 ; GFX942-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr <2 x half>, ptr %ptr, i32 256
+  %gep = getelementptr inbounds <2 x half>, ptr %ptr, i32 256
   %unused = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir
index 85d852fc779b2..be9de72a4ea9f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir
@@ -153,7 +153,7 @@ body:             |
     %2:vgpr(s32) = COPY $vgpr3
     %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2
     %4:vgpr(s64) = G_CONSTANT i64 4
-    %5:vgpr(p0) = G_PTR_ADD %0, %4
+    %5:vgpr(p0) = inbounds G_PTR_ADD %0, %4
     %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s32), addrspace 0)
     $vgpr0 = COPY %6
 
@@ -305,7 +305,7 @@ body:             |
     %2:vgpr(s64) = COPY $vgpr4_vgpr5
     %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2
     %4:vgpr(s64) = G_CONSTANT i64 4
-    %5:vgpr(p0) = G_PTR_ADD %0, %4
+    %5:vgpr(p0) = inbounds G_PTR_ADD %0, %4
     %6:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s64), addrspace 0)
     $vgpr0_vgpr1 = COPY %6
 
@@ -406,7 +406,7 @@ body:             |
     %2:vgpr(s32) = COPY $vgpr3
     %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2
     %4:vgpr(s64) = G_CONSTANT i64 -4
-    %5:vgpr(p0) = G_PTR_ADD %0, %4
+    %5:vgpr(p0) = inbounds G_PTR_ADD %0, %4
     %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s32), addrspace 0)
     $vgpr0 = COPY %6
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir
index dc317a8413cd5..3389ed72fe7d9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir
@@ -101,7 +101,7 @@ body:             |
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
     %2:vgpr(s64) = G_CONSTANT i64 2047
-    %3:vgpr(p0) = G_PTR_ADD %0, %2
+    %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
     %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
     $vgpr0 = COPY %4
 
@@ -155,7 +155,7 @@ body:             |
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
     %2:vgpr(s64) = G_CONSTANT i64 2047
-    %3:vgpr(p0) = G_PTR_ADD %0, %2
+    %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
     %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
 
 ...
@@ -211,7 +211,7 @@ body:             |
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
     %2:vgpr(s64) = G_CONSTANT i64 2048
-    %3:vgpr(p0) = G_PTR_ADD %0, %2
+    %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
     %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
     $vgpr0 = COPY %4
 
@@ -265,7 +265,7 @@ body:             |
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
     %2:vgpr(s64) = G_CONSTANT i64 2048
-    %3:vgpr(p0) = G_PTR_ADD %0, %2
+    %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
     %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
 
 ...
@@ -321,7 +321,7 @@ body:             |
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
     %2:vgpr(s64) = G_CONSTANT i64 4095
-    %3:vgpr(p0) = G_PTR_ADD %0, %2
+    %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
     %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
     $vgpr0 = COPY %4
 
@@ -375,7 +375,7 @@ body:             |
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
     %2:vgpr(s64) = G_CONSTANT i64 4095
-    %3:vgpr(p0) = G_PTR_ADD %0, %2
+    %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
     %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
 
 ...
@@ -463,7 +463,7 @@ body:             |
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
     %2:vgpr(s64) = G_CONSTANT i64 4097
-    %3:vgpr(p0) = G_PTR_ADD %0, %2
+    %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
     %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
     $vgpr0 = COPY %4
 
@@ -547,7 +547,7 @@ body:             |
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
     %2:vgpr(s64) = G_CONSTANT i64 4097
-    %3:vgpr(p0) = G_PTR_ADD %0, %2
+    %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
     %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
 
 ...
@@ -647,7 +647,7 @@ body:             |
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %1:vgpr(s64) = COPY $vgpr2_vgpr3
     %2:vgpr(s64) = G_CONSTANT i64 4095
-    %3:vgpr(p0) = G_PTR_ADD %0, %2
+    %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
     %4:vgpr(s64) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s64), addrspace 0)
     $vgpr0_vgpr1 = COPY %4
 
@@ -701,7 +701,7 @@ body:             |
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %1:vgpr(s64) = COPY $vgpr2_vgpr3
     %2:vgpr(s64) = G_CONSTANT i64 4095
-    %3:vgpr(p0) = G_PTR_ADD %0, %2
+    %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
     %4:vgpr(s64) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s64), addrspace 0)
 
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir
index eba64b853ac05..5bfb2b2e4d578 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir
@@ -492,7 +492,7 @@ body: |
     ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %1:vgpr(s64) = G_CONSTANT i64 -2048
-    %2:vgpr(p0) = G_PTR_ADD %0, %1
+    %2:vgpr(p0) = inbounds G_PTR_ADD %0, %1
     %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 0)
     $vgpr0 = COPY %3
 
@@ -561,7 +561,7 @@ body: |
     ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %1:vgpr(s64) = G_CONSTANT i64 4095
-    %2:vgpr(p0) = G_PTR_ADD %0, %1
+    %2:vgpr(p0) = inbounds G_PTR_ADD %0, %1
     %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 0)
     $vgpr0 = COPY %3
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
index e1325a0a0bc50..532b4bfee3320 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
@@ -1191,7 +1191,7 @@ body: |
     ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(s64) = G_CONSTANT i64 2047
-    %2:vgpr(p1) = G_PTR_ADD %0, %1
+    %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1
     %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0)
     $vgpr0 = COPY %3
 
@@ -1275,7 +1275,7 @@ body: |
     ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(s64) = G_CONSTANT i64 2048
-    %2:vgpr(p1) = G_PTR_ADD %0, %1
+    %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1
     %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0)
     $vgpr0 = COPY %3
 
@@ -1375,7 +1375,7 @@ body: |
     ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(s64) = G_CONSTANT i64 -2047
-    %2:vgpr(p1) = G_PTR_ADD %0, %1
+    %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1
     %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0)
     $vgpr0 = COPY %3
 
@@ -1475,7 +1475,7 @@ body: |
     ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(s64) = G_CONSTANT i64 -2048
-    %2:vgpr(p1) = G_PTR_ADD %0, %1
+    %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1
     %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0)
     $vgpr0 = COPY %3
 
@@ -1559,7 +1559,7 @@ body: |
     ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(s64) = G_CONSTANT i64 4095
-    %2:vgpr(p1) = G_PTR_ADD %0, %1
+    %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1
     %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0)
     $vgpr0 = COPY %3
 
@@ -1659,7 +1659,7 @@ body: |
     ; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     %0:...
[truncated]

Copy link
Member Author

ritter-x2a commented Aug 12, 2025

Merge activity

  • Aug 12, 7:33 AM UTC: A user started a stack merge that includes this pull request via Graphite.
  • Aug 12, 7:35 AM UTC: @ritter-x2a merged this pull request with Graphite.

@ritter-x2a ritter-x2a merged commit 53af2e6 into main Aug 12, 2025
14 checks passed
@ritter-x2a ritter-x2a deleted the users/ritter-x2a/08-11-_amdgpu_gisel_add_inbounds_flag_to_flat_gisel_tests branch August 12, 2025 07:35
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants