-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[AMDGPU][GISel] Add inbounds flag to FLAT GISel tests #153000
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU][GISel] Add inbounds flag to FLAT GISel tests #153000
Conversation
This is in preparation for a patch that disables folding offsets into FLAT instructions if the corresponding address computation is not inbounds, to avoid miscompilations where this would lead to wrong aperture check results. With the added inbounds flags for GEPs and G_PTR_ADDs affecting FLAT instructions, the outputs for these tests won't change. For SWDEV-516125.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-amdgpu Author: Fabian Ritter (ritter-x2a) ChangesThis is in preparation for a patch that disables folding offsets into FLAT With the added inbounds flags for GEPs and G_PTR_ADDs affecting FLAT For SWDEV-516125. Patch is 39.99 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/153000.diff 13 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll
index a86939fc2ce8e..f2035c2787131 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll
@@ -1119,7 +1119,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(ptr %out, ptr %ptr) #1
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: flat_store_b32 v[0:1], v2
; GFX11-NEXT: s_endpgm
- %gep = getelementptr i32, ptr %ptr, i32 4
+ %gep = getelementptr inbounds i32, ptr %ptr, i32 4
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
store i32 %result, ptr %out, align 4
ret void
@@ -1218,7 +1218,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_system(ptr %out, ptr %
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: flat_store_b32 v[0:1], v2
; GFX11-NEXT: s_endpgm
- %gep = getelementptr i32, ptr %ptr, i32 4
+ %gep = getelementptr inbounds i32, ptr %ptr, i32 4
%result = atomicrmw udec_wrap ptr %gep, i32 42 seq_cst, align 4
store i32 %result, ptr %out, align 4
ret void
@@ -1384,7 +1384,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) #1 {
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: s_endpgm
- %gep = getelementptr i32, ptr %ptr, i32 4
+ %gep = getelementptr inbounds i32, ptr %ptr, i32 4
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
ret void
}
@@ -1470,7 +1470,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_system(ptr %ptr) #1
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: s_endpgm
- %gep = getelementptr i32, ptr %ptr, i32 4
+ %gep = getelementptr inbounds i32, ptr %ptr, i32 4
%result = atomicrmw udec_wrap ptr %gep, i32 42 seq_cst, align 4
ret void
}
@@ -1599,7 +1599,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(ptr %out, ptr %
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i32, ptr %ptr, i32 %id
%out.gep = getelementptr i32, ptr %out, i32 %id
- %gep = getelementptr i32, ptr %gep.tid, i32 5
+ %gep = getelementptr inbounds i32, ptr %gep.tid, i32 5
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
store i32 %result, ptr %out.gep, align 4
ret void
@@ -1706,7 +1706,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(ptr %ptr) #1
; GFX11-NEXT: s_endpgm
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i32, ptr %ptr, i32 %id
- %gep = getelementptr i32, ptr %gep.tid, i32 5
+ %gep = getelementptr inbounds i32, ptr %gep.tid, i32 5
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
ret void
}
@@ -1926,7 +1926,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(ptr %out, ptr %ptr) #1
; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX11-NEXT: s_endpgm
- %gep = getelementptr i64, ptr %ptr, i32 4
+ %gep = getelementptr inbounds i64, ptr %ptr, i32 4
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
store i64 %result, ptr %out, align 4
ret void
@@ -2102,7 +2102,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(ptr %ptr) #1 {
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: s_endpgm
- %gep = getelementptr i64, ptr %ptr, i32 4
+ %gep = getelementptr inbounds i64, ptr %ptr, i32 4
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
ret void
}
@@ -2193,7 +2193,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_system(ptr %ptr) #1
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: s_endpgm
- %gep = getelementptr i64, ptr %ptr, i32 4
+ %gep = getelementptr inbounds i64, ptr %ptr, i32 4
%result = atomicrmw udec_wrap ptr %gep, i64 42 seq_cst, align 8, !noalias.addrspace !0
ret void
}
@@ -2333,7 +2333,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(ptr %out, ptr %
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i64, ptr %ptr, i32 %id
%out.gep = getelementptr i64, ptr %out, i32 %id
- %gep = getelementptr i64, ptr %gep.tid, i32 5
+ %gep = getelementptr inbounds i64, ptr %gep.tid, i32 5
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
store i64 %result, ptr %out.gep, align 4
ret void
@@ -2444,7 +2444,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(ptr %ptr) #1
; GFX11-NEXT: s_endpgm
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i64, ptr %ptr, i32 %id
- %gep = getelementptr i64, ptr %gep.tid, i32 5
+ %gep = getelementptr inbounds i64, ptr %gep.tid, i32 5
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll
index 7958e40ea0e68..80c743cac4840 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll
@@ -2525,7 +2525,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(ptr %out, ptr %ptr) #1
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX12-NEXT: flat_store_b32 v[0:1], v2
; GFX12-NEXT: s_endpgm
- %gep = getelementptr i32, ptr %ptr, i32 4
+ %gep = getelementptr inbounds i32, ptr %ptr, i32 4
%result = atomicrmw uinc_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
store i32 %result, ptr %out, align 4
ret void
@@ -2639,7 +2639,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_system(ptr %out, ptr %
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX12-NEXT: flat_store_b32 v[0:1], v2
; GFX12-NEXT: s_endpgm
- %gep = getelementptr i32, ptr %ptr, i32 4
+ %gep = getelementptr inbounds i32, ptr %ptr, i32 4
%result = atomicrmw uinc_wrap ptr %gep, i32 42 seq_cst, align 4
store i32 %result, ptr %out, align 4
ret void
@@ -2827,7 +2827,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(ptr %ptr) #1 {
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
- %gep = getelementptr i32, ptr %ptr, i32 4
+ %gep = getelementptr inbounds i32, ptr %ptr, i32 4
%result = atomicrmw uinc_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
ret void
}
@@ -2926,7 +2926,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_system(ptr %ptr) #1
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_endpgm
- %gep = getelementptr i32, ptr %ptr, i32 4
+ %gep = getelementptr inbounds i32, ptr %ptr, i32 4
%result = atomicrmw uinc_wrap ptr %gep, i32 42 seq_cst, align 4
ret void
}
@@ -3077,7 +3077,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(ptr %out, ptr %
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i32, ptr %ptr, i32 %id
%out.gep = getelementptr i32, ptr %out, i32 %id
- %gep = getelementptr i32, ptr %gep.tid, i32 5
+ %gep = getelementptr inbounds i32, ptr %gep.tid, i32 5
%result = atomicrmw uinc_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
store i32 %result, ptr %out.gep, align 4
ret void
@@ -3201,7 +3201,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(ptr %ptr) #1
; GFX12-NEXT: s_endpgm
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i32, ptr %ptr, i32 %id
- %gep = getelementptr i32, ptr %gep.tid, i32 5
+ %gep = getelementptr inbounds i32, ptr %gep.tid, i32 5
%result = atomicrmw uinc_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
ret void
}
@@ -3571,7 +3571,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(ptr %out, ptr %ptr) #1
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
- %gep = getelementptr i64, ptr %ptr, i32 4
+ %gep = getelementptr inbounds i64, ptr %ptr, i32 4
%result = atomicrmw uinc_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
store i64 %result, ptr %out, align 4
ret void
@@ -3701,7 +3701,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_system(ptr %out, ptr %
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
; GFX12-NEXT: s_endpgm
- %gep = getelementptr i64, ptr %ptr, i32 4
+ %gep = getelementptr inbounds i64, ptr %ptr, i32 4
%result = atomicrmw uinc_wrap ptr %gep, i64 42 seq_cst, align 8, !noalias.addrspace !0
store i64 %result, ptr %out, align 4
ret void
@@ -3901,7 +3901,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(ptr %ptr) #1 {
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: s_endpgm
- %gep = getelementptr i64, ptr %ptr, i32 4
+ %gep = getelementptr inbounds i64, ptr %ptr, i32 4
%result = atomicrmw uinc_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
ret void
}
@@ -4006,7 +4006,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_system(ptr %ptr) #1
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: s_endpgm
- %gep = getelementptr i64, ptr %ptr, i32 4
+ %gep = getelementptr inbounds i64, ptr %ptr, i32 4
%result = atomicrmw uinc_wrap ptr %gep, i64 42 seq_cst, align 8, !noalias.addrspace !0
ret void
}
@@ -4169,7 +4169,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(ptr %out, ptr %
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i64, ptr %ptr, i32 %id
%out.gep = getelementptr i64, ptr %out, i32 %id
- %gep = getelementptr i64, ptr %gep.tid, i32 5
+ %gep = getelementptr inbounds i64, ptr %gep.tid, i32 5
%result = atomicrmw uinc_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
store i64 %result, ptr %out.gep, align 4
ret void
@@ -4297,7 +4297,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(ptr %ptr) #1
; GFX12-NEXT: s_endpgm
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep.tid = getelementptr i64, ptr %ptr, i32 %id
- %gep = getelementptr i64, ptr %gep.tid, i32 5
+ %gep = getelementptr inbounds i64, ptr %gep.tid, i32 5
%result = atomicrmw uinc_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll
index 6792612ded368..7766b3ad45962 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll
@@ -108,7 +108,7 @@ define <2 x half> @flat_atomic_fadd_ret_v2f16_agent_offset(ptr %ptr, <2 x half>
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX942-NEXT: buffer_inv sc1
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %gep = getelementptr <2 x half>, ptr %ptr, i32 256
+ %gep = getelementptr inbounds <2 x half>, ptr %ptr, i32 256
%result = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
ret <2 x half> %result
}
@@ -122,7 +122,7 @@ define void @flat_atomic_fadd_noret_v2f16_agent_offset(ptr %ptr, <2 x half> %val
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX942-NEXT: buffer_inv sc1
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %gep = getelementptr <2 x half>, ptr %ptr, i32 256
+ %gep = getelementptr inbounds <2 x half>, ptr %ptr, i32 256
%unused = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir
index 85d852fc779b2..be9de72a4ea9f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir
@@ -153,7 +153,7 @@ body: |
%2:vgpr(s32) = COPY $vgpr3
%3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2
%4:vgpr(s64) = G_CONSTANT i64 4
- %5:vgpr(p0) = G_PTR_ADD %0, %4
+ %5:vgpr(p0) = inbounds G_PTR_ADD %0, %4
%6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s32), addrspace 0)
$vgpr0 = COPY %6
@@ -305,7 +305,7 @@ body: |
%2:vgpr(s64) = COPY $vgpr4_vgpr5
%3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2
%4:vgpr(s64) = G_CONSTANT i64 4
- %5:vgpr(p0) = G_PTR_ADD %0, %4
+ %5:vgpr(p0) = inbounds G_PTR_ADD %0, %4
%6:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s64), addrspace 0)
$vgpr0_vgpr1 = COPY %6
@@ -406,7 +406,7 @@ body: |
%2:vgpr(s32) = COPY $vgpr3
%3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2
%4:vgpr(s64) = G_CONSTANT i64 -4
- %5:vgpr(p0) = G_PTR_ADD %0, %4
+ %5:vgpr(p0) = inbounds G_PTR_ADD %0, %4
%6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s32), addrspace 0)
$vgpr0 = COPY %6
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir
index dc317a8413cd5..3389ed72fe7d9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir
@@ -101,7 +101,7 @@ body: |
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 2047
- %3:vgpr(p0) = G_PTR_ADD %0, %2
+ %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
%4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
$vgpr0 = COPY %4
@@ -155,7 +155,7 @@ body: |
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 2047
- %3:vgpr(p0) = G_PTR_ADD %0, %2
+ %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
%4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
...
@@ -211,7 +211,7 @@ body: |
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 2048
- %3:vgpr(p0) = G_PTR_ADD %0, %2
+ %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
%4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
$vgpr0 = COPY %4
@@ -265,7 +265,7 @@ body: |
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 2048
- %3:vgpr(p0) = G_PTR_ADD %0, %2
+ %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
%4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
...
@@ -321,7 +321,7 @@ body: |
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 4095
- %3:vgpr(p0) = G_PTR_ADD %0, %2
+ %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
%4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
$vgpr0 = COPY %4
@@ -375,7 +375,7 @@ body: |
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 4095
- %3:vgpr(p0) = G_PTR_ADD %0, %2
+ %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
%4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
...
@@ -463,7 +463,7 @@ body: |
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 4097
- %3:vgpr(p0) = G_PTR_ADD %0, %2
+ %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
%4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
$vgpr0 = COPY %4
@@ -547,7 +547,7 @@ body: |
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 4097
- %3:vgpr(p0) = G_PTR_ADD %0, %2
+ %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
%4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
...
@@ -647,7 +647,7 @@ body: |
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
%2:vgpr(s64) = G_CONSTANT i64 4095
- %3:vgpr(p0) = G_PTR_ADD %0, %2
+ %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
%4:vgpr(s64) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s64), addrspace 0)
$vgpr0_vgpr1 = COPY %4
@@ -701,7 +701,7 @@ body: |
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
%2:vgpr(s64) = G_CONSTANT i64 4095
- %3:vgpr(p0) = G_PTR_ADD %0, %2
+ %3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
%4:vgpr(s64) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s64), addrspace 0)
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir
index eba64b853ac05..5bfb2b2e4d578 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir
@@ -492,7 +492,7 @@ body: |
; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -2048
- %2:vgpr(p0) = G_PTR_ADD %0, %1
+ %2:vgpr(p0) = inbounds G_PTR_ADD %0, %1
%3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 0)
$vgpr0 = COPY %3
@@ -561,7 +561,7 @@ body: |
; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 4095
- %2:vgpr(p0) = G_PTR_ADD %0, %1
+ %2:vgpr(p0) = inbounds G_PTR_ADD %0, %1
%3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 0)
$vgpr0 = COPY %3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
index e1325a0a0bc50..532b4bfee3320 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
@@ -1191,7 +1191,7 @@ body: |
; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 2047
- %2:vgpr(p1) = G_PTR_ADD %0, %1
+ %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1
%3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0)
$vgpr0 = COPY %3
@@ -1275,7 +1275,7 @@ body: |
; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 2048
- %2:vgpr(p1) = G_PTR_ADD %0, %1
+ %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1
%3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0)
$vgpr0 = COPY %3
@@ -1375,7 +1375,7 @@ body: |
; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -2047
- %2:vgpr(p1) = G_PTR_ADD %0, %1
+ %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1
%3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0)
$vgpr0 = COPY %3
@@ -1475,7 +1475,7 @@ body: |
; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -2048
- %2:vgpr(p1) = G_PTR_ADD %0, %1
+ %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1
%3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0)
$vgpr0 = COPY %3
@@ -1559,7 +1559,7 @@ body: |
; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 4095
- %2:vgpr(p1) = G_PTR_ADD %0, %1
+ %2:vgpr(p1) = inbounds G_PTR_ADD %0, %1
%3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0)
$vgpr0 = COPY %3
@@ -1659,7 +1659,7 @@ body: |
; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:...
[truncated]
|
Merge activity
|
This is in preparation for a patch that disables folding offsets into FLAT
instructions if the corresponding address computation is not inbounds, to avoid
miscompilations where this would lead to wrong aperture check results.
With the added inbounds flags for GEPs and G_PTR_ADDs affecting FLAT
instructions, the outputs for these tests won't change.
For SWDEV-516125.