Skip to content

Commit 4c36a54

Browse files
committed
[AMDGPU] Allow negative offsets in scratch instructions
This feature removal allows LocalStackSlotAlloc to reuse a base register even if the offset is negative. Resolves #155902
1 parent 48e34d9 commit 4c36a54

File tree

6 files changed

+284
-71
lines changed

6 files changed

+284
-71
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -334,12 +334,6 @@ def FeatureFlatSegmentOffsetBug : SubtargetFeature<"flat-segment-offset-bug",
334334
"GFX10 bug where inst_offset is ignored when flat instructions access global memory"
335335
>;
336336

337-
def FeatureNegativeScratchOffsetBug : SubtargetFeature<"negative-scratch-offset-bug",
338-
"NegativeScratchOffsetBug",
339-
"true",
340-
"Negative immediate offsets in scratch instructions with an SGPR offset page fault on GFX9"
341-
>;
342-
343337
def FeatureNegativeUnalignedScratchOffsetBug : SubtargetFeature<"negative-unaligned-scratch-offset-bug",
344338
"NegativeUnalignedScratchOffsetBug",
345339
"true",
@@ -1588,8 +1582,8 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
15881582
FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16,
15891583
FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK,
15901584
FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess,
1591-
FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS,
1592-
FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad,
1585+
FeatureUnalignedDSAccess, FeatureGWS, FeatureDefaultComponentZero,
1586+
FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad,
15931587
FeatureCubeInsts, FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts,
15941588
FeatureCvtNormInsts, FeatureCvtPkNormVOP2Insts,
15951589
FeatureCvtPkNormVOP3Insts

llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3947,9 +3947,9 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
39473947
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
39483948
; GFX9-NEXT: v_add_u32_e32 v0, s3, v0
39493949
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
3950-
; GFX9-NEXT: v_add3_u32 v0, s2, v0, -16
3950+
; GFX9-NEXT: v_add_u32_e32 v0, s2, v0
39513951
; GFX9-NEXT: v_mov_b32_e32 v1, 15
3952-
; GFX9-NEXT: scratch_store_dword v0, v1, off
3952+
; GFX9-NEXT: scratch_store_dword v0, v1, off offset:-16
39533953
; GFX9-NEXT: s_waitcnt vmcnt(0)
39543954
; GFX9-NEXT: s_endpgm
39553955
;
@@ -3969,9 +3969,9 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
39693969
; GFX942-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
39703970
; GFX942: ; %bb.0: ; %bb
39713971
; GFX942-NEXT: v_add_u32_e32 v0, s1, v0
3972-
; GFX942-NEXT: v_add3_u32 v0, s0, v0, -16
3972+
; GFX942-NEXT: v_add_u32_e32 v0, s0, v0
39733973
; GFX942-NEXT: v_mov_b32_e32 v1, 15
3974-
; GFX942-NEXT: scratch_store_dword v0, v1, off sc0 sc1
3974+
; GFX942-NEXT: scratch_store_dword v0, v1, off offset:-16 sc0 sc1
39753975
; GFX942-NEXT: s_waitcnt vmcnt(0)
39763976
; GFX942-NEXT: s_endpgm
39773977
;
@@ -3996,9 +3996,9 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
39963996
; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
39973997
; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, s3, v0
39983998
; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
3999-
; UNALIGNED_GFX9-NEXT: v_add3_u32 v0, s2, v0, -16
3999+
; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, s2, v0
40004000
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, 15
4001-
; UNALIGNED_GFX9-NEXT: scratch_store_dword v0, v1, off
4001+
; UNALIGNED_GFX9-NEXT: scratch_store_dword v0, v1, off offset:-16
40024002
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
40034003
; UNALIGNED_GFX9-NEXT: s_endpgm
40044004
;
@@ -4018,9 +4018,9 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
40184018
; UNALIGNED_GFX942-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
40194019
; UNALIGNED_GFX942: ; %bb.0: ; %bb
40204020
; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v0, s1, v0
4021-
; UNALIGNED_GFX942-NEXT: v_add3_u32 v0, s0, v0, -16
4021+
; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v0, s0, v0
40224022
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v1, 15
4023-
; UNALIGNED_GFX942-NEXT: scratch_store_dword v0, v1, off sc0 sc1
4023+
; UNALIGNED_GFX942-NEXT: scratch_store_dword v0, v1, off offset:-16 sc0 sc1
40244024
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
40254025
; UNALIGNED_GFX942-NEXT: s_endpgm
40264026
;
@@ -4052,8 +4052,7 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr
40524052
; GFX9: ; %bb.0: ; %entry
40534053
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
40544054
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
4055-
; GFX9-NEXT: s_add_u32 s0, s2, 0xffffffe8
4056-
; GFX9-NEXT: scratch_load_dword v2, off, s0
4055+
; GFX9-NEXT: scratch_load_dword v2, off, s2 offset:-24
40574056
; GFX9-NEXT: s_waitcnt vmcnt(0)
40584057
; GFX9-NEXT: global_store_dword v[0:1], v2, off
40594058
; GFX9-NEXT: s_endpgm
@@ -4071,8 +4070,7 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr
40714070
;
40724071
; GFX942-LABEL: sgpr_base_negative_offset:
40734072
; GFX942: ; %bb.0: ; %entry
4074-
; GFX942-NEXT: s_add_u32 s0, s0, 0xffffffe8
4075-
; GFX942-NEXT: scratch_load_dword v2, off, s0
4073+
; GFX942-NEXT: scratch_load_dword v2, off, s0 offset:-24
40764074
; GFX942-NEXT: s_waitcnt vmcnt(0)
40774075
; GFX942-NEXT: global_store_dword v[0:1], v2, off
40784076
; GFX942-NEXT: s_endpgm
@@ -4095,8 +4093,7 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr
40954093
; UNALIGNED_GFX9: ; %bb.0: ; %entry
40964094
; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
40974095
; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
4098-
; UNALIGNED_GFX9-NEXT: s_add_u32 s0, s2, 0xffffffe8
4099-
; UNALIGNED_GFX9-NEXT: scratch_load_dword v2, off, s0
4096+
; UNALIGNED_GFX9-NEXT: scratch_load_dword v2, off, s2 offset:-24
41004097
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
41014098
; UNALIGNED_GFX9-NEXT: global_store_dword v[0:1], v2, off
41024099
; UNALIGNED_GFX9-NEXT: s_endpgm
@@ -4114,8 +4111,7 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr
41144111
;
41154112
; UNALIGNED_GFX942-LABEL: sgpr_base_negative_offset:
41164113
; UNALIGNED_GFX942: ; %bb.0: ; %entry
4117-
; UNALIGNED_GFX942-NEXT: s_add_u32 s0, s0, 0xffffffe8
4118-
; UNALIGNED_GFX942-NEXT: scratch_load_dword v2, off, s0
4114+
; UNALIGNED_GFX942-NEXT: scratch_load_dword v2, off, s0 offset:-24
41194115
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
41204116
; UNALIGNED_GFX942-NEXT: global_store_dword v[0:1], v2, off
41214117
; UNALIGNED_GFX942-NEXT: s_endpgm
Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc < %s -verify-machineinstrs -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 | FileCheck %s --check-prefix=GFX950
3+
4+
; Ensure we don't crash with: "Cannot scavenge register in FI elimination!"
5+
define amdgpu_kernel void @issue155902(i64 %arg, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg5, i64 %arg6, i64 %arg7, i64 %arg8, i64 %arg9, i64 %arg10, i64 %arg11, i64 %arg12, i64 %arg13, i64 %arg14, i64 %arg15, i64 %arg16, i64 %arg17, i64 %arg18, i64 %arg19, i64 %arg20, i64 %arg21, i64 %arg22, i64 %arg23, i64 %arg24, i64 %arg25, i64 %arg26, i64 %arg27, i64 %arg28, i64 %arg29, i64 %arg30, i64 %arg31, i64 %arg32, i64 %arg33, i64 %arg34, i64 %arg35, i64 %arg36, i64 %arg37, i64 %arg38, i64 %arg39, i64 %arg40, i64 %arg41, i64 %arg42, i64 %arg43, i64 %arg44, i64 %arg45, i64 %arg46, i64 %arg47, i64 %arg48, i64 %arg49) {
6+
; GFX950-LABEL: issue155902:
7+
; GFX950: ; %bb.0: ; %bb
8+
; GFX950-NEXT: s_mov_b32 s0, 8
9+
; GFX950-NEXT: s_mov_b32 s1, 0x4008
10+
; GFX950-NEXT: s_add_i32 s33, s0, s1
11+
; GFX950-NEXT: s_mov_b64 s[2:3], s[4:5]
12+
; GFX950-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
13+
; GFX950-NEXT: s_load_dwordx2 vcc, s[2:3], 0x8
14+
; GFX950-NEXT: s_load_dwordx2 s[98:99], s[2:3], 0x10
15+
; GFX950-NEXT: s_load_dwordx2 s[96:97], s[2:3], 0x18
16+
; GFX950-NEXT: s_load_dwordx2 s[94:95], s[2:3], 0x20
17+
; GFX950-NEXT: s_load_dwordx2 s[92:93], s[2:3], 0x28
18+
; GFX950-NEXT: s_load_dwordx2 s[90:91], s[2:3], 0x30
19+
; GFX950-NEXT: s_load_dwordx2 s[88:89], s[2:3], 0x38
20+
; GFX950-NEXT: s_load_dwordx2 s[86:87], s[2:3], 0x40
21+
; GFX950-NEXT: s_load_dwordx2 s[84:85], s[2:3], 0x48
22+
; GFX950-NEXT: s_load_dwordx2 s[82:83], s[2:3], 0x50
23+
; GFX950-NEXT: s_load_dwordx2 s[80:81], s[2:3], 0x58
24+
; GFX950-NEXT: s_load_dwordx2 s[78:79], s[2:3], 0x60
25+
; GFX950-NEXT: s_load_dwordx2 s[76:77], s[2:3], 0x68
26+
; GFX950-NEXT: s_load_dwordx2 s[74:75], s[2:3], 0x70
27+
; GFX950-NEXT: s_load_dwordx2 s[72:73], s[2:3], 0x78
28+
; GFX950-NEXT: s_load_dwordx2 s[70:71], s[2:3], 0x80
29+
; GFX950-NEXT: s_load_dwordx2 s[68:69], s[2:3], 0x88
30+
; GFX950-NEXT: s_load_dwordx2 s[66:67], s[2:3], 0x90
31+
; GFX950-NEXT: s_load_dwordx2 s[64:65], s[2:3], 0x98
32+
; GFX950-NEXT: s_load_dwordx2 s[62:63], s[2:3], 0xa0
33+
; GFX950-NEXT: s_load_dwordx2 s[60:61], s[2:3], 0xa8
34+
; GFX950-NEXT: s_load_dwordx2 s[58:59], s[2:3], 0xb0
35+
; GFX950-NEXT: s_load_dwordx2 s[56:57], s[2:3], 0xb8
36+
; GFX950-NEXT: s_load_dwordx2 s[54:55], s[2:3], 0xc0
37+
; GFX950-NEXT: s_load_dwordx2 s[52:53], s[2:3], 0xc8
38+
; GFX950-NEXT: s_load_dwordx2 s[50:51], s[2:3], 0xd0
39+
; GFX950-NEXT: s_load_dwordx2 s[48:49], s[2:3], 0xd8
40+
; GFX950-NEXT: s_load_dwordx2 s[46:47], s[2:3], 0xe0
41+
; GFX950-NEXT: s_load_dwordx2 s[44:45], s[2:3], 0xe8
42+
; GFX950-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0xf0
43+
; GFX950-NEXT: s_load_dwordx2 s[40:41], s[2:3], 0xf8
44+
; GFX950-NEXT: s_load_dwordx2 s[38:39], s[2:3], 0x100
45+
; GFX950-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x108
46+
; GFX950-NEXT: s_load_dwordx2 s[34:35], s[2:3], 0x110
47+
; GFX950-NEXT: s_load_dwordx2 s[30:31], s[2:3], 0x118
48+
; GFX950-NEXT: s_load_dwordx2 s[28:29], s[2:3], 0x120
49+
; GFX950-NEXT: s_load_dwordx2 s[26:27], s[2:3], 0x128
50+
; GFX950-NEXT: s_load_dwordx2 s[24:25], s[2:3], 0x130
51+
; GFX950-NEXT: s_load_dwordx2 s[22:23], s[2:3], 0x138
52+
; GFX950-NEXT: s_load_dwordx2 s[20:21], s[2:3], 0x140
53+
; GFX950-NEXT: s_load_dwordx2 s[18:19], s[2:3], 0x148
54+
; GFX950-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0x150
55+
; GFX950-NEXT: s_load_dwordx2 s[14:15], s[2:3], 0x158
56+
; GFX950-NEXT: s_load_dwordx2 s[12:13], s[2:3], 0x160
57+
; GFX950-NEXT: s_load_dwordx2 s[10:11], s[2:3], 0x168
58+
; GFX950-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x170
59+
; GFX950-NEXT: s_load_dwordx2 s[6:7], s[2:3], 0x178
60+
; GFX950-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x180
61+
; GFX950-NEXT: s_nop 0
62+
; GFX950-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x188
63+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], 0
64+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s33
65+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s33 offset:-8
66+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], 0x384
67+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s33 offset:8
68+
; GFX950-NEXT: s_mov_b32 s33, 0
69+
; GFX950-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
70+
; GFX950-NEXT: v_writelane_b32 v2, s33, 0
71+
; GFX950-NEXT: s_waitcnt lgkmcnt(0)
72+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
73+
; GFX950-NEXT: v_readlane_b32 s0, v2, 0
74+
; GFX950-NEXT: s_nop 4
75+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
76+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], vcc
77+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
78+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[98:99]
79+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
80+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[96:97]
81+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
82+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[94:95]
83+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
84+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[92:93]
85+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
86+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[90:91]
87+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
88+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[88:89]
89+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
90+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[86:87]
91+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
92+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[84:85]
93+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
94+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[82:83]
95+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
96+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[80:81]
97+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
98+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[78:79]
99+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
100+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[76:77]
101+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
102+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[74:75]
103+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
104+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[72:73]
105+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
106+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[70:71]
107+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
108+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[68:69]
109+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
110+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[66:67]
111+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
112+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[64:65]
113+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
114+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[62:63]
115+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
116+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[60:61]
117+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
118+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[58:59]
119+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
120+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[56:57]
121+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
122+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[54:55]
123+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
124+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[52:53]
125+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
126+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[50:51]
127+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
128+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[48:49]
129+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
130+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[46:47]
131+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
132+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[44:45]
133+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
134+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[42:43]
135+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
136+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[40:41]
137+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
138+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[38:39]
139+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
140+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[36:37]
141+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
142+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[34:35]
143+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
144+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[30:31]
145+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
146+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[28:29]
147+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
148+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[26:27]
149+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
150+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[24:25]
151+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
152+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[22:23]
153+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
154+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[20:21]
155+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
156+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[18:19]
157+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
158+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[16:17]
159+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
160+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[14:15]
161+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
162+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[12:13]
163+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
164+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[10:11]
165+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
166+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
167+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
168+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
169+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
170+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
171+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
172+
; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
173+
; GFX950-NEXT: scratch_store_dwordx2 off, v[0:1], s0
174+
; GFX950-NEXT: s_endpgm
175+
bb:
176+
%alloca.big = alloca [4096 x i32], align 4, addrspace(5)
177+
%alloca304 = alloca [2 x i64], align 8, addrspace(5)
178+
%alloca307 = alloca i64, align 8, addrspace(5)
179+
store [2 x i64] zeroinitializer, ptr addrspace(5) %alloca304, align 8
180+
store i64 900, ptr addrspace(5) %alloca307, align 8
181+
store i64 %arg, ptr addrspace(5) null, align 8
182+
store i64 %arg1, ptr addrspace(5) null, align 8
183+
store i64 %arg2, ptr addrspace(5) null, align 8
184+
store i64 %arg3, ptr addrspace(5) null, align 8
185+
store i64 %arg4, ptr addrspace(5) null, align 8
186+
store i64 %arg5, ptr addrspace(5) null, align 8
187+
store i64 %arg6, ptr addrspace(5) null, align 8
188+
store i64 %arg7, ptr addrspace(5) null, align 8
189+
store i64 %arg8, ptr addrspace(5) null, align 8
190+
store i64 %arg9, ptr addrspace(5) null, align 8
191+
store i64 %arg10, ptr addrspace(5) null, align 8
192+
store i64 %arg11, ptr addrspace(5) null, align 8
193+
store i64 %arg12, ptr addrspace(5) null, align 8
194+
store i64 %arg13, ptr addrspace(5) null, align 8
195+
store i64 %arg14, ptr addrspace(5) null, align 8
196+
store i64 %arg15, ptr addrspace(5) null, align 8
197+
store i64 %arg16, ptr addrspace(5) null, align 8
198+
store i64 %arg17, ptr addrspace(5) null, align 8
199+
store i64 %arg18, ptr addrspace(5) null, align 8
200+
store i64 %arg19, ptr addrspace(5) null, align 8
201+
store i64 %arg20, ptr addrspace(5) null, align 8
202+
store i64 %arg21, ptr addrspace(5) null, align 8
203+
store i64 %arg22, ptr addrspace(5) null, align 8
204+
store i64 %arg23, ptr addrspace(5) null, align 8
205+
store i64 %arg24, ptr addrspace(5) null, align 8
206+
store i64 %arg25, ptr addrspace(5) null, align 8
207+
store i64 %arg26, ptr addrspace(5) null, align 8
208+
store i64 %arg27, ptr addrspace(5) null, align 8
209+
store i64 %arg28, ptr addrspace(5) null, align 8
210+
store i64 %arg29, ptr addrspace(5) null, align 8
211+
store i64 %arg30, ptr addrspace(5) null, align 8
212+
store i64 %arg31, ptr addrspace(5) null, align 8
213+
store i64 %arg32, ptr addrspace(5) null, align 8
214+
store i64 %arg33, ptr addrspace(5) null, align 8
215+
store i64 %arg34, ptr addrspace(5) null, align 8
216+
store i64 %arg35, ptr addrspace(5) null, align 8
217+
store i64 %arg36, ptr addrspace(5) null, align 8
218+
store i64 %arg37, ptr addrspace(5) null, align 8
219+
store i64 %arg38, ptr addrspace(5) null, align 8
220+
store i64 %arg39, ptr addrspace(5) null, align 8
221+
store i64 %arg40, ptr addrspace(5) null, align 8
222+
store i64 %arg41, ptr addrspace(5) null, align 8
223+
store i64 %arg42, ptr addrspace(5) null, align 8
224+
store i64 %arg43, ptr addrspace(5) null, align 8
225+
store i64 %arg44, ptr addrspace(5) null, align 8
226+
store i64 %arg45, ptr addrspace(5) null, align 8
227+
store i64 %arg46, ptr addrspace(5) null, align 8
228+
store i64 %arg47, ptr addrspace(5) null, align 8
229+
store i64 %arg48, ptr addrspace(5) null, align 8
230+
store i64 %arg49, ptr addrspace(5) null, align 8
231+
ret void
232+
}

llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1582,8 +1582,7 @@ define amdgpu_kernel void @soff1_voff1_negative(i32 %soff) {
15821582
; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 1
15831583
; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
15841584
; GFX942-SDAG-NEXT: v_add_u32_e32 v0, s0, v0
1585-
; GFX942-SDAG-NEXT: v_add_u32_e32 v0, -1, v0
1586-
; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1
1585+
; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off offset:-1 sc0 sc1
15871586
; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
15881587
; GFX942-SDAG-NEXT: s_endpgm
15891588
;
@@ -1593,8 +1592,8 @@ define amdgpu_kernel void @soff1_voff1_negative(i32 %soff) {
15931592
; GFX942-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
15941593
; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1
15951594
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1596-
; GFX942-GISEL-NEXT: v_add3_u32 v0, s0, v0, -1
1597-
; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1
1595+
; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0
1596+
; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off offset:-1 sc0 sc1
15981597
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0)
15991598
; GFX942-GISEL-NEXT: s_endpgm
16001599
;

0 commit comments

Comments
 (0)