@@ -12,6 +12,7 @@ define amdgpu_kernel void @test1_s_barrier_signal(ptr addrspace(1) %out) #0 {
12
12
; GCN-NEXT: v_sub_nc_u32_e32 v0, v1, v0
13
13
; GCN-NEXT: s_wait_kmcnt 0x0
14
14
; GCN-NEXT: global_store_b32 v3, v2, s[0:1]
15
+ ; GCN-NEXT: s_wait_storecnt 0x0
15
16
; GCN-NEXT: s_barrier_signal -1
16
17
; GCN-NEXT: s_barrier_wait -1
17
18
; GCN-NEXT: global_store_b32 v3, v0, s[0:1]
@@ -28,6 +29,7 @@ define amdgpu_kernel void @test1_s_barrier_signal(ptr addrspace(1) %out) #0 {
28
29
; GLOBAL-ISEL-NEXT: v_sub_nc_u32_e32 v0, v1, v0
29
30
; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
30
31
; GLOBAL-ISEL-NEXT: global_store_b32 v3, v2, s[0:1]
32
+ ; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
31
33
; GLOBAL-ISEL-NEXT: s_barrier_signal -1
32
34
; GLOBAL-ISEL-NEXT: s_barrier_wait -1
33
35
; GLOBAL-ISEL-NEXT: global_store_b32 v3, v0, s[0:1]
@@ -56,6 +58,7 @@ define amdgpu_kernel void @test2_s_barrier_signal(ptr addrspace(1) %out) #0 {
56
58
; GCN-NEXT: v_sub_nc_u32_e32 v0, v1, v0
57
59
; GCN-NEXT: s_wait_kmcnt 0x0
58
60
; GCN-NEXT: global_store_b32 v3, v2, s[0:1]
61
+ ; GCN-NEXT: s_wait_storecnt 0x0
59
62
; GCN-NEXT: s_barrier_signal 1
60
63
; GCN-NEXT: s_barrier_wait 1
61
64
; GCN-NEXT: global_store_b32 v3, v0, s[0:1]
@@ -72,6 +75,7 @@ define amdgpu_kernel void @test2_s_barrier_signal(ptr addrspace(1) %out) #0 {
72
75
; GLOBAL-ISEL-NEXT: v_sub_nc_u32_e32 v0, v1, v0
73
76
; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
74
77
; GLOBAL-ISEL-NEXT: global_store_b32 v3, v2, s[0:1]
78
+ ; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
75
79
; GLOBAL-ISEL-NEXT: s_barrier_signal 1
76
80
; GLOBAL-ISEL-NEXT: s_barrier_wait 1
77
81
; GLOBAL-ISEL-NEXT: global_store_b32 v3, v0, s[0:1]
@@ -100,6 +104,7 @@ define amdgpu_kernel void @test3_s_barrier_signal(ptr addrspace(1) %out) #0 {
100
104
; GCN-NEXT: v_sub_nc_u32_e32 v0, v1, v0
101
105
; GCN-NEXT: s_wait_kmcnt 0x0
102
106
; GCN-NEXT: global_store_b32 v3, v2, s[0:1]
107
+ ; GCN-NEXT: s_wait_storecnt 0x0
103
108
; GCN-NEXT: s_barrier_signal 0
104
109
; GCN-NEXT: s_barrier_wait 0
105
110
; GCN-NEXT: global_store_b32 v3, v0, s[0:1]
@@ -116,6 +121,7 @@ define amdgpu_kernel void @test3_s_barrier_signal(ptr addrspace(1) %out) #0 {
116
121
; GLOBAL-ISEL-NEXT: v_sub_nc_u32_e32 v0, v1, v0
117
122
; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
118
123
; GLOBAL-ISEL-NEXT: global_store_b32 v3, v2, s[0:1]
124
+ ; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
119
125
; GLOBAL-ISEL-NEXT: s_barrier_signal 0
120
126
; GLOBAL-ISEL-NEXT: s_barrier_wait 0
121
127
; GLOBAL-ISEL-NEXT: global_store_b32 v3, v0, s[0:1]
@@ -146,6 +152,7 @@ define amdgpu_kernel void @test1_s_barrier_signal_var(ptr addrspace(1) %out) #0
146
152
; GCN-NEXT: v_sub_nc_u32_e32 v0, v2, v0
147
153
; GCN-NEXT: s_wait_kmcnt 0x0
148
154
; GCN-NEXT: global_store_b32 v3, v1, s[0:1]
155
+ ; GCN-NEXT: s_wait_storecnt 0x0
149
156
; GCN-NEXT: s_barrier_signal m0
150
157
; GCN-NEXT: s_barrier_wait 1
151
158
; GCN-NEXT: global_store_b32 v3, v0, s[0:1]
@@ -163,6 +170,7 @@ define amdgpu_kernel void @test1_s_barrier_signal_var(ptr addrspace(1) %out) #0
163
170
; GLOBAL-ISEL-NEXT: v_sub_nc_u32_e32 v0, v1, v0
164
171
; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
165
172
; GLOBAL-ISEL-NEXT: global_store_b32 v3, v2, s[0:1]
173
+ ; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
166
174
; GLOBAL-ISEL-NEXT: s_barrier_signal m0
167
175
; GLOBAL-ISEL-NEXT: s_barrier_wait 1
168
176
; GLOBAL-ISEL-NEXT: global_store_b32 v3, v0, s[0:1]
@@ -192,6 +200,7 @@ define void @test2_s_barrier_signal_var(i32 %arg) {
192
200
; GCN-NEXT: v_readfirstlane_b32 s0, v0
193
201
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
194
202
; GCN-NEXT: s_mov_b32 m0, s0
203
+ ; GCN-NEXT: s_wait_storecnt 0x0
195
204
; GCN-NEXT: s_barrier_signal m0
196
205
; GCN-NEXT: s_setpc_b64 s[30:31]
197
206
;
@@ -203,6 +212,7 @@ define void @test2_s_barrier_signal_var(i32 %arg) {
203
212
; GLOBAL-ISEL-NEXT: s_wait_bvhcnt 0x0
204
213
; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
205
214
; GLOBAL-ISEL-NEXT: v_readfirstlane_b32 m0, v0
215
+ ; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
206
216
; GLOBAL-ISEL-NEXT: s_barrier_signal m0
207
217
; GLOBAL-ISEL-NEXT: s_setpc_b64 s[30:31]
208
218
call void @llvm.amdgcn.s.barrier.signal.var (i32 %arg )
@@ -216,6 +226,7 @@ define amdgpu_kernel void @test1_s_barrier_signal_isfirst(ptr addrspace(1) %a, p
216
226
; GCN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
217
227
; GCN-NEXT: s_wait_kmcnt 0x0
218
228
; GCN-NEXT: global_store_b32 v0, v1, s[6:7]
229
+ ; GCN-NEXT: s_wait_storecnt 0x0
219
230
; GCN-NEXT: s_barrier_signal_isfirst -1
220
231
; GCN-NEXT: s_cselect_b32 s3, s3, s5
221
232
; GCN-NEXT: s_cselect_b32 s2, s2, s4
@@ -235,6 +246,7 @@ define amdgpu_kernel void @test1_s_barrier_signal_isfirst(ptr addrspace(1) %a, p
235
246
; GLOBAL-ISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
236
247
; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
237
248
; GLOBAL-ISEL-NEXT: global_store_b32 v0, v1, s[6:7]
249
+ ; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
238
250
; GLOBAL-ISEL-NEXT: s_barrier_signal_isfirst -1
239
251
; GLOBAL-ISEL-NEXT: s_cselect_b32 s8, 1, 0
240
252
; GLOBAL-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -270,6 +282,7 @@ define amdgpu_kernel void @test2_s_barrier_signal_isfirst(ptr addrspace(1) %a, p
270
282
; GCN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
271
283
; GCN-NEXT: s_wait_kmcnt 0x0
272
284
; GCN-NEXT: global_store_b32 v0, v1, s[6:7]
285
+ ; GCN-NEXT: s_wait_storecnt 0x0
273
286
; GCN-NEXT: s_barrier_signal_isfirst 1
274
287
; GCN-NEXT: s_cselect_b32 s3, s3, s5
275
288
; GCN-NEXT: s_cselect_b32 s2, s2, s4
@@ -289,6 +302,7 @@ define amdgpu_kernel void @test2_s_barrier_signal_isfirst(ptr addrspace(1) %a, p
289
302
; GLOBAL-ISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
290
303
; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
291
304
; GLOBAL-ISEL-NEXT: global_store_b32 v0, v1, s[6:7]
305
+ ; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
292
306
; GLOBAL-ISEL-NEXT: s_barrier_signal_isfirst 1
293
307
; GLOBAL-ISEL-NEXT: s_cselect_b32 s8, 1, 0
294
308
; GLOBAL-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -324,6 +338,7 @@ define amdgpu_kernel void @test3_s_barrier_signal_isfirst(ptr addrspace(1) %a, p
324
338
; GCN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
325
339
; GCN-NEXT: s_wait_kmcnt 0x0
326
340
; GCN-NEXT: global_store_b32 v0, v1, s[6:7]
341
+ ; GCN-NEXT: s_wait_storecnt 0x0
327
342
; GCN-NEXT: s_barrier_signal_isfirst 1
328
343
; GCN-NEXT: s_cselect_b32 s3, s3, s5
329
344
; GCN-NEXT: s_cselect_b32 s2, s2, s4
@@ -343,6 +358,7 @@ define amdgpu_kernel void @test3_s_barrier_signal_isfirst(ptr addrspace(1) %a, p
343
358
; GLOBAL-ISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
344
359
; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
345
360
; GLOBAL-ISEL-NEXT: global_store_b32 v0, v1, s[6:7]
361
+ ; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
346
362
; GLOBAL-ISEL-NEXT: s_barrier_signal_isfirst 1
347
363
; GLOBAL-ISEL-NEXT: s_cselect_b32 s8, 1, 0
348
364
; GLOBAL-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -379,6 +395,7 @@ define amdgpu_kernel void @test1_s_barrier_signal_isfirst_var(ptr addrspace(1) %
379
395
; GCN-NEXT: s_mov_b32 m0, 1
380
396
; GCN-NEXT: s_wait_kmcnt 0x0
381
397
; GCN-NEXT: global_store_b32 v0, v1, s[6:7]
398
+ ; GCN-NEXT: s_wait_storecnt 0x0
382
399
; GCN-NEXT: s_barrier_signal_isfirst m0
383
400
; GCN-NEXT: s_cselect_b32 s3, s3, s5
384
401
; GCN-NEXT: s_cselect_b32 s2, s2, s4
@@ -399,6 +416,7 @@ define amdgpu_kernel void @test1_s_barrier_signal_isfirst_var(ptr addrspace(1) %
399
416
; GLOBAL-ISEL-NEXT: s_mov_b32 m0, 1
400
417
; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
401
418
; GLOBAL-ISEL-NEXT: global_store_b32 v0, v1, s[6:7]
419
+ ; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
402
420
; GLOBAL-ISEL-NEXT: s_barrier_signal_isfirst m0
403
421
; GLOBAL-ISEL-NEXT: s_cselect_b32 s8, 1, 0
404
422
; GLOBAL-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -444,6 +462,7 @@ define void @test2_s_barrier_signal_isfirst_var(ptr addrspace(1) %a, ptr addrspa
444
462
; GCN-NEXT: v_add_co_u32 v7, vcc_lo, v7, v9
445
463
; GCN-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v8, vcc_lo
446
464
; GCN-NEXT: global_store_b32 v[7:8], v10, off
465
+ ; GCN-NEXT: s_wait_storecnt 0x0
447
466
; GCN-NEXT: s_barrier_signal_isfirst m0
448
467
; GCN-NEXT: s_cselect_b32 vcc_lo, -1, 0
449
468
; GCN-NEXT: v_dual_cndmask_b32 v2, v4, v2 :: v_dual_cndmask_b32 v3, v5, v3
@@ -470,6 +489,7 @@ define void @test2_s_barrier_signal_isfirst_var(ptr addrspace(1) %a, ptr addrspa
470
489
; GLOBAL-ISEL-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v8, vcc_lo
471
490
; GLOBAL-ISEL-NEXT: v_mov_b32_e32 v9, 0
472
491
; GLOBAL-ISEL-NEXT: global_store_b32 v[7:8], v9, off
492
+ ; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
473
493
; GLOBAL-ISEL-NEXT: s_barrier_signal_isfirst m0
474
494
; GLOBAL-ISEL-NEXT: s_cselect_b32 s0, 1, 0
475
495
; GLOBAL-ISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -1339,6 +1359,7 @@ define amdgpu_kernel void @test_barrier_convert(ptr addrspace(1) %out) #0 {
1339
1359
; GCN-NEXT: v_sub_nc_u32_e32 v0, v1, v0
1340
1360
; GCN-NEXT: s_wait_kmcnt 0x0
1341
1361
; GCN-NEXT: global_store_b32 v3, v2, s[0:1]
1362
+ ; GCN-NEXT: s_wait_storecnt 0x0
1342
1363
; GCN-NEXT: s_barrier_signal -1
1343
1364
; GCN-NEXT: s_barrier_wait -1
1344
1365
; GCN-NEXT: global_store_b32 v3, v0, s[0:1]
@@ -1355,6 +1376,7 @@ define amdgpu_kernel void @test_barrier_convert(ptr addrspace(1) %out) #0 {
1355
1376
; GLOBAL-ISEL-NEXT: v_sub_nc_u32_e32 v0, v1, v0
1356
1377
; GLOBAL-ISEL-NEXT: s_wait_kmcnt 0x0
1357
1378
; GLOBAL-ISEL-NEXT: global_store_b32 v3, v2, s[0:1]
1379
+ ; GLOBAL-ISEL-NEXT: s_wait_storecnt 0x0
1358
1380
; GLOBAL-ISEL-NEXT: s_barrier_signal -1
1359
1381
; GLOBAL-ISEL-NEXT: s_barrier_wait -1
1360
1382
; GLOBAL-ISEL-NEXT: global_store_b32 v3, v0, s[0:1]
0 commit comments