@@ -3563,15 +3563,19 @@ define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(ptr addrspace(1) %ou
3563
3563
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:32
3564
3564
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:48
3565
3565
; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload
3566
+ ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
3566
3567
; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload
3567
3568
; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload
3569
+ ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
3568
3570
; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload
3569
3571
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
3570
3572
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
3571
3573
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
3572
3574
; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
3575
+ ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
3573
3576
; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
3574
3577
; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
3578
+ ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
3575
3579
; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
3576
3580
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
3577
3581
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
@@ -4371,8 +4375,10 @@ define amdgpu_kernel void @global_sextload_v64i16_to_v64i32(ptr addrspace(1) %ou
4371
4375
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:48
4372
4376
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0
4373
4377
; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
4378
+ ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
4374
4379
; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
4375
4380
; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
4381
+ ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
4376
4382
; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
4377
4383
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
4378
4384
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
@@ -7341,8 +7347,10 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %ou
7341
7347
; GCN-NOHSA-SI-NEXT: buffer_store_dword v15, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill
7342
7348
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
7343
7349
; GCN-NOHSA-SI-NEXT: buffer_load_dword v12, off, s[12:15], 0 ; 4-byte Folded Reload
7350
+ ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
7344
7351
; GCN-NOHSA-SI-NEXT: buffer_load_dword v13, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
7345
7352
; GCN-NOHSA-SI-NEXT: buffer_load_dword v14, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
7353
+ ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
7346
7354
; GCN-NOHSA-SI-NEXT: buffer_load_dword v15, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
7347
7355
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
7348
7356
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, v39
@@ -7364,8 +7372,10 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %ou
7364
7372
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:96
7365
7373
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:64
7366
7374
; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload
7375
+ ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
7367
7376
; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload
7368
7377
; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload
7378
+ ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
7369
7379
; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload
7370
7380
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
7371
7381
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
0 commit comments