@@ -32,9 +32,11 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
32
32
; GFX906-NEXT: v_writelane_b32 v2, s24, 5
33
33
; GFX906-NEXT: s_mov_b64 s[26:27], s[10:11]
34
34
; GFX906-NEXT: v_writelane_b32 v2, s26, 6
35
+ ; GFX906-NEXT: v_writelane_b32 v41, s34, 2
35
36
; GFX906-NEXT: v_writelane_b32 v2, s27, 7
37
+ ; GFX906-NEXT: v_writelane_b32 v41, s35, 3
36
38
; GFX906-NEXT: v_writelane_b32 v2, s8, 8
37
- ; GFX906-NEXT: v_writelane_b32 v41, s16, 2
39
+ ; GFX906-NEXT: v_writelane_b32 v41, s16, 4
38
40
; GFX906-NEXT: v_writelane_b32 v2, s9, 9
39
41
; GFX906-NEXT: v_writelane_b32 v41, s30, 0
40
42
; GFX906-NEXT: v_writelane_b32 v2, s4, 10
@@ -338,7 +340,9 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
338
340
; GFX906-NEXT: v_readlane_b32 s31, v41, 1
339
341
; GFX906-NEXT: v_readlane_b32 s30, v41, 0
340
342
; GFX906-NEXT: ; kill: killed $vgpr40
341
- ; GFX906-NEXT: v_readlane_b32 s4, v41, 2
343
+ ; GFX906-NEXT: v_readlane_b32 s34, v41, 2
344
+ ; GFX906-NEXT: v_readlane_b32 s35, v41, 3
345
+ ; GFX906-NEXT: v_readlane_b32 s4, v41, 4
342
346
; GFX906-NEXT: s_waitcnt vmcnt(0)
343
347
; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[30:33] offset:112
344
348
; GFX906-NEXT: s_waitcnt vmcnt(0)
@@ -379,23 +383,27 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
379
383
; GFX908-NEXT: s_mov_b64 exec, -1
380
384
; GFX908-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill
381
385
; GFX908-NEXT: s_mov_b64 exec, s[18:19]
382
- ; GFX908-NEXT: v_mov_b32_e32 v3, s16
386
+ ; GFX908-NEXT: v_mov_b32_e32 v3, s34
383
387
; GFX908-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill
388
+ ; GFX908-NEXT: v_mov_b32_e32 v3, s35
389
+ ; GFX908-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill
390
+ ; GFX908-NEXT: v_mov_b32_e32 v3, s16
391
+ ; GFX908-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill
384
392
; GFX908-NEXT: s_addk_i32 s32, 0x2c00
385
393
; GFX908-NEXT: s_mov_b64 s[16:17], exec
386
394
; GFX908-NEXT: s_mov_b64 exec, 1
387
- ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:164
395
+ ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:172
388
396
; GFX908-NEXT: v_writelane_b32 v2, s30, 0
389
397
; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
390
- ; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:164
398
+ ; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:172
391
399
; GFX908-NEXT: s_waitcnt vmcnt(0)
392
400
; GFX908-NEXT: s_mov_b64 exec, s[16:17]
393
401
; GFX908-NEXT: s_mov_b64 s[16:17], exec
394
402
; GFX908-NEXT: s_mov_b64 exec, 1
395
- ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:164
403
+ ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:172
396
404
; GFX908-NEXT: v_writelane_b32 v2, s31, 0
397
405
; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
398
- ; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:164
406
+ ; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:172
399
407
; GFX908-NEXT: s_waitcnt vmcnt(0)
400
408
; GFX908-NEXT: s_mov_b64 exec, s[16:17]
401
409
; GFX908-NEXT: ; implicit-def: $vgpr2
@@ -729,25 +737,31 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
729
737
; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
730
738
; GFX908-NEXT: s_waitcnt vmcnt(0)
731
739
; GFX908-NEXT: s_mov_b64 exec, 1
732
- ; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:164
740
+ ; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:172
733
741
; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
734
742
; GFX908-NEXT: s_waitcnt vmcnt(0)
735
743
; GFX908-NEXT: v_readlane_b32 s31, v0, 0
736
- ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:164
744
+ ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:172
737
745
; GFX908-NEXT: s_waitcnt vmcnt(0)
738
746
; GFX908-NEXT: s_mov_b64 exec, s[4:5]
739
747
; GFX908-NEXT: s_mov_b64 s[4:5], exec
740
748
; GFX908-NEXT: s_mov_b64 exec, 1
741
- ; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:164
749
+ ; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:172
742
750
; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 ; 4-byte Folded Reload
743
751
; GFX908-NEXT: s_waitcnt vmcnt(0)
744
752
; GFX908-NEXT: v_readlane_b32 s30, v0, 0
745
- ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:164
753
+ ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:172
746
754
; GFX908-NEXT: s_waitcnt vmcnt(0)
747
755
; GFX908-NEXT: s_mov_b64 exec, s[4:5]
748
756
; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload
749
757
; GFX908-NEXT: ; kill: killed $vgpr40
750
758
; GFX908-NEXT: s_waitcnt vmcnt(0)
759
+ ; GFX908-NEXT: v_readfirstlane_b32 s34, v0
760
+ ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload
761
+ ; GFX908-NEXT: s_waitcnt vmcnt(0)
762
+ ; GFX908-NEXT: v_readfirstlane_b32 s35, v0
763
+ ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload
764
+ ; GFX908-NEXT: s_waitcnt vmcnt(0)
751
765
; GFX908-NEXT: v_readfirstlane_b32 s4, v0
752
766
; GFX908-NEXT: s_xor_saveexec_b64 s[6:7], -1
753
767
; GFX908-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload
0 commit comments