@@ -2145,12 +2145,11 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
2145
2145
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
2146
2146
; GFX1164-NEXT: v_readfirstlane_b32 s3, v1
2147
2147
; GFX1164-NEXT: v_readfirstlane_b32 s2, v0
2148
- ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2 ) | instid1(VALU_DEP_2 )
2148
+ ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3 ) | instid1(VALU_DEP_1 )
2149
2149
; GFX1164-NEXT: v_mad_u64_u32 v[0:1], null, s4, v2, s[2:3]
2150
2150
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
2151
2151
; GFX1164-NEXT: s_mov_b32 s2, -1
2152
2152
; GFX1164-NEXT: v_mad_u64_u32 v[3:4], null, s5, v2, v[1:2]
2153
- ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
2154
2153
; GFX1164-NEXT: v_mov_b32_e32 v1, v3
2155
2154
; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
2156
2155
; GFX1164-NEXT: s_endpgm
@@ -2189,12 +2188,11 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
2189
2188
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
2190
2189
; GFX1132-NEXT: v_readfirstlane_b32 s3, v1
2191
2190
; GFX1132-NEXT: v_readfirstlane_b32 s2, v0
2192
- ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2 ) | instid1(VALU_DEP_2 )
2191
+ ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3 ) | instid1(VALU_DEP_1 )
2193
2192
; GFX1132-NEXT: v_mad_u64_u32 v[0:1], null, s4, v2, s[2:3]
2194
2193
; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
2195
2194
; GFX1132-NEXT: s_mov_b32 s2, -1
2196
2195
; GFX1132-NEXT: v_mad_u64_u32 v[3:4], null, s5, v2, v[1:2]
2197
- ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
2198
2196
; GFX1132-NEXT: v_mov_b32_e32 v1, v3
2199
2197
; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
2200
2198
; GFX1132-NEXT: s_endpgm
@@ -2232,7 +2230,7 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
2232
2230
; GFX1264-NEXT: s_wait_kmcnt 0x0
2233
2231
; GFX1264-NEXT: v_readfirstlane_b32 s3, v1
2234
2232
; GFX1264-NEXT: v_readfirstlane_b32 s2, v0
2235
- ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
2233
+ ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1)
2236
2234
; GFX1264-NEXT: v_mad_co_u64_u32 v[0:1], null, s4, v2, s[2:3]
2237
2235
; GFX1264-NEXT: s_mov_b32 s3, 0x31016000
2238
2236
; GFX1264-NEXT: s_mov_b32 s2, -1
@@ -2272,7 +2270,7 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
2272
2270
; GFX1232-NEXT: s_wait_kmcnt 0x0
2273
2271
; GFX1232-NEXT: v_readfirstlane_b32 s3, v1
2274
2272
; GFX1232-NEXT: v_readfirstlane_b32 s2, v0
2275
- ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
2273
+ ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1)
2276
2274
; GFX1232-NEXT: v_mad_co_u64_u32 v[0:1], null, s4, v2, s[2:3]
2277
2275
; GFX1232-NEXT: s_mov_b32 s3, 0x31016000
2278
2276
; GFX1232-NEXT: s_mov_b32 s2, -1
@@ -3244,7 +3242,7 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
3244
3242
; GFX1164_DPP-NEXT: v_mov_b32_e32 v8, v4
3245
3243
; GFX1164_DPP-NEXT: v_mov_b32_e32 v9, v5
3246
3244
; GFX1164_DPP-NEXT: v_readfirstlane_b32 s3, v7
3247
- ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
3245
+ ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3)
3248
3246
; GFX1164_DPP-NEXT: v_add_co_u32 v6, vcc, s2, v8
3249
3247
; GFX1164_DPP-NEXT: s_mov_b32 s2, s6
3250
3248
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e32 v7, vcc, s3, v9, vcc
@@ -3329,7 +3327,7 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
3329
3327
; GFX1132_DPP-NEXT: v_mov_b32_e32 v10, v6
3330
3328
; GFX1132_DPP-NEXT: v_mov_b32_e32 v11, v7
3331
3329
; GFX1132_DPP-NEXT: v_readfirstlane_b32 s3, v9
3332
- ; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
3330
+ ; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3)
3333
3331
; GFX1132_DPP-NEXT: v_add_co_u32 v8, vcc_lo, s2, v10
3334
3332
; GFX1132_DPP-NEXT: s_mov_b32 s2, s6
3335
3333
; GFX1132_DPP-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, s3, v11, vcc_lo
@@ -4068,7 +4066,6 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
4068
4066
; GFX1164-NEXT: v_mul_lo_u32 v0, s8, v0
4069
4067
; GFX1164-NEXT: v_readfirstlane_b32 s2, v1
4070
4068
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
4071
- ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_3)
4072
4069
; GFX1164-NEXT: v_sub_nc_u32_e32 v0, s2, v0
4073
4070
; GFX1164-NEXT: s_mov_b32 s2, -1
4074
4071
; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -4105,7 +4102,6 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
4105
4102
; GFX1132-NEXT: v_mul_lo_u32 v0, s4, v0
4106
4103
; GFX1132-NEXT: v_readfirstlane_b32 s2, v1
4107
4104
; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
4108
- ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_3)
4109
4105
; GFX1132-NEXT: v_sub_nc_u32_e32 v0, s2, v0
4110
4106
; GFX1132-NEXT: s_mov_b32 s2, -1
4111
4107
; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -4144,7 +4140,6 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
4144
4140
; GFX1264-NEXT: v_mul_lo_u32 v0, s8, v0
4145
4141
; GFX1264-NEXT: v_readfirstlane_b32 s2, v1
4146
4142
; GFX1264-NEXT: s_mov_b32 s3, 0x31016000
4147
- ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_3)
4148
4143
; GFX1264-NEXT: v_sub_nc_u32_e32 v0, s2, v0
4149
4144
; GFX1264-NEXT: s_mov_b32 s2, -1
4150
4145
; GFX1264-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -4182,7 +4177,6 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
4182
4177
; GFX1232-NEXT: v_mul_lo_u32 v0, s4, v0
4183
4178
; GFX1232-NEXT: v_readfirstlane_b32 s2, v1
4184
4179
; GFX1232-NEXT: s_mov_b32 s3, 0x31016000
4185
- ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_3)
4186
4180
; GFX1232-NEXT: v_sub_nc_u32_e32 v0, s2, v0
4187
4181
; GFX1232-NEXT: s_mov_b32 s2, -1
4188
4182
; GFX1232-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -5716,7 +5710,6 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
5716
5710
; GFX1164-NEXT: v_mad_u64_u32 v[5:6], null, s5, v2, v[4:5]
5717
5711
; GFX1164-NEXT: v_sub_co_u32 v0, vcc, s2, v3
5718
5712
; GFX1164-NEXT: s_mov_b32 s2, -1
5719
- ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_3)
5720
5713
; GFX1164-NEXT: v_sub_co_ci_u32_e32 v1, vcc, s3, v5, vcc
5721
5714
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
5722
5715
; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
@@ -5761,7 +5754,6 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
5761
5754
; GFX1132-NEXT: v_mad_u64_u32 v[5:6], null, s5, v2, v[4:5]
5762
5755
; GFX1132-NEXT: v_sub_co_u32 v0, vcc_lo, s2, v3
5763
5756
; GFX1132-NEXT: s_mov_b32 s2, -1
5764
- ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_3)
5765
5757
; GFX1132-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, s3, v5, vcc_lo
5766
5758
; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
5767
5759
; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
@@ -5805,7 +5797,6 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
5805
5797
; GFX1264-NEXT: v_mad_co_u64_u32 v[4:5], null, s5, v2, v[4:5]
5806
5798
; GFX1264-NEXT: v_sub_co_u32 v0, vcc, s2, v3
5807
5799
; GFX1264-NEXT: s_mov_b32 s2, -1
5808
- ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_3)
5809
5800
; GFX1264-NEXT: v_sub_co_ci_u32_e32 v1, vcc, s3, v4, vcc
5810
5801
; GFX1264-NEXT: s_mov_b32 s3, 0x31016000
5811
5802
; GFX1264-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null
@@ -5848,7 +5839,6 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
5848
5839
; GFX1232-NEXT: v_mad_co_u64_u32 v[4:5], null, s5, v2, v[4:5]
5849
5840
; GFX1232-NEXT: v_sub_co_u32 v0, vcc_lo, s2, v3
5850
5841
; GFX1232-NEXT: s_mov_b32 s2, -1
5851
- ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_3)
5852
5842
; GFX1232-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, s3, v4, vcc_lo
5853
5843
; GFX1232-NEXT: s_mov_b32 s3, 0x31016000
5854
5844
; GFX1232-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null
@@ -6818,7 +6808,7 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
6818
6808
; GFX1164_DPP-NEXT: v_mov_b32_e32 v8, v4
6819
6809
; GFX1164_DPP-NEXT: v_mov_b32_e32 v9, v5
6820
6810
; GFX1164_DPP-NEXT: v_readfirstlane_b32 s3, v7
6821
- ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
6811
+ ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3)
6822
6812
; GFX1164_DPP-NEXT: v_sub_co_u32 v6, vcc, s2, v8
6823
6813
; GFX1164_DPP-NEXT: s_mov_b32 s2, s6
6824
6814
; GFX1164_DPP-NEXT: v_sub_co_ci_u32_e32 v7, vcc, s3, v9, vcc
@@ -6903,7 +6893,7 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
6903
6893
; GFX1132_DPP-NEXT: v_mov_b32_e32 v10, v6
6904
6894
; GFX1132_DPP-NEXT: v_mov_b32_e32 v11, v7
6905
6895
; GFX1132_DPP-NEXT: v_readfirstlane_b32 s3, v9
6906
- ; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
6896
+ ; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3)
6907
6897
; GFX1132_DPP-NEXT: v_sub_co_u32 v8, vcc_lo, s2, v10
6908
6898
; GFX1132_DPP-NEXT: s_mov_b32 s2, s6
6909
6899
; GFX1132_DPP-NEXT: v_sub_co_ci_u32_e32 v9, vcc_lo, s3, v11, vcc_lo
0 commit comments