@@ -2145,12 +2145,11 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
21452145; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
21462146; GFX1164-NEXT: v_readfirstlane_b32 s3, v1
21472147; GFX1164-NEXT: v_readfirstlane_b32 s2, v0
2148- ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2 ) | instid1(VALU_DEP_2 )
2148+ ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3 ) | instid1(VALU_DEP_1 )
21492149; GFX1164-NEXT: v_mad_u64_u32 v[0:1], null, s4, v2, s[2:3]
21502150; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
21512151; GFX1164-NEXT: s_mov_b32 s2, -1
21522152; GFX1164-NEXT: v_mad_u64_u32 v[3:4], null, s5, v2, v[1:2]
2153- ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
21542153; GFX1164-NEXT: v_mov_b32_e32 v1, v3
21552154; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
21562155; GFX1164-NEXT: s_endpgm
@@ -2189,12 +2188,11 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
21892188; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
21902189; GFX1132-NEXT: v_readfirstlane_b32 s3, v1
21912190; GFX1132-NEXT: v_readfirstlane_b32 s2, v0
2192- ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2 ) | instid1(VALU_DEP_2 )
2191+ ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3 ) | instid1(VALU_DEP_1 )
21932192; GFX1132-NEXT: v_mad_u64_u32 v[0:1], null, s4, v2, s[2:3]
21942193; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
21952194; GFX1132-NEXT: s_mov_b32 s2, -1
21962195; GFX1132-NEXT: v_mad_u64_u32 v[3:4], null, s5, v2, v[1:2]
2197- ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
21982196; GFX1132-NEXT: v_mov_b32_e32 v1, v3
21992197; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
22002198; GFX1132-NEXT: s_endpgm
@@ -2232,7 +2230,7 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
22322230; GFX1264-NEXT: s_wait_kmcnt 0x0
22332231; GFX1264-NEXT: v_readfirstlane_b32 s3, v1
22342232; GFX1264-NEXT: v_readfirstlane_b32 s2, v0
2235- ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
2233+ ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1)
22362234; GFX1264-NEXT: v_mad_co_u64_u32 v[0:1], null, s4, v2, s[2:3]
22372235; GFX1264-NEXT: s_mov_b32 s3, 0x31016000
22382236; GFX1264-NEXT: s_mov_b32 s2, -1
@@ -2272,7 +2270,7 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
22722270; GFX1232-NEXT: s_wait_kmcnt 0x0
22732271; GFX1232-NEXT: v_readfirstlane_b32 s3, v1
22742272; GFX1232-NEXT: v_readfirstlane_b32 s2, v0
2275- ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
2273+ ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1)
22762274; GFX1232-NEXT: v_mad_co_u64_u32 v[0:1], null, s4, v2, s[2:3]
22772275; GFX1232-NEXT: s_mov_b32 s3, 0x31016000
22782276; GFX1232-NEXT: s_mov_b32 s2, -1
@@ -3244,7 +3242,7 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
32443242; GFX1164_DPP-NEXT: v_mov_b32_e32 v8, v4
32453243; GFX1164_DPP-NEXT: v_mov_b32_e32 v9, v5
32463244; GFX1164_DPP-NEXT: v_readfirstlane_b32 s3, v7
3247- ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
3245+ ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3)
32483246; GFX1164_DPP-NEXT: v_add_co_u32 v6, vcc, s2, v8
32493247; GFX1164_DPP-NEXT: s_mov_b32 s2, s6
32503248; GFX1164_DPP-NEXT: v_add_co_ci_u32_e32 v7, vcc, s3, v9, vcc
@@ -3329,7 +3327,7 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
33293327; GFX1132_DPP-NEXT: v_mov_b32_e32 v10, v6
33303328; GFX1132_DPP-NEXT: v_mov_b32_e32 v11, v7
33313329; GFX1132_DPP-NEXT: v_readfirstlane_b32 s3, v9
3332- ; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
3330+ ; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3)
33333331; GFX1132_DPP-NEXT: v_add_co_u32 v8, vcc_lo, s2, v10
33343332; GFX1132_DPP-NEXT: s_mov_b32 s2, s6
33353333; GFX1132_DPP-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, s3, v11, vcc_lo
@@ -4068,7 +4066,6 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
40684066; GFX1164-NEXT: v_mul_lo_u32 v0, s8, v0
40694067; GFX1164-NEXT: v_readfirstlane_b32 s2, v1
40704068; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
4071- ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_3)
40724069; GFX1164-NEXT: v_sub_nc_u32_e32 v0, s2, v0
40734070; GFX1164-NEXT: s_mov_b32 s2, -1
40744071; GFX1164-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -4105,7 +4102,6 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
41054102; GFX1132-NEXT: v_mul_lo_u32 v0, s4, v0
41064103; GFX1132-NEXT: v_readfirstlane_b32 s2, v1
41074104; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
4108- ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_3)
41094105; GFX1132-NEXT: v_sub_nc_u32_e32 v0, s2, v0
41104106; GFX1132-NEXT: s_mov_b32 s2, -1
41114107; GFX1132-NEXT: buffer_store_b32 v0, off, s[0:3], 0
@@ -4144,7 +4140,6 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
41444140; GFX1264-NEXT: v_mul_lo_u32 v0, s8, v0
41454141; GFX1264-NEXT: v_readfirstlane_b32 s2, v1
41464142; GFX1264-NEXT: s_mov_b32 s3, 0x31016000
4147- ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_3)
41484143; GFX1264-NEXT: v_sub_nc_u32_e32 v0, s2, v0
41494144; GFX1264-NEXT: s_mov_b32 s2, -1
41504145; GFX1264-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -4182,7 +4177,6 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, ptr addrspace(
41824177; GFX1232-NEXT: v_mul_lo_u32 v0, s4, v0
41834178; GFX1232-NEXT: v_readfirstlane_b32 s2, v1
41844179; GFX1232-NEXT: s_mov_b32 s3, 0x31016000
4185- ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_3)
41864180; GFX1232-NEXT: v_sub_nc_u32_e32 v0, s2, v0
41874181; GFX1232-NEXT: s_mov_b32 s2, -1
41884182; GFX1232-NEXT: buffer_store_b32 v0, off, s[0:3], null
@@ -5716,7 +5710,6 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
57165710; GFX1164-NEXT: v_mad_u64_u32 v[5:6], null, s5, v2, v[4:5]
57175711; GFX1164-NEXT: v_sub_co_u32 v0, vcc, s2, v3
57185712; GFX1164-NEXT: s_mov_b32 s2, -1
5719- ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_3)
57205713; GFX1164-NEXT: v_sub_co_ci_u32_e32 v1, vcc, s3, v5, vcc
57215714; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
57225715; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
@@ -5761,7 +5754,6 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
57615754; GFX1132-NEXT: v_mad_u64_u32 v[5:6], null, s5, v2, v[4:5]
57625755; GFX1132-NEXT: v_sub_co_u32 v0, vcc_lo, s2, v3
57635756; GFX1132-NEXT: s_mov_b32 s2, -1
5764- ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_3)
57655757; GFX1132-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, s3, v5, vcc_lo
57665758; GFX1132-NEXT: s_mov_b32 s3, 0x31016000
57675759; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
@@ -5805,7 +5797,6 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
58055797; GFX1264-NEXT: v_mad_co_u64_u32 v[4:5], null, s5, v2, v[4:5]
58065798; GFX1264-NEXT: v_sub_co_u32 v0, vcc, s2, v3
58075799; GFX1264-NEXT: s_mov_b32 s2, -1
5808- ; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_3)
58095800; GFX1264-NEXT: v_sub_co_ci_u32_e32 v1, vcc, s3, v4, vcc
58105801; GFX1264-NEXT: s_mov_b32 s3, 0x31016000
58115802; GFX1264-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null
@@ -5848,7 +5839,6 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
58485839; GFX1232-NEXT: v_mad_co_u64_u32 v[4:5], null, s5, v2, v[4:5]
58495840; GFX1232-NEXT: v_sub_co_u32 v0, vcc_lo, s2, v3
58505841; GFX1232-NEXT: s_mov_b32 s2, -1
5851- ; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_3)
58525842; GFX1232-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, s3, v4, vcc_lo
58535843; GFX1232-NEXT: s_mov_b32 s3, 0x31016000
58545844; GFX1232-NEXT: buffer_store_b64 v[0:1], off, s[0:3], null
@@ -6818,7 +6808,7 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
68186808; GFX1164_DPP-NEXT: v_mov_b32_e32 v8, v4
68196809; GFX1164_DPP-NEXT: v_mov_b32_e32 v9, v5
68206810; GFX1164_DPP-NEXT: v_readfirstlane_b32 s3, v7
6821- ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
6811+ ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3)
68226812; GFX1164_DPP-NEXT: v_sub_co_u32 v6, vcc, s2, v8
68236813; GFX1164_DPP-NEXT: s_mov_b32 s2, s6
68246814; GFX1164_DPP-NEXT: v_sub_co_ci_u32_e32 v7, vcc, s3, v9, vcc
@@ -6903,7 +6893,7 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
69036893; GFX1132_DPP-NEXT: v_mov_b32_e32 v10, v6
69046894; GFX1132_DPP-NEXT: v_mov_b32_e32 v11, v7
69056895; GFX1132_DPP-NEXT: v_readfirstlane_b32 s3, v9
6906- ; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
6896+ ; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3)
69076897; GFX1132_DPP-NEXT: v_sub_co_u32 v8, vcc_lo, s2, v10
69086898; GFX1132_DPP-NEXT: s_mov_b32 s2, s6
69096899; GFX1132_DPP-NEXT: v_sub_co_ci_u32_e32 v9, vcc_lo, s3, v11, vcc_lo
0 commit comments