@@ -180,11 +180,7 @@ define amdgpu_ps %struct.uint96 @s_add64_32(i64 inreg %val64A, i64 inreg %val64B
180180; CHECK-LABEL: s_add64_32:
181181; CHECK: ; %bb.0:
182182; CHECK-NEXT: s_add_u32 s0, s0, s2
183- ; CHECK-NEXT: s_cselect_b64 s[6:7], -1, 0
184- ; CHECK-NEXT: s_cmp_lg_u64 s[6:7], 0
185183; CHECK-NEXT: s_addc_u32 s1, s1, s3
186- ; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
187- ; CHECK-NEXT: s_cmp_lg_u64 s[2:3], 0
188184; CHECK-NEXT: s_addc_u32 s2, s4, 0
189185; CHECK-NEXT: ; return to shader part epilog
190186 %sum64 = add i64 %val64A , %val64B
@@ -199,14 +195,10 @@ define amdgpu_ps %struct.uint96 @s_add64_32(i64 inreg %val64A, i64 inreg %val64B
199195define amdgpu_ps <2 x i64 > @s_uadd_v2i64 (<2 x i64 > inreg %val0 , <2 x i64 > inreg %val1 , ptr %ptrval ) {
200196; CHECK-LABEL: s_uadd_v2i64:
201197; CHECK: ; %bb.0:
202- ; CHECK-NEXT: s_add_u32 s10, s2, s6
203- ; CHECK-NEXT: s_cselect_b64 s[8:9], -1, 0
204- ; CHECK-NEXT: s_cmp_lg_u64 s[8:9], 0
205- ; CHECK-NEXT: s_addc_u32 s8, s3, s7
198+ ; CHECK-NEXT: s_add_u32 s6, s2, s6
199+ ; CHECK-NEXT: s_addc_u32 s7, s3, s7
206200; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
207201; CHECK-NEXT: s_add_u32 s0, s0, s4
208- ; CHECK-NEXT: s_cselect_b64 s[6:7], -1, 0
209- ; CHECK-NEXT: s_cmp_lg_u64 s[6:7], 0
210202; CHECK-NEXT: s_addc_u32 s1, s1, s5
211203; CHECK-NEXT: v_mov_b32_e32 v2, s0
212204; CHECK-NEXT: v_mov_b32_e32 v3, s1
@@ -215,8 +207,8 @@ define amdgpu_ps <2 x i64> @s_uadd_v2i64(<2 x i64> inreg %val0, <2 x i64> inreg
215207; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1]
216208; CHECK-NEXT: v_readfirstlane_b32 s0, v7
217209; CHECK-NEXT: v_readfirstlane_b32 s2, v6
218- ; CHECK-NEXT: v_mov_b32_e32 v4, s10
219- ; CHECK-NEXT: v_mov_b32_e32 v5, s8
210+ ; CHECK-NEXT: v_mov_b32_e32 v4, s6
211+ ; CHECK-NEXT: v_mov_b32_e32 v5, s7
220212; CHECK-NEXT: s_mov_b32 s1, s0
221213; CHECK-NEXT: s_mov_b32 s3, s2
222214; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
@@ -233,14 +225,10 @@ define amdgpu_ps <2 x i64> @s_uadd_v2i64(<2 x i64> inreg %val0, <2 x i64> inreg
233225define amdgpu_ps <2 x i64 > @s_usub_v2i64 (<2 x i64 > inreg %val0 , <2 x i64 > inreg %val1 , ptr %ptrval ) {
234226; CHECK-LABEL: s_usub_v2i64:
235227; CHECK: ; %bb.0:
236- ; CHECK-NEXT: s_sub_u32 s10, s2, s6
237- ; CHECK-NEXT: s_cselect_b64 s[8:9], -1, 0
238- ; CHECK-NEXT: s_cmp_lg_u64 s[8:9], 0
239- ; CHECK-NEXT: s_subb_u32 s8, s3, s7
228+ ; CHECK-NEXT: s_sub_u32 s6, s2, s6
229+ ; CHECK-NEXT: s_subb_u32 s7, s3, s7
240230; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
241231; CHECK-NEXT: s_sub_u32 s0, s0, s4
242- ; CHECK-NEXT: s_cselect_b64 s[6:7], -1, 0
243- ; CHECK-NEXT: s_cmp_lg_u64 s[6:7], 0
244232; CHECK-NEXT: s_subb_u32 s1, s1, s5
245233; CHECK-NEXT: v_mov_b32_e32 v2, s0
246234; CHECK-NEXT: v_mov_b32_e32 v3, s1
@@ -249,8 +237,8 @@ define amdgpu_ps <2 x i64> @s_usub_v2i64(<2 x i64> inreg %val0, <2 x i64> inreg
249237; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1]
250238; CHECK-NEXT: v_readfirstlane_b32 s0, v7
251239; CHECK-NEXT: v_readfirstlane_b32 s2, v6
252- ; CHECK-NEXT: v_mov_b32_e32 v4, s10
253- ; CHECK-NEXT: v_mov_b32_e32 v5, s8
240+ ; CHECK-NEXT: v_mov_b32_e32 v4, s6
241+ ; CHECK-NEXT: v_mov_b32_e32 v5, s7
254242; CHECK-NEXT: s_mov_b32 s1, s0
255243; CHECK-NEXT: s_mov_b32 s3, s2
256244; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
@@ -268,8 +256,6 @@ define amdgpu_ps i64 @s_uadd_i64(i64 inreg %val0, i64 inreg %val1, ptr %ptrval)
268256; CHECK-LABEL: s_uadd_i64:
269257; CHECK: ; %bb.0:
270258; CHECK-NEXT: s_add_u32 s0, s0, s2
271- ; CHECK-NEXT: s_cselect_b64 s[4:5], -1, 0
272- ; CHECK-NEXT: s_cmp_lg_u64 s[4:5], 0
273259; CHECK-NEXT: s_addc_u32 s1, s1, s3
274260; CHECK-NEXT: v_mov_b32_e32 v2, s0
275261; CHECK-NEXT: v_mov_b32_e32 v3, s1
@@ -292,8 +278,6 @@ define amdgpu_ps i64 @s_uadd_p1(i64 inreg %val0, i64 inreg %val1, ptr %ptrval) {
292278; CHECK-LABEL: s_uadd_p1:
293279; CHECK: ; %bb.0:
294280; CHECK-NEXT: s_add_u32 s0, s0, 1
295- ; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
296- ; CHECK-NEXT: s_cmp_lg_u64 s[2:3], 0
297281; CHECK-NEXT: s_addc_u32 s1, s1, 0
298282; CHECK-NEXT: v_mov_b32_e32 v2, s0
299283; CHECK-NEXT: v_mov_b32_e32 v3, s1
@@ -339,8 +323,6 @@ define amdgpu_ps i64 @s_usub_p1(i64 inreg %val0, i64 inreg %val1, ptr %ptrval) {
339323; CHECK-LABEL: s_usub_p1:
340324; CHECK: ; %bb.0:
341325; CHECK-NEXT: s_sub_u32 s0, s0, 1
342- ; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
343- ; CHECK-NEXT: s_cmp_lg_u64 s[2:3], 0
344326; CHECK-NEXT: s_subb_u32 s1, s1, 0
345327; CHECK-NEXT: v_mov_b32_e32 v2, s0
346328; CHECK-NEXT: v_mov_b32_e32 v3, s1
@@ -363,8 +345,6 @@ define amdgpu_ps i64 @s_usub_n1(i64 inreg %val0, i64 inreg %val1, ptr %ptrval) {
363345; CHECK-LABEL: s_usub_n1:
364346; CHECK: ; %bb.0:
365347; CHECK-NEXT: s_sub_u32 s0, s0, -1
366- ; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
367- ; CHECK-NEXT: s_cmp_lg_u64 s[2:3], 0
368348; CHECK-NEXT: s_subb_u32 s1, s1, -1
369349; CHECK-NEXT: v_mov_b32_e32 v2, s0
370350; CHECK-NEXT: v_mov_b32_e32 v3, s1
0 commit comments