Skip to content

Commit 39380bb

Browse files
committed
Fix typo blocking S_CSELECT* handling
Signed-off-by: John Lu <John.Lu@amd.com>
1 parent 779f4aa commit 39380bb

File tree

10 files changed

+1050
-1319
lines changed

10 files changed

+1050
-1319
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10614,7 +10614,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1061410614
if ((Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
1061510615
Def->getOpcode() == AMDGPU::S_CSELECT_B64) &&
1061610616
Def->getOperand(1).isImm() && Def->getOperand(1).getImm() &&
10617-
!Def->getOperand(2).isImm() && !Def->getOperand(2).getImm())
10617+
Def->getOperand(2).isImm() && !Def->getOperand(2).getImm())
1061810618
CanOptimize = true;
1061910619

1062010620
if (!CanOptimize)

llvm/test/CodeGen/AMDGPU/addsub64_carry.ll

Lines changed: 8 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -180,11 +180,7 @@ define amdgpu_ps %struct.uint96 @s_add64_32(i64 inreg %val64A, i64 inreg %val64B
180180
; CHECK-LABEL: s_add64_32:
181181
; CHECK: ; %bb.0:
182182
; CHECK-NEXT: s_add_u32 s0, s0, s2
183-
; CHECK-NEXT: s_cselect_b64 s[6:7], -1, 0
184-
; CHECK-NEXT: s_cmp_lg_u64 s[6:7], 0
185183
; CHECK-NEXT: s_addc_u32 s1, s1, s3
186-
; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
187-
; CHECK-NEXT: s_cmp_lg_u64 s[2:3], 0
188184
; CHECK-NEXT: s_addc_u32 s2, s4, 0
189185
; CHECK-NEXT: ; return to shader part epilog
190186
%sum64 = add i64 %val64A, %val64B
@@ -199,14 +195,10 @@ define amdgpu_ps %struct.uint96 @s_add64_32(i64 inreg %val64A, i64 inreg %val64B
199195
define amdgpu_ps <2 x i64> @s_uadd_v2i64(<2 x i64> inreg %val0, <2 x i64> inreg %val1, ptr %ptrval) {
200196
; CHECK-LABEL: s_uadd_v2i64:
201197
; CHECK: ; %bb.0:
202-
; CHECK-NEXT: s_add_u32 s10, s2, s6
203-
; CHECK-NEXT: s_cselect_b64 s[8:9], -1, 0
204-
; CHECK-NEXT: s_cmp_lg_u64 s[8:9], 0
205-
; CHECK-NEXT: s_addc_u32 s8, s3, s7
198+
; CHECK-NEXT: s_add_u32 s6, s2, s6
199+
; CHECK-NEXT: s_addc_u32 s7, s3, s7
206200
; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
207201
; CHECK-NEXT: s_add_u32 s0, s0, s4
208-
; CHECK-NEXT: s_cselect_b64 s[6:7], -1, 0
209-
; CHECK-NEXT: s_cmp_lg_u64 s[6:7], 0
210202
; CHECK-NEXT: s_addc_u32 s1, s1, s5
211203
; CHECK-NEXT: v_mov_b32_e32 v2, s0
212204
; CHECK-NEXT: v_mov_b32_e32 v3, s1
@@ -215,8 +207,8 @@ define amdgpu_ps <2 x i64> @s_uadd_v2i64(<2 x i64> inreg %val0, <2 x i64> inreg
215207
; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1]
216208
; CHECK-NEXT: v_readfirstlane_b32 s0, v7
217209
; CHECK-NEXT: v_readfirstlane_b32 s2, v6
218-
; CHECK-NEXT: v_mov_b32_e32 v4, s10
219-
; CHECK-NEXT: v_mov_b32_e32 v5, s8
210+
; CHECK-NEXT: v_mov_b32_e32 v4, s6
211+
; CHECK-NEXT: v_mov_b32_e32 v5, s7
220212
; CHECK-NEXT: s_mov_b32 s1, s0
221213
; CHECK-NEXT: s_mov_b32 s3, s2
222214
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
@@ -233,14 +225,10 @@ define amdgpu_ps <2 x i64> @s_uadd_v2i64(<2 x i64> inreg %val0, <2 x i64> inreg
233225
define amdgpu_ps <2 x i64> @s_usub_v2i64(<2 x i64> inreg %val0, <2 x i64> inreg %val1, ptr %ptrval) {
234226
; CHECK-LABEL: s_usub_v2i64:
235227
; CHECK: ; %bb.0:
236-
; CHECK-NEXT: s_sub_u32 s10, s2, s6
237-
; CHECK-NEXT: s_cselect_b64 s[8:9], -1, 0
238-
; CHECK-NEXT: s_cmp_lg_u64 s[8:9], 0
239-
; CHECK-NEXT: s_subb_u32 s8, s3, s7
228+
; CHECK-NEXT: s_sub_u32 s6, s2, s6
229+
; CHECK-NEXT: s_subb_u32 s7, s3, s7
240230
; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
241231
; CHECK-NEXT: s_sub_u32 s0, s0, s4
242-
; CHECK-NEXT: s_cselect_b64 s[6:7], -1, 0
243-
; CHECK-NEXT: s_cmp_lg_u64 s[6:7], 0
244232
; CHECK-NEXT: s_subb_u32 s1, s1, s5
245233
; CHECK-NEXT: v_mov_b32_e32 v2, s0
246234
; CHECK-NEXT: v_mov_b32_e32 v3, s1
@@ -249,8 +237,8 @@ define amdgpu_ps <2 x i64> @s_usub_v2i64(<2 x i64> inreg %val0, <2 x i64> inreg
249237
; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1]
250238
; CHECK-NEXT: v_readfirstlane_b32 s0, v7
251239
; CHECK-NEXT: v_readfirstlane_b32 s2, v6
252-
; CHECK-NEXT: v_mov_b32_e32 v4, s10
253-
; CHECK-NEXT: v_mov_b32_e32 v5, s8
240+
; CHECK-NEXT: v_mov_b32_e32 v4, s6
241+
; CHECK-NEXT: v_mov_b32_e32 v5, s7
254242
; CHECK-NEXT: s_mov_b32 s1, s0
255243
; CHECK-NEXT: s_mov_b32 s3, s2
256244
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
@@ -268,8 +256,6 @@ define amdgpu_ps i64 @s_uadd_i64(i64 inreg %val0, i64 inreg %val1, ptr %ptrval)
268256
; CHECK-LABEL: s_uadd_i64:
269257
; CHECK: ; %bb.0:
270258
; CHECK-NEXT: s_add_u32 s0, s0, s2
271-
; CHECK-NEXT: s_cselect_b64 s[4:5], -1, 0
272-
; CHECK-NEXT: s_cmp_lg_u64 s[4:5], 0
273259
; CHECK-NEXT: s_addc_u32 s1, s1, s3
274260
; CHECK-NEXT: v_mov_b32_e32 v2, s0
275261
; CHECK-NEXT: v_mov_b32_e32 v3, s1
@@ -292,8 +278,6 @@ define amdgpu_ps i64 @s_uadd_p1(i64 inreg %val0, i64 inreg %val1, ptr %ptrval) {
292278
; CHECK-LABEL: s_uadd_p1:
293279
; CHECK: ; %bb.0:
294280
; CHECK-NEXT: s_add_u32 s0, s0, 1
295-
; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
296-
; CHECK-NEXT: s_cmp_lg_u64 s[2:3], 0
297281
; CHECK-NEXT: s_addc_u32 s1, s1, 0
298282
; CHECK-NEXT: v_mov_b32_e32 v2, s0
299283
; CHECK-NEXT: v_mov_b32_e32 v3, s1
@@ -339,8 +323,6 @@ define amdgpu_ps i64 @s_usub_p1(i64 inreg %val0, i64 inreg %val1, ptr %ptrval) {
339323
; CHECK-LABEL: s_usub_p1:
340324
; CHECK: ; %bb.0:
341325
; CHECK-NEXT: s_sub_u32 s0, s0, 1
342-
; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
343-
; CHECK-NEXT: s_cmp_lg_u64 s[2:3], 0
344326
; CHECK-NEXT: s_subb_u32 s1, s1, 0
345327
; CHECK-NEXT: v_mov_b32_e32 v2, s0
346328
; CHECK-NEXT: v_mov_b32_e32 v3, s1
@@ -363,8 +345,6 @@ define amdgpu_ps i64 @s_usub_n1(i64 inreg %val0, i64 inreg %val1, ptr %ptrval) {
363345
; CHECK-LABEL: s_usub_n1:
364346
; CHECK: ; %bb.0:
365347
; CHECK-NEXT: s_sub_u32 s0, s0, -1
366-
; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
367-
; CHECK-NEXT: s_cmp_lg_u64 s[2:3], 0
368348
; CHECK-NEXT: s_subb_u32 s1, s1, -1
369349
; CHECK-NEXT: v_mov_b32_e32 v2, s0
370350
; CHECK-NEXT: v_mov_b32_e32 v3, s1

0 commit comments

Comments
 (0)