Skip to content
Open
12 changes: 12 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6216,6 +6216,18 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
SDLoc(N), VT, N0, N1))
return SD;

// (umin (sub a, b) a) -> (usubo a, b); (select usubo.1, a, usubo.0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// (umin (sub a, b) a) -> (usubo a, b); (select usubo.1, a, usubo.0)
// (umin (sub a, b), a) -> (usubo a, b); (select usubo.1, a, usubo.0)

{
SDValue B;
if (sd_match(N0, m_Sub(m_Specific(N1), m_Value(B))) &&
TLI.isOperationLegalOrCustom(ISD::USUBO, VT)) {
EVT SETCCT = getSetCCResultType(VT);
SDVTList VTs = DAG.getVTList(VT, SETCCT);
Comment on lines +6224 to +6225
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
EVT SETCCT = getSetCCResultType(VT);
SDVTList VTs = DAG.getVTList(VT, SETCCT);
SDVTList VTs = DAG.getVTList(VT, getSetCCResultType(VT));

SDValue USO = DAG.getNode(ISD::USUBO, DL, VTs, N1, B);
return DAG.getSelect(DL, VT, USO.getValue(1), N1, USO.getValue(0));
}
}

// Simplify the operands using demanded-bits information.
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
Expand Down
53 changes: 53 additions & 0 deletions llvm/test/CodeGen/AArch64/underflow-compare-fold.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s

; GitHub issue #161036

; Positive test : umin(sub(a,b),a) with scalar types should be folded
define i64 @underflow_compare_fold(i64 %a, i64 %b) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Misses the case where the min is commuted?

; CHECK-LABEL: underflow_compare_fold
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
; CHECK-NEXT: csel x0, x0, x8, lo
; CHECK-NEXT: ret
%sub = sub i64 %a, %b
%cond = tail call i64 @llvm.umin.i64(i64 %sub, i64 %a)
ret i64 %cond
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test same pattern with vectors. Also add multiple use test, and illegal type

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added some tests with vector types, incorrect patterns, etc.

I didn't guard against multiple uses here since I thought the results of sub and umin are still available after the fold. Am I misunderstanding something here and should I be guarding the pattern with m_OneUse?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The point isn't whether it's correct or not, but whether it's worthwhile. If you aren't eliminating the instruction, is it worth doing the fold (I suppose maybe if the min will be expanded)


; Negative test, vector types : umin(sub(a,b),a) but with vectors
define <16 x i8> @underflow_compare_dontfold_vectors(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: underflow_compare_dontfold_vectors
; CHECK-LABEL: %bb.0
; CHECK-NEXT: sub v1.16b, v0.16b, v1.16b
; CHECK-NEXT: umin v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%sub = sub <16 x i8> %a, %b
%cond = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %sub, <16 x i8> %a)
ret <16 x i8> %cond
}

; Negative test, pattern mismatch : umin(a,sub(a,b))
define i64 @umin_sub_inverse_args(i64 %a, i64 %b) {
; CHECK-LABEL: umin_sub_inverse_args
; CHECK-LABEL: %bb.0
; CHECK-NEXT: sub x8, x0, x1
; CHECK-NEXT: cmp x0, x8
; CHECK-NEXT: csel x0, x0, x8, lo
; CHECK-NEXT: ret
%sub = sub i64 %a, %b
%cond = tail call i64 @llvm.umin.i64(i64 %a, i64 %sub)
ret i64 %cond
}

; Negative test, pattern mismatch : umin(add(a,b),a)
define i64 @umin_add(i64 %a, i64 %b) {
; CHECK-LABEL: umin_add
; CHECK-LABEL: %bb.0
; CHECK-NEXT: add x8, x0, x1
; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: csel x0, x8, x0, lo
; CHECK-NEXT: ret
%add = add i64 %a, %b
%cond = tail call i64 @llvm.umin.i64(i64 %add, i64 %a)
ret i64 %cond
}
56 changes: 56 additions & 0 deletions llvm/test/CodeGen/X86/underflow-compare-fold.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
; RUN: llc < %s -mtriple=x86_64 | FileCheck %s

; GitHub issue #161036

; Positive test : umin(sub(a,b),a) with scalar types should be folded
define i64 @underflow_compare_fold(i64 %a, i64 %b) {
; CHECK-LABEL: underflow_compare_fold
; CHECK-LABEL: %bb.0
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: subq %rsi, %rax
; CHECK-NEXT: cmovbq %rdi, %rax
; CHECK-NEXT: retq
%sub = sub i64 %a, %b
%cond = tail call i64 @llvm.umin.i64(i64 %sub, i64 %a)
ret i64 %cond
}

; Negative test, vector types : umin(sub(a,b),a) but with vectors
define <16 x i8> @underflow_compare_dontfold_vectors(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: underflow_compare_dontfold_vectors
; CHECK-LABEL: %bb.0
; CHECK-NEXT: movdqa %xmm0, %xmm2
; CHECK-NEXT: psubb %xmm1, %xmm2
; CHECK-NEXT: pminub %xmm2, %xmm0
; CHECK-NEXT: retq
%sub = sub <16 x i8> %a, %b
%cond = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %sub, <16 x i8> %a)
ret <16 x i8> %cond
}

; Negative test, pattern mismatch : umin(a,sub(a,b))
define i64 @umin_sub_inverse_args(i64 %a, i64 %b) {
; CHECK-LABEL: umin_sub_inverse_args
; CHECK-LABEL: %bb.0
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: subq %rsi, %rax
; CHECK-NEXT: cmpq %rax, %rdi
; CHECK-NEXT: cmovbq %rdi, %rax
; CHECK-NEXT: retq
%sub = sub i64 %a, %b
%cond = tail call i64 @llvm.umin.i64(i64 %a, i64 %sub)
ret i64 %cond
}

; Negative test, pattern mismatch : umin(add(a,b),a)
define i64 @umin_add(i64 %a, i64 %b) {
; CHECK-LABEL: umin_add
; CHECK-LABEL: %bb.0
; CHECK-NEXT: leaq (%rsi,%rdi), %rax
; CHECK-NEXT: cmpq %rdi, %rax
; CHECK-NEXT: cmovaeq %rdi, %rax
; CHECK-NEXT: retq
%add = add i64 %a, %b
%cond = tail call i64 @llvm.umin.i64(i64 %add, i64 %a)
ret i64 %cond
}
Loading