llvm · ckoparkar · Oct 2, 2025 · Oct 2, 2025 · Oct 2, 2025 · Oct 2, 2025
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6216,6 +6216,18 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
                                         SDLoc(N), VT, N0, N1))
     return SD;
 
+  // (umin (sub a, b) a) -> (usubo a, b); (select usubo.1, a, usubo.0)
-  // (umin (sub a, b) a) -> (usubo a, b); (select usubo.1, a, usubo.0)
+  // (umin (sub a, b), a) -> (usubo a, b); (select usubo.1, a, usubo.0)
-  // (umin (sub a, b) a) -> (usubo a, b); (select usubo.1, a, usubo.0)
+  // (umin (sub a, b), a) -> (usubo a, b); (select usubo.1, a, usubo.0)
+  {
+    SDValue B;
+    if (sd_match(N0, m_Sub(m_Specific(N1), m_Value(B))) &&
+        TLI.isOperationLegalOrCustom(ISD::USUBO, VT)) {
+      EVT SETCCT = getSetCCResultType(VT);
+      SDVTList VTs = DAG.getVTList(VT, SETCCT);
-      EVT SETCCT = getSetCCResultType(VT);
-      SDVTList VTs = DAG.getVTList(VT, SETCCT);
+      SDVTList VTs = DAG.getVTList(VT, getSetCCResultType(VT));
-      EVT SETCCT = getSetCCResultType(VT);
-      SDVTList VTs = DAG.getVTList(VT, SETCCT);
+      SDVTList VTs = DAG.getVTList(VT, getSetCCResultType(VT));
+      SDValue USO = DAG.getNode(ISD::USUBO, DL, VTs, N1, B);
+      return DAG.getSelect(DL, VT, USO.getValue(1), N1, USO.getValue(0));
+    }
+  }
+
   // Simplify the operands using demanded-bits information.
   if (SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);

diff --git a/llvm/test/CodeGen/AArch64/underflow-compare-fold.ll b/llvm/test/CodeGen/AArch64/underflow-compare-fold.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
+
+; GitHub issue #161036
+
+; Positive test : umin(sub(a,b),a) with scalar types should be folded
+define i64 @underflow_compare_fold(i64 %a, i64 %b) {
+; CHECK-LABEL: underflow_compare_fold
+; CHECK:      // %bb.0:
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: csel x0, x0, x8, lo
+; CHECK-NEXT: ret
+  %sub = sub i64 %a, %b
+  %cond = tail call i64 @llvm.umin.i64(i64 %sub, i64 %a)
+  ret i64 %cond
+}
+
+; Negative test, vector types : umin(sub(a,b),a) but with vectors
+define <16 x i8> @underflow_compare_dontfold_vectors(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: underflow_compare_dontfold_vectors
+; CHECK-LABEL: %bb.0
+; CHECK-NEXT: sub v1.16b, v0.16b, v1.16b
+; CHECK-NEXT: umin v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: ret
+  %sub = sub <16 x i8> %a, %b
+  %cond = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %sub, <16 x i8> %a)
+  ret <16 x i8> %cond
+}
+
+; Negative test, pattern mismatch : umin(a,sub(a,b))
+define i64 @umin_sub_inverse_args(i64 %a, i64 %b) {
+; CHECK-LABEL: umin_sub_inverse_args
+; CHECK-LABEL: %bb.0
+; CHECK-NEXT: sub x8, x0, x1
+; CHECK-NEXT: cmp x0, x8
+; CHECK-NEXT: csel x0, x0, x8, lo
+; CHECK-NEXT: ret
+  %sub = sub i64 %a, %b
+  %cond = tail call i64 @llvm.umin.i64(i64 %a, i64 %sub)
+  ret i64 %cond
+}
+
+; Negative test, pattern mismatch : umin(add(a,b),a)
+define i64 @umin_add(i64 %a, i64 %b) {
+; CHECK-LABEL: umin_add
+; CHECK-LABEL: %bb.0
+; CHECK-NEXT: add x8, x0, x1
+; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: csel x0, x8, x0, lo
+; CHECK-NEXT: ret
+  %add = add i64 %a, %b
+  %cond = tail call i64 @llvm.umin.i64(i64 %add, i64 %a)
+  ret i64 %cond
+}
diff --git a/llvm/test/CodeGen/X86/underflow-compare-fold.ll b/llvm/test/CodeGen/X86/underflow-compare-fold.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=x86_64 | FileCheck %s
+
+; GitHub issue #161036
+
+; Positive test : umin(sub(a,b),a) with scalar types should be folded
+define i64 @underflow_compare_fold(i64 %a, i64 %b) {
+; CHECK-LABEL: underflow_compare_fold
+; CHECK-LABEL: %bb.0
+; CHECK-NEXT: movq    %rdi, %rax
+; CHECK-NEXT: subq    %rsi, %rax
+; CHECK-NEXT: cmovbq  %rdi, %rax
+; CHECK-NEXT: retq
+  %sub = sub i64 %a, %b
+  %cond = tail call i64 @llvm.umin.i64(i64 %sub, i64 %a)
+  ret i64 %cond
+}
+
+; Negative test, vector types : umin(sub(a,b),a) but with vectors
+define <16 x i8> @underflow_compare_dontfold_vectors(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: underflow_compare_dontfold_vectors
+; CHECK-LABEL: %bb.0
+; CHECK-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NEXT: psubb %xmm1, %xmm2
+; CHECK-NEXT: pminub %xmm2, %xmm0
+; CHECK-NEXT: retq
+  %sub = sub <16 x i8> %a, %b
+  %cond = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %sub, <16 x i8> %a)
+  ret <16 x i8> %cond
+}
+
+; Negative test, pattern mismatch : umin(a,sub(a,b))
+define i64 @umin_sub_inverse_args(i64 %a, i64 %b) {
+; CHECK-LABEL: umin_sub_inverse_args
+; CHECK-LABEL: %bb.0
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: subq %rsi, %rax
+; CHECK-NEXT: cmpq %rax, %rdi
+; CHECK-NEXT: cmovbq %rdi, %rax
+; CHECK-NEXT: retq
+  %sub = sub i64 %a, %b
+  %cond = tail call i64 @llvm.umin.i64(i64 %a, i64 %sub)
+  ret i64 %cond
+}
+
+; Negative test, pattern mismatch : umin(add(a,b),a)
+define i64 @umin_add(i64 %a, i64 %b) {
+; CHECK-LABEL: umin_add
+; CHECK-LABEL: %bb.0
+; CHECK-NEXT: leaq (%rsi,%rdi), %rax
+; CHECK-NEXT: cmpq %rdi, %rax
+; CHECK-NEXT: cmovaeq %rdi, %rax
+; CHECK-NEXT: retq
+  %add = add i64 %a, %b
+  %cond = tail call i64 @llvm.umin.i64(i64 %add, i64 %a)
+  ret i64 %cond
+}