From 5ed5b4e7b9b736b00d89120395e7e2c9949f28d8 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sun, 22 Aug 2021 16:43:27 +0200
Subject: [PATCH 1/2] [InstCombine] Add tests for "eq of parts" with logical op
 (NFC)

We currently only handle this with a bitwise and/or instruction,
but not a logical.

(cherry picked from commit be4b8366fbd2c67b48a743d3e34837e29d6b8d0e)
---
 .../Transforms/InstCombine/eq-of-parts.ll     | 62 +++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/eq-of-parts.ll b/llvm/test/Transforms/InstCombine/eq-of-parts.ll
index 3e7ac275e3847..2b52e186e8a43 100644
--- a/llvm/test/Transforms/InstCombine/eq-of-parts.ll
+++ b/llvm/test/Transforms/InstCombine/eq-of-parts.ll
@@ -352,6 +352,37 @@ define i1 @eq_21_extra_use_eq2(i32 %x, i32 %y) {
   ret i1 %c.210
 }
 
+; Logical and instead of bitwise and.
+
+define i1 @eq_21_logical(i32 %x, i32 %y) {
+; CHECK-LABEL: @eq_21_logical(
+; CHECK-NEXT:    [[X_321:%.*]] = lshr i32 [[X:%.*]], 8
+; CHECK-NEXT:    [[X_1:%.*]] = trunc i32 [[X_321]] to i8
+; CHECK-NEXT:    [[X_32:%.*]] = lshr i32 [[X]], 16
+; CHECK-NEXT:    [[X_2:%.*]] = trunc i32 [[X_32]] to i8
+; CHECK-NEXT:    [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8
+; CHECK-NEXT:    [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8
+; CHECK-NEXT:    [[Y_32:%.*]] = lshr i32 [[Y]], 16
+; CHECK-NEXT:    [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8
+; CHECK-NEXT:    [[C_1:%.*]] = icmp eq i8 [[X_1]], [[Y_1]]
+; CHECK-NEXT:    [[C_2:%.*]] = icmp eq i8 [[X_2]], [[Y_2]]
+; CHECK-NEXT:    [[C_210:%.*]] = select i1 [[C_2]], i1 [[C_1]], i1 false
+; CHECK-NEXT:    ret i1 [[C_210]]
+;
+  %x.321 = lshr i32 %x, 8
+  %x.1 = trunc i32 %x.321 to i8
+  %x.32 = lshr i32 %x, 16
+  %x.2 = trunc i32 %x.32 to i8
+  %y.321 = lshr i32 %y, 8
+  %y.1 = trunc i32 %y.321 to i8
+  %y.32 = lshr i32 %y, 16
+  %y.2 = trunc i32 %y.32 to i8
+  %c.1 = icmp eq i8 %x.1, %y.1
+  %c.2 = icmp eq i8 %x.2, %y.2
+  %c.210 = select i1 %c.2, i1 %c.1, i1 false
+  ret i1 %c.210
+}
+
 ; Negative tests.
 
 define i1 @eq_21_wrong_op1(i32 %x, i32 %y, i32 %z) {
@@ -992,6 +1023,37 @@ define i1 @ne_21_extra_use_ne2(i32 %x, i32 %y) {
   ret i1 %c.210
 }
 
+; Logical or instead of bitwise or.
+
+define i1 @ne_21_logical(i32 %x, i32 %y) {
+; CHECK-LABEL: @ne_21_logical(
+; CHECK-NEXT:    [[X_321:%.*]] = lshr i32 [[X:%.*]], 8
+; CHECK-NEXT:    [[X_1:%.*]] = trunc i32 [[X_321]] to i8
+; CHECK-NEXT:    [[X_32:%.*]] = lshr i32 [[X]], 16
+; CHECK-NEXT:    [[X_2:%.*]] = trunc i32 [[X_32]] to i8
+; CHECK-NEXT:    [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8
+; CHECK-NEXT:    [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8
+; CHECK-NEXT:    [[Y_32:%.*]] = lshr i32 [[Y]], 16
+; CHECK-NEXT:    [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8
+; CHECK-NEXT:    [[C_1:%.*]] = icmp ne i8 [[X_1]], [[Y_1]]
+; CHECK-NEXT:    [[C_2:%.*]] = icmp ne i8 [[X_2]], [[Y_2]]
+; CHECK-NEXT:    [[C_210:%.*]] = select i1 [[C_2]], i1 true, i1 [[C_1]]
+; CHECK-NEXT:    ret i1 [[C_210]]
+;
+  %x.321 = lshr i32 %x, 8
+  %x.1 = trunc i32 %x.321 to i8
+  %x.32 = lshr i32 %x, 16
+  %x.2 = trunc i32 %x.32 to i8
+  %y.321 = lshr i32 %y, 8
+  %y.1 = trunc i32 %y.321 to i8
+  %y.32 = lshr i32 %y, 16
+  %y.2 = trunc i32 %y.32 to i8
+  %c.1 = icmp ne i8 %x.1, %y.1
+  %c.2 = icmp ne i8 %x.2, %y.2
+  %c.210 = select i1 %c.2, i1 true, i1 %c.1
+  ret i1 %c.210
+}
+
 ; Negative tests.
 
 define i1 @ne_21_wrong_op1(i32 %x, i32 %y, i32 %z) {

From a9f19478e3d7eccac3676866b6a74153dcce4a7c Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sun, 22 Aug 2021 16:55:53 +0200
Subject: [PATCH 2/2] [InstCombine] Perform "eq of parts" fold with logical ops

The pattern matched here is too complex for the general logical
and/or to bitwise and/or conversion to trigger. However, the
fold is poison-safe, so match it with a select root as well:

https://alive2.llvm.org/ce/z/vNzzSg
https://alive2.llvm.org/ce/z/Beyumt
(cherry picked from commit fafe5a6f44c6eebb723844bc04275d23b520400c)
---
 .../InstCombine/InstCombineAndOrXor.cpp       |  8 ++---
 .../InstCombine/InstCombineInternal.h         |  2 ++
 .../InstCombine/InstCombineSelect.cpp         |  9 +++--
 .../Transforms/InstCombine/eq-of-parts.ll     | 36 +++++++------------
 4 files changed, 25 insertions(+), 30 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 120852c44474d..6ee4001d068d8 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1113,8 +1113,8 @@ static Value *extractIntPart(const IntPart &P, IRBuilderBase &Builder) {
 /// (icmp eq X0, Y0) & (icmp eq X1, Y1) -> icmp eq X01, Y01
 /// (icmp ne X0, Y0) | (icmp ne X1, Y1) -> icmp ne X01, Y01
 /// where X0, X1 and Y0, Y1 are adjacent parts extracted from an integer.
-static Value *foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd,
-                            InstCombiner::BuilderTy &Builder) {
+Value *InstCombinerImpl::foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1,
+                                       bool IsAnd) {
   if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse())
     return nullptr;
 
@@ -1262,7 +1262,7 @@ Value *InstCombinerImpl::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
           foldUnsignedUnderflowCheck(RHS, LHS, /*IsAnd=*/true, Q, Builder))
     return X;
 
-  if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/true, Builder))
+  if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/true))
     return X;
 
   // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
@@ -2496,7 +2496,7 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
           foldUnsignedUnderflowCheck(RHS, LHS, /*IsAnd=*/false, Q, Builder))
     return X;
 
-  if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/false, Builder))
+  if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/false))
     return X;
 
   // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index eaa53348028d0..9fe11318ab7f3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -347,6 +347,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   Value *foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &Or);
   Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &Xor);
 
+  Value *foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd);
+
   /// Optimize (fcmp)&(fcmp) or (fcmp)|(fcmp).
   /// NOTE: Unlike most of instcombine, this returns a Value which should
   /// already be inserted into the function.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 5bbc3c87ca4f2..742db31600c96 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2754,11 +2754,16 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
                                                         /* IsAnd */ IsAnd))
           return I;
 
-      if (auto *ICmp0 = dyn_cast<ICmpInst>(CondVal))
-        if (auto *ICmp1 = dyn_cast<ICmpInst>(Op1))
+      if (auto *ICmp0 = dyn_cast<ICmpInst>(CondVal)) {
+        if (auto *ICmp1 = dyn_cast<ICmpInst>(Op1)) {
           if (auto *V = foldAndOrOfICmpsOfAndWithPow2(ICmp0, ICmp1, &SI, IsAnd,
                                                       /* IsLogical */ true))
             return replaceInstUsesWith(SI, V);
+
+          if (auto *V = foldEqOfParts(ICmp0, ICmp1, IsAnd))
+            return replaceInstUsesWith(SI, V);
+        }
+      }
     }
 
     // select (select a, true, b), c, false -> select a, c, false
diff --git a/llvm/test/Transforms/InstCombine/eq-of-parts.ll b/llvm/test/Transforms/InstCombine/eq-of-parts.ll
index 2b52e186e8a43..3c1b9561a920c 100644
--- a/llvm/test/Transforms/InstCombine/eq-of-parts.ll
+++ b/llvm/test/Transforms/InstCombine/eq-of-parts.ll
@@ -356,18 +356,12 @@ define i1 @eq_21_extra_use_eq2(i32 %x, i32 %y) {
 
 define i1 @eq_21_logical(i32 %x, i32 %y) {
 ; CHECK-LABEL: @eq_21_logical(
-; CHECK-NEXT:    [[X_321:%.*]] = lshr i32 [[X:%.*]], 8
-; CHECK-NEXT:    [[X_1:%.*]] = trunc i32 [[X_321]] to i8
-; CHECK-NEXT:    [[X_32:%.*]] = lshr i32 [[X]], 16
-; CHECK-NEXT:    [[X_2:%.*]] = trunc i32 [[X_32]] to i8
-; CHECK-NEXT:    [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8
-; CHECK-NEXT:    [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8
-; CHECK-NEXT:    [[Y_32:%.*]] = lshr i32 [[Y]], 16
-; CHECK-NEXT:    [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8
-; CHECK-NEXT:    [[C_1:%.*]] = icmp eq i8 [[X_1]], [[Y_1]]
-; CHECK-NEXT:    [[C_2:%.*]] = icmp eq i8 [[X_2]], [[Y_2]]
-; CHECK-NEXT:    [[C_210:%.*]] = select i1 [[C_2]], i1 [[C_1]], i1 false
-; CHECK-NEXT:    ret i1 [[C_210]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 8
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[Y:%.*]], 8
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i16 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    ret i1 [[TMP5]]
 ;
   %x.321 = lshr i32 %x, 8
   %x.1 = trunc i32 %x.321 to i8
@@ -1027,18 +1021,12 @@ define i1 @ne_21_extra_use_ne2(i32 %x, i32 %y) {
 
 define i1 @ne_21_logical(i32 %x, i32 %y) {
 ; CHECK-LABEL: @ne_21_logical(
-; CHECK-NEXT:    [[X_321:%.*]] = lshr i32 [[X:%.*]], 8
-; CHECK-NEXT:    [[X_1:%.*]] = trunc i32 [[X_321]] to i8
-; CHECK-NEXT:    [[X_32:%.*]] = lshr i32 [[X]], 16
-; CHECK-NEXT:    [[X_2:%.*]] = trunc i32 [[X_32]] to i8
-; CHECK-NEXT:    [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8
-; CHECK-NEXT:    [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8
-; CHECK-NEXT:    [[Y_32:%.*]] = lshr i32 [[Y]], 16
-; CHECK-NEXT:    [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8
-; CHECK-NEXT:    [[C_1:%.*]] = icmp ne i8 [[X_1]], [[Y_1]]
-; CHECK-NEXT:    [[C_2:%.*]] = icmp ne i8 [[X_2]], [[Y_2]]
-; CHECK-NEXT:    [[C_210:%.*]] = select i1 [[C_2]], i1 true, i1 [[C_1]]
-; CHECK-NEXT:    ret i1 [[C_210]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 8
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[Y:%.*]], 8
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i16 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    ret i1 [[TMP5]]
 ;
   %x.321 = lshr i32 %x, 8
   %x.1 = trunc i32 %x.321 to i8