[RISCV] Add implementation of targetShrinkDemandedConstant to optimiz…

…e AND immediates. SimplifyDemandedBits can remove set bits from immediates from instructions like AND/OR/XOR. This can prevent them from being efficiently codegened on RISCV. This adds an initial version that tries to keep or form 12 bit sign extended immediates for AND operations to enable use of ANDI. If that doesn't work we'll try to create a 32 bit sign extended immediate to use LUI+ADDIW. More optimizations are possible for different size immediates or different operations. But this is a good starting point that already has test coverage. Reviewed By: frasercrmck Differential Revision: https://reviews.llvm.org/D94628
kyulee-com · Jan 15, 2021 · 86e604c · 86e604c
1 parent d0cb0d3
commit 86e604c
Show file tree

Hide file tree

Showing 10 changed files with 196 additions and 231 deletions.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1900,6 +1900,71 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
   return true;
 }
 
+bool RISCVTargetLowering::targetShrinkDemandedConstant(
+    SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+    TargetLoweringOpt &TLO) const {
+  // Delay this optimization as late as possible.
+  if (!TLO.LegalOps)
+    return false;
+
+  EVT VT = Op.getValueType();
+  if (VT.isVector())
+    return false;
+
+  // Only handle AND for now.
+  if (Op.getOpcode() != ISD::AND)
+    return false;
+
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+  if (!C)
+    return false;
+
+  const APInt &Mask = C->getAPIntValue();
+
+  // Clear all non-demanded bits initially.
+  APInt ShrunkMask = Mask & DemandedBits;
+
+  // If the shrunk mask fits in sign extended 12 bits, let the target
+  // independent code apply it.
+  if (ShrunkMask.isSignedIntN(12))
+    return false;
+
+  // Try to make a smaller immediate by setting undemanded bits.
+
+  // We need to be able to make a negative number through a combination of mask
+  // and undemanded bits.
+  APInt ExpandedMask = Mask | ~DemandedBits;
+  if (!ExpandedMask.isNegative())
+    return false;
+
+  // What is the fewest number of bits we need to represent the negative number.
+  unsigned MinSignedBits = ExpandedMask.getMinSignedBits();
+
+  // Try to make a 12 bit negative immediate. If that fails try to make a 32
+  // bit negative immediate unless the shrunk immediate already fits in 32 bits.
+  APInt NewMask = ShrunkMask;
+  if (MinSignedBits <= 12)
+    NewMask.setBitsFrom(11);
+  else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
+    NewMask.setBitsFrom(31);
+  else
+    return false;
+
+  // Sanity check that our new mask is a subset of the demanded mask.
+  assert(NewMask.isSubsetOf(ExpandedMask));
+
+  // If we aren't changing the mask, just return true to keep it and prevent
+  // the caller from optimizing.
+  if (NewMask == Mask)
+    return true;
+
+  // Replace the constant with the new mask.
+  SDLoc DL(Op);
+  SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
+  SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
+  return TLO.CombineTo(Op, NewOp);
+}
+
 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
                                                         KnownBits &Known,
                                                         const APInt &DemandedElts,

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -131,6 +131,10 @@ class RISCVTargetLowering : public TargetLowering {
 
   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 
+  bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
+                                    const APInt &DemandedElts,
+                                    TargetLoweringOpt &TLO) const override;
+
   void computeKnownBitsForTargetNode(const SDValue Op,
                                      KnownBits &Known,
                                      const APInt &DemandedElts,

diff --git a/llvm/test/CodeGen/RISCV/copysign-casts.ll b/llvm/test/CodeGen/RISCV/copysign-casts.ll
@@ -42,8 +42,7 @@ define double @fold_promote_d_s(double %a, float %b) nounwind {
 ; RV64I-NEXT:    slli a2, a2, 63
 ; RV64I-NEXT:    addi a2, a2, -1
 ; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    addi a2, zero, 1
-; RV64I-NEXT:    slli a2, a2, 31
+; RV64I-NEXT:    lui a2, 524288
 ; RV64I-NEXT:    and a1, a1, a2
 ; RV64I-NEXT:    slli a1, a1, 32
 ; RV64I-NEXT:    or a0, a0, a1
@@ -188,10 +187,7 @@ define float @fold_promote_f_h(float %a, half %b) nounwind {
 ; RV64I-NEXT:    lui a2, 524288
 ; RV64I-NEXT:    addiw a2, a2, -1
 ; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    addi a2, zero, 1
-; RV64I-NEXT:    slli a2, a2, 33
-; RV64I-NEXT:    addi a2, a2, -1
-; RV64I-NEXT:    slli a2, a2, 15
+; RV64I-NEXT:    lui a2, 1048568
 ; RV64I-NEXT:    and a1, a1, a2
 ; RV64I-NEXT:    slli a1, a1, 16
 ; RV64I-NEXT:    or a0, a0, a1

diff --git a/llvm/test/CodeGen/RISCV/frame-info.ll b/llvm/test/CodeGen/RISCV/frame-info.ll
@@ -82,10 +82,7 @@ define void @stack_alloc(i32 signext %size) {
 ; RV64-NEXT:    slli a0, a0, 32
 ; RV64-NEXT:    srli a0, a0, 32
 ; RV64-NEXT:    addi a0, a0, 15
-; RV64-NEXT:    addi a1, zero, 1
-; RV64-NEXT:    slli a1, a1, 33
-; RV64-NEXT:    addi a1, a1, -16
-; RV64-NEXT:    and a0, a0, a1
+; RV64-NEXT:    andi a0, a0, -16
 ; RV64-NEXT:    sub a0, sp, a0
 ; RV64-NEXT:    mv sp, a0
 ; RV64-NEXT:    call callee_with_args@plt
@@ -129,10 +126,7 @@ define void @stack_alloc(i32 signext %size) {
 ; RV64-WITHFP-NEXT:    slli a0, a0, 32
 ; RV64-WITHFP-NEXT:    srli a0, a0, 32
 ; RV64-WITHFP-NEXT:    addi a0, a0, 15
-; RV64-WITHFP-NEXT:    addi a1, zero, 1
-; RV64-WITHFP-NEXT:    slli a1, a1, 33
-; RV64-WITHFP-NEXT:    addi a1, a1, -16
-; RV64-WITHFP-NEXT:    and a0, a0, a1
+; RV64-WITHFP-NEXT:    andi a0, a0, -16
 ; RV64-WITHFP-NEXT:    sub a0, sp, a0
 ; RV64-WITHFP-NEXT:    mv sp, a0
 ; RV64-WITHFP-NEXT:    call callee_with_args@plt

diff --git a/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll b/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll
@@ -122,10 +122,7 @@ define half @fcopysign_fneg(half %a, half %b) nounwind {
 ; RV64I-NEXT:    lui a2, 524288
 ; RV64I-NEXT:    addiw a2, a2, -1
 ; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    addi a2, zero, 1
-; RV64I-NEXT:    slli a2, a2, 33
-; RV64I-NEXT:    addi a2, a2, -1
-; RV64I-NEXT:    slli a2, a2, 15
+; RV64I-NEXT:    lui a2, 1048568
 ; RV64I-NEXT:    and a1, a1, a2
 ; RV64I-NEXT:    slli a1, a1, 16
 ; RV64I-NEXT:    or a0, a0, a1