Skip to content

Commit

Permalink
[RISCV] Add implementation of targetShrinkDemandedConstant to optimiz…
Browse files Browse the repository at this point in the history
…e AND immediates.

SimplifyDemandedBits can remove set bits from immediates from instructions
like AND/OR/XOR. This can prevent them from being efficiently
codegened on RISCV.

This adds an initial version that tries to keep or form 12 bit
sign extended immediates for AND operations to enable use of ANDI.
If that doesn't work we'll try to create a 32 bit sign extended immediate
to use LUI+ADDIW.

More optimizations are possible for different size immediates or
different operations. But this is a good starting point that already
has test coverage.

Reviewed By: frasercrmck

Differential Revision: https://reviews.llvm.org/D94628
  • Loading branch information
topperc committed Jan 15, 2021
1 parent d0cb0d3 commit 86e604c
Show file tree
Hide file tree
Showing 10 changed files with 196 additions and 231 deletions.
65 changes: 65 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1900,6 +1900,71 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
return true;
}

bool RISCVTargetLowering::targetShrinkDemandedConstant(
SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
TargetLoweringOpt &TLO) const {
// Delay this optimization as late as possible.
if (!TLO.LegalOps)
return false;

EVT VT = Op.getValueType();
if (VT.isVector())
return false;

// Only handle AND for now.
if (Op.getOpcode() != ISD::AND)
return false;

ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (!C)
return false;

const APInt &Mask = C->getAPIntValue();

// Clear all non-demanded bits initially.
APInt ShrunkMask = Mask & DemandedBits;

// If the shrunk mask fits in sign extended 12 bits, let the target
// independent code apply it.
if (ShrunkMask.isSignedIntN(12))
return false;

// Try to make a smaller immediate by setting undemanded bits.

// We need to be able to make a negative number through a combination of mask
// and undemanded bits.
APInt ExpandedMask = Mask | ~DemandedBits;
if (!ExpandedMask.isNegative())
return false;

// What is the fewest number of bits we need to represent the negative number.
unsigned MinSignedBits = ExpandedMask.getMinSignedBits();

// Try to make a 12 bit negative immediate. If that fails try to make a 32
// bit negative immediate unless the shrunk immediate already fits in 32 bits.
APInt NewMask = ShrunkMask;
if (MinSignedBits <= 12)
NewMask.setBitsFrom(11);
else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
NewMask.setBitsFrom(31);
else
return false;

// Sanity check that our new mask is a subset of the demanded mask.
assert(NewMask.isSubsetOf(ExpandedMask));

// If we aren't changing the mask, just return true to keep it and prevent
// the caller from optimizing.
if (NewMask == Mask)
return true;

// Replace the constant with the new mask.
SDLoc DL(Op);
SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
return TLO.CombineTo(Op, NewOp);
}

void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
KnownBits &Known,
const APInt &DemandedElts,
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ class RISCVTargetLowering : public TargetLowering {

SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;

bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
const APInt &DemandedElts,
TargetLoweringOpt &TLO) const override;

void computeKnownBitsForTargetNode(const SDValue Op,
KnownBits &Known,
const APInt &DemandedElts,
Expand Down
8 changes: 2 additions & 6 deletions llvm/test/CodeGen/RISCV/copysign-casts.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ define double @fold_promote_d_s(double %a, float %b) nounwind {
; RV64I-NEXT: slli a2, a2, 63
; RV64I-NEXT: addi a2, a2, -1
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: addi a2, zero, 1
; RV64I-NEXT: slli a2, a2, 31
; RV64I-NEXT: lui a2, 524288
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a0, a0, a1
Expand Down Expand Up @@ -188,10 +187,7 @@ define float @fold_promote_f_h(float %a, half %b) nounwind {
; RV64I-NEXT: lui a2, 524288
; RV64I-NEXT: addiw a2, a2, -1
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: addi a2, zero, 1
; RV64I-NEXT: slli a2, a2, 33
; RV64I-NEXT: addi a2, a2, -1
; RV64I-NEXT: slli a2, a2, 15
; RV64I-NEXT: lui a2, 1048568
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: slli a1, a1, 16
; RV64I-NEXT: or a0, a0, a1
Expand Down
10 changes: 2 additions & 8 deletions llvm/test/CodeGen/RISCV/frame-info.ll
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,7 @@ define void @stack_alloc(i32 signext %size) {
; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: srli a0, a0, 32
; RV64-NEXT: addi a0, a0, 15
; RV64-NEXT: addi a1, zero, 1
; RV64-NEXT: slli a1, a1, 33
; RV64-NEXT: addi a1, a1, -16
; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: andi a0, a0, -16
; RV64-NEXT: sub a0, sp, a0
; RV64-NEXT: mv sp, a0
; RV64-NEXT: call callee_with_args@plt
Expand Down Expand Up @@ -129,10 +126,7 @@ define void @stack_alloc(i32 signext %size) {
; RV64-WITHFP-NEXT: slli a0, a0, 32
; RV64-WITHFP-NEXT: srli a0, a0, 32
; RV64-WITHFP-NEXT: addi a0, a0, 15
; RV64-WITHFP-NEXT: addi a1, zero, 1
; RV64-WITHFP-NEXT: slli a1, a1, 33
; RV64-WITHFP-NEXT: addi a1, a1, -16
; RV64-WITHFP-NEXT: and a0, a0, a1
; RV64-WITHFP-NEXT: andi a0, a0, -16
; RV64-WITHFP-NEXT: sub a0, sp, a0
; RV64-WITHFP-NEXT: mv sp, a0
; RV64-WITHFP-NEXT: call callee_with_args@plt
Expand Down
5 changes: 1 addition & 4 deletions llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,7 @@ define half @fcopysign_fneg(half %a, half %b) nounwind {
; RV64I-NEXT: lui a2, 524288
; RV64I-NEXT: addiw a2, a2, -1
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: addi a2, zero, 1
; RV64I-NEXT: slli a2, a2, 33
; RV64I-NEXT: addi a2, a2, -1
; RV64I-NEXT: slli a2, a2, 15
; RV64I-NEXT: lui a2, 1048568
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: slli a1, a1, 16
; RV64I-NEXT: or a0, a0, a1
Expand Down
Loading

0 comments on commit 86e604c

Please sign in to comment.