Skip to content

Commit 529bd4f

Browse files
committed
[DAG] SimplifyDemandedBits - don't early-out for multiple use values
SimplifyDemandedBits currently early-outs for multi-use values beyond the root node (just returning the knownbits), which is missing a number of optimizations as there are plenty of cases where we can still simplify when initially demanding all elements/bits. @lenary has confirmed that the test cases in aea-erratum-fix.ll need refactoring and the current increase codegen is not a major concern. Differential Revision: https://reviews.llvm.org/D129765
1 parent ea460b7 commit 529bd4f

26 files changed

+1843
-1708
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,6 +1089,10 @@ bool TargetLowering::SimplifyDemandedBits(
10891089
if (Op.isUndef())
10901090
return false;
10911091

1092+
// We can't simplify target constants.
1093+
if (Op.getOpcode() == ISD::TargetConstant)
1094+
return false;
1095+
10921096
if (Op.getOpcode() == ISD::Constant) {
10931097
// We know all of the bits for a constant!
10941098
Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
@@ -1103,17 +1107,16 @@ bool TargetLowering::SimplifyDemandedBits(
11031107
}
11041108

11051109
// Other users may use these bits.
1110+
bool HasMultiUse = false;
11061111
if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
1107-
if (Depth != 0) {
1108-
// If not at the root, Just compute the Known bits to
1109-
// simplify things downstream.
1110-
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1112+
if (Depth >= SelectionDAG::MaxRecursionDepth) {
1113+
// Limit search depth.
11111114
return false;
11121115
}
1113-
// If this is the root being simplified, allow it to have multiple uses,
1114-
// just set the DemandedBits/Elts to all bits.
1116+
// Allow multiple uses, just set the DemandedBits/Elts to all bits.
11151117
DemandedBits = APInt::getAllOnes(BitWidth);
11161118
DemandedElts = APInt::getAllOnes(NumElts);
1119+
HasMultiUse = true;
11171120
} else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
11181121
// Not demanding any bits/elts from Op.
11191122
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
@@ -1124,8 +1127,6 @@ bool TargetLowering::SimplifyDemandedBits(
11241127

11251128
KnownBits Known2;
11261129
switch (Op.getOpcode()) {
1127-
case ISD::TargetConstant:
1128-
llvm_unreachable("Can't simplify this node");
11291130
case ISD::SCALAR_TO_VECTOR: {
11301131
if (!DemandedElts[0])
11311132
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
@@ -2715,6 +2716,12 @@ bool TargetLowering::SimplifyDemandedBits(
27152716
APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
27162717
}
27172718

2719+
// A multi use 'all demanded elts' simplify failed to find any knownbits.
2720+
// Try again just for the original demanded elts.
2721+
// Ensure we do this AFTER constant folding above.
2722+
if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2723+
Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
2724+
27182725
return false;
27192726
}
27202727

llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll

Lines changed: 59 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -2616,36 +2616,36 @@ define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
26162616
; CHECK-NEXT: mov w8, #1895825407
26172617
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
26182618
; CHECK-NEXT: mov x25, #-34359738368
2619-
; CHECK-NEXT: mov x23, #34359738367
2619+
; CHECK-NEXT: mov x22, #34359738367
26202620
; CHECK-NEXT: fmov s9, w8
26212621
; CHECK-NEXT: fcmp s8, s10
26222622
; CHECK-NEXT: mov h0, v0.h[3]
2623-
; CHECK-NEXT: csel x8, xzr, x0, lt
2624-
; CHECK-NEXT: csel x9, x25, x1, lt
2623+
; CHECK-NEXT: csel x8, x25, x1, lt
2624+
; CHECK-NEXT: csel x9, xzr, x0, lt
26252625
; CHECK-NEXT: fcmp s8, s9
2626-
; CHECK-NEXT: csel x9, x23, x9, gt
2627-
; CHECK-NEXT: csinv x8, x8, xzr, le
2626+
; CHECK-NEXT: csinv x9, x9, xzr, le
2627+
; CHECK-NEXT: csel x8, x22, x8, gt
26282628
; CHECK-NEXT: fcmp s8, s8
26292629
; CHECK-NEXT: fcvt s8, h0
26302630
; CHECK-NEXT: csel x8, xzr, x8, vs
26312631
; CHECK-NEXT: fmov s0, s8
2632-
; CHECK-NEXT: str x8, [sp, #24] // 8-byte Folded Spill
2633-
; CHECK-NEXT: csel x8, xzr, x9, vs
26342632
; CHECK-NEXT: str x8, [sp, #72] // 8-byte Folded Spill
2633+
; CHECK-NEXT: csel x8, xzr, x9, vs
2634+
; CHECK-NEXT: str x8, [sp, #24] // 8-byte Folded Spill
26352635
; CHECK-NEXT: bl __fixsfti
26362636
; CHECK-NEXT: fcmp s8, s10
26372637
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
26382638
; CHECK-NEXT: csel x8, xzr, x0, lt
26392639
; CHECK-NEXT: csel x9, x25, x1, lt
26402640
; CHECK-NEXT: fcmp s8, s9
2641-
; CHECK-NEXT: csel x9, x23, x9, gt
2641+
; CHECK-NEXT: csel x9, x22, x9, gt
26422642
; CHECK-NEXT: csinv x8, x8, xzr, le
26432643
; CHECK-NEXT: fcmp s8, s8
26442644
; CHECK-NEXT: fcvt s8, h0
2645-
; CHECK-NEXT: csel x8, xzr, x8, vs
2646-
; CHECK-NEXT: csel x22, xzr, x9, vs
2645+
; CHECK-NEXT: csel x10, xzr, x8, vs
2646+
; CHECK-NEXT: csel x8, xzr, x9, vs
26472647
; CHECK-NEXT: fmov s0, s8
2648-
; CHECK-NEXT: str x8, [sp, #16] // 8-byte Folded Spill
2648+
; CHECK-NEXT: stp x8, x10, [sp, #8] // 16-byte Folded Spill
26492649
; CHECK-NEXT: bl __fixsfti
26502650
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
26512651
; CHECK-NEXT: fcmp s8, s10
@@ -2654,10 +2654,10 @@ define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
26542654
; CHECK-NEXT: csel x9, xzr, x0, lt
26552655
; CHECK-NEXT: fcmp s8, s9
26562656
; CHECK-NEXT: csinv x9, x9, xzr, le
2657-
; CHECK-NEXT: csel x8, x23, x8, gt
2657+
; CHECK-NEXT: csel x8, x22, x8, gt
26582658
; CHECK-NEXT: fcmp s8, s8
26592659
; CHECK-NEXT: fcvt s8, h0
2660-
; CHECK-NEXT: csel x24, xzr, x8, vs
2660+
; CHECK-NEXT: csel x26, xzr, x8, vs
26612661
; CHECK-NEXT: csel x8, xzr, x9, vs
26622662
; CHECK-NEXT: fmov s0, s8
26632663
; CHECK-NEXT: str x8, [sp, #32] // 8-byte Folded Spill
@@ -2669,40 +2669,39 @@ define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
26692669
; CHECK-NEXT: csel x9, xzr, x0, lt
26702670
; CHECK-NEXT: fcmp s8, s9
26712671
; CHECK-NEXT: csinv x9, x9, xzr, le
2672-
; CHECK-NEXT: csel x8, x23, x8, gt
2672+
; CHECK-NEXT: csel x8, x22, x8, gt
26732673
; CHECK-NEXT: fcmp s8, s8
26742674
; CHECK-NEXT: fcvt s8, h0
2675-
; CHECK-NEXT: csel x27, xzr, x8, vs
2675+
; CHECK-NEXT: csel x28, xzr, x8, vs
26762676
; CHECK-NEXT: csel x8, xzr, x9, vs
26772677
; CHECK-NEXT: fmov s0, s8
2678-
; CHECK-NEXT: str x8, [sp, #8] // 8-byte Folded Spill
2678+
; CHECK-NEXT: str x8, [sp] // 8-byte Folded Spill
26792679
; CHECK-NEXT: bl __fixsfti
26802680
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
26812681
; CHECK-NEXT: fcmp s8, s10
26822682
; CHECK-NEXT: mov h0, v0.h[3]
2683-
; CHECK-NEXT: csel x8, xzr, x0, lt
2684-
; CHECK-NEXT: csel x9, x25, x1, lt
2683+
; CHECK-NEXT: csel x8, x25, x1, lt
2684+
; CHECK-NEXT: csel x9, xzr, x0, lt
26852685
; CHECK-NEXT: fcmp s8, s9
2686-
; CHECK-NEXT: csel x9, x23, x9, gt
2687-
; CHECK-NEXT: csinv x8, x8, xzr, le
2686+
; CHECK-NEXT: csinv x9, x9, xzr, le
2687+
; CHECK-NEXT: csel x8, x22, x8, gt
26882688
; CHECK-NEXT: fcmp s8, s8
26892689
; CHECK-NEXT: fcvt s8, h0
2690-
; CHECK-NEXT: csel x8, xzr, x8, vs
2691-
; CHECK-NEXT: csel x29, xzr, x9, vs
2690+
; CHECK-NEXT: csel x27, xzr, x8, vs
2691+
; CHECK-NEXT: csel x20, xzr, x9, vs
26922692
; CHECK-NEXT: fmov s0, s8
2693-
; CHECK-NEXT: str x8, [sp] // 8-byte Folded Spill
26942693
; CHECK-NEXT: bl __fixsfti
26952694
; CHECK-NEXT: fcmp s8, s10
26962695
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
26972696
; CHECK-NEXT: csel x8, xzr, x0, lt
26982697
; CHECK-NEXT: csel x9, x25, x1, lt
26992698
; CHECK-NEXT: fcmp s8, s9
2700-
; CHECK-NEXT: csel x9, x23, x9, gt
2699+
; CHECK-NEXT: csel x9, x22, x9, gt
27012700
; CHECK-NEXT: csinv x8, x8, xzr, le
27022701
; CHECK-NEXT: fcmp s8, s8
27032702
; CHECK-NEXT: fcvt s8, h0
2704-
; CHECK-NEXT: csel x20, xzr, x8, vs
2705-
; CHECK-NEXT: csel x28, xzr, x9, vs
2703+
; CHECK-NEXT: csel x29, xzr, x8, vs
2704+
; CHECK-NEXT: csel x21, xzr, x9, vs
27062705
; CHECK-NEXT: fmov s0, s8
27072706
; CHECK-NEXT: bl __fixsfti
27082707
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
@@ -2712,65 +2711,54 @@ define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
27122711
; CHECK-NEXT: csel x9, xzr, x0, lt
27132712
; CHECK-NEXT: fcmp s8, s9
27142713
; CHECK-NEXT: csinv x9, x9, xzr, le
2715-
; CHECK-NEXT: csel x8, x23, x8, gt
2714+
; CHECK-NEXT: csel x8, x22, x8, gt
27162715
; CHECK-NEXT: fcmp s8, s8
27172716
; CHECK-NEXT: fcvt s8, h0
2718-
; CHECK-NEXT: csel x21, xzr, x8, vs
2719-
; CHECK-NEXT: csel x26, xzr, x9, vs
2717+
; CHECK-NEXT: csel x23, xzr, x8, vs
2718+
; CHECK-NEXT: csel x24, xzr, x9, vs
27202719
; CHECK-NEXT: fmov s0, s8
27212720
; CHECK-NEXT: bl __fixsfti
2722-
; CHECK-NEXT: fmov d0, x20
27232721
; CHECK-NEXT: fcmp s8, s10
2724-
; CHECK-NEXT: ldr x11, [sp, #8] // 8-byte Folded Reload
2725-
; CHECK-NEXT: lsr x10, x28, #28
2726-
; CHECK-NEXT: ldr d1, [sp] // 8-byte Folded Reload
2727-
; CHECK-NEXT: lsr x12, x29, #28
2728-
; CHECK-NEXT: mov v0.d[1], x28
2722+
; CHECK-NEXT: extr x9, x21, x29, #28
2723+
; CHECK-NEXT: bfi x23, x20, #36, #28
2724+
; CHECK-NEXT: extr x11, x27, x20, #28
2725+
; CHECK-NEXT: str x24, [x19]
27292726
; CHECK-NEXT: csel x8, x25, x1, lt
2730-
; CHECK-NEXT: csel x9, xzr, x0, lt
2727+
; CHECK-NEXT: csel x10, xzr, x0, lt
27312728
; CHECK-NEXT: fcmp s8, s9
2732-
; CHECK-NEXT: stur x11, [x19, #75]
2733-
; CHECK-NEXT: ldr x13, [sp, #32] // 8-byte Folded Reload
2734-
; CHECK-NEXT: csinv x9, x9, xzr, le
2735-
; CHECK-NEXT: csel x8, x23, x8, gt
2729+
; CHECK-NEXT: stur x9, [x19, #41]
2730+
; CHECK-NEXT: stp x23, x11, [x19, #8]
2731+
; CHECK-NEXT: lsr x11, x27, #28
2732+
; CHECK-NEXT: csinv x9, x10, xzr, le
2733+
; CHECK-NEXT: lsr x10, x21, #28
2734+
; CHECK-NEXT: csel x8, x22, x8, gt
27362735
; CHECK-NEXT: fcmp s8, s8
2737-
; CHECK-NEXT: fmov x11, d0
2738-
; CHECK-NEXT: stur x13, [x19, #50]
2739-
; CHECK-NEXT: mov v1.d[1], x29
2740-
; CHECK-NEXT: ldr d0, [sp, #16] // 8-byte Folded Reload
2741-
; CHECK-NEXT: csel x9, xzr, x9, vs
27422736
; CHECK-NEXT: strb w10, [x19, #49]
2743-
; CHECK-NEXT: extr x10, x28, x11, #28
27442737
; CHECK-NEXT: csel x8, xzr, x8, vs
2745-
; CHECK-NEXT: bfi x8, x11, #36, #28
2746-
; CHECK-NEXT: strb w12, [x19, #24]
2738+
; CHECK-NEXT: ldr x10, [sp] // 8-byte Folded Reload
2739+
; CHECK-NEXT: csel x9, xzr, x9, vs
2740+
; CHECK-NEXT: bfi x8, x29, #36, #28
2741+
; CHECK-NEXT: strb w11, [x19, #24]
2742+
; CHECK-NEXT: stur x10, [x19, #75]
2743+
; CHECK-NEXT: ldp x12, x11, [sp, #8] // 16-byte Folded Reload
27472744
; CHECK-NEXT: stur x9, [x19, #25]
2748-
; CHECK-NEXT: fmov x12, d1
2749-
; CHECK-NEXT: stur x10, [x19, #41]
2750-
; CHECK-NEXT: lsr x9, x22, #28
2751-
; CHECK-NEXT: ldr d1, [sp, #24] // 8-byte Folded Reload
27522745
; CHECK-NEXT: stur x8, [x19, #33]
2746+
; CHECK-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload
2747+
; CHECK-NEXT: extr x10, x12, x11, #28
2748+
; CHECK-NEXT: bfi x28, x11, #36, #28
2749+
; CHECK-NEXT: stur x8, [x19, #50]
2750+
; CHECK-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload
27532751
; CHECK-NEXT: ldr x11, [sp, #72] // 8-byte Folded Reload
2754-
; CHECK-NEXT: extr x18, x29, x12, #28
2755-
; CHECK-NEXT: mov v0.d[1], x22
2756-
; CHECK-NEXT: bfi x21, x12, #36, #28
2757-
; CHECK-NEXT: str x26, [x19]
2758-
; CHECK-NEXT: mov v1.d[1], x11
2759-
; CHECK-NEXT: lsr x10, x11, #28
2760-
; CHECK-NEXT: mov x13, x11
2761-
; CHECK-NEXT: stp x21, x18, [x19, #8]
2762-
; CHECK-NEXT: fmov x8, d0
2763-
; CHECK-NEXT: strb w9, [x19, #99]
2764-
; CHECK-NEXT: strb w10, [x19, #74]
2765-
; CHECK-NEXT: fmov x11, d1
2766-
; CHECK-NEXT: extr x12, x22, x8, #28
2767-
; CHECK-NEXT: bfi x27, x8, #36, #28
2768-
; CHECK-NEXT: extr x8, x13, x11, #28
2769-
; CHECK-NEXT: bfi x24, x11, #36, #28
2770-
; CHECK-NEXT: stur x12, [x19, #91]
2771-
; CHECK-NEXT: stur x27, [x19, #83]
2752+
; CHECK-NEXT: stur x10, [x19, #91]
2753+
; CHECK-NEXT: stur x28, [x19, #83]
2754+
; CHECK-NEXT: extr x8, x11, x9, #28
2755+
; CHECK-NEXT: bfi x26, x9, #36, #28
2756+
; CHECK-NEXT: lsr x9, x12, #28
27722757
; CHECK-NEXT: stur x8, [x19, #66]
2773-
; CHECK-NEXT: stur x24, [x19, #58]
2758+
; CHECK-NEXT: lsr x8, x11, #28
2759+
; CHECK-NEXT: stur x26, [x19, #58]
2760+
; CHECK-NEXT: strb w9, [x19, #99]
2761+
; CHECK-NEXT: strb w8, [x19, #74]
27742762
; CHECK-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload
27752763
; CHECK-NEXT: ldp x22, x21, [sp, #160] // 16-byte Folded Reload
27762764
; CHECK-NEXT: ldp x24, x23, [sp, #144] // 16-byte Folded Reload

0 commit comments

Comments
 (0)