Skip to content

Commit f1f275e

Browse files
committed
Missing AArch64ISD::BICi handling
1 parent e96c0c1 commit f1f275e

File tree

3 files changed

+162
-4
lines changed

3 files changed

+162
-4
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

+9-4
Original file line numberDiff line numberDiff line change
@@ -3416,13 +3416,18 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
34163416
Known = KnownBits::mulhs(Known, Known2);
34173417
break;
34183418
}
3419-
case ISD::AVGCEILU: {
3419+
case ISD::AVGFLOORU:
3420+
case ISD::AVGCEILU:
3421+
case ISD::AVGFLOORS:
3422+
case ISD::AVGCEILS: {
3423+
bool IsCeil = Opcode == ISD::AVGCEILU || Opcode == ISD::AVGCEILS;
3424+
bool IsSigned = Opcode == ISD::AVGFLOORS || Opcode == ISD::AVGCEILS;
34203425
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
34213426
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
3422-
Known = Known.zext(BitWidth + 1);
3427+
Known = IsSigned ? Known.sext(BitWidth + 1) : Known.zext(BitWidth + 1);
34233428
Known2 = Known2.zext(BitWidth + 1);
3424-
KnownBits One = KnownBits::makeConstant(APInt(1, 1));
3425-
Known = KnownBits::computeForAddCarry(Known, Known2, One);
3429+
KnownBits Carry = KnownBits::makeConstant(APInt(1, IsCeil ? 1 : 0));
3430+
Known = KnownBits::computeForAddCarry(Known, Known2, Carry);
34263431
Known = Known.extractBits(BitWidth, 1);
34273432
break;
34283433
}

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+28
Original file line numberDiff line numberDiff line change
@@ -24284,6 +24284,19 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
2428424284
if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false))
2428524285
return R;
2428624286
return performFlagSettingCombine(N, DCI, AArch64ISD::SBC);
24287+
case AArch64ISD::BICi: {
24288+
KnownBits Known;
24289+
APInt DemandedBits =
24290+
APInt::getAllOnes(N->getValueType(0).getScalarSizeInBits());
24291+
APInt DemandedElts =
24292+
APInt::getAllOnes(N->getValueType(0).getVectorNumElements());
24293+
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
24294+
!DCI.isBeforeLegalizeOps());
24295+
if (DAG.getTargetLoweringInfo().SimplifyDemandedBits(
24296+
SDValue(N, 0), DemandedBits, DemandedElts, Known, TLO))
24297+
return TLO.New;
24298+
break;
24299+
}
2428724300
case ISD::XOR:
2428824301
return performXorCombine(N, DAG, DCI, Subtarget);
2428924302
case ISD::MUL:
@@ -27324,6 +27337,21 @@ bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
2732427337
// used - simplify to just Val.
2732527338
return TLO.CombineTo(Op, ShiftR->getOperand(0));
2732627339
}
27340+
case AArch64ISD::BICi: {
27341+
// Fold BICi if all destination bits already known to be zeroed
27342+
SDValue Op0 = Op.getOperand(0);
27343+
KnownBits KnownOp0 =
27344+
TLO.DAG.computeKnownBits(Op0, OriginalDemandedElts, Depth + 1);
27345+
uint64_t BitsToClear = Op->getConstantOperandVal(1)
27346+
<< Op->getConstantOperandVal(2);
27347+
APInt AlreadyZeroedBitsToClear = BitsToClear & KnownOp0.Zero;
27348+
if (AlreadyZeroedBitsToClear == BitsToClear)
27349+
return TLO.CombineTo(Op, Op0);
27350+
27351+
Known &= KnownBits::makeConstant(APInt(Known.getBitWidth(), ~BitsToClear));
27352+
27353+
return false;
27354+
}
2732727355
case ISD::INTRINSIC_WO_CHAIN: {
2732827356
if (auto ElementSize = IsSVECntIntrinsic(Op)) {
2732927357
unsigned MaxSVEVectorSizeInBits = Subtarget->getMaxSVEVectorSizeInBits();
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=aarch64-neon < %s | FileCheck %s
3+
4+
declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
5+
declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
6+
declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>)
7+
declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
8+
9+
define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
10+
; CHECK-LABEL: haddu_zext:
11+
; CHECK: // %bb.0:
12+
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
13+
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
14+
; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
15+
; CHECK-NEXT: ret
16+
%x0 = zext <8 x i8> %a0 to <8 x i16>
17+
%x1 = zext <8 x i8> %a1 to <8 x i16>
18+
%hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
19+
%res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511,i16 511, i16 511, i16 511, i16 511>
20+
ret <8 x i16> %res
21+
}
22+
23+
define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
24+
; CHECK-LABEL: rhaddu_zext:
25+
; CHECK: // %bb.0:
26+
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
27+
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
28+
; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
29+
; CHECK-NEXT: ret
30+
%x0 = zext <8 x i8> %a0 to <8 x i16>
31+
%x1 = zext <8 x i8> %a1 to <8 x i16>
32+
%hadd = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
33+
%res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
34+
ret <8 x i16> %res
35+
}
36+
37+
define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) {
38+
; CHECK-LABEL: hadds_zext:
39+
; CHECK: // %bb.0:
40+
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
41+
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
42+
; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
43+
; CHECK-NEXT: ret
44+
%x0 = zext <8 x i8> %a0 to <8 x i16>
45+
%x1 = zext <8 x i8> %a1 to <8 x i16>
46+
%hadd = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
47+
%res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
48+
ret <8 x i16> %res
49+
}
50+
51+
define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
52+
; CHECK-LABEL: shaddu_zext:
53+
; CHECK: // %bb.0:
54+
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
55+
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
56+
; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
57+
; CHECK-NEXT: ret
58+
%x0 = zext <8 x i8> %a0 to <8 x i16>
59+
%x1 = zext <8 x i8> %a1 to <8 x i16>
60+
%hadd = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
61+
%res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
62+
ret <8 x i16> %res
63+
}
64+
65+
; ; negative tests
66+
67+
define <8 x i16> @haddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
68+
; CHECK-LABEL: haddu_sext:
69+
; CHECK: // %bb.0:
70+
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
71+
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
72+
; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
73+
; CHECK-NEXT: bic v0.8h, #254, lsl #8
74+
; CHECK-NEXT: ret
75+
%x0 = sext <8 x i8> %a0 to <8 x i16>
76+
%x1 = sext <8 x i8> %a1 to <8 x i16>
77+
%hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
78+
%res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511,i16 511, i16 511, i16 511, i16 511>
79+
ret <8 x i16> %res
80+
}
81+
82+
define <8 x i16> @urhadd_sext(<8 x i8> %a0, <8 x i8> %a1) {
83+
; CHECK-LABEL: urhadd_sext:
84+
; CHECK: // %bb.0:
85+
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
86+
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
87+
; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
88+
; CHECK-NEXT: bic v0.8h, #254, lsl #8
89+
; CHECK-NEXT: ret
90+
%x0 = sext <8 x i8> %a0 to <8 x i16>
91+
%x1 = sext <8 x i8> %a1 to <8 x i16>
92+
%hadd = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
93+
%res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511,i16 511, i16 511, i16 511, i16 511>
94+
ret <8 x i16> %res
95+
}
96+
97+
define <8 x i16> @hadds_sext(<8 x i8> %a0, <8 x i8> %a1) {
98+
; CHECK-LABEL: hadds_sext:
99+
; CHECK: // %bb.0:
100+
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
101+
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
102+
; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
103+
; CHECK-NEXT: bic v0.8h, #254, lsl #8
104+
; CHECK-NEXT: ret
105+
%x0 = sext <8 x i8> %a0 to <8 x i16>
106+
%x1 = sext <8 x i8> %a1 to <8 x i16>
107+
%hadd = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
108+
%res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
109+
ret <8 x i16> %res
110+
}
111+
112+
define <8 x i16> @shaddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
113+
; CHECK-LABEL: shaddu_sext:
114+
; CHECK: // %bb.0:
115+
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
116+
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
117+
; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
118+
; CHECK-NEXT: bic v0.8h, #254, lsl #8
119+
; CHECK-NEXT: ret
120+
%x0 = sext <8 x i8> %a0 to <8 x i16>
121+
%x1 = sext <8 x i8> %a1 to <8 x i16>
122+
%hadd = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
123+
%res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
124+
ret <8 x i16> %res
125+
}

0 commit comments

Comments
 (0)