@@ -50936,10 +50936,12 @@ static SDValue combineAndShuffleNot(SDNode *N, SelectionDAG &DAG,
5093650936// Given a target type \p VT, we generate
5093750937// or (and x, y), (xor z, zext(build_vector (constants)))
5093850938// given x, y and z are of type \p VT. We can do so, if operands are either
50939- // truncates from VT types, the second operand is a vector of constants or can
50940- // be recursively promoted.
50939+ // truncates from VT types, the second operand is a vector of constants, can
50940+ // be recursively promoted or is an existing extension we can extend further .
5094150941static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT,
50942- SelectionDAG &DAG, unsigned Depth) {
50942+ SelectionDAG &DAG,
50943+ const X86Subtarget &Subtarget,
50944+ unsigned Depth) {
5094350945 // Limit recursion to avoid excessive compile times.
5094450946 if (Depth >= SelectionDAG::MaxRecursionDepth)
5094550947 return SDValue();
@@ -50954,7 +50956,8 @@ static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT,
5095450956 if (!TLI.isOperationLegalOrPromote(N.getOpcode(), VT))
5095550957 return SDValue();
5095650958
50957- if (SDValue NN0 = PromoteMaskArithmetic(N0, DL, VT, DAG, Depth + 1))
50959+ if (SDValue NN0 =
50960+ PromoteMaskArithmetic(N0, DL, VT, DAG, Subtarget, Depth + 1))
5095850961 N0 = NN0;
5095950962 else {
5096050963 // The left side has to be a 'trunc'.
@@ -50966,14 +50969,19 @@ static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT,
5096650969 return SDValue();
5096750970 }
5096850971
50969- if (SDValue NN1 = PromoteMaskArithmetic(N1, DL, VT, DAG, Depth + 1))
50972+ if (SDValue NN1 =
50973+ PromoteMaskArithmetic(N1, DL, VT, DAG, Subtarget, Depth + 1))
5097050974 N1 = NN1;
5097150975 else {
50972- // The right side has to be a 'trunc' or a (foldable) constant.
50976+ // The right side has to be a 'trunc', a (foldable) constant or an
50977+ // existing extension we can extend further.
5097350978 bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
5097450979 N1.getOperand(0).getValueType() == VT;
5097550980 if (RHSTrunc)
5097650981 N1 = N1.getOperand(0);
50982+ else if (ISD::isExtVecInRegOpcode(N1.getOpcode()) && VT.is256BitVector() &&
50983+ Subtarget.hasInt256() && N1.hasOneUse())
50984+ N1 = DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0));
5097750985 else if (SDValue Cst =
5097850986 DAG.FoldConstantArithmetic(ISD::ZERO_EXTEND, DL, VT, {N1}))
5097950987 N1 = Cst;
@@ -51003,7 +51011,7 @@ static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL,
5100351011 EVT NarrowVT = Narrow.getValueType();
5100451012
5100551013 // Generate the wide operation.
51006- SDValue Op = PromoteMaskArithmetic(Narrow, DL, VT, DAG, 0);
51014+ SDValue Op = PromoteMaskArithmetic(Narrow, DL, VT, DAG, Subtarget, 0);
5100751015 if (!Op)
5100851016 return SDValue();
5100951017 switch (N.getOpcode()) {
0 commit comments