Skip to content

Commit ed33690

Browse files
authored
[X86] PromoteMaskArithmetic - oneuse vector_extend_inreg nodes can be safely promoted on AVX2+ targets (#157425)
Allows us to extend a result back to the largest type after we've handled mask logic using vXi1 result types from different source vector widths (e.g. v8i32 and v8i8) Fixes #157382
1 parent 3f3f7d1 commit ed33690

File tree

2 files changed

+17
-12
lines changed

2 files changed

+17
-12
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -50936,10 +50936,12 @@ static SDValue combineAndShuffleNot(SDNode *N, SelectionDAG &DAG,
5093650936
// Given a target type \p VT, we generate
5093750937
// or (and x, y), (xor z, zext(build_vector (constants)))
5093850938
// given x, y and z are of type \p VT. We can do so, if operands are either
50939-
// truncates from VT types, the second operand is a vector of constants or can
50940-
// be recursively promoted.
50939+
// truncates from VT types, the second operand is a vector of constants, can
50940+
// be recursively promoted or is an existing extension we can extend further.
5094150941
static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT,
50942-
SelectionDAG &DAG, unsigned Depth) {
50942+
SelectionDAG &DAG,
50943+
const X86Subtarget &Subtarget,
50944+
unsigned Depth) {
5094350945
// Limit recursion to avoid excessive compile times.
5094450946
if (Depth >= SelectionDAG::MaxRecursionDepth)
5094550947
return SDValue();
@@ -50954,7 +50956,8 @@ static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT,
5095450956
if (!TLI.isOperationLegalOrPromote(N.getOpcode(), VT))
5095550957
return SDValue();
5095650958

50957-
if (SDValue NN0 = PromoteMaskArithmetic(N0, DL, VT, DAG, Depth + 1))
50959+
if (SDValue NN0 =
50960+
PromoteMaskArithmetic(N0, DL, VT, DAG, Subtarget, Depth + 1))
5095850961
N0 = NN0;
5095950962
else {
5096050963
// The left side has to be a 'trunc'.
@@ -50966,14 +50969,19 @@ static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT,
5096650969
return SDValue();
5096750970
}
5096850971

50969-
if (SDValue NN1 = PromoteMaskArithmetic(N1, DL, VT, DAG, Depth + 1))
50972+
if (SDValue NN1 =
50973+
PromoteMaskArithmetic(N1, DL, VT, DAG, Subtarget, Depth + 1))
5097050974
N1 = NN1;
5097150975
else {
50972-
// The right side has to be a 'trunc' or a (foldable) constant.
50976+
// The right side has to be a 'trunc', a (foldable) constant or an
50977+
// existing extension we can extend further.
5097350978
bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
5097450979
N1.getOperand(0).getValueType() == VT;
5097550980
if (RHSTrunc)
5097650981
N1 = N1.getOperand(0);
50982+
else if (ISD::isExtVecInRegOpcode(N1.getOpcode()) && VT.is256BitVector() &&
50983+
Subtarget.hasInt256() && N1.hasOneUse())
50984+
N1 = DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0));
5097750985
else if (SDValue Cst =
5097850986
DAG.FoldConstantArithmetic(ISD::ZERO_EXTEND, DL, VT, {N1}))
5097950987
N1 = Cst;
@@ -51003,7 +51011,7 @@ static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL,
5100351011
EVT NarrowVT = Narrow.getValueType();
5100451012

5100551013
// Generate the wide operation.
51006-
SDValue Op = PromoteMaskArithmetic(Narrow, DL, VT, DAG, 0);
51014+
SDValue Op = PromoteMaskArithmetic(Narrow, DL, VT, DAG, Subtarget, 0);
5100751015
if (!Op)
5100851016
return SDValue();
5100951017
switch (N.getOpcode()) {

llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -733,11 +733,8 @@ define <8 x i32> @PR157382(ptr %p0, ptr %p1, ptr %p2) {
733733
; AVX2-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2
734734
; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
735735
; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2
736-
; AVX2-NEXT: vpmovsxbw %xmm2, %xmm2
737-
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
738-
; AVX2-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
739-
; AVX2-NEXT: vpor %xmm2, %xmm1, %xmm1
740-
; AVX2-NEXT: vpmovsxwd %xmm1, %ymm1
736+
; AVX2-NEXT: vpmovsxbd %xmm2, %ymm2
737+
; AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
741738
; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
742739
; AVX2-NEXT: retq
743740
;

0 commit comments

Comments
 (0)