Skip to content

Commit 74b98ab

Browse files
committed
[X86] Fold ZERO_EXTEND_VECTOR_INREG(BUILD_VECTOR(X,Y,?,?)) -> BUILD_VECTOR(X,0,Y,0)
Helps avoid some unnecessary shift by splat amount extensions before shuffle combining gets limited by with one use checks
1 parent bad1b7f commit 74b98ab

File tree

4 files changed

+32
-43
lines changed

4 files changed

+32
-43
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53526,6 +53526,7 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
5352653526
unsigned Opcode = N->getOpcode();
5352753527
unsigned InOpcode = In.getOpcode();
5352853528
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
53529+
SDLoc DL(N);
5352953530

5353053531
// Try to merge vector loads and extend_inreg to an extload.
5353153532
if (!DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(In.getNode()) &&
@@ -53538,10 +53539,9 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
5353853539
: ISD::ZEXTLOAD;
5353953540
EVT MemVT = VT.changeVectorElementType(SVT);
5354053541
if (TLI.isLoadExtLegal(Ext, VT, MemVT)) {
53541-
SDValue Load =
53542-
DAG.getExtLoad(Ext, SDLoc(N), VT, Ld->getChain(), Ld->getBasePtr(),
53543-
Ld->getPointerInfo(), MemVT, Ld->getOriginalAlign(),
53544-
Ld->getMemOperand()->getFlags());
53542+
SDValue Load = DAG.getExtLoad(
53543+
Ext, DL, VT, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
53544+
MemVT, Ld->getOriginalAlign(), Ld->getMemOperand()->getFlags());
5354553545
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
5354653546
return Load;
5354753547
}
@@ -53550,7 +53550,7 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
5355053550

5355153551
// Fold EXTEND_VECTOR_INREG(EXTEND_VECTOR_INREG(X)) -> EXTEND_VECTOR_INREG(X).
5355253552
if (Opcode == InOpcode)
53553-
return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0));
53553+
return DAG.getNode(Opcode, DL, VT, In.getOperand(0));
5355453554

5355553555
// Fold EXTEND_VECTOR_INREG(EXTRACT_SUBVECTOR(EXTEND(X),0))
5355653556
// -> EXTEND_VECTOR_INREG(X).
@@ -53559,7 +53559,21 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
5355953559
In.getOperand(0).getOpcode() == getOpcode_EXTEND(Opcode) &&
5356053560
In.getOperand(0).getOperand(0).getValueSizeInBits() ==
5356153561
In.getValueSizeInBits())
53562-
return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0).getOperand(0));
53562+
return DAG.getNode(Opcode, DL, VT, In.getOperand(0).getOperand(0));
53563+
53564+
// Fold EXTEND_VECTOR_INREG(BUILD_VECTOR(X,Y,?,?)) -> BUILD_VECTOR(X,0,Y,0).
53565+
// TODO: Move to DAGCombine?
53566+
if (!DCI.isBeforeLegalizeOps() && Opcode == ISD::ZERO_EXTEND_VECTOR_INREG &&
53567+
In.getOpcode() == ISD::BUILD_VECTOR && In.hasOneUse() &&
53568+
In.getValueSizeInBits() == VT.getSizeInBits()) {
53569+
unsigned NumElts = VT.getVectorNumElements();
53570+
unsigned Scale = VT.getScalarSizeInBits() / In.getScalarValueSizeInBits();
53571+
EVT EltVT = In.getOperand(0).getValueType();
53572+
SmallVector<SDValue> Elts(Scale * NumElts, DAG.getConstant(0, DL, EltVT));
53573+
for (unsigned I = 0; I != NumElts; ++I)
53574+
Elts[I * Scale] = In.getOperand(I);
53575+
return DAG.getBitcast(VT, DAG.getBuildVector(In.getValueType(), DL, Elts));
53576+
}
5356353577

5356453578
// Attempt to combine as a shuffle.
5356553579
// TODO: General ZERO_EXTEND_VECTOR_INREG support.

llvm/test/CodeGen/X86/vector-shift-ashr-128.ll

Lines changed: 11 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1778,31 +1778,19 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
17781778
}
17791779

17801780
define <2 x i64> @PR52719(<2 x i64> %a0, i32 %a1) {
1781-
; SSE2-LABEL: PR52719:
1782-
; SSE2: # %bb.0:
1783-
; SSE2-NEXT: movd %edi, %xmm1
1784-
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1785-
; SSE2-NEXT: psrlq %xmm1, %xmm2
1786-
; SSE2-NEXT: psrlq %xmm1, %xmm0
1787-
; SSE2-NEXT: pxor %xmm2, %xmm0
1788-
; SSE2-NEXT: psubq %xmm2, %xmm0
1789-
; SSE2-NEXT: retq
1790-
;
1791-
; SSE41-LABEL: PR52719:
1792-
; SSE41: # %bb.0:
1793-
; SSE41-NEXT: movd %edi, %xmm1
1794-
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
1795-
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1796-
; SSE41-NEXT: psrlq %xmm1, %xmm2
1797-
; SSE41-NEXT: psrlq %xmm1, %xmm0
1798-
; SSE41-NEXT: pxor %xmm2, %xmm0
1799-
; SSE41-NEXT: psubq %xmm2, %xmm0
1800-
; SSE41-NEXT: retq
1781+
; SSE-LABEL: PR52719:
1782+
; SSE: # %bb.0:
1783+
; SSE-NEXT: movd %edi, %xmm1
1784+
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1785+
; SSE-NEXT: psrlq %xmm1, %xmm2
1786+
; SSE-NEXT: psrlq %xmm1, %xmm0
1787+
; SSE-NEXT: pxor %xmm2, %xmm0
1788+
; SSE-NEXT: psubq %xmm2, %xmm0
1789+
; SSE-NEXT: retq
18011790
;
18021791
; AVX-LABEL: PR52719:
18031792
; AVX: # %bb.0:
18041793
; AVX-NEXT: vmovd %edi, %xmm1
1805-
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
18061794
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
18071795
; AVX-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
18081796
; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
@@ -1813,8 +1801,7 @@ define <2 x i64> @PR52719(<2 x i64> %a0, i32 %a1) {
18131801
; XOPAVX1-LABEL: PR52719:
18141802
; XOPAVX1: # %bb.0:
18151803
; XOPAVX1-NEXT: vmovd %edi, %xmm1
1816-
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
1817-
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
1804+
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
18181805
; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
18191806
; XOPAVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1
18201807
; XOPAVX1-NEXT: vpshaq %xmm1, %xmm0, %xmm0
@@ -1823,8 +1810,7 @@ define <2 x i64> @PR52719(<2 x i64> %a0, i32 %a1) {
18231810
; XOPAVX2-LABEL: PR52719:
18241811
; XOPAVX2: # %bb.0:
18251812
; XOPAVX2-NEXT: vmovd %edi, %xmm1
1826-
; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
1827-
; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
1813+
; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
18281814
; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
18291815
; XOPAVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm1
18301816
; XOPAVX2-NEXT: vpshaq %xmm1, %xmm0, %xmm0
@@ -1834,7 +1820,6 @@ define <2 x i64> @PR52719(<2 x i64> %a0, i32 %a1) {
18341820
; AVX512: # %bb.0:
18351821
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18361822
; AVX512-NEXT: vmovd %edi, %xmm1
1837-
; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
18381823
; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
18391824
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
18401825
; AVX512-NEXT: vzeroupper
@@ -1843,7 +1828,6 @@ define <2 x i64> @PR52719(<2 x i64> %a0, i32 %a1) {
18431828
; AVX512VL-LABEL: PR52719:
18441829
; AVX512VL: # %bb.0:
18451830
; AVX512VL-NEXT: vmovd %edi, %xmm1
1846-
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
18471831
; AVX512VL-NEXT: vpsraq %xmm1, %xmm0, %xmm0
18481832
; AVX512VL-NEXT: retq
18491833
;

llvm/test/CodeGen/X86/vector-shift-ashr-256.ll

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2148,8 +2148,6 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
21482148
; AVX2-LABEL: PR52719:
21492149
; AVX2: # %bb.0:
21502150
; AVX2-NEXT: vmovd %edi, %xmm1
2151-
; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
2152-
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
21532151
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
21542152
; AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
21552153
; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
@@ -2175,8 +2173,6 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
21752173
; XOPAVX2-LABEL: PR52719:
21762174
; XOPAVX2: # %bb.0:
21772175
; XOPAVX2-NEXT: vmovd %edi, %xmm1
2178-
; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
2179-
; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
21802176
; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
21812177
; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
21822178
; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
@@ -2188,16 +2184,13 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
21882184
; AVX512: # %bb.0:
21892185
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
21902186
; AVX512-NEXT: vmovd %edi, %xmm1
2191-
; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
2192-
; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
21932187
; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
21942188
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
21952189
; AVX512-NEXT: retq
21962190
;
21972191
; AVX512VL-LABEL: PR52719:
21982192
; AVX512VL: # %bb.0:
2199-
; AVX512VL-NEXT: vpbroadcastd %edi, %xmm1
2200-
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
2193+
; AVX512VL-NEXT: vmovd %edi, %xmm1
22012194
; AVX512VL-NEXT: vpsraq %xmm1, %ymm0, %ymm0
22022195
; AVX512VL-NEXT: retq
22032196
;

llvm/test/CodeGen/X86/vector-shift-ashr-512.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -501,8 +501,6 @@ define <8 x i64> @PR52719(<8 x i64> %a0, i32 %a1) {
501501
; ALL-LABEL: PR52719:
502502
; ALL: # %bb.0:
503503
; ALL-NEXT: vmovd %edi, %xmm1
504-
; ALL-NEXT: vpbroadcastd %xmm1, %xmm1
505-
; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
506504
; ALL-NEXT: vpsraq %xmm1, %zmm0, %zmm0
507505
; ALL-NEXT: retq
508506
%vec = insertelement <8 x i32> poison, i32 %a1, i64 0

0 commit comments

Comments
 (0)