-
Notifications
You must be signed in to change notification settings - Fork 13.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DAG] visitEXTRACT_SUBVECTOR - accumulate SimplifyDemandedVectorElts demanded elts across all EXTRACT_SUBVECTOR uses (REAPPLIED) #133401
[DAG] visitEXTRACT_SUBVECTOR - accumulate SimplifyDemandedVectorElts demanded elts across all EXTRACT_SUBVECTOR uses (REAPPLIED) #133401
Conversation
…demanded elts across all EXTRACT_SUBVECTOR uses (REAPPLIED) Similar to what is done for visitEXTRACT_VECTOR_ELT - if all uses of a vector are EXTRACT_SUBVECTOR, then determine the accumulated demanded elts across all users and call SimplifyDemandedVectorElts in "AssumeSingleUse" use. Second try after llvm#133130 was reverted by llvm#133331 due to it affecting reverted test files
@llvm/pr-subscribers-llvm-selectiondag Author: Simon Pilgrim (RKSimon) ChangesSimilar to what is done for visitEXTRACT_VECTOR_ELT - if all uses of a vector are EXTRACT_SUBVECTOR, then determine the accumulated demanded elts across all users and call SimplifyDemandedVectorElts in "AssumeSingleUse" use. Second try after #133130 was reverted by #133331 due to it affecting reverted test files Patch is 21.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/133401.diff 10 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2fd744391b917..4487b9d510cc7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -25557,8 +25557,31 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
return NarrowBOp;
- if (SimplifyDemandedVectorElts(SDValue(N, 0)))
- return SDValue(N, 0);
+ // If only EXTRACT_SUBVECTOR nodes use the source vector we can
+ // simplify it based on the (valid) extractions.
+ if (!V.getValueType().isScalableVector() &&
+ llvm::all_of(V->users(), [&](SDNode *Use) {
+ return Use->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ Use->getOperand(0) == V;
+ })) {
+ unsigned NumElts = V.getValueType().getVectorNumElements();
+ APInt DemandedElts = APInt::getZero(NumElts);
+ for (SDNode *User : V->users()) {
+ unsigned ExtIdx = User->getConstantOperandVal(1);
+ unsigned NumSubElts = User->getValueType(0).getVectorNumElements();
+ DemandedElts.setBits(ExtIdx, ExtIdx + NumSubElts);
+ }
+ if (SimplifyDemandedVectorElts(V, DemandedElts, /*AssumeSingleUse=*/true)) {
+ // We simplified the vector operand of this extract subvector. If this
+ // extract is not dead, visit it again so it is folded properly.
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(N);
+ return SDValue(N, 0);
+ }
+ } else {
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
+ }
return SDValue();
}
diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
index 2dfa515d9f05c..8125e062e7ffd 100644
--- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
+++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
@@ -2573,7 +2573,6 @@ define void @vec384_i8_widen_to_i24_factor3_broadcast_to_v16i24_factor16(ptr %in
; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,15,3,4,15,6,7,15,9,10,15,12,13,15]
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
@@ -2591,7 +2590,6 @@ define void @vec384_i8_widen_to_i24_factor3_broadcast_to_v16i24_factor16(ptr %in
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,15,3,4,15,6,7,15,9,10,15,12,13,15]
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
@@ -2837,7 +2835,6 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
@@ -2855,7 +2852,6 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
@@ -3100,7 +3096,6 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
@@ -3118,7 +3113,6 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
@@ -3614,10 +3608,9 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
+; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
-; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3],xmm1[4,5],xmm0[6],xmm1[7]
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
@@ -3631,10 +3624,9 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
+; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3],xmm1[4,5],xmm0[6],xmm1[7]
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
@@ -3868,10 +3860,9 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
+; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
-; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5],xmm0[6],xmm1[7]
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
@@ -3885,10 +3876,9 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
+; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5],xmm0[6],xmm1[7]
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
diff --git a/llvm/test/CodeGen/X86/pr42905.ll b/llvm/test/CodeGen/X86/pr42905.ll
index 6ebe5be45a4f8..a3ff58e3dcf9b 100644
--- a/llvm/test/CodeGen/X86/pr42905.ll
+++ b/llvm/test/CodeGen/X86/pr42905.ll
@@ -4,16 +4,10 @@
define <4 x double> @autogen_SD30452(i1 %L230) {
; CHECK-LABEL: autogen_SD30452:
; CHECK: # %bb.0: # %BB
-; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [151829,151829]
-; CHECK-NEXT: movq %xmm0, %rax
-; CHECK-NEXT: cvtsi2sd %rax, %xmm0
-; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
-; CHECK-NEXT: movq %xmm2, %rax
-; CHECK-NEXT: xorps %xmm2, %xmm2
-; CHECK-NEXT: cvtsi2sd %rax, %xmm2
-; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [151829,151829]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; CHECK-NEXT: cvtdq2pd %xmm0, %xmm0
+; CHECK-NEXT: movaps %xmm0, %xmm1
; CHECK-NEXT: retq
BB:
%I = insertelement <4 x i64> zeroinitializer, i64 151829, i32 3
diff --git a/llvm/test/CodeGen/X86/sad.ll b/llvm/test/CodeGen/X86/sad.ll
index c74440d7ec021..fe71a16039c19 100644
--- a/llvm/test/CodeGen/X86/sad.ll
+++ b/llvm/test/CodeGen/X86/sad.ll
@@ -927,8 +927,7 @@ define dso_local i32 @sad_nonloop_64i8(ptr nocapture readonly %p, i64, ptr nocap
; AVX512F-NEXT: vmovdqu 32(%rdi), %ymm1
; AVX512F-NEXT: vpsadbw 32(%rdx), %ymm1, %ymm1
; AVX512F-NEXT: vpsadbw (%rdx), %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512F-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll
index 40e4bb4b16c79..bc08f57e5faac 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll
@@ -2079,7 +2079,7 @@ define void @store_i16_stride7_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX-NEXT: vpsrld $16, %xmm8, %xmm10
; AVX-NEXT: vpunpckhdq {{.*#+}} xmm10 = xmm3[2],xmm10[2],xmm3[3],xmm10[3]
; AVX-NEXT: vpunpckhwd {{.*#+}} xmm12 = xmm8[4],xmm3[4],xmm8[5],xmm3[5],xmm8[6],xmm3[6],xmm8[7],xmm3[7]
-; AVX-NEXT: vpshuflw {{.*#+}} xmm12 = xmm12[2,2,2,2,4,5,6,7]
+; AVX-NEXT: vpshufd {{.*#+}} xmm12 = xmm12[1,1,2,3]
; AVX-NEXT: vpshufhw {{.*#+}} xmm12 = xmm12[0,1,2,3,4,5,5,4]
; AVX-NEXT: vinsertf128 $1, %xmm10, %ymm12, %ymm10
; AVX-NEXT: vandnps %ymm10, %ymm6, %ymm6
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll
index 5bd9b0292a8f0..638c195850d32 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll
@@ -170,7 +170,7 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512-LABEL: test_v16f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
@@ -264,7 +264,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX512-LABEL: test_v8f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
@@ -306,7 +306,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
index eafee9e65345f..f0f430abc48dc 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
@@ -175,7 +175,7 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512-LABEL: test_v16f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
@@ -311,7 +311,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX512-LABEL: test_v8f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
@@ -353,7 +353,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
index e8f9c7f7b524d..4d6daf3fb77f0 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
@@ -216,7 +216,7 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512-LABEL: test_v16f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
@@ -310,7 +310,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX512-LABEL: test_v8f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
@@ -352,7 +352,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
diff --git a/llvm/test/CodeGen/X86/vector-reduce-mul.ll b/llvm/test/CodeGen/X86/vector-reduce-mul.ll
index 0103b7622dc3e..ab95081e2938e 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-mul.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-mul.ll
@@ -357,14 +357,14 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512BW-LABEL: test_v8i64:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
-; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm2
+; AVX512BW-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
+; AVX512BW-NEXT: vpsrlq $32, %ymm1, %ymm3
+; AVX512BW-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
+; AVX512BW-NEXT: vpaddq %ymm2, %ymm3, %ymm2
+; AVX512BW-NEXT: vpsllq $32, %ymm2, %ymm2
+; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
@@ -390,14 +390,14 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512BWVL-LABEL: test_v8i64:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
-; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm3, %zmm2
-; AVX512BWVL-NEXT: vpsllq $32, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpsrlq $32, %ymm0, %ymm2
+; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
+; AVX512BWVL-NEXT: vpsrlq $32, %ymm1, %ymm3
+; AVX512BWVL-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
+; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm3, %ymm2
+; AVX512BWVL-NEXT: vpsllq $32, %ymm2, %ymm2
+; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
@@ -667,14 +667,14 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
-; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm2
+; AVX512BW-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
+; AVX512BW-NEXT: vpsrlq $32, %ymm1, %ymm3
+; AVX512BW-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
+; AVX512BW-NEXT: vpaddq %ymm2, %ymm3, %ymm2
+; AVX512BW-NEXT: vpsllq $32, %ymm2, %ymm2
+; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
@@ -708,14 +708,14 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BWVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
-; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm3, %zmm2
-; AVX512BWVL-NEXT: vpsllq $32, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpsrlq $32, %ymm0, %ymm2
+; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
+; AVX512BWVL-NEXT: vpsrlq $32, %ymm1, %ymm3
+; AVX512BWVL-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
+; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm3, %ymm2
+; AVX512BWVL-NEXT: vpsllq $32, %ymm2, %ymm2
+; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
index ab216cafcc923..a598e30845579 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
@@ -3862,15 +3862,14 @@ define void @vec384_i32_widen_to_i96_factor3_broadcast_to_v4...
[truncated]
|
@llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesSimilar to what is done for visitEXTRACT_VECTOR_ELT - if all uses of a vector are EXTRACT_SUBVECTOR, then determine the accumulated demanded elts across all users and call SimplifyDemandedVectorElts in "AssumeSingleUse" use. Second try after #133130 was reverted by #133331 due to it affecting reverted test files Patch is 21.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/133401.diff 10 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2fd744391b917..4487b9d510cc7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -25557,8 +25557,31 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
return NarrowBOp;
- if (SimplifyDemandedVectorElts(SDValue(N, 0)))
- return SDValue(N, 0);
+ // If only EXTRACT_SUBVECTOR nodes use the source vector we can
+ // simplify it based on the (valid) extractions.
+ if (!V.getValueType().isScalableVector() &&
+ llvm::all_of(V->users(), [&](SDNode *Use) {
+ return Use->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ Use->getOperand(0) == V;
+ })) {
+ unsigned NumElts = V.getValueType().getVectorNumElements();
+ APInt DemandedElts = APInt::getZero(NumElts);
+ for (SDNode *User : V->users()) {
+ unsigned ExtIdx = User->getConstantOperandVal(1);
+ unsigned NumSubElts = User->getValueType(0).getVectorNumElements();
+ DemandedElts.setBits(ExtIdx, ExtIdx + NumSubElts);
+ }
+ if (SimplifyDemandedVectorElts(V, DemandedElts, /*AssumeSingleUse=*/true)) {
+ // We simplified the vector operand of this extract subvector. If this
+ // extract is not dead, visit it again so it is folded properly.
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(N);
+ return SDValue(N, 0);
+ }
+ } else {
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
+ }
return SDValue();
}
diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
index 2dfa515d9f05c..8125e062e7ffd 100644
--- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
+++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
@@ -2573,7 +2573,6 @@ define void @vec384_i8_widen_to_i24_factor3_broadcast_to_v16i24_factor16(ptr %in
; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,15,3,4,15,6,7,15,9,10,15,12,13,15]
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
@@ -2591,7 +2590,6 @@ define void @vec384_i8_widen_to_i24_factor3_broadcast_to_v16i24_factor16(ptr %in
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,15,3,4,15,6,7,15,9,10,15,12,13,15]
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
@@ -2837,7 +2835,6 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
@@ -2855,7 +2852,6 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
@@ -3100,7 +3096,6 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
@@ -3118,7 +3113,6 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
@@ -3614,10 +3608,9 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
+; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
-; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3],xmm1[4,5],xmm0[6],xmm1[7]
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
@@ -3631,10 +3624,9 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
+; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3],xmm1[4,5],xmm0[6],xmm1[7]
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
@@ -3868,10 +3860,9 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
+; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
-; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5],xmm0[6],xmm1[7]
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
@@ -3885,10 +3876,9 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
+; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5],xmm0[6],xmm1[7]
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
diff --git a/llvm/test/CodeGen/X86/pr42905.ll b/llvm/test/CodeGen/X86/pr42905.ll
index 6ebe5be45a4f8..a3ff58e3dcf9b 100644
--- a/llvm/test/CodeGen/X86/pr42905.ll
+++ b/llvm/test/CodeGen/X86/pr42905.ll
@@ -4,16 +4,10 @@
define <4 x double> @autogen_SD30452(i1 %L230) {
; CHECK-LABEL: autogen_SD30452:
; CHECK: # %bb.0: # %BB
-; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [151829,151829]
-; CHECK-NEXT: movq %xmm0, %rax
-; CHECK-NEXT: cvtsi2sd %rax, %xmm0
-; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
-; CHECK-NEXT: movq %xmm2, %rax
-; CHECK-NEXT: xorps %xmm2, %xmm2
-; CHECK-NEXT: cvtsi2sd %rax, %xmm2
-; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [151829,151829]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; CHECK-NEXT: cvtdq2pd %xmm0, %xmm0
+; CHECK-NEXT: movaps %xmm0, %xmm1
; CHECK-NEXT: retq
BB:
%I = insertelement <4 x i64> zeroinitializer, i64 151829, i32 3
diff --git a/llvm/test/CodeGen/X86/sad.ll b/llvm/test/CodeGen/X86/sad.ll
index c74440d7ec021..fe71a16039c19 100644
--- a/llvm/test/CodeGen/X86/sad.ll
+++ b/llvm/test/CodeGen/X86/sad.ll
@@ -927,8 +927,7 @@ define dso_local i32 @sad_nonloop_64i8(ptr nocapture readonly %p, i64, ptr nocap
; AVX512F-NEXT: vmovdqu 32(%rdi), %ymm1
; AVX512F-NEXT: vpsadbw 32(%rdx), %ymm1, %ymm1
; AVX512F-NEXT: vpsadbw (%rdx), %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512F-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll
index 40e4bb4b16c79..bc08f57e5faac 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll
@@ -2079,7 +2079,7 @@ define void @store_i16_stride7_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX-NEXT: vpsrld $16, %xmm8, %xmm10
; AVX-NEXT: vpunpckhdq {{.*#+}} xmm10 = xmm3[2],xmm10[2],xmm3[3],xmm10[3]
; AVX-NEXT: vpunpckhwd {{.*#+}} xmm12 = xmm8[4],xmm3[4],xmm8[5],xmm3[5],xmm8[6],xmm3[6],xmm8[7],xmm3[7]
-; AVX-NEXT: vpshuflw {{.*#+}} xmm12 = xmm12[2,2,2,2,4,5,6,7]
+; AVX-NEXT: vpshufd {{.*#+}} xmm12 = xmm12[1,1,2,3]
; AVX-NEXT: vpshufhw {{.*#+}} xmm12 = xmm12[0,1,2,3,4,5,5,4]
; AVX-NEXT: vinsertf128 $1, %xmm10, %ymm12, %ymm10
; AVX-NEXT: vandnps %ymm10, %ymm6, %ymm6
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll
index 5bd9b0292a8f0..638c195850d32 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll
@@ -170,7 +170,7 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512-LABEL: test_v16f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
@@ -264,7 +264,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX512-LABEL: test_v8f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
@@ -306,7 +306,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
index eafee9e65345f..f0f430abc48dc 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
@@ -175,7 +175,7 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512-LABEL: test_v16f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
@@ -311,7 +311,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX512-LABEL: test_v8f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
@@ -353,7 +353,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
index e8f9c7f7b524d..4d6daf3fb77f0 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
@@ -216,7 +216,7 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512-LABEL: test_v16f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
@@ -310,7 +310,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX512-LABEL: test_v8f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
@@ -352,7 +352,7 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
diff --git a/llvm/test/CodeGen/X86/vector-reduce-mul.ll b/llvm/test/CodeGen/X86/vector-reduce-mul.ll
index 0103b7622dc3e..ab95081e2938e 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-mul.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-mul.ll
@@ -357,14 +357,14 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512BW-LABEL: test_v8i64:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
-; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm2
+; AVX512BW-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
+; AVX512BW-NEXT: vpsrlq $32, %ymm1, %ymm3
+; AVX512BW-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
+; AVX512BW-NEXT: vpaddq %ymm2, %ymm3, %ymm2
+; AVX512BW-NEXT: vpsllq $32, %ymm2, %ymm2
+; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
@@ -390,14 +390,14 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512BWVL-LABEL: test_v8i64:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
-; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm3, %zmm2
-; AVX512BWVL-NEXT: vpsllq $32, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpsrlq $32, %ymm0, %ymm2
+; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
+; AVX512BWVL-NEXT: vpsrlq $32, %ymm1, %ymm3
+; AVX512BWVL-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
+; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm3, %ymm2
+; AVX512BWVL-NEXT: vpsllq $32, %ymm2, %ymm2
+; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
@@ -667,14 +667,14 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
-; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm2
+; AVX512BW-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
+; AVX512BW-NEXT: vpsrlq $32, %ymm1, %ymm3
+; AVX512BW-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
+; AVX512BW-NEXT: vpaddq %ymm2, %ymm3, %ymm2
+; AVX512BW-NEXT: vpsllq $32, %ymm2, %ymm2
+; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
@@ -708,14 +708,14 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BWVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
-; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm3, %zmm2
-; AVX512BWVL-NEXT: vpsllq $32, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpsrlq $32, %ymm0, %ymm2
+; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
+; AVX512BWVL-NEXT: vpsrlq $32, %ymm1, %ymm3
+; AVX512BWVL-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
+; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm3, %ymm2
+; AVX512BWVL-NEXT: vpsllq $32, %ymm2, %ymm2
+; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
index ab216cafcc923..a598e30845579 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
@@ -3862,15 +3862,14 @@ define void @vec384_i32_widen_to_i96_factor3_broadcast_to_v4...
[truncated]
|
I applied this change and ran the internal tests that were timing out from #133083, and there were no issues, so LGTM from my side. |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/92/builds/16293 Here is the relevant piece of the build log for the reference
|
Similar to what is done for visitEXTRACT_VECTOR_ELT - if all uses of a vector are EXTRACT_SUBVECTOR, then determine the accumulated demanded elts across all users and call SimplifyDemandedVectorElts in "AssumeSingleUse" use.
Second try after #133130 was reverted by #133331 due to it affecting reverted test files