From 7d16fa8378c5ad37bb8c02efa3a82dd0695a2b21 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 11 Dec 2025 12:02:22 +0000
Subject: [PATCH] [X86] isLoadBitCastBeneficial - its only beneficial to
 bitcast between vector types if the new type is legal

Prevents us from attempting to store illegal types like <2 x i128> that will force scalarization/splitting

Noticed while trying to avoid some split stores mentioned in #171616
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 10 ++++++----
 .../X86/single_elt_vector_memory_operation.ll | 19 +++++++------------
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f8730a3de11c5..9123ac6c2617d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3489,10 +3489,12 @@ bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
   if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
     return false;
 
-  // If both types are legal vectors, it's always ok to convert them.
-  if (LoadVT.isVector() && BitcastVT.isVector() &&
-      isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
-    return true;
+  if (LoadVT.isVector() && BitcastVT.isVector()) {
+    // If both types are legal vectors, it's always ok to convert them.
+    // Don't convert to an illegal type.
+    if (isTypeLegal(LoadVT))
+      return isTypeLegal(BitcastVT);
+  }
 
   // If we have a large vector type (even if illegal), don't bitcast to large
   // (illegal) scalar types. Better to load fewer vectors and extract.
diff --git a/llvm/test/CodeGen/X86/single_elt_vector_memory_operation.ll b/llvm/test/CodeGen/X86/single_elt_vector_memory_operation.ll
index f65461ccee23b..818953cca66de 100644
--- a/llvm/test/CodeGen/X86/single_elt_vector_memory_operation.ll
+++ b/llvm/test/CodeGen/X86/single_elt_vector_memory_operation.ll
@@ -20,25 +20,22 @@ define void @load_single_128bit_elt_vector(ptr %in, ptr %off, ptr %out) nounwind
 ; AVX-LABEL: load_single_128bit_elt_vector:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vmovaps (%rdi), %xmm0
-; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vmovaps %xmm1, 16(%rdx)
-; AVX-NEXT:    vmovaps %xmm0, (%rdx)
+; AVX-NEXT:    vmovaps %ymm0, (%rdx)
+; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retq
 ;
 ; AVX2-LABEL: load_single_128bit_elt_vector:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vmovaps (%rdi), %xmm0
-; AVX2-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; AVX2-NEXT:    vmovaps %xmm1, 16(%rdx)
-; AVX2-NEXT:    vmovaps %xmm0, (%rdx)
+; AVX2-NEXT:    vmovaps %ymm0, (%rdx)
+; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: load_single_128bit_elt_vector:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vmovaps (%rdi), %xmm0
-; AVX512F-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT:    vmovaps %xmm1, 16(%rdx)
-; AVX512F-NEXT:    vmovaps %xmm0, (%rdx)
+; AVX512F-NEXT:    vmovaps %ymm0, (%rdx)
+; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
   %i0 = load <16 x i8>, ptr %in, align 64
   %i1 = bitcast <16 x i8> %i0 to <1 x i128>
@@ -112,9 +109,7 @@ define void @load_single_256bit_elt_vector(ptr %in, ptr %off, ptr %out) nounwind
 ; AVX512F-LABEL: load_single_256bit_elt_vector:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vmovaps (%rdi), %ymm0
-; AVX512F-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT:    vmovaps %ymm1, 32(%rdx)
-; AVX512F-NEXT:    vmovaps %ymm0, (%rdx)
+; AVX512F-NEXT:    vmovaps %zmm0, (%rdx)
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
   %i0 = load <32 x i8>, ptr %in, align 64