Skip to content

Commit

Permalink
[X86] LowerBUILD_VECTOR(): fix all-UNDEF detection
Browse files Browse the repository at this point in the history
The original check was trying to avoid checking UndefMask itself,
and deduce it via simpler means, but checking `NonZeroMask`
does not, e.g., check `ZeroMask`.

Fixes llvm/llvm-project#60168
  • Loading branch information
LebedevRI committed Jan 20, 2023
1 parent b3154d0 commit 1eecf03
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 6 deletions.
10 changes: 4 additions & 6 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11166,19 +11166,17 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
}
}

// All undef vector. Return an UNDEF. All zero vectors were handled above.
unsigned NumFrozenUndefElts = FrozenUndefMask.countPopulation();
if (NonZeroMask == 0 && NumFrozenUndefElts != NumElems) {
assert(UndefMask.isAllOnes() && "Fully undef mask expected");
// All undef vector. Return an UNDEF.
if (UndefMask.isAllOnes())
return DAG.getUNDEF(VT);
}

// If we have multiple FREEZE-UNDEF operands, we are likely going to end up
// lowering into a suboptimal insertion sequence. Instead, thaw the UNDEF in
// our source BUILD_VECTOR, create another FREEZE-UNDEF splat BUILD_VECTOR,
// and blend the FREEZE-UNDEF operands back in.
// FIXME: is this worthwhile even for a single FREEZE-UNDEF operand?
if (NumFrozenUndefElts >= 2 && NumFrozenUndefElts < NumElems) {
if (unsigned NumFrozenUndefElts = FrozenUndefMask.countPopulation();
NumFrozenUndefElts >= 2 && NumFrozenUndefElts < NumElems) {
SmallVector<int, 16> BlendMask(NumElems, -1);
SmallVector<SDValue, 16> Elts(NumElems, DAG.getUNDEF(OpEltVT));
for (unsigned i = 0; i < NumElems; ++i) {
Expand Down
73 changes: 73 additions & 0 deletions llvm/test/CodeGen/X86/build-vector-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -552,3 +552,76 @@ define <4 x float> @PR37502(float %x, float %y) {
ret <4 x float> %i3
}

define void @pr60168_buildvector_of_zeros_and_undef(<2 x i32> %x, ptr %out) {
; SSE2-32-LABEL: pr60168_buildvector_of_zeros_and_undef:
; SSE2-32: # %bb.0:
; SSE2-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE2-32-NEXT: movd %eax, %xmm1
; SSE2-32-NEXT: xorps %xmm2, %xmm2
; SSE2-32-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[0,0]
; SSE2-32-NEXT: paddd %xmm0, %xmm0
; SSE2-32-NEXT: psubd %xmm0, %xmm2
; SSE2-32-NEXT: movdqa %xmm2, %xmm0
; SSE2-32-NEXT: psrad $31, %xmm0
; SSE2-32-NEXT: pxor %xmm0, %xmm2
; SSE2-32-NEXT: psubd %xmm0, %xmm2
; SSE2-32-NEXT: movq %xmm2, (%eax)
; SSE2-32-NEXT: retl
;
; SSE2-64-LABEL: pr60168_buildvector_of_zeros_and_undef:
; SSE2-64: # %bb.0:
; SSE2-64-NEXT: movd %eax, %xmm1
; SSE2-64-NEXT: xorps %xmm2, %xmm2
; SSE2-64-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[0,0]
; SSE2-64-NEXT: paddd %xmm0, %xmm0
; SSE2-64-NEXT: psubd %xmm0, %xmm2
; SSE2-64-NEXT: movdqa %xmm2, %xmm0
; SSE2-64-NEXT: psrad $31, %xmm0
; SSE2-64-NEXT: pxor %xmm0, %xmm2
; SSE2-64-NEXT: psubd %xmm0, %xmm2
; SSE2-64-NEXT: movq %xmm2, (%rdi)
; SSE2-64-NEXT: retq
;
; SSE41-32-LABEL: pr60168_buildvector_of_zeros_and_undef:
; SSE41-32: # %bb.0:
; SSE41-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE41-32-NEXT: paddd %xmm0, %xmm0
; SSE41-32-NEXT: pxor %xmm1, %xmm1
; SSE41-32-NEXT: psubd %xmm0, %xmm1
; SSE41-32-NEXT: pabsd %xmm1, %xmm0
; SSE41-32-NEXT: movq %xmm0, (%eax)
; SSE41-32-NEXT: retl
;
; SSE41-64-LABEL: pr60168_buildvector_of_zeros_and_undef:
; SSE41-64: # %bb.0:
; SSE41-64-NEXT: paddd %xmm0, %xmm0
; SSE41-64-NEXT: pxor %xmm1, %xmm1
; SSE41-64-NEXT: psubd %xmm0, %xmm1
; SSE41-64-NEXT: pabsd %xmm1, %xmm0
; SSE41-64-NEXT: movq %xmm0, (%rdi)
; SSE41-64-NEXT: retq
;
; AVX-32-LABEL: pr60168_buildvector_of_zeros_and_undef:
; AVX-32: # %bb.0:
; AVX-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX-32-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; AVX-32-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-32-NEXT: vpsubd %xmm0, %xmm1, %xmm0
; AVX-32-NEXT: vpabsd %xmm0, %xmm0
; AVX-32-NEXT: vmovq %xmm0, (%eax)
; AVX-32-NEXT: retl
;
; AVX-64-LABEL: pr60168_buildvector_of_zeros_and_undef:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; AVX-64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-64-NEXT: vpsubd %xmm0, %xmm1, %xmm0
; AVX-64-NEXT: vpabsd %xmm0, %xmm0
; AVX-64-NEXT: vmovq %xmm0, (%rdi)
; AVX-64-NEXT: retq
%i2 = mul <2 x i32> %x, <i32 -2, i32 -2>
%i3 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %i2, i1 false)
store <2 x i32> %i3, ptr %out
ret void
}
declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1 immarg)

0 comments on commit 1eecf03

Please sign in to comment.