Skip to content

Commit

Permalink
[X86] getFauxShuffle - don't assume an UNDEF src element for AND/ANDN…
Browse files Browse the repository at this point in the history
…P results in an UNDEF shuffle mask index

The other src element might be zero, guaranteeing zero.

Fixes #55157
  • Loading branch information
RKSimon committed Apr 28, 2022
1 parent ae81435 commit ed8dffe
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 8 deletions.
8 changes: 4 additions & 4 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8041,11 +8041,11 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
uint64_t ZeroMask = IsAndN ? 255 : 0;
if (!getTargetConstantBitsFromNode(IsAndN ? N0 : N1, 8, UndefElts, EltBits))
return false;
// We can't assume an undef src element gives an undef dst - the other src
// might be zero.
if (!UndefElts.isZero())
return false;
for (int i = 0, e = (int)EltBits.size(); i != e; ++i) {
if (UndefElts[i]) {
Mask.push_back(SM_SentinelUndef);
continue;
}
const APInt &ByteBits = EltBits[i];
if (ByteBits != 0 && ByteBits != 255)
return false;
Expand Down
14 changes: 10 additions & 4 deletions llvm/test/CodeGen/X86/vector-shuffle-combining.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3333,11 +3333,17 @@ define void @PR45604(<32 x i16>* %dst, <8 x i16>* %src) {
ret void
}

; FIXME: getFauxShuffle AND/ANDN decoding wrongly assumes an undef src always gives an undef dst.
; getFauxShuffle AND/ANDN decoding wrongly assumed an undef src always gives an undef dst.
define <2 x i64> @PR55157(<16 x i8>* %0) {
; CHECK-LABEL: PR55157:
; CHECK: # %bb.0:
; CHECK-NEXT: retq
; SSE-LABEL: PR55157:
; SSE: # %bb.0:
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: PR55157:
; AVX: # %bb.0:
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%2 = load <16 x i8>, <16 x i8>* %0, align 16
%3 = icmp eq <16 x i8> %2, zeroinitializer
%4 = tail call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> zeroinitializer, <16 x i8> zeroinitializer)
Expand Down

0 comments on commit ed8dffe

Please sign in to comment.