Skip to content

Commit

Permalink
Clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
EgorBo committed Oct 21, 2023
1 parent c7fd55c commit ea7c181
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 4 deletions.
9 changes: 6 additions & 3 deletions src/coreclr/jit/importervectorization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,12 @@ GenTree* Compiler::impExpandHalfConstEqualsSIMD(

// Optimization: use a single load when byteLen equals simdSize.
// For code simplicity we always create nodes for two vectors case.
const bool useSingleVector = simdSize == byteLen;
return gtNewSimdCmpOpAllNode(GT_EQ, TYP_UBYTE, useSingleVector ? xor1 : orr, gtNewZeroConNode(simdType), baseType,
simdSize);
if (simdSize == byteLen)
{
return gtNewSimdCmpOpAllNode(GT_EQ, TYP_UBYTE, vec1, cnsVec1, baseType, simdSize);
}

return gtNewSimdCmpOpAllNode(GT_EQ, TYP_UBYTE, orr, gtNewZeroConNode(simdType), baseType, simdSize);

// Codegen example for byteLen=40 and OrdinalIgnoreCase mode with AVX:
//
Expand Down
16 changes: 15 additions & 1 deletion src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1780,10 +1780,24 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm
GenTree* op2 = node->Op(2);
GenCondition cmpCnd = (cmpOp == GT_EQ) ? GenCondition::EQ : GenCondition::NE;

if (!varTypeIsFloating(simdBaseType) && (simdSize != 64) && op2->IsVectorZero() &&
if (!varTypeIsFloating(simdBaseType) && (simdSize != 64) &&
comp->compOpportunisticallyDependsOn(InstructionSet_SSE41) &&
!op1->OperIsHWIntrinsic(NI_AVX512F_ConvertMaskToVector))
{
if (!op2->IsVectorZero())
{
// Optimize "X == Y" to "(X ^ Y) == 0"
GenTree* zeroVec = comp->gtNewZeroConNode(simdType);
GenTree* xorVec = comp->gtNewSimdBinOpNode(GT_XOR, simdType, op1, op2, simdBaseJitType, simdSize);
node->Op(1) = xorVec;
node->Op(2) = zeroVec;
BlockRange().InsertBefore(node, xorVec);
BlockRange().InsertBefore(node, zeroVec);

// We'll re-visit the comparison node again
return xorVec;
}

// On SSE4.1 or higher we can optimize comparisons against zero to
// just use PTEST. We can't support it for floating-point, however,
// as it has both +0.0 and -0.0 where +0.0 == -0.0
Expand Down

0 comments on commit ea7c181

Please sign in to comment.