Skip to content

Commit

Permalink
Ensure CndSel handles 64-bit operands where possible
Browse files Browse the repository at this point in the history
  • Loading branch information
tannergooding committed Jun 28, 2024
1 parent f71ea56 commit e2bc3d1
Showing 1 changed file with 22 additions and 7 deletions.
29 changes: 22 additions & 7 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3078,8 +3078,6 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm
//
GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node)
{
assert(!comp->canUseEvexEncodingDebugOnly());

var_types simdType = node->gtType;
CorInfoType simdBaseJitType = node->GetSimdBaseJitType();
var_types simdBaseType = node->GetSimdBaseType();
Expand All @@ -3102,17 +3100,34 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node)
// we can optimize the entire conditional select to
// a single BlendVariable instruction (if supported by the architecture)

// TODO-XARCH-AVX512 Use VPBLENDM* and take input directly from K registers if cond is from MoveMaskToVectorSpecial.
// First, determine if the condition is a per-element mask
if (op1->OperIsHWIntrinsic() && HWIntrinsicInfo::ReturnsPerElementMask(op1->AsHWIntrinsic()->GetHWIntrinsicId()))
{
// Next, determine if the target architecture supports BlendVariable
NamedIntrinsic blendVariableId = NI_Illegal;

// For Vector256 (simdSize == 32), BlendVariable for floats/doubles is available on AVX, whereas other types
// require AVX2
if (simdSize == 32)
bool isOp1CvtMaskToVector = op1->AsHWIntrinsic()->OperIsConvertMaskToVector();

if ((simdSize == 64) || isOp1CvtMaskToVector)
{
if (isOp1CvtMaskToVector)
{
op1 = op1->AsHWIntrinsic()->Op(1);
}
else
{
op1 = gtNewSimdCvtVectorToMaskNode(TYP_MASK, op1, simdBaseJitType, simdSize);
BlockRange().InsertBefore(node, op1);
}

assert(op1->TypeGet() == TYP_MASK);
blendVariableId = NI_EVEX_BlendVariableMask;
}
else if (simdSize == 32)
{
// For Vector256 (simdSize == 32), BlendVariable for floats/doubles
// is available on AVX, whereas other types (integrals) require AVX2

if (varTypeIsFloating(simdBaseType))
{
// This should have already been confirmed
Expand All @@ -3124,9 +3139,9 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node)
blendVariableId = NI_AVX2_BlendVariable;
}
}
// For Vector128, BlendVariable is available on SSE41
else if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE41))
{
// For Vector128, BlendVariable is available on SSE41
blendVariableId = NI_SSE41_BlendVariable;
}

Expand Down

0 comments on commit e2bc3d1

Please sign in to comment.