Skip to content

Commit

Permalink
Resolve some issues impacting the jitstress-isas-x86 runs (#104935)
Browse files Browse the repository at this point in the history
* Ensure that TernaryLogic lowering accounts for AND_NOT since it is not commutative

* Ensure we create a valid node when DOTNET_EnableSSE2=0 is set

* Apply formatting patch
  • Loading branch information
tannergooding authored Jul 18, 2024
1 parent c707913 commit f1d8613
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 15 deletions.
19 changes: 17 additions & 2 deletions src/coreclr/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1413,8 +1413,23 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
op2 = impSIMDPopStack();
op1 = impSIMDPopStack();

op1 = gtFoldExpr(gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseJitType, simdSize));
retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseJitType, simdSize);
if (IsBaselineSimdIsaSupported())
{
op1 = gtFoldExpr(gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseJitType, simdSize));
retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseJitType, simdSize);
}
else
{
// We need to ensure we import even if SSE2 is disabled
assert(intrinsic == NI_SSE_AndNot);

op3 = gtNewAllBitsSetConNode(retType);

op1 = gtNewSimdHWIntrinsicNode(retType, op1, op3, NI_SSE_Xor, simdBaseJitType, simdSize);
op1 = gtFoldExpr(op1);

retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, NI_SSE_And, simdBaseJitType, simdSize);
}
break;
}

Expand Down
115 changes: 102 additions & 13 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1471,9 +1471,23 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
op2 = userIntrin->Op(1);
}

NamedIntrinsic intrinsic =
GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(comp, GT_AND_NOT, op1, op2, simdBaseType,
simdSize, false);
NamedIntrinsic intrinsic = NI_Illegal;

if (comp->IsBaselineSimdIsaSupported())
{
intrinsic = GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(comp, GT_AND_NOT, op1, op2,
simdBaseType, simdSize, false);
}
else
{
// We need to ensure we optimize even if SSE2 is disabled

assert(simdBaseType == TYP_FLOAT);
assert(simdSize <= 16);

intrinsic = NI_SSE_AndNot;
}

userIntrin->ResetHWIntrinsicId(intrinsic, comp, op1, op2);

return nextNode;
Expand All @@ -1487,24 +1501,55 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
BlockRange().Remove(node);
op3 = userIntrin->Op(2);

// Tracks which two operands get used first
TernaryLogicUseFlags firstOpUseFlags = TernaryLogicUseFlags::AB;

if (op3 == node)
{
op3 = userIntrin->Op(1);
if (userOper == GT_AND_NOT)
{
op3 = op2;
op2 = op1;
op1 = userIntrin->Op(1);

// AND_NOT isn't commutative so we need to shift parameters down
firstOpUseFlags = TernaryLogicUseFlags::BC;
}
else
{
op3 = userIntrin->Op(1);
}
}

uint8_t controlByte = 0x00;

if ((userOper == GT_XOR) && op3->IsVectorAllBitsSet())
{
// We're being used by what is actually GT_NOT, so we
// need to shift parameters down so that A is unused
// We have XOR(OP(A, B), AllBitsSet)
// A: op1
// B: op2
// C: op3 (AllBitsSet)
//
// We want A to be the unused parameter so swap it around
// A: op3 (AllBitsSet)
// B: op1
// C: op2
//
// This gives us NOT(OP(B, C))

assert(firstOpUseFlags == TernaryLogicUseFlags::AB);

std::swap(op2, op3);
std::swap(op1, op2);

if (isOperNot)
{
// We have what is actually a double not, so just return op2
// We have NOT(XOR(B, AllBitsSet))
// A: op3 (AllBitsSet)
// B: op1
// C: op2 (AllBitsSet)
//
// This represents a double not, so so just return op2
// which is the only actual value now that the parameters
// were shifted around

Expand Down Expand Up @@ -1538,20 +1583,64 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
}
else if (isOperNot)
{
// A is unused, so we just want OP(NOT(B), C)
if (firstOpUseFlags == TernaryLogicUseFlags::AB)
{
// We have OP(XOR(A, AllBitsSet), C)
// A: op1
// B: op2 (AllBitsSet)
// C: op3
//
// We want A to be the unused parameter so swap it around
// A: op2 (AllBitsSet)
// B: op1
// C: op3
//
// This gives us OP(NOT(B), C)

assert(op2->IsVectorAllBitsSet());
std::swap(op1, op2);
assert(op2->IsVectorAllBitsSet());
std::swap(op1, op2);

controlByte = static_cast<uint8_t>(~B);
controlByte = TernaryLogicInfo::GetTernaryControlByte(userOper, controlByte, C);
controlByte = static_cast<uint8_t>(~B);
controlByte = TernaryLogicInfo::GetTernaryControlByte(userOper, controlByte, C);
}
else
{
// We have OP(A, XOR(B, AllBitsSet))
// A: op1
// B: op2
// C: op3 (AllBitsSet)
//
// We want A to be the unused parameter so swap it around
// A: op3 (AllBitsSet)
// B: op1
// C: op2
//
// This gives us OP(B, NOT(C))

assert(firstOpUseFlags == TernaryLogicUseFlags::BC);

assert(op3->IsVectorAllBitsSet());
std::swap(op2, op3);
std::swap(op1, op2);

controlByte = static_cast<uint8_t>(~C);
controlByte = TernaryLogicInfo::GetTernaryControlByte(userOper, B, controlByte);
}
}
else
else if (firstOpUseFlags == TernaryLogicUseFlags::AB)
{
// We have OP2(OP1(A, B), C)
controlByte = TernaryLogicInfo::GetTernaryControlByte(oper, A, B);
controlByte = TernaryLogicInfo::GetTernaryControlByte(userOper, controlByte, C);
}
else
{
// We have OP2(A, OP1(B, C))
assert(firstOpUseFlags == TernaryLogicUseFlags::BC);

controlByte = TernaryLogicInfo::GetTernaryControlByte(oper, B, C);
controlByte = TernaryLogicInfo::GetTernaryControlByte(userOper, A, controlByte);
}

NamedIntrinsic ternaryLogicId = NI_AVX512F_TernaryLogic;

Expand Down

0 comments on commit f1d8613

Please sign in to comment.