Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit d91f04b

Browse files
tannergoodingjakobbotsch
authored andcommittedMay 30, 2024
Update the JIT to support rewriting more complex intrinsics as user calls (dotnet#102702)
* Update the JIT to support rewriting more complex intrinsics as user calls * Updating the shuffle hwintrinsic to always be imported as an intrinsic * Ensure multi-reg returns are initialized for rewritten hwintrinsics * Apply suggestions from code review Co-authored-by: Jakob Botsch Nielsen <Jakob.botsch.nielsen@gmail.com> * Adding function headers to SetMethodHandle and SetEntryPoint * Apply formatting patch --------- Co-authored-by: Jakob Botsch Nielsen <Jakob.botsch.nielsen@gmail.com>
1 parent 72e34e8 commit d91f04b

9 files changed

+418
-100
lines changed
 

‎src/coreclr/jit/gentree.cpp

+79-4
Original file line numberDiff line numberDiff line change
@@ -4128,6 +4128,8 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp)
41284128
// first tree to be evaluated, and "lvl2" - the second.
41294129
if (multiOp->IsReverseOp())
41304130
{
4131+
assert(!multiOp->AsHWIntrinsic()->IsUserCall());
4132+
41314133
level = gtSetEvalOrder(multiOp->Op(2));
41324134
lvl2 = gtSetEvalOrder(multiOp->Op(1));
41334135
}
@@ -4140,11 +4142,18 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp)
41404142
// We want the more complex tree to be evaluated first.
41414143
if (level < lvl2)
41424144
{
4143-
bool canSwap = multiOp->IsReverseOp() ? gtCanSwapOrder(multiOp->Op(2), multiOp->Op(1))
4144-
: gtCanSwapOrder(multiOp->Op(1), multiOp->Op(2));
4145+
bool canSwap = false;
4146+
4147+
if (!multiOp->AsHWIntrinsic()->IsUserCall())
4148+
{
4149+
canSwap = multiOp->IsReverseOp() ? gtCanSwapOrder(multiOp->Op(2), multiOp->Op(1))
4150+
: gtCanSwapOrder(multiOp->Op(1), multiOp->Op(2));
4151+
}
41454152

41464153
if (canSwap)
41474154
{
4155+
assert(!multiOp->AsHWIntrinsic()->IsUserCall());
4156+
41484157
if (multiOp->IsReverseOp())
41494158
{
41504159
multiOp->ClearReverseOp();
@@ -6563,7 +6572,7 @@ bool GenTree::OperSupportsReverseOpEvalOrder(Compiler* comp) const
65636572
#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS)
65646573
if (OperIsMultiOp())
65656574
{
6566-
return AsMultiOp()->GetOperandCount() == 2;
6575+
return (AsMultiOp()->GetOperandCount() == 2) && !AsMultiOp()->IsUserCall();
65676576
}
65686577
#endif // FEATURE_SIMD || FEATURE_HW_INTRINSICS
65696578
return false;
@@ -9711,6 +9720,11 @@ GenTree* Compiler::gtCloneExpr(GenTree* tree)
97119720
tree->AsHWIntrinsic()->GetHWIntrinsicId(),
97129721
tree->AsHWIntrinsic()->GetSimdBaseJitType(), tree->AsHWIntrinsic()->GetSimdSize());
97139722
copy->AsHWIntrinsic()->SetAuxiliaryJitType(tree->AsHWIntrinsic()->GetAuxiliaryJitType());
9723+
9724+
if (tree->AsHWIntrinsic()->IsUserCall())
9725+
{
9726+
copy->AsHWIntrinsic()->SetMethodHandle(this, tree->AsHWIntrinsic()->GetMethodHandle());
9727+
}
97149728
goto CLONE_MULTIOP_OPERANDS;
97159729
#endif
97169730
#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS)
@@ -19570,6 +19584,67 @@ void GenTreeMultiOp::InitializeOperands(GenTree** operands, size_t operandCount)
1957019584
SetOperandCount(operandCount);
1957119585
}
1957219586

19587+
//------------------------------------------------------------------------
19588+
// GenTreeJitIntrinsic::SetMethodHandle: Sets the method handle for an intrinsic
19589+
// so that it can be rewritten back to a user call in a later phase
19590+
//
19591+
// Arguments:
19592+
// comp - The compiler instance
19593+
// methodHandle - The method handle representing the fallback handling for the intrinsic
19594+
//
19595+
// Notes:
19596+
// We need to ensure that the operands are not tracked inline so that we can track the
19597+
// underlying method handle. See the comment in GenTreeJitIntrinsic around why the union
19598+
// of fields exists.
19599+
//
19600+
void GenTreeJitIntrinsic::SetMethodHandle(Compiler* comp, CORINFO_METHOD_HANDLE methodHandle)
19601+
{
19602+
assert(OperIsHWIntrinsic() && !IsUserCall());
19603+
gtFlags |= GTF_HW_USER_CALL;
19604+
19605+
size_t operandCount = GetOperandCount();
19606+
19607+
if ((operandCount != 0) && (operandCount <= ArrLen(gtInlineOperands)))
19608+
{
19609+
GenTree** oldOperands = GetOperandArray();
19610+
GenTree** newOperands = comp->getAllocator(CMK_ASTNode).allocate<GenTree*>(operandCount);
19611+
19612+
ResetOperandArray(operandCount, comp, newOperands, operandCount);
19613+
assert(GetOperandArray() == newOperands);
19614+
19615+
for (size_t i = 0; i < operandCount; i++)
19616+
{
19617+
newOperands[i] = oldOperands[i];
19618+
}
19619+
}
19620+
19621+
gtMethodHandle = methodHandle;
19622+
gtEntryPoint = nullptr;
19623+
}
19624+
19625+
#if defined(FEATURE_READYTORUN)
19626+
//------------------------------------------------------------------------
19627+
// GenTreeJitIntrinsic::SetEntryPoint: Sets the entry point for an intrinsic
19628+
// so that it can be rewritten back to a user call in a later phase for R2R
19629+
// scenarios
19630+
//
19631+
// Arguments:
19632+
// comp - The compiler instance
19633+
// entryPoint - The entry point information required for R2R scenarios
19634+
//
19635+
// Notes:
19636+
// This requires SetMethodHandle to have been called first to ensure we aren't
19637+
// overwriting any inline operands
19638+
//
19639+
void GenTreeJitIntrinsic::SetEntryPoint(Compiler* comp, CORINFO_CONST_LOOKUP entryPoint)
19640+
{
19641+
assert(IsUserCall());
19642+
assert(gtEntryPoint == nullptr);
19643+
19644+
gtEntryPoint = new (comp, CMK_ASTNode) CORINFO_CONST_LOOKUP(entryPoint);
19645+
}
19646+
#endif // FEATURE_READYTORUN
19647+
1957319648
var_types GenTreeJitIntrinsic::GetAuxiliaryType() const
1957419649
{
1957519650
CorInfoType auxiliaryJitType = GetAuxiliaryJitType();
@@ -27040,7 +27115,7 @@ bool GenTreeHWIntrinsic::OperRequiresCallFlag() const
2704027115
}
2704127116
}
2704227117

27043-
return false;
27118+
return IsUserCall();
2704427119
}
2704527120

2704627121
//------------------------------------------------------------------------------

‎src/coreclr/jit/gentree.h

+51-1
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,7 @@ enum GenTreeFlags : unsigned int
559559

560560
#ifdef FEATURE_HW_INTRINSICS
561561
GTF_HW_EM_OP = 0x10000000, // GT_HWINTRINSIC -- node is used as an operand to an embedded mask
562+
GTF_HW_USER_CALL = 0x20000000, // GT_HWINTRINSIC -- node is implemented via a user call
562563
#endif // FEATURE_HW_INTRINSICS
563564
};
564565

@@ -6089,6 +6090,15 @@ struct GenTreeMultiOp : public GenTree
60896090
}
60906091
#endif
60916092

6093+
bool IsUserCall() const
6094+
{
6095+
#if defined(FEATURE_HW_INTRINSICS)
6096+
return OperIs(GT_HWINTRINSIC) && (gtFlags & GTF_HW_USER_CALL) != 0;
6097+
#else
6098+
return false;
6099+
#endif
6100+
}
6101+
60926102
GenTree*& Op(size_t index)
60936103
{
60946104
size_t actualIndex = index - 1;
@@ -6217,7 +6227,29 @@ class IntrinsicNodeBuilder final
62176227
struct GenTreeJitIntrinsic : public GenTreeMultiOp
62186228
{
62196229
protected:
6220-
GenTree* gtInlineOperands[2];
6230+
union
6231+
{
6232+
// We don't have enough space to carry both the inline operands
6233+
// and the necessary information required to support rewriting
6234+
// the intrinsic back into a user call. As such, we union the
6235+
// data instead and use the GTF_HW_USER_CALL flag to indicate
6236+
// which fields are valid to access. -- Tracking the fields
6237+
// independently causes TREE_NODE_SZ_LARGE to increase and for
6238+
// GenTreeJitIntrinsic to become the largest node, which is
6239+
// undesirable, so this approach helps keep things pay-for-play.
6240+
6241+
GenTree* gtInlineOperands[2];
6242+
6243+
struct
6244+
{
6245+
CORINFO_METHOD_HANDLE gtMethodHandle;
6246+
6247+
#if defined(FEATURE_READYTORUN)
6248+
// Call target lookup info for method call from a Ready To Run module
6249+
CORINFO_CONST_LOOKUP* gtEntryPoint;
6250+
#endif // FEATURE_READYTORUN
6251+
};
6252+
};
62216253
regNumberSmall gtOtherReg; // The second register for multi-reg intrinsics.
62226254
MultiRegSpillFlags gtSpillFlags; // Spill flags for multi-reg intrinsics.
62236255
unsigned char gtAuxiliaryJitType; // For intrinsics than need another type (e.g. Avx2.Gather* or SIMD (by element))
@@ -6226,6 +6258,24 @@ struct GenTreeJitIntrinsic : public GenTreeMultiOp
62266258
NamedIntrinsic gtHWIntrinsicId;
62276259

62286260
public:
6261+
CORINFO_METHOD_HANDLE GetMethodHandle() const
6262+
{
6263+
assert(IsUserCall());
6264+
return gtMethodHandle;
6265+
}
6266+
6267+
void SetMethodHandle(Compiler* comp, CORINFO_METHOD_HANDLE methodHandle);
6268+
6269+
#if defined(FEATURE_READYTORUN)
6270+
CORINFO_CONST_LOOKUP GetEntryPoint() const
6271+
{
6272+
assert(IsUserCall());
6273+
return *gtEntryPoint;
6274+
}
6275+
6276+
void SetEntryPoint(Compiler* comp, CORINFO_CONST_LOOKUP entryPoint);
6277+
#endif // FEATURE_READYTORUN
6278+
62296279
//-----------------------------------------------------------
62306280
// GetRegNumByIdx: Get regNumber of i'th position.
62316281
//

‎src/coreclr/jit/hwintrinsicarm64.cpp

+8-1
Original file line numberDiff line numberDiff line change
@@ -1869,7 +1869,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
18691869

18701870
if (!indices->IsVectorConst())
18711871
{
1872-
// TODO-ARM64-CQ: Handling non-constant indices is a bit more complex
1872+
assert(sig->numArgs == 2);
1873+
1874+
op2 = impSIMDPopStack();
1875+
op1 = impSIMDPopStack();
1876+
1877+
retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize);
1878+
1879+
retNode->AsHWIntrinsic()->SetMethodHandle(this, method);
18731880
break;
18741881
}
18751882

‎src/coreclr/jit/hwintrinsiclistarm64.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ HARDWARE_INTRINSIC(Vector64, op_UnsignedRightShift,
103103
HARDWARE_INTRINSIC(Vector64, ShiftLeft, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
104104
HARDWARE_INTRINSIC(Vector64, ShiftRightArithmetic, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
105105
HARDWARE_INTRINSIC(Vector64, ShiftRightLogical, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
106-
HARDWARE_INTRINSIC(Vector64, Shuffle, 8, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
106+
HARDWARE_INTRINSIC(Vector64, Shuffle, 8, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
107107
HARDWARE_INTRINSIC(Vector64, Sqrt, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
108108
HARDWARE_INTRINSIC(Vector64, Store, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
109109
HARDWARE_INTRINSIC(Vector64, StoreAligned, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
@@ -220,7 +220,7 @@ HARDWARE_INTRINSIC(Vector128, op_UnaryPlus,
220220
HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
221221
HARDWARE_INTRINSIC(Vector128, ShiftRightArithmetic, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
222222
HARDWARE_INTRINSIC(Vector128, ShiftRightLogical, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
223-
HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
223+
HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
224224
HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
225225
HARDWARE_INTRINSIC(Vector128, Store, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
226226
HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)

0 commit comments

Comments
 (0)