diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 2b9ec27eb8228..29b926ae126b0 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -4128,6 +4128,8 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp) // first tree to be evaluated, and "lvl2" - the second. if (multiOp->IsReverseOp()) { + assert(!multiOp->AsHWIntrinsic()->IsUserCall()); + level = gtSetEvalOrder(multiOp->Op(2)); lvl2 = gtSetEvalOrder(multiOp->Op(1)); } @@ -4140,11 +4142,18 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp) // We want the more complex tree to be evaluated first. if (level < lvl2) { - bool canSwap = multiOp->IsReverseOp() ? gtCanSwapOrder(multiOp->Op(2), multiOp->Op(1)) - : gtCanSwapOrder(multiOp->Op(1), multiOp->Op(2)); + bool canSwap = false; + + if (!multiOp->AsHWIntrinsic()->IsUserCall()) + { + canSwap = multiOp->IsReverseOp() ? gtCanSwapOrder(multiOp->Op(2), multiOp->Op(1)) + : gtCanSwapOrder(multiOp->Op(1), multiOp->Op(2)); + } if (canSwap) { + assert(!multiOp->AsHWIntrinsic()->IsUserCall()); + if (multiOp->IsReverseOp()) { multiOp->ClearReverseOp(); @@ -6563,7 +6572,7 @@ bool GenTree::OperSupportsReverseOpEvalOrder(Compiler* comp) const #if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) if (OperIsMultiOp()) { - return AsMultiOp()->GetOperandCount() == 2; + return (AsMultiOp()->GetOperandCount() == 2) && !AsMultiOp()->IsUserCall(); } #endif // FEATURE_SIMD || FEATURE_HW_INTRINSICS return false; @@ -9711,6 +9720,11 @@ GenTree* Compiler::gtCloneExpr(GenTree* tree) tree->AsHWIntrinsic()->GetHWIntrinsicId(), tree->AsHWIntrinsic()->GetSimdBaseJitType(), tree->AsHWIntrinsic()->GetSimdSize()); copy->AsHWIntrinsic()->SetAuxiliaryJitType(tree->AsHWIntrinsic()->GetAuxiliaryJitType()); + + if (tree->AsHWIntrinsic()->IsUserCall()) + { + copy->AsHWIntrinsic()->SetMethodHandle(this, tree->AsHWIntrinsic()->GetMethodHandle()); + } goto CLONE_MULTIOP_OPERANDS; #endif #if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) @@ -19570,6 +19584,67 @@ void GenTreeMultiOp::InitializeOperands(GenTree** operands, size_t operandCount) SetOperandCount(operandCount); } +//------------------------------------------------------------------------ +// GenTreeJitIntrinsic::SetMethodHandle: Sets the method handle for an intrinsic +// so that it can be rewritten back to a user call in a later phase +// +// Arguments: +// comp - The compiler instance +// methodHandle - The method handle representing the fallback handling for the intrinsic +// +// Notes: +// We need to ensure that the operands are not tracked inline so that we can track the +// underlying method handle. See the comment in GenTreeJitIntrinsic around why the union +// of fields exists. +// +void GenTreeJitIntrinsic::SetMethodHandle(Compiler* comp, CORINFO_METHOD_HANDLE methodHandle) +{ + assert(OperIsHWIntrinsic() && !IsUserCall()); + gtFlags |= GTF_HW_USER_CALL; + + size_t operandCount = GetOperandCount(); + + if ((operandCount != 0) && (operandCount <= ArrLen(gtInlineOperands))) + { + GenTree** oldOperands = GetOperandArray(); + GenTree** newOperands = comp->getAllocator(CMK_ASTNode).allocate(operandCount); + + ResetOperandArray(operandCount, comp, newOperands, operandCount); + assert(GetOperandArray() == newOperands); + + for (size_t i = 0; i < operandCount; i++) + { + newOperands[i] = oldOperands[i]; + } + } + + gtMethodHandle = methodHandle; + gtEntryPoint = nullptr; +} + +#if defined(FEATURE_READYTORUN) +//------------------------------------------------------------------------ +// GenTreeJitIntrinsic::SetEntryPoint: Sets the entry point for an intrinsic +// so that it can be rewritten back to a user call in a later phase for R2R +// scenarios +// +// Arguments: +// comp - The compiler instance +// entryPoint - The entry point information required for R2R scenarios +// +// Notes: +// This requires SetMethodHandle to have been called first to ensure we aren't +// overwriting any inline operands +// +void GenTreeJitIntrinsic::SetEntryPoint(Compiler* comp, CORINFO_CONST_LOOKUP entryPoint) +{ + assert(IsUserCall()); + assert(gtEntryPoint == nullptr); + + gtEntryPoint = new (comp, CMK_ASTNode) CORINFO_CONST_LOOKUP(entryPoint); +} +#endif // FEATURE_READYTORUN + var_types GenTreeJitIntrinsic::GetAuxiliaryType() const { CorInfoType auxiliaryJitType = GetAuxiliaryJitType(); @@ -27036,7 +27111,7 @@ bool GenTreeHWIntrinsic::OperRequiresCallFlag() const } } - return false; + return IsUserCall(); } //------------------------------------------------------------------------------ diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 6a1b50e1569c9..ad77714cfd16b 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -559,6 +559,7 @@ enum GenTreeFlags : unsigned int #ifdef FEATURE_HW_INTRINSICS GTF_HW_EM_OP = 0x10000000, // GT_HWINTRINSIC -- node is used as an operand to an embedded mask + GTF_HW_USER_CALL = 0x20000000, // GT_HWINTRINSIC -- node is implemented via a user call #endif // FEATURE_HW_INTRINSICS }; @@ -6089,6 +6090,15 @@ struct GenTreeMultiOp : public GenTree } #endif + bool IsUserCall() const + { +#if defined(FEATURE_HW_INTRINSICS) + return OperIs(GT_HWINTRINSIC) && (gtFlags & GTF_HW_USER_CALL) != 0; +#else + return false; +#endif + } + GenTree*& Op(size_t index) { size_t actualIndex = index - 1; @@ -6217,7 +6227,29 @@ class IntrinsicNodeBuilder final struct GenTreeJitIntrinsic : public GenTreeMultiOp { protected: - GenTree* gtInlineOperands[2]; + union + { + // We don't have enough space to carry both the inline operands + // and the necessary information required to support rewriting + // the intrinsic back into a user call. As such, we union the + // data instead and use the GTF_HW_USER_CALL flag to indicate + // which fields are valid to access. -- Tracking the fields + // independently causes TREE_NODE_SZ_LARGE to increase and for + // GenTreeJitIntrinsic to become the largest node, which is + // undesirable, so this approach helps keep things pay-for-play. + + GenTree* gtInlineOperands[2]; + + struct + { + CORINFO_METHOD_HANDLE gtMethodHandle; + +#if defined(FEATURE_READYTORUN) + // Call target lookup info for method call from a Ready To Run module + CORINFO_CONST_LOOKUP* gtEntryPoint; +#endif // FEATURE_READYTORUN + }; + }; regNumberSmall gtOtherReg; // The second register for multi-reg intrinsics. MultiRegSpillFlags gtSpillFlags; // Spill flags for multi-reg intrinsics. unsigned char gtAuxiliaryJitType; // For intrinsics than need another type (e.g. Avx2.Gather* or SIMD (by element)) @@ -6226,6 +6258,24 @@ struct GenTreeJitIntrinsic : public GenTreeMultiOp NamedIntrinsic gtHWIntrinsicId; public: + CORINFO_METHOD_HANDLE GetMethodHandle() const + { + assert(IsUserCall()); + return gtMethodHandle; + } + + void SetMethodHandle(Compiler* comp, CORINFO_METHOD_HANDLE methodHandle); + +#if defined(FEATURE_READYTORUN) + CORINFO_CONST_LOOKUP GetEntryPoint() const + { + assert(IsUserCall()); + return *gtEntryPoint; + } + + void SetEntryPoint(Compiler* comp, CORINFO_CONST_LOOKUP entryPoint); +#endif // FEATURE_READYTORUN + //----------------------------------------------------------- // GetRegNumByIdx: Get regNumber of i'th position. // diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 5a30c54fe78f5..f7445f1e8937d 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1869,7 +1869,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if (!indices->IsVectorConst()) { - // TODO-ARM64-CQ: Handling non-constant indices is a bit more complex + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); + + retNode->AsHWIntrinsic()->SetMethodHandle(this, method); break; } diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index a207ac5bc6040..69660b362fa9c 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -103,7 +103,7 @@ HARDWARE_INTRINSIC(Vector64, op_UnsignedRightShift, HARDWARE_INTRINSIC(Vector64, ShiftLeft, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, ShiftRightArithmetic, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, ShiftRightLogical, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector64, Shuffle, 8, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, Shuffle, 8, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector64, Sqrt, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, Store, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, StoreAligned, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) @@ -220,7 +220,7 @@ HARDWARE_INTRINSIC(Vector128, op_UnaryPlus, HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, ShiftRightArithmetic, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, ShiftRightLogical, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Store, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index aed1d0b1dfb11..bf3137ca17c4c 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -122,7 +122,7 @@ HARDWARE_INTRINSIC(Vector128, op_UnsignedRightShift, HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, ShiftRightArithmetic, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, ShiftRightLogical, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Store, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) @@ -236,7 +236,7 @@ HARDWARE_INTRINSIC(Vector256, op_UnsignedRightShift, HARDWARE_INTRINSIC(Vector256, ShiftLeft, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, ShiftRightArithmetic, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, ShiftRightLogical, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, Shuffle, 32, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, Shuffle, 32, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector256, Sqrt, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, Store, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, StoreAligned, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) @@ -351,7 +351,7 @@ HARDWARE_INTRINSIC(Vector512, op_UnsignedRightShift, HARDWARE_INTRINSIC(Vector512, ShiftLeft, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, ShiftRightArithmetic, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, ShiftRightLogical, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, Shuffle, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, Shuffle, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector512, Sqrt, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, Store, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, StoreAligned, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 8aaee01f4c41c..bfcf81913f705 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -2890,7 +2890,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if (!indices->IsVectorConst()) { - // TODO-XARCH-CQ: Handling non-constant indices is a bit more complex + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); + + retNode->AsHWIntrinsic()->SetMethodHandle(this, method); break; } diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 85bd5d8dfbd16..0b0a271c76947 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -239,7 +239,7 @@ var_types Compiler::impImportCall(OPCODE opcode, if (call != nullptr) { -#ifdef FEATURE_READYTORUN +#if defined(FEATURE_READYTORUN) if (call->OperGet() == GT_INTRINSIC) { if (opts.IsReadyToRun()) @@ -253,7 +253,29 @@ var_types Compiler::impImportCall(OPCODE opcode, call->AsIntrinsic()->gtEntryPoint.accessType = IAT_VALUE; } } -#endif +#if defined(FEATURE_HW_INTRINSICS) + else if (call->OperIsHWIntrinsic()) + { + if (call->AsHWIntrinsic()->IsUserCall()) + { + CORINFO_CONST_LOOKUP entryPoint; + + if (opts.IsReadyToRun()) + { + noway_assert(callInfo->kind == CORINFO_CALL); + entryPoint = callInfo->codePointerLookup.constLookup; + } + else + { + entryPoint.addr = nullptr; + entryPoint.accessType = IAT_VALUE; + } + + call->AsHWIntrinsic()->SetEntryPoint(this, entryPoint); + } + } +#endif // FEATURE_HW_INTRINSICS +#endif // FEATURE_READYTORUN bIntrinsicImported = true; goto DONE_CALL; diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index d9b69b8df5aa2..25b7a1e010e02 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -9,11 +9,12 @@ // RewriteNodeAsCall : Replace the given tree node by a GT_CALL. // // Arguments: -// ppTree - A pointer-to-a-pointer for the tree node -// fgWalkData - A pointer to tree walk data providing the context -// callHnd - The method handle of the call to be generated -// entryPoint - The method entrypoint of the call to be generated -// args - The argument list of the call to be generated +// use - A pointer-to-a-pointer for the tree node +// parents - A pointer to tree walk data providing the context +// callHnd - The method handle of the call to be generated +// entryPoint - The method entrypoint of the call to be generated +// operands - The operand list of the call to be generated +// operandCount - The number of operands in the operand list // // Return Value: // None. @@ -22,11 +23,11 @@ void Rationalizer::RewriteNodeAsCall(GenTree** use, ArrayStack& parents, CORINFO_METHOD_HANDLE callHnd, -#ifdef FEATURE_READYTORUN +#if defined(FEATURE_READYTORUN) CORINFO_CONST_LOOKUP entryPoint, -#endif - GenTree* arg1, - GenTree* arg2) +#endif // FEATURE_READYTORUN + GenTree** operands, + size_t operandCount) { GenTree* const tree = *use; GenTree* const treeFirstNode = comp->fgGetFirstNode(tree); @@ -37,57 +38,148 @@ void Rationalizer::RewriteNodeAsCall(GenTree** use, // Create the call node GenTreeCall* call = comp->gtNewCallNode(CT_USER_FUNC, callHnd, tree->gtType); - if (arg2 != nullptr) + CORINFO_SIG_INFO sig; + comp->eeGetMethodSig(callHnd, &sig); + + var_types retType = JITtype2varType(sig.retType); + + if (varTypeIsStruct(retType)) { - call->gtArgs.PushFront(comp, NewCallArg::Primitive(arg2)); - call->gtFlags |= arg2->gtFlags & GTF_ALL_EFFECT; + call->gtRetClsHnd = sig.retTypeClass; + retType = comp->impNormStructType(sig.retTypeClass); + +#if FEATURE_MULTIREG_RET + call->InitializeStructReturnType(comp, sig.retTypeClass, call->GetUnmanagedCallConv()); +#endif // FEATURE_MULTIREG_RET + + Compiler::structPassingKind howToReturnStruct; + var_types returnType = + comp->getReturnTypeForStruct(sig.retTypeClass, call->GetUnmanagedCallConv(), &howToReturnStruct); + + if (howToReturnStruct == Compiler::SPK_ByReference) + { + assert(returnType == TYP_UNKNOWN); + call->gtCallMoreFlags |= GTF_CALL_M_RETBUFFARG; + } } - if (arg1 != nullptr) + assert(retType == tree->gtType); + + CORINFO_ARG_LIST_HANDLE sigArg = sig.args; + size_t firstArg = 0; + + if (sig.hasThis()) { - call->gtArgs.PushFront(comp, NewCallArg::Primitive(arg1)); - call->gtFlags |= arg1->gtFlags & GTF_ALL_EFFECT; + GenTree* operand = operands[0]; + NewCallArg arg = NewCallArg::Primitive(operand).WellKnown(WellKnownArg::ThisPointer); + + call->gtArgs.PushBack(comp, arg); + call->gtFlags |= operand->gtFlags & GTF_ALL_EFFECT; + + firstArg++; } -#if DEBUG - CORINFO_SIG_INFO sig; - comp->eeGetMethodSig(callHnd, &sig); - assert(JITtype2varType(sig.retType) == tree->gtType); -#endif // DEBUG + for (size_t i = firstArg; i < operandCount; i++) + { + GenTree* operand = operands[i]; + + CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE; + CorInfoType corTyp = strip(comp->info.compCompHnd->getArgType(&sig, sigArg, &clsHnd)); + var_types sigTyp = JITtype2varType(corTyp); + + NewCallArg arg; + + if (varTypeIsStruct(sigTyp)) + { + sigTyp = comp->impNormStructType(clsHnd); + arg = NewCallArg::Struct(operand, sigTyp, clsHnd); + } + else + { + arg = NewCallArg::Primitive(operand, sigTyp); + } + + call->gtArgs.PushBack(comp, arg); + call->gtFlags |= operand->gtFlags & GTF_ALL_EFFECT; -#ifdef FEATURE_READYTORUN + sigArg = comp->info.compCompHnd->getArgNext(sigArg); + } + +#if defined(FEATURE_READYTORUN) call->AsCall()->setEntryPoint(entryPoint); -#endif +#endif // FEATURE_READYTORUN + + unsigned tmpNum = BAD_VAR_NUM; + + if (call->TreatAsShouldHaveRetBufArg()) + { + assert(call->ShouldHaveRetBufArg()); + + tmpNum = comp->lvaGrabTemp(true DEBUGARG("return buffer for hwintrinsic")); + comp->lvaSetStruct(tmpNum, sig.retTypeClass, false); + + GenTree* destAddr = comp->gtNewLclVarAddrNode(tmpNum, TYP_BYREF); + NewCallArg newArg = NewCallArg::Primitive(destAddr).WellKnown(WellKnownArg::RetBuffer); + + call->gtArgs.InsertAfterThisOrFirst(comp, newArg); + call->gtType = TYP_VOID; + } call = comp->fgMorphArgs(call); + GenTree* result = call; + // Replace "tree" with "call" if (parents.Height() > 1) { - parents.Top(1)->ReplaceOperand(use, call); + if (tmpNum != BAD_VAR_NUM) + { + result = comp->gtNewLclvNode(tmpNum, tree->gtType); + } + + parents.Top(1)->ReplaceOperand(use, result); + + if (tmpNum != BAD_VAR_NUM) + { + comp->gtSetEvalOrder(result); + BlockRange().InsertAfter(insertionPoint, LIR::Range(comp->fgSetTreeSeq(result), result)); + } } else { // If there's no parent, the tree being replaced is the root of the // statement (and no special handling is necessary). - *use = call; + *use = result; } comp->gtSetEvalOrder(call); BlockRange().InsertAfter(insertionPoint, LIR::Range(comp->fgSetTreeSeq(call), call)); - // Propagate flags of "call" to its parents. - // 0 is current node, so start at 1 - for (int i = 1; i < parents.Height(); i++) + if (result == call) + { + // Propagate flags of "call" to its parents. + // 0 is current node, so start at 1 + for (int i = 1; i < parents.Height(); i++) + { + parents.Top(i)->gtFlags |= (call->gtFlags & GTF_ALL_EFFECT) | GTF_CALL; + } + } + else { - parents.Top(i)->gtFlags |= (call->gtFlags & GTF_ALL_EFFECT) | GTF_CALL; + // Normally the call replaces the node in pre-order, so we automatically continue visiting the call. + // However, when we have a retbuf the node is replaced by a local with the call inserted before it, + // so we need to make sure we visit it here. + RationalizeVisitor visitor(*this); + GenTree* node = call; + visitor.WalkTree(&node, nullptr); + assert(node == call); } - // Since "tree" is replaced with "call", pop "tree" node (i.e the current node) - // and replace it with "call" on parent stack. + // Since "tree" is replaced with "result", pop "tree" node (i.e the current node) + // and replace it with "result" on parent stack. assert(parents.Top() == tree); (void)parents.Pop(); - parents.Push(call); + parents.Push(result); } // RewriteIntrinsicAsUserCall : Rewrite an intrinsic operator as a GT_CALL to the original method. @@ -108,14 +200,53 @@ void Rationalizer::RewriteIntrinsicAsUserCall(GenTree** use, ArrayStackAsIntrinsic(); - GenTree* arg1 = intrinsic->gtGetOp1(); - GenTree* arg2 = intrinsic->gtGetOp2(); + GenTree* operands[2]; + size_t operandCount = 0; + + operands[0] = intrinsic->gtGetOp1(); + + if (operands[0] != nullptr) + { + operandCount++; + } + + operands[1] = intrinsic->gtGetOp2(); + + if (operands[1] != nullptr) + { + operandCount++; + } + RewriteNodeAsCall(use, parents, intrinsic->gtMethodHandle, -#ifdef FEATURE_READYTORUN +#if defined(FEATURE_READYTORUN) intrinsic->gtEntryPoint, -#endif - arg1, arg2); +#endif // FEATURE_READYTORUN + operands, operandCount); +} + +#if defined(FEATURE_HW_INTRINSICS) +// RewriteHWIntrinsicAsUserCall : Rewrite a hwintrinsic node as a GT_CALL to the original method. +// +// Arguments: +// ppTree - A pointer-to-a-pointer for the intrinsic node +// fgWalkData - A pointer to tree walk data providing the context +// +// Return Value: +// None. +void Rationalizer::RewriteHWIntrinsicAsUserCall(GenTree** use, ArrayStack& parents) +{ + GenTreeHWIntrinsic* hwintrinsic = (*use)->AsHWIntrinsic(); + + GenTree** operands = hwintrinsic->GetOperandArray(); + size_t operandCount = hwintrinsic->GetOperandCount(); + + RewriteNodeAsCall(use, parents, hwintrinsic->GetMethodHandle(), +#if defined(FEATURE_READYTORUN) + hwintrinsic->GetEntryPoint(), +#endif // FEATURE_READYTORUN + operands, operandCount); } +#endif // FEATURE_HW_INTRINSICS #ifdef TARGET_ARM64 // RewriteSubLshDiv: Possibly rewrite a SubLshDiv node into a Mod. @@ -319,6 +450,13 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::Ge assert(comp->IsTargetIntrinsic(node->AsIntrinsic()->gtIntrinsicName)); break; +#if defined(FEATURE_HW_INTRINSICS) + case GT_HWINTRINSIC: + // Intrinsics should have already been rewritten back into user calls. + assert(!node->AsHWIntrinsic()->IsUserCall()); + break; +#endif // FEATURE_HW_INTRINSICS + case GT_CAST: if (node->AsCast()->CastOp()->OperIsSimple()) { @@ -361,63 +499,55 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::Ge return Compiler::WALK_CONTINUE; } -//------------------------------------------------------------------------ -// DoPhase: Run the rationalize over the method IR. -// -// Returns: -// PhaseStatus indicating, what, if anything, was modified -// -PhaseStatus Rationalizer::DoPhase() +// Rewrite intrinsics that are not supported by the target back into user calls. +// This needs to be done before the transition to LIR because it relies on the use +// of fgMorphArgs, which is designed to operate on HIR. Once this is done for a +// particular statement, link that statement's nodes into the current basic block. +Compiler::fgWalkResult Rationalizer::RationalizeVisitor::PreOrderVisit(GenTree** use, GenTree* user) { - class RationalizeVisitor final : public GenTreeVisitor - { - Rationalizer& m_rationalizer; + GenTree* const node = *use; - public: - enum - { - ComputeStack = true, - DoPreOrder = true, - DoPostOrder = true, - UseExecutionOrder = true, - }; - - RationalizeVisitor(Rationalizer& rationalizer) - : GenTreeVisitor(rationalizer.comp) - , m_rationalizer(rationalizer) + if (node->OperGet() == GT_INTRINSIC) + { + if (m_rationalizer.comp->IsIntrinsicImplementedByUserCall(node->AsIntrinsic()->gtIntrinsicName)) { + m_rationalizer.RewriteIntrinsicAsUserCall(use, this->m_ancestors); } - - // Rewrite intrinsics that are not supported by the target back into user calls. - // This needs to be done before the transition to LIR because it relies on the use - // of fgMorphArgs, which is designed to operate on HIR. Once this is done for a - // particular statement, link that statement's nodes into the current basic block. - fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) + } +#if defined(FEATURE_HW_INTRINSICS) + else if (node->OperIsHWIntrinsic()) + { + if (node->AsHWIntrinsic()->IsUserCall()) { - GenTree* const node = *use; - if (node->OperGet() == GT_INTRINSIC && - m_rationalizer.comp->IsIntrinsicImplementedByUserCall(node->AsIntrinsic()->gtIntrinsicName)) - { - m_rationalizer.RewriteIntrinsicAsUserCall(use, this->m_ancestors); - } + m_rationalizer.RewriteHWIntrinsicAsUserCall(use, this->m_ancestors); + } + } +#endif // FEATURE_HW_INTRINSICS #ifdef TARGET_ARM64 - if (node->OperIs(GT_SUB)) - { - m_rationalizer.RewriteSubLshDiv(use); - } + if (node->OperIs(GT_SUB)) + { + m_rationalizer.RewriteSubLshDiv(use); + } #endif - return Compiler::WALK_CONTINUE; - } + return Compiler::WALK_CONTINUE; +} - // Rewrite HIR nodes into LIR nodes. - fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) - { - return m_rationalizer.RewriteNode(use, this->m_ancestors); - } - }; +// Rewrite HIR nodes into LIR nodes. +Compiler::fgWalkResult Rationalizer::RationalizeVisitor::PostOrderVisit(GenTree** use, GenTree* user) +{ + return m_rationalizer.RewriteNode(use, this->m_ancestors); +} +//------------------------------------------------------------------------ +// DoPhase: Run the rationalize over the method IR. +// +// Returns: +// PhaseStatus indicating, what, if anything, was modified +// +PhaseStatus Rationalizer::DoPhase() +{ DBEXEC(TRUE, SanityCheck()); comp->compCurBB = nullptr; diff --git a/src/coreclr/jit/rationalize.h b/src/coreclr/jit/rationalize.h index a8651b2e5b8c7..4f197d73b6803 100644 --- a/src/coreclr/jit/rationalize.h +++ b/src/coreclr/jit/rationalize.h @@ -39,13 +39,16 @@ class Rationalizer final : public Phase void RewriteNodeAsCall(GenTree** use, ArrayStack& parents, CORINFO_METHOD_HANDLE callHnd, -#ifdef FEATURE_READYTORUN +#if defined(FEATURE_READYTORUN) CORINFO_CONST_LOOKUP entryPoint, -#endif - GenTree* arg1 = nullptr, - GenTree* arg2 = nullptr); +#endif // FEATURE_READYTORUN + GenTree** operands, + size_t operandCount); void RewriteIntrinsicAsUserCall(GenTree** use, Compiler::GenTreeStack& parents); +#if defined(FEATURE_HW_INTRINSICS) + void RewriteHWIntrinsicAsUserCall(GenTree** use, Compiler::GenTreeStack& parents); +#endif // FEATURE_HW_INTRINSICS #ifdef TARGET_ARM64 void RewriteSubLshDiv(GenTree** use); @@ -53,6 +56,30 @@ class Rationalizer final : public Phase // Root visitor Compiler::fgWalkResult RewriteNode(GenTree** useEdge, Compiler::GenTreeStack& parents); + +private: + class RationalizeVisitor final : public GenTreeVisitor + { + Rationalizer& m_rationalizer; + + public: + enum + { + ComputeStack = true, + DoPreOrder = true, + DoPostOrder = true, + UseExecutionOrder = true, + }; + + RationalizeVisitor(Rationalizer& rationalizer) + : GenTreeVisitor(rationalizer.comp) + , m_rationalizer(rationalizer) + { + } + + fgWalkResult PreOrderVisit(GenTree** use, GenTree* user); + fgWalkResult PostOrderVisit(GenTree** use, GenTree* user); + }; }; inline Rationalizer::Rationalizer(Compiler* _comp)