Skip to content

Commit

Permalink
Adding basic support for recognizing and handling SIMD intrinsics as …
Browse files Browse the repository at this point in the history
…HW intrinsics (#35421)

* Adding basic support for recognizing and handling SIMD intrinsics as HW intrinsics

* Applying formatting patch

* Fixing a preprocessor concatenation for non windows

* Add a default case to workaround a compiler warning on FreeBSD

* Fixing a noway_assert to include GT_HWINTRINSIC

* Fixing some asserts that were being triggered

* Use getSIMDVectorRegisterByteLength

* Applying formatting patch

* Fixing ARM64 to use the actual type size

* Removing the [Intrinsic] attribute from some Vector2/3/4 methods which aren't intrinsic

* Updating SSE/SSE2 CompareGreaterThan and related functions to be table driven

* Fixing the SimdAsHWIntrinsic relational operations to match the GT_SIMD behavior

* Ensure that GT_HWINTRINSIC fixes the type for certain TYP_SIMD8

* Fixing the SimdAsHWIntrinsic Vector<int>.op_Multiply support to match the GT_SIMD behavior

* Fixing the SimdAsHWIntrinsic Vector2/3 Division to match the GT_SIMD behavior

* Porting Abs, Min, and Max to use the SimdAsHWIntrinsic support

* Minor fixups to the SSE2 codepath

* Applying formatting patch

* Fixing a check in lowering

* Mark SimdAsHWIntrinsic nodes so we can lookup the correct handle

* Adding the 3 operand overload for gtNewSimdAsHWIntrinsicNode

* Fixing BuildHWIntrinsic to properly take RMW into account

* Fixing the rationalize handling of GT_HWINTRINSIC to account for SIMD vs non-SIMD nodes

* Fixing the importer to not create SIMD nodes if featureSIMD is disabled

* Fixing the SSE4.2 implementation of CompareLessThan<long>

* Preserve the base type for subtraction/addition operations

* Applying formatting patch

* Responding to PR feedback

* Fixing a copy/paste error under reinterpret cast

* Fixing abs to expect 1 argument

* Adding method comment headers that were missing

* Removing unused table entries from SimdAsHWIntrinsic for Vector2/3/4

* Ensure we catch intrinsics from the Vector static class

* Fixing SSSE3_Abs and AVX2_Abs to get the base type from the first argument

* Ensure we adjust the class handle used for intrinsics from the Vector static class

* Ensure we populate the handle cache for clsHnd even if it isn't used

* Fix where we grab the base type from for the static Vector class

* Fixing ConditionalSelect and improving the messages used for impCloneExpr in SimdAsHWIntrinsic

* Ensure we clone the constVectorDup before using it

* Applying formatting patch
  • Loading branch information
tannergooding authored May 5, 2020
1 parent 2b3de4f commit 56518a7
Show file tree
Hide file tree
Showing 26 changed files with 1,824 additions and 301 deletions.
10 changes: 8 additions & 2 deletions src/coreclr/src/jit/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ if (CLR_CMAKE_TARGET_WIN32)
regset.h
sideeffects.h
simd.h
simdashwintrinsic.h
simdintrinsiclist.h
sm.h
smallhash.h
Expand Down Expand Up @@ -204,14 +205,16 @@ if (CLR_CMAKE_TARGET_WIN32)
instrsarm.h
instrsarm64.h
registerarm.h
registerarm64.h)
registerarm64.h
simdashwintrinsiclistarm64.h)
elseif (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386)
list (APPEND JIT_HEADERS
emitfmtsxarch.h
emitxarch.h
hwintrinsiclistxarch.h
hwintrinsic.h
instrsxarch.h)
instrsxarch.h
simdashwintrinsiclistxarch.h)
endif ()
endif(CLR_CMAKE_TARGET_WIN32)

Expand All @@ -223,6 +226,7 @@ set( JIT_AMD64_SOURCES
lowerxarch.cpp
lsraxarch.cpp
simd.cpp
simdashwintrinsic.cpp
simdcodegenxarch.cpp
targetamd64.cpp
unwindamd64.cpp
Expand All @@ -249,6 +253,7 @@ set( JIT_I386_SOURCES
lowerxarch.cpp
lsraxarch.cpp
simd.cpp
simdashwintrinsic.cpp
simdcodegenxarch.cpp
targetx86.cpp
unwindx86.cpp
Expand All @@ -264,6 +269,7 @@ set( JIT_ARM64_SOURCES
lsraarmarch.cpp
lsraarm64.cpp
simd.cpp
simdashwintrinsic.cpp
targetarm64.cpp
unwindarm.cpp
unwindarm64.cpp
Expand Down
64 changes: 62 additions & 2 deletions src/coreclr/src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

#include "hwintrinsic.h"
#include "simd.h"
#include "simdashwintrinsic.h"

// This is only used locally in the JIT to indicate that
// a verification block should be inserted
Expand Down Expand Up @@ -2615,6 +2616,36 @@ class Compiler
NamedIntrinsic hwIntrinsicID,
var_types baseType,
unsigned size);

GenTreeHWIntrinsic* gtNewSimdAsHWIntrinsicNode(
var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size)
{
GenTreeHWIntrinsic* node = gtNewSimdHWIntrinsicNode(type, op1, hwIntrinsicID, baseType, size);
node->gtFlags |= GTF_SIMDASHW_OP;
return node;
}

GenTreeHWIntrinsic* gtNewSimdAsHWIntrinsicNode(
var_types type, GenTree* op1, GenTree* op2, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size)
{
GenTreeHWIntrinsic* node = gtNewSimdHWIntrinsicNode(type, op1, op2, hwIntrinsicID, baseType, size);
node->gtFlags |= GTF_SIMDASHW_OP;
return node;
}

GenTreeHWIntrinsic* gtNewSimdAsHWIntrinsicNode(var_types type,
GenTree* op1,
GenTree* op2,
GenTree* op3,
NamedIntrinsic hwIntrinsicID,
var_types baseType,
unsigned size)
{
GenTreeHWIntrinsic* node = gtNewSimdHWIntrinsicNode(type, op1, op2, op3, hwIntrinsicID, baseType, size);
node->gtFlags |= GTF_SIMDASHW_OP;
return node;
}

GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID);
GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type,
GenTree* op1,
Expand Down Expand Up @@ -3689,16 +3720,36 @@ class Compiler
CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig,
bool mustExpand);
GenTree* impSimdAsHWIntrinsic(NamedIntrinsic intrinsic,
CORINFO_CLASS_HANDLE clsHnd,
CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig,
bool mustExpand);

protected:
bool compSupportsHWIntrinsic(CORINFO_InstructionSet isa);

GenTree* impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic,
CORINFO_CLASS_HANDLE clsHnd,
CORINFO_SIG_INFO* sig,
var_types retType,
var_types baseType,
unsigned simdSize);

GenTree* impSimdAsHWIntrinsicCndSel(CORINFO_CLASS_HANDLE clsHnd,
var_types retType,
var_types baseType,
unsigned simdSize,
GenTree* op1,
GenTree* op2,
GenTree* op3);

GenTree* impSpecialIntrinsic(NamedIntrinsic intrinsic,
CORINFO_CLASS_HANDLE clsHnd,
CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig);

GenTree* getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass);
GenTree* getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass, bool expectAddr = false);
GenTree* impNonConstFallback(NamedIntrinsic intrinsic, var_types simdType, var_types baseType);
GenTree* addRangeCheckIfNeeded(NamedIntrinsic intrinsic, GenTree* lastOp, bool mustExpand, int immUpperBound);

Expand All @@ -3712,6 +3763,13 @@ class Compiler
GenTree* impAvxOrAvx2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
GenTree* impBMI1OrBMI2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);

GenTree* impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic,
CORINFO_CLASS_HANDLE clsHnd,
var_types retType,
var_types baseType,
unsigned simdSize,
GenTree* op1,
GenTree* op2);
#endif // TARGET_XARCH
#endif // FEATURE_HW_INTRINSICS
GenTree* impArrayAccessIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
Expand Down Expand Up @@ -8203,8 +8261,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
return emitTypeSize(TYP_SIMD8);
}

public:
// Returns the codegen type for a given SIMD size.
var_types getSIMDTypeForSize(unsigned size)
static var_types getSIMDTypeForSize(unsigned size)
{
var_types simdType = TYP_UNDEF;
if (size == 8)
Expand All @@ -8230,6 +8289,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
return simdType;
}

private:
unsigned getSIMDInitTempVarNum()
{
if (lvaSIMDInitTempVarNum == BAD_VAR_NUM)
Expand Down
11 changes: 5 additions & 6 deletions src/coreclr/src/jit/flowgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22298,12 +22298,11 @@ void Compiler::fgNoteNonInlineCandidate(Statement* stmt, GenTreeCall* call)
*/
GenTree* Compiler::fgGetStructAsStructPtr(GenTree* tree)
{
noway_assert((tree->gtOper == GT_LCL_VAR) || (tree->gtOper == GT_FIELD) || (tree->gtOper == GT_IND) ||
(tree->gtOper == GT_BLK) || (tree->gtOper == GT_OBJ) || tree->OperIsSIMD() ||
// tree->gtOper == GT_CALL || cannot get address of call.
// tree->gtOper == GT_MKREFANY || inlining should've been aborted due to mkrefany opcode.
// tree->gtOper == GT_RET_EXPR || cannot happen after fgUpdateInlineReturnExpressionPlaceHolder
(tree->gtOper == GT_COMMA));
noway_assert(tree->OperIs(GT_LCL_VAR, GT_FIELD, GT_IND, GT_BLK, GT_OBJ, GT_COMMA) || tree->OperIsSIMD() ||
tree->OperIsHWIntrinsic());
// GT_CALL, cannot get address of call.
// GT_MKREFANY, inlining should've been aborted due to mkrefany opcode.
// GT_RET_EXPR, cannot happen after fgUpdateInlineReturnExpressionPlaceHolder

switch (tree->OperGet())
{
Expand Down
15 changes: 14 additions & 1 deletion src/coreclr/src/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17206,6 +17206,12 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree)
if (varTypeIsSIMD(tree))
{
structHnd = gtGetStructHandleForSIMD(tree->gtType, TYP_FLOAT);
#ifdef FEATURE_HW_INTRINSICS
if (structHnd == NO_CLASS_HANDLE)
{
structHnd = gtGetStructHandleForHWSIMD(tree->gtType, TYP_FLOAT);
}
#endif
}
#endif
break;
Expand Down Expand Up @@ -17272,7 +17278,14 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree)
#endif // FEATURE_SIMD
#ifdef FEATURE_HW_INTRINSICS
case GT_HWINTRINSIC:
structHnd = gtGetStructHandleForHWSIMD(tree->gtType, tree->AsHWIntrinsic()->gtSIMDBaseType);
if ((tree->gtFlags & GTF_SIMDASHW_OP) != 0)
{
structHnd = gtGetStructHandleForSIMD(tree->gtType, tree->AsHWIntrinsic()->gtSIMDBaseType);
}
else
{
structHnd = gtGetStructHandleForHWSIMD(tree->gtType, tree->AsHWIntrinsic()->gtSIMDBaseType);
}
break;
#endif
break;
Expand Down
5 changes: 4 additions & 1 deletion src/coreclr/src/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -745,7 +745,7 @@ struct GenTree

#define GTF_UNSIGNED 0x00008000 // With GT_CAST: the source operand is an unsigned type
// With operators: the specified node is an unsigned operator
//
//
#define GTF_LATE_ARG 0x00010000 // The specified node is evaluated to a temp in the arg list, and this temp is added to gtCallLateArgs.
#define GTF_SPILL 0x00020000 // Needs to be spilled here

Expand Down Expand Up @@ -915,6 +915,9 @@ struct GenTree
#define GTF_SIMD12_OP 0x80000000 // GT_SIMD -- Indicates that the operands need to be handled as SIMD12
// even if they have been retyped as SIMD16.

#define GTF_SIMDASHW_OP 0x80000000 // GT_HWINTRINSIC -- Indicates that the structHandle should be gotten from gtGetStructHandleForSIMD
// rarther than from gtGetStructHandleForHWSIMD.

//---------------------------------------------------------------------
//
// GenTree flags stored in gtDebugFlags.
Expand Down
15 changes: 8 additions & 7 deletions src/coreclr/src/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -495,26 +495,27 @@ bool HWIntrinsicInfo::isImmOp(NamedIntrinsic id, const GenTree* op)
}

//------------------------------------------------------------------------
// // getArgForHWIntrinsic: pop an argument from the stack and validate its type
// getArgForHWIntrinsic: pop an argument from the stack and validate its type
//
// Arguments:
// argType -- the required type of argument
// argClass -- the class handle of argType
// argType -- the required type of argument
// argClass -- the class handle of argType
// expectAddr -- if true indicates we are expecting type stack entry to be a TYP_BYREF.
//
// Return Value:
// the validated argument
//
GenTree* Compiler::getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass)
GenTree* Compiler::getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass, bool expectAddr)
{
GenTree* arg = nullptr;
if (argType == TYP_STRUCT)
{
unsigned int argSizeBytes;
var_types base = getBaseTypeAndSizeOfSIMDType(argClass, &argSizeBytes);
argType = getSIMDTypeForSize(argSizeBytes);
assert((argType == TYP_SIMD8) || (argType == TYP_SIMD16) || (argType == TYP_SIMD32));
arg = impSIMDPopStack(argType);
assert((arg->TypeGet() == TYP_SIMD8) || (arg->TypeGet() == TYP_SIMD16) || (arg->TypeGet() == TYP_SIMD32));
assert(varTypeIsSIMD(argType));
arg = impSIMDPopStack(argType, expectAddr);
assert(varTypeIsSIMD(arg->TypeGet()));
}
else
{
Expand Down
66 changes: 45 additions & 21 deletions src/coreclr/src/jit/hwintrinsic.h
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ struct HWIntrinsicInfo
}

#ifdef TARGET_XARCH
static int lookupIval(NamedIntrinsic id)
static int lookupIval(NamedIntrinsic id, bool opportunisticallyDependsOnAVX)
{
switch (id)
{
Expand All @@ -321,6 +321,17 @@ struct HWIntrinsicInfo
case NI_SSE_CompareScalarGreaterThan:
case NI_SSE2_CompareGreaterThan:
case NI_SSE2_CompareScalarGreaterThan:
case NI_AVX_CompareGreaterThan:
{
if (opportunisticallyDependsOnAVX)
{
return static_cast<int>(FloatComparisonMode::OrderedGreaterThanSignaling);
}

assert(id != NI_AVX_CompareGreaterThan);
return static_cast<int>(FloatComparisonMode::OrderedLessThanSignaling);
}

case NI_SSE_CompareLessThan:
case NI_SSE_CompareScalarLessThan:
case NI_SSE2_CompareLessThan:
Expand All @@ -334,6 +345,17 @@ struct HWIntrinsicInfo
case NI_SSE_CompareScalarGreaterThanOrEqual:
case NI_SSE2_CompareGreaterThanOrEqual:
case NI_SSE2_CompareScalarGreaterThanOrEqual:
case NI_AVX_CompareGreaterThanOrEqual:
{
if (opportunisticallyDependsOnAVX)
{
return static_cast<int>(FloatComparisonMode::OrderedGreaterThanOrEqualSignaling);
}

assert(id != NI_AVX_CompareGreaterThanOrEqual);
return static_cast<int>(FloatComparisonMode::OrderedLessThanOrEqualSignaling);
}

case NI_SSE_CompareLessThanOrEqual:
case NI_SSE_CompareScalarLessThanOrEqual:
case NI_SSE2_CompareLessThanOrEqual:
Expand All @@ -356,6 +378,17 @@ struct HWIntrinsicInfo
case NI_SSE_CompareScalarNotGreaterThan:
case NI_SSE2_CompareNotGreaterThan:
case NI_SSE2_CompareScalarNotGreaterThan:
case NI_AVX_CompareNotGreaterThan:
{
if (opportunisticallyDependsOnAVX)
{
return static_cast<int>(FloatComparisonMode::UnorderedNotGreaterThanSignaling);
}

assert(id != NI_AVX_CompareNotGreaterThan);
return static_cast<int>(FloatComparisonMode::UnorderedNotLessThanSignaling);
}

case NI_SSE_CompareNotLessThan:
case NI_SSE_CompareScalarNotLessThan:
case NI_SSE2_CompareNotLessThan:
Expand All @@ -369,6 +402,17 @@ struct HWIntrinsicInfo
case NI_SSE_CompareScalarNotGreaterThanOrEqual:
case NI_SSE2_CompareNotGreaterThanOrEqual:
case NI_SSE2_CompareScalarNotGreaterThanOrEqual:
case NI_AVX_CompareNotGreaterThanOrEqual:
{
if (opportunisticallyDependsOnAVX)
{
return static_cast<int>(FloatComparisonMode::UnorderedNotGreaterThanOrEqualSignaling);
}

assert(id != NI_AVX_CompareNotGreaterThanOrEqual);
return static_cast<int>(FloatComparisonMode::UnorderedNotLessThanOrEqualSignaling);
}

case NI_SSE_CompareNotLessThanOrEqual:
case NI_SSE_CompareScalarNotLessThanOrEqual:
case NI_SSE2_CompareNotLessThanOrEqual:
Expand Down Expand Up @@ -437,26 +481,6 @@ struct HWIntrinsicInfo
return static_cast<int>(FloatRoundingMode::ToZero);
}

case NI_AVX_CompareGreaterThan:
{
return static_cast<int>(FloatComparisonMode::OrderedGreaterThanSignaling);
}

case NI_AVX_CompareGreaterThanOrEqual:
{
return static_cast<int>(FloatComparisonMode::OrderedGreaterThanOrEqualSignaling);
}

case NI_AVX_CompareNotGreaterThan:
{
return static_cast<int>(FloatComparisonMode::UnorderedNotGreaterThanSignaling);
}

case NI_AVX_CompareNotGreaterThanOrEqual:
{
return static_cast<int>(FloatComparisonMode::UnorderedNotGreaterThanOrEqualSignaling);
}

default:
{
return -1;
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
}
else
{
emitSize = EA_SIZE(node->gtSIMDSize);
emitSize = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize));
opt = genGetSimdInsOpt(emitSize, intrin.baseType);

if ((opt == INS_OPTS_1D) && (intrin.category == HW_Category_SimpleSIMD))
Expand Down
Loading

0 comments on commit 56518a7

Please sign in to comment.