Skip to content

Commit

Permalink
Ensure Vector2/3/4, Quaternion, and Plane don't have a false dependen…
Browse files Browse the repository at this point in the history
…cy on Vector<T> (#86481)

* Ensure Vector2/3/4, Quaternion, and Plane don't have a false dependency on Vector<T>

* Apply JIT formatting patch

* Fixing a build issue

* Handle an SPMI assert
  • Loading branch information
tannergooding authored May 20, 2023
1 parent fdb5ac7 commit d7a30ba
Show file tree
Hide file tree
Showing 10 changed files with 159 additions and 79 deletions.
2 changes: 1 addition & 1 deletion docs/design/coreclr/botr/vectors-and-intrinsics.md
Original file line number Diff line number Diff line change
Expand Up @@ -169,5 +169,5 @@ While the above api exists, it is not expected that general purpose code within
|`compExactlyDependsOn(isa)`| Use when making a decision to use or not use an instruction set when the decision will affect the semantics of the generated code. Should never be used in an assert. | Return whether or not an instruction set is supported. Calls notifyInstructionSetUsage with the result of that computation.
|`compOpportunisticallyDependsOn(isa)`| Use when making an opportunistic decision to use or not use an instruction set. Use when the instruction set usage is a "nice to have optimization opportunity", but do not use when a false result may change the semantics of the program. Should never be used in an assert. | Return whether or not an instruction set is supported. Calls notifyInstructionSetUsage if the instruction set is supported.
|`compIsaSupportedDebugOnly(isa)` | Use to assert whether or not an instruction set is supported | Return whether or not an instruction set is supported. Does not report anything. Only available in debug builds.
|`getSIMDVectorRegisterByteLength()` | Use to get the size of a `Vector<T>` value. | Determine the size of the `Vector<T>` type. If on the architecture the size may vary depending on whatever rules. Use `compExactlyDependsOn` to perform the queries so that the size is consistent between compile time and runtime.
|`getVectorTByteLength()` | Use to get the size of a `Vector<T>` value. | Determine the size of the `Vector<T>` type. If on the architecture the size may vary depending on whatever rules. Use `compExactlyDependsOn` to perform the queries so that the size is consistent between compile time and runtime.
|`getMaxVectorByteLength()`| Get the maximum number of bytes that might be used in a SIMD type during this compilation. | Query the set of instruction sets supported, and determine the largest simd type supported. Use `compOpportunisticallyDependsOn` to perform the queries so that the maximum size needed is the only one recorded.
9 changes: 7 additions & 2 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -8645,8 +8645,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

// Get the number of bytes in a System.Numeric.Vector<T> for the current compilation.
// Note - cannot be used for System.Runtime.Intrinsic
unsigned getSIMDVectorRegisterByteLength()
unsigned getVectorTByteLength()
{
// We need to report the ISA dependency to the VM so that scenarios
// such as R2R work correctly for larger vector sizes, so we always
// do `compExactlyDependsOn` for such cases.
CLANG_FORMAT_COMMENT_ANCHOR;

#if defined(TARGET_XARCH)
if (compExactlyDependsOn(InstructionSet_AVX2))
{
Expand All @@ -8660,7 +8665,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#elif defined(TARGET_ARM64)
return FP_REGSIZE_BYTES;
#else
assert(!"getSIMDVectorRegisterByteLength() unimplemented on target arch");
assert(!"getVectorTByteLength() unimplemented on target arch");
unreached();
#endif
}
Expand Down
92 changes: 50 additions & 42 deletions src/coreclr/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -775,24 +775,26 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
case NI_Vector128_AsVector:
{
assert(sig->numArgs == 1);
uint32_t vectorTByteLength = getVectorTByteLength();

if (getSIMDVectorRegisterByteLength() == YMM_REGSIZE_BYTES)
if (vectorTByteLength == YMM_REGSIZE_BYTES)
{
// Vector<T> is TYP_SIMD32, so we should treat this as a call to Vector128.ToVector256
return impSpecialIntrinsic(NI_Vector128_ToVector256, clsHnd, method, sig, simdBaseJitType, retType,
simdSize);
}
else
{
assert(vectorTByteLength == XMM_REGSIZE_BYTES);

assert(getSIMDVectorRegisterByteLength() == XMM_REGSIZE_BYTES);

// We fold away the cast here, as it only exists to satisfy
// the type system. It is safe to do this here since the retNode type
// and the signature return type are both the same TYP_SIMD.

retNode = impSIMDPopStack();
SetOpLclRelatedToSIMDIntrinsic(retNode);
assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass)));
// We fold away the cast here, as it only exists to satisfy
// the type system. It is safe to do this here since the retNode type
// and the signature return type are both the same TYP_SIMD.

retNode = impSIMDPopStack();
SetOpLclRelatedToSIMDIntrinsic(retNode);
assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass)));
}
break;
}

Expand Down Expand Up @@ -903,8 +905,9 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
case NI_Vector256_AsVector256:
{
assert(sig->numArgs == 1);
uint32_t vectorTByteLength = getVectorTByteLength();

if (getSIMDVectorRegisterByteLength() == YMM_REGSIZE_BYTES)
if (vectorTByteLength == YMM_REGSIZE_BYTES)
{
// We fold away the cast here, as it only exists to satisfy
// the type system. It is safe to do this here since the retNode type
Expand All @@ -916,36 +919,38 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

break;
}

assert(getSIMDVectorRegisterByteLength() == XMM_REGSIZE_BYTES);

if (compExactlyDependsOn(InstructionSet_AVX))
else
{
// We support Vector256 but Vector<T> is only 16-bytes, so we should
// treat this method as a call to Vector256.GetLower or Vector128.ToVector256
assert(vectorTByteLength == XMM_REGSIZE_BYTES);

if (intrinsic == NI_Vector256_AsVector)
if (compExactlyDependsOn(InstructionSet_AVX))
{
return impSpecialIntrinsic(NI_Vector256_GetLower, clsHnd, method, sig, simdBaseJitType, retType,
simdSize);
}
else
{
assert(intrinsic == NI_Vector256_AsVector256);
return impSpecialIntrinsic(NI_Vector128_ToVector256, clsHnd, method, sig, simdBaseJitType, retType,
16);
// We support Vector256 but Vector<T> is only 16-bytes, so we should
// treat this method as a call to Vector256.GetLower or Vector128.ToVector256

if (intrinsic == NI_Vector256_AsVector)
{
return impSpecialIntrinsic(NI_Vector256_GetLower, clsHnd, method, sig, simdBaseJitType, retType,
simdSize);
}
else
{
assert(intrinsic == NI_Vector256_AsVector256);
return impSpecialIntrinsic(NI_Vector128_ToVector256, clsHnd, method, sig, simdBaseJitType,
retType, 16);
}
}
}

break;
}

case NI_Vector512_AsVector:
case NI_Vector512_AsVector512:
{
assert(sig->numArgs == 1);
uint32_t vectorTByteLength = getVectorTByteLength();

if (getSIMDVectorRegisterByteLength() == YMM_REGSIZE_BYTES)
if (vectorTByteLength == YMM_REGSIZE_BYTES)
{
assert(IsBaselineVector512IsaSupported());
// We support Vector512 but Vector<T> is only 32-bytes, so we should
Expand All @@ -964,23 +969,26 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
}
break;
}

assert(getSIMDVectorRegisterByteLength() == XMM_REGSIZE_BYTES);
if (compExactlyDependsOn(InstructionSet_AVX512F))
else
{
// We support Vector512 but Vector<T> is only 16-bytes, so we should
// treat this method as a call to Vector512.GetLower128 or Vector128.ToVector512
assert(vectorTByteLength == XMM_REGSIZE_BYTES);

if (intrinsic == NI_Vector512_AsVector)
if (compExactlyDependsOn(InstructionSet_AVX512F))
{
return impSpecialIntrinsic(NI_Vector512_GetLower128, clsHnd, method, sig, simdBaseJitType, retType,
simdSize);
}
else
{
assert(intrinsic == NI_Vector512_AsVector512);
return impSpecialIntrinsic(NI_Vector128_ToVector512, clsHnd, method, sig, simdBaseJitType, retType,
16);
// We support Vector512 but Vector<T> is only 16-bytes, so we should
// treat this method as a call to Vector512.GetLower128 or Vector128.ToVector512

if (intrinsic == NI_Vector512_AsVector)
{
return impSpecialIntrinsic(NI_Vector512_GetLower128, clsHnd, method, sig, simdBaseJitType,
retType, simdSize);
}
else
{
assert(intrinsic == NI_Vector512_AsVector512);
return impSpecialIntrinsic(NI_Vector128_ToVector512, clsHnd, method, sig, simdBaseJitType,
retType, 16);
}
}
}
break;
Expand Down
5 changes: 1 addition & 4 deletions src/coreclr/jit/importercalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8332,10 +8332,7 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
CORINFO_SIG_INFO sig;
info.compCompHnd->getMethodSig(method, &sig);

int sizeOfVectorT = getSIMDVectorRegisterByteLength();

result = SimdAsHWIntrinsicInfo::lookupId(this, &sig, className, methodName, enclosingClassName,
sizeOfVectorT);
result = SimdAsHWIntrinsicInfo::lookupId(this, &sig, className, methodName, enclosingClassName);
#endif // FEATURE_HW_INTRINSICS

if (result == NI_Illegal)
Expand Down
5 changes: 2 additions & 3 deletions src/coreclr/jit/simd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,6 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH
{
JITDUMP(" Found type Vector\n");
m_simdHandleCache->VectorHandle = typeHnd;

size = getSIMDVectorRegisterByteLength();
break;
}

Expand Down Expand Up @@ -299,8 +297,9 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH
}

JITDUMP(" Found Vector<%s>\n", varTypeName(JitType2PreciseVarType(simdBaseJitType)));
size = getVectorTByteLength();

size = getSIMDVectorRegisterByteLength();
assert(size != 0);
break;
}

Expand Down
89 changes: 72 additions & 17 deletions src/coreclr/jit/simdashwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,21 +44,21 @@ const SimdAsHWIntrinsicInfo& SimdAsHWIntrinsicInfo::lookup(NamedIntrinsic id)
// lookupId: Gets the NamedIntrinsic for a given method name and InstructionSet
//
// Arguments:
// comp -- The compiler
// sig -- The signature of the intrinsic
// className -- The name of the class associated with the SimdIntrinsic to lookup
// methodName -- The name of the method associated with the SimdIntrinsic to lookup
// enclosingClassName -- The name of the enclosing class
// sizeOfVectorT -- The size of Vector<T> in bytes
//
// Return Value:
// The NamedIntrinsic associated with methodName and classId
NamedIntrinsic SimdAsHWIntrinsicInfo::lookupId(Compiler* comp,
CORINFO_SIG_INFO* sig,
const char* className,
const char* methodName,
const char* enclosingClassName,
int sizeOfVectorT)
const char* enclosingClassName)
{
SimdAsHWIntrinsicClassId classId = lookupClassId(className, enclosingClassName, sizeOfVectorT);
SimdAsHWIntrinsicClassId classId = lookupClassId(comp, className, enclosingClassName);

if (classId == SimdAsHWIntrinsicClassId::Unknown)
{
Expand All @@ -74,11 +74,46 @@ NamedIntrinsic SimdAsHWIntrinsicInfo::lookupId(Compiler* comp,
isInstanceMethod = true;
}

if (strcmp(methodName, "get_IsHardwareAccelerated") == 0)
if (classId == SimdAsHWIntrinsicClassId::Vector)
{
return comp->IsBaselineSimdIsaSupported() ? NI_IsSupported_True : NI_IsSupported_False;
// We want to avoid doing anything that would unnecessarily trigger a recorded dependency against Vector<T>
// so we duplicate a few checks here to ensure this works smoothly for the static Vector class.

assert(!isInstanceMethod);

if (strcmp(methodName, "get_IsHardwareAccelerated") == 0)
{
return comp->IsBaselineSimdIsaSupported() ? NI_IsSupported_True : NI_IsSupported_False;
}

var_types retType = JITtype2varType(sig->retType);
CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF;
CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE;

if (retType == TYP_STRUCT)
{
argClass = sig->retTypeSigClass;
}
else
{
assert(numArgs != 0);
argClass = comp->info.compCompHnd->getArgClass(sig, sig->args);
}

const char* argNamespaceName;
const char* argClassName = comp->getClassNameFromMetadata(argClass, &argNamespaceName);

classId = lookupClassId(comp, argClassName, nullptr);

if (classId == SimdAsHWIntrinsicClassId::Unknown)
{
return NI_Illegal;
}
assert(classId != SimdAsHWIntrinsicClassId::Vector);
}

assert(strcmp(methodName, "get_IsHardwareAccelerated") != 0);

for (int i = 0; i < (NI_SIMD_AS_HWINTRINSIC_END - NI_SIMD_AS_HWINTRINSIC_START - 1); i++)
{
const SimdAsHWIntrinsicInfo& intrinsicInfo = simdAsHWIntrinsicInfoArray[i];
Expand Down Expand Up @@ -113,19 +148,17 @@ NamedIntrinsic SimdAsHWIntrinsicInfo::lookupId(Compiler* comp,
// lookupClassId: Gets the SimdAsHWIntrinsicClassId for a given class name and enclsoing class name
//
// Arguments:
// comp -- The compiler
// className -- The name of the class associated with the SimdAsHWIntrinsicClassId to lookup
// enclosingClassName -- The name of the enclosing class
// sizeOfVectorT -- The size of Vector<T> in bytes
//
// Return Value:
// The SimdAsHWIntrinsicClassId associated with className and enclosingClassName
SimdAsHWIntrinsicClassId SimdAsHWIntrinsicInfo::lookupClassId(const char* className,
const char* enclosingClassName,
int sizeOfVectorT)
SimdAsHWIntrinsicClassId SimdAsHWIntrinsicInfo::lookupClassId(Compiler* comp,
const char* className,
const char* enclosingClassName)
{
assert(className != nullptr);

if (enclosingClassName != nullptr)
if ((className == nullptr) || (enclosingClassName != nullptr))
{
return SimdAsHWIntrinsicClassId::Unknown;
}
Expand Down Expand Up @@ -159,7 +192,11 @@ SimdAsHWIntrinsicClassId SimdAsHWIntrinsicInfo::lookupClassId(const char* classN

className += 6;

if (strcmp(className, "2") == 0)
if (className[0] == '\0')
{
return SimdAsHWIntrinsicClassId::Vector;
}
else if (strcmp(className, "2") == 0)
{
return SimdAsHWIntrinsicClassId::Vector2;
}
Expand All @@ -171,16 +208,18 @@ SimdAsHWIntrinsicClassId SimdAsHWIntrinsicInfo::lookupClassId(const char* classN
{
return SimdAsHWIntrinsicClassId::Vector4;
}
else if ((className[0] == '\0') || (strcmp(className, "`1") == 0))
else if (strcmp(className, "`1") == 0)
{
uint32_t vectorTByteLength = comp->getVectorTByteLength();

#if defined(TARGET_XARCH)
if (sizeOfVectorT == 32)
if (vectorTByteLength == 32)
{
return SimdAsHWIntrinsicClassId::VectorT256;
}
#endif // TARGET_XARCH

assert(sizeOfVectorT == 16);
assert(vectorTByteLength == 16);
return SimdAsHWIntrinsicClassId::VectorT128;
}
break;
Expand Down Expand Up @@ -655,6 +694,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic,
break;
}

case NI_Quaternion_WithElement:
case NI_Vector2_WithElement:
case NI_Vector3_WithElement:
case NI_Vector4_WithElement:
case NI_VectorT128_WithElement:
case NI_VectorT256_WithElement:
{
Expand Down Expand Up @@ -736,6 +779,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic,
break;
}

case NI_Quaternion_WithElement:
case NI_Vector2_WithElement:
case NI_Vector3_WithElement:
case NI_Vector4_WithElement:
case NI_VectorT128_WithElement:
{
assert(numArgs == 3);
Expand Down Expand Up @@ -1484,9 +1531,13 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic,
}

case NI_Quaternion_get_Item:
case NI_Quaternion_GetElement:
case NI_Vector2_get_Item:
case NI_Vector2_GetElement:
case NI_Vector3_get_Item:
case NI_Vector3_GetElement:
case NI_Vector4_get_Item:
case NI_Vector4_GetElement:
case NI_VectorT128_get_Item:
case NI_VectorT128_GetElement:
#if defined(TARGET_XARCH)
Expand Down Expand Up @@ -1986,6 +2037,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic,
break;
}

case NI_Quaternion_WithElement:
case NI_Vector2_WithElement:
case NI_Vector3_WithElement:
case NI_Vector4_WithElement:
case NI_VectorT128_WithElement:
#if defined(TARGET_XARCH)
case NI_VectorT256_WithElement:
Expand Down
Loading

0 comments on commit d7a30ba

Please sign in to comment.