Skip to content

Commit f861a98

Browse files
tannergoodingmichaelgsharp
authored andcommitted
Ensure the scalar Reciprocal*Estimate APIs use AVX512 where possible (dotnet#101800)
* Ensure the scalar Reciprocal*Estimate APIs use AVX512 where possible * Ensure that TensorPrimitives.Reciprocal APIs consistently use AVX512 * Move the implementation of ReciprocalEstimate and ReciprocalSqrtEstimate to the JIT so R2R works * Ensure the relevant math intrinsics are marked betterToExpand * Apply formatting patch * Fixing the method header for impEstimateIntrinsic * Don't use the AVX512 paths for TensorPrimitives.Reciprocal*Estimate APIs on .NET 8 * Workaround an issue where the TensorPrimitives net8 tests are not running on net8
1 parent f578d20 commit f861a98

File tree

10 files changed

+217
-53
lines changed

10 files changed

+217
-53
lines changed

src/coreclr/jit/compiler.h

+6-1
Original file line numberDiff line numberDiff line change
@@ -1594,7 +1594,7 @@ enum class ProfileChecks : unsigned int
15941594
{
15951595
CHECK_NONE = 0,
15961596
CHECK_HASLIKELIHOOD = 1 << 0, // check all FlowEdges for hasLikelihood
1597-
CHECK_LIKELIHOODSUM = 1 << 1, // check block succesor likelihoods sum to 1
1597+
CHECK_LIKELIHOODSUM = 1 << 1, // check block succesor likelihoods sum to 1
15981598
CHECK_LIKELY = 1 << 2, // fully check likelihood based weights
15991599
RAISE_ASSERT = 1 << 3, // assert on check failure
16001600
CHECK_ALL_BLOCKS = 1 << 4, // check blocks even if bbHasProfileWeight is false
@@ -4528,6 +4528,11 @@ class Compiler
45284528
CORINFO_THIS_TRANSFORM constraintCallThisTransform,
45294529
NamedIntrinsic* pIntrinsicName,
45304530
bool* isSpecialIntrinsic = nullptr);
4531+
GenTree* impEstimateIntrinsic(CORINFO_METHOD_HANDLE method,
4532+
CORINFO_SIG_INFO* sig,
4533+
CorInfoType callJitType,
4534+
NamedIntrinsic intrinsicName,
4535+
bool tailCall);
45314536
GenTree* impMathIntrinsic(CORINFO_METHOD_HANDLE method,
45324537
CORINFO_SIG_INFO* sig,
45334538
var_types callType,

src/coreclr/jit/importercalls.cpp

+155-8
Original file line numberDiff line numberDiff line change
@@ -3125,7 +3125,15 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
31253125
// To be fixed in https://github.com/dotnet/runtime/pull/77465
31263126
const bool tier0opts = !opts.compDbgCode && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT);
31273127

3128-
if (!mustExpand && tier0opts)
3128+
if (tier0opts)
3129+
{
3130+
// The *Estimate APIs are allowed to differ in behavior across hardware
3131+
// so ensure we treat them as "betterToExpand" to get deterministic behavior
3132+
3133+
betterToExpand |= (ni == NI_System_Math_ReciprocalEstimate);
3134+
betterToExpand |= (ni == NI_System_Math_ReciprocalSqrtEstimate);
3135+
}
3136+
else if (!mustExpand)
31293137
{
31303138
switch (ni)
31313139
{
@@ -3189,9 +3197,9 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
31893197
break;
31903198

31913199
default:
3192-
// Unsafe.* are all small enough to prefer expansions.
3200+
// Various intrinsics are all small enough to prefer expansions.
3201+
betterToExpand |= ni >= NI_SYSTEM_MATH_START && ni <= NI_SYSTEM_MATH_END;
31933202
betterToExpand |= ni >= NI_SRCS_UNSAFE_START && ni <= NI_SRCS_UNSAFE_END;
3194-
// Same for these
31953203
betterToExpand |= ni >= NI_PRIMITIVE_START && ni <= NI_PRIMITIVE_END;
31963204
break;
31973205
}
@@ -4146,6 +4154,13 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
41464154
break;
41474155
}
41484156

4157+
case NI_System_Math_ReciprocalEstimate:
4158+
case NI_System_Math_ReciprocalSqrtEstimate:
4159+
{
4160+
retNode = impEstimateIntrinsic(method, sig, callJitType, ni, tailCall);
4161+
break;
4162+
}
4163+
41494164
case NI_System_Array_Clone:
41504165
case NI_System_Collections_Generic_Comparer_get_Default:
41514166
case NI_System_Collections_Generic_EqualityComparer_get_Default:
@@ -7413,13 +7428,15 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName)
74137428
// instructions to directly compute round/ceiling/floor/truncate.
74147429

74157430
case NI_System_Math_Abs:
7431+
case NI_System_Math_ReciprocalEstimate:
7432+
case NI_System_Math_ReciprocalSqrtEstimate:
74167433
case NI_System_Math_Sqrt:
74177434
return true;
74187435

74197436
case NI_System_Math_Ceiling:
74207437
case NI_System_Math_Floor:
7421-
case NI_System_Math_Truncate:
74227438
case NI_System_Math_Round:
7439+
case NI_System_Math_Truncate:
74237440
return compOpportunisticallyDependsOn(InstructionSet_SSE41);
74247441

74257442
case NI_System_Math_FusedMultiplyAdd:
@@ -7434,11 +7451,13 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName)
74347451
case NI_System_Math_Abs:
74357452
case NI_System_Math_Ceiling:
74367453
case NI_System_Math_Floor:
7437-
case NI_System_Math_Truncate:
7438-
case NI_System_Math_Round:
7439-
case NI_System_Math_Sqrt:
74407454
case NI_System_Math_Max:
74417455
case NI_System_Math_Min:
7456+
case NI_System_Math_ReciprocalEstimate:
7457+
case NI_System_Math_ReciprocalSqrtEstimate:
7458+
case NI_System_Math_Round:
7459+
case NI_System_Math_Sqrt:
7460+
case NI_System_Math_Truncate:
74427461
return true;
74437462

74447463
case NI_System_Math_FusedMultiplyAdd:
@@ -7513,6 +7532,8 @@ bool Compiler::IsMathIntrinsic(NamedIntrinsic intrinsicName)
75137532
case NI_System_Math_MinMagnitudeNumber:
75147533
case NI_System_Math_MinNumber:
75157534
case NI_System_Math_Pow:
7535+
case NI_System_Math_ReciprocalEstimate:
7536+
case NI_System_Math_ReciprocalSqrtEstimate:
75167537
case NI_System_Math_Round:
75177538
case NI_System_Math_Sin:
75187539
case NI_System_Math_Sinh:
@@ -8728,6 +8749,119 @@ void Compiler::impCheckCanInline(GenTreeCall* call,
87288749
}
87298750
}
87308751

8752+
//------------------------------------------------------------------------
8753+
// impEstimateIntrinsic: Imports one of the *Estimate intrinsics which are
8754+
// explicitly allowed to differ in result based on the hardware they're running
8755+
// against
8756+
//
8757+
// Arguments:
8758+
// method - The handle of the method being imported
8759+
// callType - The underlying type for the call
8760+
// intrinsicName - The intrinsic being imported
8761+
// tailCall - true if the method is a tail call; otherwise false
8762+
//
8763+
GenTree* Compiler::impEstimateIntrinsic(CORINFO_METHOD_HANDLE method,
8764+
CORINFO_SIG_INFO* sig,
8765+
CorInfoType callJitType,
8766+
NamedIntrinsic intrinsicName,
8767+
bool tailCall)
8768+
{
8769+
var_types callType = JITtype2varType(callJitType);
8770+
8771+
assert(varTypeIsFloating(callType));
8772+
assert(sig->numArgs == 1);
8773+
8774+
#if defined(FEATURE_HW_INTRINSICS)
8775+
// We use compExactlyDependsOn since these are estimate APIs where
8776+
// the behavior is explicitly allowed to differ across machines and
8777+
// we want to ensure that it gets marked as such in R2R.
8778+
8779+
var_types simdType = TYP_UNKNOWN;
8780+
NamedIntrinsic intrinsicId = NI_Illegal;
8781+
8782+
switch (intrinsicName)
8783+
{
8784+
case NI_System_Math_ReciprocalEstimate:
8785+
{
8786+
#if defined(TARGET_XARCH)
8787+
if (compExactlyDependsOn(InstructionSet_AVX512F))
8788+
{
8789+
simdType = TYP_SIMD16;
8790+
intrinsicId = NI_AVX512F_Reciprocal14Scalar;
8791+
}
8792+
else if ((callType == TYP_FLOAT) && compExactlyDependsOn(InstructionSet_SSE))
8793+
{
8794+
simdType = TYP_SIMD16;
8795+
intrinsicId = NI_SSE_ReciprocalScalar;
8796+
}
8797+
#elif defined(TARGET_ARM64)
8798+
if (compExactlyDependsOn(InstructionSet_AdvSimd_Arm64))
8799+
{
8800+
simdType = TYP_SIMD8;
8801+
intrinsicId = NI_AdvSimd_Arm64_ReciprocalEstimateScalar;
8802+
}
8803+
#endif // TARGET_ARM64
8804+
break;
8805+
}
8806+
8807+
case NI_System_Math_ReciprocalSqrtEstimate:
8808+
{
8809+
#if defined(TARGET_XARCH)
8810+
if (compExactlyDependsOn(InstructionSet_AVX512F))
8811+
{
8812+
simdType = TYP_SIMD16;
8813+
intrinsicId = NI_AVX512F_ReciprocalSqrt14Scalar;
8814+
}
8815+
else if ((callType == TYP_FLOAT) && compExactlyDependsOn(InstructionSet_SSE))
8816+
{
8817+
simdType = TYP_SIMD16;
8818+
intrinsicId = NI_SSE_ReciprocalSqrtScalar;
8819+
}
8820+
#elif defined(TARGET_ARM64)
8821+
if (compExactlyDependsOn(InstructionSet_AdvSimd_Arm64))
8822+
{
8823+
simdType = TYP_SIMD8;
8824+
intrinsicId = NI_AdvSimd_Arm64_ReciprocalSquareRootEstimateScalar;
8825+
}
8826+
#endif // TARGET_ARM64
8827+
break;
8828+
}
8829+
8830+
default:
8831+
{
8832+
unreached();
8833+
}
8834+
}
8835+
8836+
if (intrinsicId != NI_Illegal)
8837+
{
8838+
unsigned simdSize = 0;
8839+
8840+
if (simdType == TYP_SIMD8)
8841+
{
8842+
simdSize = 8;
8843+
}
8844+
else
8845+
{
8846+
assert(simdType == TYP_SIMD16);
8847+
simdSize = 16;
8848+
}
8849+
8850+
GenTree* op1 = impPopStack().val;
8851+
8852+
op1 = gtNewSimdCreateScalarUnsafeNode(simdType, op1, callJitType, simdSize);
8853+
op1 = gtNewSimdHWIntrinsicNode(simdType, op1, intrinsicId, callJitType, simdSize);
8854+
8855+
return gtNewSimdToScalarNode(callType, op1, callJitType, simdSize);
8856+
}
8857+
#endif // FEATURE_HW_INTRINSICS
8858+
8859+
// TODO-CQ: Returning this as an intrinsic blocks inlining and is undesirable
8860+
// return impMathIntrinsic(method, sig, callType, intrinsicName, tailCall);
8861+
8862+
return nullptr;
8863+
}
8864+
87318865
GenTree* Compiler::impMathIntrinsic(CORINFO_METHOD_HANDLE method,
87328866
CORINFO_SIG_INFO* sig,
87338867
var_types callType,
@@ -10337,7 +10471,20 @@ NamedIntrinsic Compiler::lookupPrimitiveFloatNamedIntrinsic(CORINFO_METHOD_HANDL
1033710471

1033810472
case 'R':
1033910473
{
10340-
if (strcmp(methodName, "Round") == 0)
10474+
if (strncmp(methodName, "Reciprocal", 10) == 0)
10475+
{
10476+
methodName += 10;
10477+
10478+
if (strcmp(methodName, "Estimate") == 0)
10479+
{
10480+
result = NI_System_Math_ReciprocalEstimate;
10481+
}
10482+
else if (strcmp(methodName, "SqrtEstimate") == 0)
10483+
{
10484+
result = NI_System_Math_ReciprocalSqrtEstimate;
10485+
}
10486+
}
10487+
else if (strcmp(methodName, "Round") == 0)
1034110488
{
1034210489
result = NI_System_Math_Round;
1034310490
}

src/coreclr/jit/namedintrinsiclist.h

+2
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ enum NamedIntrinsic : unsigned short
5151
NI_System_Math_MinMagnitudeNumber,
5252
NI_System_Math_MinNumber,
5353
NI_System_Math_Pow,
54+
NI_System_Math_ReciprocalEstimate,
55+
NI_System_Math_ReciprocalSqrtEstimate,
5456
NI_System_Math_Round,
5557
NI_System_Math_Sin,
5658
NI_System_Math_Sinh,

src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Reciprocal.cs

+36
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,14 @@ public static void ReciprocalSqrtEstimate<T>(ReadOnlySpan<T> x, Span<T> destinat
9595

9696
public static Vector128<T> Invoke(Vector128<T> x)
9797
{
98+
#if NET9_0_OR_GREATER
99+
if (Avx512F.VL.IsSupported)
100+
{
101+
if (typeof(T) == typeof(float)) return Avx512F.VL.Reciprocal14(x.AsSingle()).As<float, T>();
102+
if (typeof(T) == typeof(double)) return Avx512F.VL.Reciprocal14(x.AsDouble()).As<double, T>();
103+
}
104+
#endif
105+
98106
if (Sse.IsSupported)
99107
{
100108
if (typeof(T) == typeof(float)) return Sse.Reciprocal(x.AsSingle()).As<float, T>();
@@ -115,6 +123,14 @@ public static Vector128<T> Invoke(Vector128<T> x)
115123

116124
public static Vector256<T> Invoke(Vector256<T> x)
117125
{
126+
#if NET9_0_OR_GREATER
127+
if (Avx512F.VL.IsSupported)
128+
{
129+
if (typeof(T) == typeof(float)) return Avx512F.VL.Reciprocal14(x.AsSingle()).As<float, T>();
130+
if (typeof(T) == typeof(double)) return Avx512F.VL.Reciprocal14(x.AsDouble()).As<double, T>();
131+
}
132+
#endif
133+
118134
if (Avx.IsSupported)
119135
{
120136
if (typeof(T) == typeof(float)) return Avx.Reciprocal(x.AsSingle()).As<float, T>();
@@ -125,11 +141,13 @@ public static Vector256<T> Invoke(Vector256<T> x)
125141

126142
public static Vector512<T> Invoke(Vector512<T> x)
127143
{
144+
#if NET9_0_OR_GREATER
128145
if (Avx512F.IsSupported)
129146
{
130147
if (typeof(T) == typeof(float)) return Avx512F.Reciprocal14(x.AsSingle()).As<float, T>();
131148
if (typeof(T) == typeof(double)) return Avx512F.Reciprocal14(x.AsDouble()).As<double, T>();
132149
}
150+
#endif
133151

134152
return Vector512<T>.One / x;
135153
}
@@ -143,6 +161,14 @@ public static Vector512<T> Invoke(Vector512<T> x)
143161

144162
public static Vector128<T> Invoke(Vector128<T> x)
145163
{
164+
#if NET9_0_OR_GREATER
165+
if (Avx512F.VL.IsSupported)
166+
{
167+
if (typeof(T) == typeof(float)) return Avx512F.VL.ReciprocalSqrt14(x.AsSingle()).As<float, T>();
168+
if (typeof(T) == typeof(double)) return Avx512F.VL.ReciprocalSqrt14(x.AsDouble()).As<double, T>();
169+
}
170+
#endif
171+
146172
if (Sse.IsSupported)
147173
{
148174
if (typeof(T) == typeof(float)) return Sse.ReciprocalSqrt(x.AsSingle()).As<float, T>();
@@ -163,6 +189,14 @@ public static Vector128<T> Invoke(Vector128<T> x)
163189

164190
public static Vector256<T> Invoke(Vector256<T> x)
165191
{
192+
#if NET9_0_OR_GREATER
193+
if (Avx512F.VL.IsSupported)
194+
{
195+
if (typeof(T) == typeof(float)) return Avx512F.VL.ReciprocalSqrt14(x.AsSingle()).As<float, T>();
196+
if (typeof(T) == typeof(double)) return Avx512F.VL.ReciprocalSqrt14(x.AsDouble()).As<double, T>();
197+
}
198+
#endif
199+
166200
if (Avx.IsSupported)
167201
{
168202
if (typeof(T) == typeof(float)) return Avx.ReciprocalSqrt(x.AsSingle()).As<float, T>();
@@ -173,11 +207,13 @@ public static Vector256<T> Invoke(Vector256<T> x)
173207

174208
public static Vector512<T> Invoke(Vector512<T> x)
175209
{
210+
#if NET9_0_OR_GREATER
176211
if (Avx512F.IsSupported)
177212
{
178213
if (typeof(T) == typeof(float)) return Avx512F.ReciprocalSqrt14(x.AsSingle()).As<float, T>();
179214
if (typeof(T) == typeof(double)) return Avx512F.ReciprocalSqrt14(x.AsDouble()).As<double, T>();
180215
}
216+
#endif
181217

182218
return Vector512<T>.One / Vector512.Sqrt(x);
183219
}

src/libraries/System.Numerics.Tensors/tests/Net8Tests/System.Numerics.Tensors.Net8.Tests.csproj

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
<PropertyGroup>
1111
<TargetFramework>$(NetCoreAppCurrent)</TargetFramework>
1212
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
13+
<DefineConstants>$(DefineConstants);SNT_NET8_TESTS</DefineConstants>
1314
</PropertyGroup>
1415

1516
<ItemGroup>

src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs

+5
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,12 @@ public static IEnumerable<object[]> SpanDestinationFunctionsToTest()
386386
yield return Create(TensorPrimitives.Reciprocal, f => T.One / f);
387387
yield return Create(TensorPrimitives.ReciprocalEstimate, T.ReciprocalEstimate, T.CreateTruncating(Helpers.DefaultToleranceForEstimates));
388388
yield return Create(TensorPrimitives.ReciprocalSqrt, f => T.One / T.Sqrt(f));
389+
390+
#if !SNT_NET8_TESTS
391+
// Avoid running with the net8 tests due to: https://github.com/dotnet/runtime/issues/101846
389392
yield return Create(TensorPrimitives.ReciprocalSqrtEstimate, T.ReciprocalSqrtEstimate, T.CreateTruncating(Helpers.DefaultToleranceForEstimates));
393+
#endif
394+
390395
yield return Create(TensorPrimitives.Round, T.Round);
391396
yield return Create(TensorPrimitives.Sin, T.Sin, trigTolerance);
392397
yield return Create(TensorPrimitives.Sinh, T.Sinh, Helpers.DetermineTolerance<T>(doubleTolerance: 1e-14));

src/libraries/System.Private.CoreLib/src/System/Double.cs

+2
Original file line numberDiff line numberDiff line change
@@ -865,9 +865,11 @@ bool IFloatingPoint<double>.TryWriteSignificandLittleEndian(Span<byte> destinati
865865
public static double Lerp(double value1, double value2, double amount) => (value1 * (1.0 - amount)) + (value2 * amount);
866866

867867
/// <inheritdoc cref="IFloatingPointIeee754{TSelf}.ReciprocalEstimate(TSelf)" />
868+
[Intrinsic]
868869
public static double ReciprocalEstimate(double x) => Math.ReciprocalEstimate(x);
869870

870871
/// <inheritdoc cref="IFloatingPointIeee754{TSelf}.ReciprocalSqrtEstimate(TSelf)" />
872+
[Intrinsic]
871873
public static double ReciprocalSqrtEstimate(double x) => Math.ReciprocalSqrtEstimate(x);
872874

873875
/// <inheritdoc cref="IFloatingPointIeee754{TSelf}.ScaleB(TSelf, int)" />

0 commit comments

Comments
 (0)