Improve precision of the vectorized trig functions (#99549)

eiriktsarpalis · web-flow · commit 07b705b6c3a8 · 2024-03-13T14:35:57.000Z
* Improve precision of the vectorized trig functions * Ensure all trig algorithms use FMA where required. * Revert "Ensure all trig algorithms use FMA where required." This reverts commit c97605c. * Use factories in memberdata methods for improved type safety and inference. * Update tolerances for hardware that doesn't support FMA. * Increase tolerance for trig functions in non-FMA hardware.
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Cos.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Cos.cs
@@ -241,11 +241,14 @@ public static Vector128<double> Invoke(Vector128<double> x)
                     return ApplyScalar<CosOperatorDouble>(x);
                 }
 
+                // dn = int(x / pi + 1/2) - 1/2
                 Vector128<double> almHuge = Vector128.Create(AlmHuge);
-                Vector128<double> dn = (uxMasked * Vector128.Create(1 / double.Pi)) + Vector128.Create(double.Pi / 2) + almHuge;
+                Vector128<double> half = Vector128.Create(0.5);
+                Vector128<double> dn = (uxMasked * Vector128.Create(1 / double.Pi)) + half + almHuge;
                 Vector128<ulong> odd = dn.AsUInt64() << 63;
-                dn = dn - almHuge - Vector128.Create(0.5);
+                dn = dn - almHuge - half;
 
+                // f = x - (n*pi)
                 Vector128<double> f = uxMasked;
                 f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(-double.Pi), f);
                 f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(Pi_Tail2), f);
@@ -276,11 +279,14 @@ public static Vector256<double> Invoke(Vector256<double> x)
                     return ApplyScalar<CosOperatorDouble>(x);
                 }
 
+                // dn = int(x / pi + 1/2) - 1/2
                 Vector256<double> almHuge = Vector256.Create(AlmHuge);
-                Vector256<double> dn = (uxMasked * Vector256.Create(1 / double.Pi)) + Vector256.Create(double.Pi / 2) + almHuge;
+                Vector256<double> half = Vector256.Create(0.5);
+                Vector256<double> dn = (uxMasked * Vector256.Create(1 / double.Pi)) + half + almHuge;
                 Vector256<ulong> odd = dn.AsUInt64() << 63;
-                dn = dn - almHuge - Vector256.Create(0.5);
+                dn = dn - almHuge - half;
 
+                // f = x - (n*pi)
                 Vector256<double> f = uxMasked;
                 f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(-double.Pi), f);
                 f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(Pi_Tail2), f);
@@ -311,11 +317,14 @@ public static Vector512<double> Invoke(Vector512<double> x)
                     return ApplyScalar<CosOperatorDouble>(x);
                 }
 
+                // dn = int(x / pi + 1/2) - 1/2
                 Vector512<double> almHuge = Vector512.Create(AlmHuge);
-                Vector512<double> dn = (uxMasked * Vector512.Create(1 / double.Pi)) + Vector512.Create(double.Pi / 2) + almHuge;
+                Vector512<double> half = Vector512.Create(0.5);
+                Vector512<double> dn = (uxMasked * Vector512.Create(1 / double.Pi)) + half + almHuge;
                 Vector512<ulong> odd = dn.AsUInt64() << 63;
-                dn = dn - almHuge - Vector512.Create(0.5);
+                dn = dn - almHuge - half;
 
+                // f = x - (n*pi)
                 Vector512<double> f = uxMasked;
                 f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(-double.Pi), f);
                 f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(Pi_Tail2), f);
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sin.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sin.cs
@@ -97,8 +97,8 @@ public static Vector512<T> Invoke(Vector512<T> x)
         /// <summary>float.Sin(x)</summary>
         private readonly struct SinOperatorSingle : IUnaryOperator<float, float>
         {
-            internal const uint SignMask = 0x7FFFFFFFu;
             internal const uint MaxVectorizedValue = 0x49800000u;
+            internal const uint SignMask = 0x7FFFFFFFu;
             private const float AlmHuge = 1.2582912e7f;
             private const float Pi_Tail1 = 8.742278e-8f;
             private const float Pi_Tail2 = 3.430249e-15f;
@@ -231,11 +231,17 @@ public static Vector128<double> Invoke(Vector128<double> x)
                     return ApplyScalar<SinOperatorDouble>(x);
                 }
 
+                // dn = |x| * (1 / π)
                 Vector128<double> almHuge = Vector128.Create(AlmHuge);
                 Vector128<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector128.Create(1 / double.Pi), almHuge);
                 Vector128<ulong> odd = dn.AsUInt64() << 63;
                 dn -= almHuge;
-                Vector128<double> f = uxMasked - (dn * Vector128.Create(double.Pi)) - (dn * Vector128.Create(Pi_Tail1)) - (dn * Vector128.Create(Pi_Tail2));
+
+                // f = |x| - (dn * π)
+                Vector128<double> f = uxMasked;
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(-double.Pi), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(-Pi_Tail1), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(-Pi_Tail2), f);
 
                 // POLY_EVAL_ODD_17
                 Vector128<double> f2 = f * f;
@@ -262,11 +268,17 @@ public static Vector256<double> Invoke(Vector256<double> x)
                     return ApplyScalar<SinOperatorDouble>(x);
                 }
 
+                // dn = |x| * (1 / π)
                 Vector256<double> almHuge = Vector256.Create(AlmHuge);
                 Vector256<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector256.Create(1 / double.Pi), almHuge);
                 Vector256<ulong> odd = dn.AsUInt64() << 63;
                 dn -= almHuge;
-                Vector256<double> f = uxMasked - (dn * Vector256.Create(double.Pi)) - (dn * Vector256.Create(Pi_Tail1)) - (dn * Vector256.Create(Pi_Tail2));
+
+                // f = |x| - (dn * π)
+                Vector256<double> f = uxMasked;
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(-double.Pi), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(-Pi_Tail1), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(-Pi_Tail2), f);
 
                 // POLY_EVAL_ODD_17
                 Vector256<double> f2 = f * f;
@@ -293,11 +305,17 @@ public static Vector512<double> Invoke(Vector512<double> x)
                     return ApplyScalar<SinOperatorDouble>(x);
                 }
 
+                // dn = |x| * (1 / π)
                 Vector512<double> almHuge = Vector512.Create(AlmHuge);
                 Vector512<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector512.Create(1 / double.Pi), almHuge);
                 Vector512<ulong> odd = dn.AsUInt64() << 63;
                 dn -= almHuge;
-                Vector512<double> f = uxMasked - (dn * Vector512.Create(double.Pi)) - (dn * Vector512.Create(Pi_Tail1)) - (dn * Vector512.Create(Pi_Tail2));
+
+                // f = |x| - (dn * π)
+                Vector512<double> f = uxMasked;
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(-double.Pi), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(-Pi_Tail1), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(-Pi_Tail2), f);
 
                 // POLY_EVAL_ODD_17
                 Vector512<double> f2 = f * f;
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Tan.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Tan.cs
@@ -259,10 +259,16 @@ public static Vector128<double> Invoke(Vector128<double> x)
                     return ApplyScalar<TanOperatorDouble>(x);
                 }
 
+                // dn = |x| * (2/π)
                 Vector128<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector128.Create(2 / double.Pi), Vector128.Create(AlmHuge));
                 Vector128<ulong> odd = dn.AsUInt64() << 63;
                 dn -= Vector128.Create(AlmHuge);
-                Vector128<double> f = uxMasked.AsDouble() - (dn * (double.Pi / 2)) - (dn * HalfPi2) - (dn * HalfPi3);
+
+                // f = |x| - (dn * π/2)
+                Vector128<double> f = uxMasked;
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(-double.Pi / 2), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(-HalfPi2), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(-HalfPi3), f);
 
                 // POLY_EVAL_ODD_29
                 Vector128<double> g = f * f;
@@ -300,10 +306,16 @@ public static Vector256<double> Invoke(Vector256<double> x)
                     return ApplyScalar<TanOperatorDouble>(x);
                 }
 
+                // dn = |x| * (2/π)
                 Vector256<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector256.Create(2 / double.Pi), Vector256.Create(AlmHuge));
                 Vector256<ulong> odd = dn.AsUInt64() << 63;
                 dn -= Vector256.Create(AlmHuge);
-                Vector256<double> f = uxMasked.AsDouble() - (dn * (double.Pi / 2)) - (dn * HalfPi2) - (dn * HalfPi3);
+
+                // f = |x| - (dn * π/2)
+                Vector256<double> f = uxMasked;
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(-double.Pi / 2), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(-HalfPi2), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(-HalfPi3), f);
 
                 // POLY_EVAL_ODD_29
                 Vector256<double> g = f * f;
@@ -341,10 +353,16 @@ public static Vector512<double> Invoke(Vector512<double> x)
                     return ApplyScalar<TanOperatorDouble>(x);
                 }
 
+                // dn = |x| * (2/π)
                 Vector512<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector512.Create(2 / double.Pi), Vector512.Create(AlmHuge));
                 Vector512<ulong> odd = dn.AsUInt64() << 63;
                 dn -= Vector512.Create(AlmHuge);
-                Vector512<double> f = uxMasked.AsDouble() - (dn * (double.Pi / 2)) - (dn * HalfPi2) - (dn * HalfPi3);
+
+                // f = |x| - (dn * π/2)
+                Vector512<double> f = uxMasked;
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(-double.Pi / 2), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(-HalfPi2), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(-HalfPi3), f);
 
                 // POLY_EVAL_ODD_29
                 Vector512<double> g = f * f;
diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs