From 2c29f1ca9de972b7fe160c709a0aab866a5cd1c3 Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Fri, 12 Jul 2024 16:36:22 -0700
Subject: [PATCH 01/13] Allow using a more efficient algorithm if twice the
 vector size is accelerated

---
 .../System/Runtime/Intrinsics/Vector128.cs    |  18 +-
 .../System/Runtime/Intrinsics/Vector256.cs    |  18 +-
 .../src/System/Runtime/Intrinsics/Vector64.cs |  18 +-
 .../System/Runtime/Intrinsics/VectorMath.cs   | 334 +++++++++++++-----
 4 files changed, 296 insertions(+), 92 deletions(-)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
index 2011025c0455d..9fc80d60741a7 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
@@ -1608,7 +1608,14 @@ public static Vector128<float> Exp(Vector128<float> vector)
         {
             if (IsHardwareAccelerated)
             {
-                return VectorMath.ExpSingle<Vector128<float>, Vector128<uint>, Vector128<double>, Vector128<ulong>>(vector);
+                if (Vector256.IsHardwareAccelerated)
+                {
+                    return VectorMath.ExpSingle<Vector128<float>, Vector128<uint>, Vector256<double>, Vector256<ulong>>(vector);
+                }
+                else
+                {
+                    return VectorMath.ExpSingle<Vector128<float>, Vector128<uint>, Vector128<double>, Vector128<ulong>>(vector);
+                }
             }
             else
             {
@@ -1855,7 +1862,14 @@ public static Vector128<float> Hypot(Vector128<float> x, Vector128<float> y)
         {
             if (IsHardwareAccelerated)
             {
-                return VectorMath.HypotSingle<Vector128<float>, Vector128<double>>(x, y);
+                if (Vector256.IsHardwareAccelerated)
+                {
+                    return VectorMath.HypotSingle<Vector128<float>, Vector256<double>>(x, y);
+                }
+                else
+                {
+                    return VectorMath.HypotSingle<Vector128<float>, Vector128<double>>(x, y);
+                }
             }
             else
             {
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
index d1dde486f5803..dcae9f6cbb9f0 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
@@ -1526,7 +1526,14 @@ public static Vector256<float> Exp(Vector256<float> vector)
         {
             if (IsHardwareAccelerated)
             {
-                return VectorMath.ExpSingle<Vector256<float>, Vector256<uint>, Vector256<double>, Vector256<ulong>>(vector);
+                if (Vector512.IsHardwareAccelerated)
+                {
+                    return VectorMath.ExpSingle<Vector256<float>, Vector256<uint>, Vector512<double>, Vector512<ulong>>(vector);
+                }
+                else
+                {
+                    return VectorMath.ExpSingle<Vector256<float>, Vector256<uint>, Vector256<double>, Vector256<ulong>>(vector);
+                }
             }
             else
             {
@@ -1771,7 +1778,14 @@ public static Vector256<float> Hypot(Vector256<float> x, Vector256<float> y)
         {
             if (IsHardwareAccelerated)
             {
-                return VectorMath.HypotSingle<Vector256<float>, Vector256<double>>(x, y);
+                if (Vector512.IsHardwareAccelerated)
+                {
+                    return VectorMath.HypotSingle<Vector256<float>, Vector512<double>>(x, y);
+                }
+                else
+                {
+                    return VectorMath.HypotSingle<Vector256<float>, Vector256<double>>(x, y);
+                }
             }
             else
             {
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
index 7203311532d70..e0e32aaefc5e2 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
@@ -1264,7 +1264,14 @@ public static Vector64<float> Exp(Vector64<float> vector)
         {
             if (IsHardwareAccelerated)
             {
-                return VectorMath.ExpSingle<Vector64<float>, Vector64<uint>, Vector64<double>, Vector64<ulong>>(vector);
+                if (Vector128.IsHardwareAccelerated)
+                {
+                    return VectorMath.ExpSingle<Vector64<float>, Vector64<uint>, Vector128<double>, Vector128<ulong>>(vector);
+                }
+                else
+                {
+                    return VectorMath.ExpSingle<Vector64<float>, Vector64<uint>, Vector64<double>, Vector64<ulong>>(vector);
+                }
             }
             else
             {
@@ -1574,7 +1581,14 @@ public static Vector64<float> Hypot(Vector64<float> x, Vector64<float> y)
         {
             if (IsHardwareAccelerated)
             {
-                return VectorMath.HypotSingle<Vector64<float>, Vector64<double>>(x, y);
+                if (Vector128.IsHardwareAccelerated)
+                {
+                    return VectorMath.HypotSingle<Vector64<float>, Vector128<double>>(x, y);
+                }
+                else
+                {
+                    return VectorMath.HypotSingle<Vector64<float>, Vector64<double>>(x, y);
+                }
             }
             else
             {
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
index fbe07cc62a9af..95a8751ed1d8b 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
@@ -153,7 +153,7 @@ public static TVectorDouble ExpDouble<TVectorDouble, TVectorInt64, TVectorUInt64
 
             // m = (n - j) / 64
             // result = polynomial * 2^m
-            TVectorDouble ret = poly * Unsafe.BitCast<TVectorUInt64, TVectorDouble>((n + TVectorUInt64.Create(V_DP64_BIAS)) << 52);
+            TVectorDouble result = poly * Unsafe.BitCast<TVectorUInt64, TVectorDouble>((n + TVectorUInt64.Create(V_DP64_BIAS)) << 52);
 
             // Check if -709 < vx < 709
             if (TVectorUInt64.GreaterThanAny(Unsafe.BitCast<TVectorDouble, TVectorUInt64>(TVectorDouble.Abs(x)), TVectorUInt64.Create(V_ARG_MAX)))
@@ -161,17 +161,17 @@ public static TVectorDouble ExpDouble<TVectorDouble, TVectorInt64, TVectorUInt64
                 // (x > V_EXPF_MAX) ? double.PositiveInfinity : x
                 TVectorDouble infinityMask = TVectorDouble.GreaterThan(x, TVectorDouble.Create(V_EXPF_MAX));
 
-                ret = TVectorDouble.ConditionalSelect(
+                result = TVectorDouble.ConditionalSelect(
                     infinityMask,
                     TVectorDouble.Create(double.PositiveInfinity),
-                    ret
+                    result
                 );
 
                 // (x < V_EXPF_MIN) ? 0 : x
-                ret = TVectorDouble.AndNot(ret, TVectorDouble.LessThan(x, TVectorDouble.Create(V_EXPF_MIN)));
+                result = TVectorDouble.AndNot(result, TVectorDouble.LessThan(x, TVectorDouble.Create(V_EXPF_MIN)));
             }
 
-            return ret;
+            return result;
         }
 
         public static TVectorSingle ExpSingle<TVectorSingle, TVectorUInt32, TVectorDouble, TVectorUInt64>(TVectorSingle x)
@@ -224,71 +224,21 @@ public static TVectorSingle ExpSingle<TVectorSingle, TVectorUInt32, TVectorDoubl
             const double C5 = 0.009676036358193323;
             const double C6 = 0.001341000536524434;
 
-            // Convert x to double precision
-            (TVectorDouble xl, TVectorDouble xu) = Widen<TVectorSingle, TVectorDouble>(x);
-
-            // x * (64.0 / ln(2))
-            TVectorDouble v_tbl_ln2 = TVectorDouble.Create(V_TBL_LN2);
-
-            TVectorDouble zl = xl * v_tbl_ln2;
-            TVectorDouble zu = xu * v_tbl_ln2;
-
-            TVectorDouble v_expf_huge = TVectorDouble.Create(V_EXPF_HUGE);
-
-            TVectorDouble dnl = zl + v_expf_huge;
-            TVectorDouble dnu = zu + v_expf_huge;
-
-            // n = (int)z
-            TVectorUInt64 nl = Unsafe.BitCast<TVectorDouble, TVectorUInt64>(dnl);
-            TVectorUInt64 nu = Unsafe.BitCast<TVectorDouble, TVectorUInt64>(dnu);
-
-            // dn = (double)n
-            dnl -= v_expf_huge;
-            dnu -= v_expf_huge;
-
-            // r = z - dn
-            TVectorDouble c1 = TVectorDouble.Create(C1);
-            TVectorDouble c2 = TVectorDouble.Create(C2);
-            TVectorDouble c3 = TVectorDouble.Create(C3);
-            TVectorDouble c4 = TVectorDouble.Create(C4);
-            TVectorDouble c5 = TVectorDouble.Create(C5);
-            TVectorDouble c6 = TVectorDouble.Create(C6);
-
-            TVectorDouble rl = zl - dnl;
-
-            TVectorDouble rl2 = rl * rl;
-            TVectorDouble rl4 = rl2 * rl2;
-
-            TVectorDouble polyl = TVectorDouble.MultiplyAddEstimate(
-                rl4,
-                TVectorDouble.MultiplyAddEstimate(c6, rl, c5),
-                TVectorDouble.MultiplyAddEstimate(
-                    rl2,
-                    TVectorDouble.MultiplyAddEstimate(c4, rl, c3),
-                    TVectorDouble.MultiplyAddEstimate(c2, rl, c1)
-                )
-            );
-
-            TVectorDouble ru = zu - dnu;
-
-            TVectorDouble ru2 = ru * ru;
-            TVectorDouble ru4 = ru2 * ru2;
-
-            TVectorDouble polyu = TVectorDouble.MultiplyAddEstimate(
-                ru4,
-                TVectorDouble.MultiplyAddEstimate(c6, ru, c5),
-                TVectorDouble.MultiplyAddEstimate(
-                    ru2,
-                    TVectorDouble.MultiplyAddEstimate(c4, ru, c3),
-                    TVectorDouble.MultiplyAddEstimate(c2, ru, c1)
-                )
-            );
+            TVectorSingle result;
 
-            // result = (float)(poly + (n << 52))
-            TVectorSingle ret = Narrow<TVectorDouble, TVectorSingle>(
-                Unsafe.BitCast<TVectorUInt64, TVectorDouble>(Unsafe.BitCast<TVectorDouble, TVectorUInt64>(polyl) + (nl << 52)),
-                Unsafe.BitCast<TVectorUInt64, TVectorDouble>(Unsafe.BitCast<TVectorDouble, TVectorUInt64>(polyu) + (nu << 52))
-            );
+            if (TVectorSingle.Count == TVectorDouble.Count)
+            {
+                result = Narrow<TVectorDouble, TVectorSingle>(
+                    CoreImpl(Widen<TVectorSingle, TVectorDouble>(x))
+                );
+            }
+            else
+            {
+                result = Narrow<TVectorDouble, TVectorSingle>(
+                    CoreImpl(WidenLower<TVectorSingle, TVectorDouble>(x)),
+                    CoreImpl(WidenUpper<TVectorSingle, TVectorDouble>(x))
+                );
+            }
 
             // Check if -103 < |x| < 88
             if (TVectorUInt32.GreaterThanAny(Unsafe.BitCast<TVectorSingle, TVectorUInt32>(TVectorSingle.Abs(x)), TVectorUInt32.Create(V_ARG_MAX)))
@@ -296,17 +246,49 @@ public static TVectorSingle ExpSingle<TVectorSingle, TVectorUInt32, TVectorDoubl
                 // (x > V_EXPF_MAX) ? float.PositiveInfinity : x
                 TVectorSingle infinityMask = TVectorSingle.GreaterThan(x, TVectorSingle.Create(V_EXPF_MAX));
 
-                ret = TVectorSingle.ConditionalSelect(
+                result = TVectorSingle.ConditionalSelect(
                     infinityMask,
                     TVectorSingle.Create(float.PositiveInfinity),
-                    ret
+                    result
                 );
 
                 // (x < V_EXPF_MIN) ? 0 : x
-                ret = TVectorSingle.AndNot(ret, TVectorSingle.LessThan(x, TVectorSingle.Create(V_EXPF_MIN)));
+                result = TVectorSingle.AndNot(result, TVectorSingle.LessThan(x, TVectorSingle.Create(V_EXPF_MIN)));
             }
 
-            return ret;
+            return result;
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static TVectorDouble CoreImpl(TVectorDouble x)
+            {
+                // x * (64.0 / ln(2))
+                TVectorDouble z = x * TVectorDouble.Create(V_TBL_LN2);
+
+                TVectorDouble v_expf_huge = TVectorDouble.Create(V_EXPF_HUGE);
+                TVectorDouble dn = z + v_expf_huge;
+
+                // n = (int)z
+                TVectorUInt64 n = Unsafe.BitCast<TVectorDouble, TVectorUInt64>(dn);
+
+                // r = z - n
+                TVectorDouble r = z - (dn - v_expf_huge);
+
+                TVectorDouble r2 = r * r;
+                TVectorDouble r4 = r2 * r2;
+
+                TVectorDouble poly = TVectorDouble.MultiplyAddEstimate(
+                    r4,
+                    TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C6), r, TVectorDouble.Create(C5)),
+                    TVectorDouble.MultiplyAddEstimate(
+                        r2,
+                        TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C4), r, TVectorDouble.Create(C3)),
+                        TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C2), r, TVectorDouble.Create(C1))
+                    )
+                );
+
+                // result = poly + (n << 52)
+                return Unsafe.BitCast<TVectorUInt64, TVectorDouble>(Unsafe.BitCast<TVectorDouble, TVectorUInt64>(poly) + (n << 52));
+            }
         }
 
         public static TVectorDouble HypotDouble<TVectorDouble, TVectorUInt64>(TVectorDouble x, TVectorDouble y)
@@ -455,13 +437,21 @@ public static TVectorSingle HypotSingle<TVectorSingle, TVectorDouble>(TVectorSin
             TVectorSingle infinityMask = TVectorSingle.IsPositiveInfinity(ax) | TVectorSingle.IsPositiveInfinity(ay);
             TVectorSingle nanMask = TVectorSingle.IsNaN(ax) | TVectorSingle.IsNaN(ay);
 
-            (TVectorDouble xxLower, TVectorDouble xxUpper) = Widen<TVectorSingle, TVectorDouble>(ax);
-            (TVectorDouble yyLower, TVectorDouble yyUpper) = Widen<TVectorSingle, TVectorDouble>(ay);
+            TVectorSingle result;
 
-            TVectorSingle result = Narrow<TVectorDouble, TVectorSingle>(
-                TVectorDouble.Sqrt(TVectorDouble.MultiplyAddEstimate(xxLower, xxLower, yyLower * yyLower)),
-                TVectorDouble.Sqrt(TVectorDouble.MultiplyAddEstimate(xxUpper, xxUpper, yyUpper * yyUpper))
-            );
+            if (TVectorSingle.Count == TVectorDouble.Count)
+            {
+                result = Narrow<TVectorDouble, TVectorSingle>(
+                    CoreImpl(Widen<TVectorSingle, TVectorDouble>(ax), Widen<TVectorSingle, TVectorDouble>(ay))
+                );
+            }
+            else
+            {
+                result = Narrow<TVectorDouble, TVectorSingle>(
+                    CoreImpl(WidenLower<TVectorSingle, TVectorDouble>(ax), WidenLower<TVectorSingle, TVectorDouble>(ay)),
+                    CoreImpl(WidenUpper<TVectorSingle, TVectorDouble>(ax), WidenUpper<TVectorSingle, TVectorDouble>(ay))
+                );
+            }
 
             // IEEE 754 requires that we return +Infinity
             // if either input is Infinity, even if one of
@@ -472,6 +462,12 @@ public static TVectorSingle HypotSingle<TVectorSingle, TVectorDouble>(TVectorSin
             result = TVectorSingle.ConditionalSelect(infinityMask, TVectorSingle.Create(float.PositiveInfinity), result);
 
             return result;
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static TVectorDouble CoreImpl(TVectorDouble x, TVectorDouble y)
+            {
+                return TVectorDouble.Sqrt(TVectorDouble.MultiplyAddEstimate(x, x, y * y));
+            }
         }
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -1534,6 +1530,63 @@ private static TVector Create<TVector, T>(float value)
             return result;
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static TVectorSingle Narrow<TVectorDouble, TVectorSingle>(TVectorDouble vector)
+            where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
+            where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
+        {
+            Unsafe.SkipInit(out TVectorSingle result);
+
+            if (typeof(TVectorDouble) == typeof(Vector128<double>))
+            {
+                Debug.Assert(typeof(TVectorSingle) == typeof(Vector64<float>));
+
+                if (AdvSimd.Arm64.IsSupported)
+                {
+                    result = (TVectorSingle)(object)AdvSimd.Arm64.ConvertToSingleLower((Vector128<double>)(object)vector);
+                }
+                else
+                {
+                    Vector128<double> value = (Vector128<double>)(object)vector;
+                    result = (TVectorSingle)(object)Vector64.Narrow(value.GetLower(), value.GetUpper());
+                }
+            }
+            else if (typeof(TVectorDouble) == typeof(Vector256<double>))
+            {
+                Debug.Assert(typeof(TVectorSingle) == typeof(Vector128<float>));
+
+                if (Avx.IsSupported)
+                {
+                    result = (TVectorSingle)(object)Avx.ConvertToVector128Single((Vector256<double>)(object)vector);
+                }
+                else
+                {
+                    Vector256<double> value = (Vector256<double>)(object)vector;
+                    result = (TVectorSingle)(object)Vector128.Narrow(value.GetLower(), value.GetUpper());
+                }
+            }
+            else if (typeof(TVectorDouble) == typeof(Vector512<double>))
+            {
+                Debug.Assert(typeof(TVectorSingle) == typeof(Vector256<float>));
+
+                if (Avx512F.IsSupported)
+                {
+                    result = (TVectorSingle)(object)Avx512F.ConvertToVector256Single((Vector512<double>)(object)vector);
+                }
+                else
+                {
+                    Vector512<double> value = (Vector512<double>)(object)vector;
+                    result = (TVectorSingle)(object)Vector256.Narrow(value.GetLower(), value.GetUpper());
+                }
+            }
+            else
+            {
+                ThrowHelper.ThrowNotSupportedException();
+            }
+
+            return result;
+        }
+
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private static TVectorSingle Narrow<TVectorDouble, TVectorSingle>(TVectorDouble lower, TVectorDouble upper)
             where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
@@ -1575,36 +1628,145 @@ private static TVectorSingle Narrow<TVectorDouble, TVectorSingle>(TVectorDouble
         }
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private static (TVectorDouble Lower, TVectorDouble Upper) Widen<TVectorSingle, TVectorDouble>(TVectorSingle vector)
+        private static TVectorDouble Widen<TVectorSingle, TVectorDouble>(TVectorSingle vector)
             where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
             where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
         {
-            Unsafe.SkipInit(out (TVectorDouble, TVectorDouble) result);
+            Unsafe.SkipInit(out TVectorDouble result);
+
+            if (typeof(TVectorSingle) == typeof(Vector64<float>))
+            {
+                Debug.Assert(typeof(TVectorDouble) == typeof(Vector128<double>));
+
+                if (AdvSimd.Arm64.IsSupported)
+                {
+                    result = (TVectorDouble)(object)AdvSimd.Arm64.ConvertToDouble((Vector64<float>)(object)vector);
+                }
+                else
+                {
+                    Vector64<float> value = (Vector64<float>)(object)vector;
+
+                    Vector64<double> lower = Vector64.WidenLower(value);
+                    Vector64<double> upper = Vector64.WidenUpper(value);
+
+                    result = (TVectorDouble)(object)Vector128.Create(lower, upper);
+                }
+            }
+            else if (typeof(TVectorSingle) == typeof(Vector128<float>))
+            {
+                Debug.Assert(typeof(TVectorDouble) == typeof(Vector256<double>));
+
+                if (Avx.IsSupported)
+                {
+                    result = (TVectorDouble)(object)Avx.ConvertToVector256Double((Vector128<float>)(object)vector);
+                }
+                else
+                {
+                    Vector128<float> value = (Vector128<float>)(object)vector;
+
+                    Vector128<double> lower = Vector128.WidenLower(value);
+                    Vector128<double> upper = Vector128.WidenUpper(value);
+
+                    result = (TVectorDouble)(object)Vector256.Create(lower, upper);
+                }
+            }
+            else if (typeof(TVectorSingle) == typeof(Vector256<float>))
+            {
+                Debug.Assert(typeof(TVectorDouble) == typeof(Vector512<double>));
+
+                if (Avx512F.IsSupported)
+                {
+                    result = (TVectorDouble)(object)Avx512F.ConvertToVector512Double((Vector256<float>)(object)vector);
+                }
+                else
+                {
+                    Vector256<float> value = (Vector256<float>)(object)vector;
+
+                    Vector256<double> lower = Vector256.WidenLower(value);
+                    Vector256<double> upper = Vector256.WidenUpper(value);
+
+                    result = (TVectorDouble)(object)Vector512.Create(lower, upper);
+                }
+            }
+            else
+            {
+                ThrowHelper.ThrowNotSupportedException();
+            }
+
+            return result;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static TVectorDouble WidenLower<TVectorSingle, TVectorDouble>(TVectorSingle vector)
+            where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
+            where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
+        {
+            Unsafe.SkipInit(out TVectorDouble result);
+
+            if (typeof(TVectorSingle) == typeof(Vector<float>))
+            {
+                Debug.Assert(typeof(TVectorDouble) == typeof(Vector<double>));
+                result = (TVectorDouble)(object)Vector.WidenLower((Vector<float>)(object)vector);
+            }
+            else if (typeof(TVectorSingle) == typeof(Vector64<float>))
+            {
+                Debug.Assert(typeof(TVectorDouble) == typeof(Vector64<double>));
+                result = (TVectorDouble)(object)Vector64.WidenLower((Vector64<float>)(object)vector);
+            }
+            else if (typeof(TVectorSingle) == typeof(Vector128<float>))
+            {
+                Debug.Assert(typeof(TVectorDouble) == typeof(Vector128<double>));
+                result = (TVectorDouble)(object)Vector128.WidenLower((Vector128<float>)(object)vector);
+            }
+            else if (typeof(TVectorSingle) == typeof(Vector256<float>))
+            {
+                Debug.Assert(typeof(TVectorDouble) == typeof(Vector256<double>));
+                result = (TVectorDouble)(object)Vector256.WidenLower((Vector256<float>)(object)vector);
+            }
+            else if (typeof(TVectorSingle) == typeof(Vector512<float>))
+            {
+                Debug.Assert(typeof(TVectorDouble) == typeof(Vector512<double>));
+                result = (TVectorDouble)(object)Vector512.WidenLower((Vector512<float>)(object)vector);
+            }
+            else
+            {
+                ThrowHelper.ThrowNotSupportedException();
+            }
+
+            return result;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static TVectorDouble WidenUpper<TVectorSingle, TVectorDouble>(TVectorSingle vector)
+            where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
+            where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
+        {
+            Unsafe.SkipInit(out TVectorDouble result);
 
             if (typeof(TVectorSingle) == typeof(Vector<float>))
             {
                 Debug.Assert(typeof(TVectorDouble) == typeof(Vector<double>));
-                result = ((TVectorDouble, TVectorDouble))(object)Vector.Widen((Vector<float>)(object)vector);
+                result = (TVectorDouble)(object)Vector.WidenUpper((Vector<float>)(object)vector);
             }
             else if (typeof(TVectorSingle) == typeof(Vector64<float>))
             {
                 Debug.Assert(typeof(TVectorDouble) == typeof(Vector64<double>));
-                result = ((TVectorDouble, TVectorDouble))(object)Vector64.Widen((Vector64<float>)(object)vector);
+                result = (TVectorDouble)(object)Vector64.WidenUpper((Vector64<float>)(object)vector);
             }
             else if (typeof(TVectorSingle) == typeof(Vector128<float>))
             {
                 Debug.Assert(typeof(TVectorDouble) == typeof(Vector128<double>));
-                result = ((TVectorDouble, TVectorDouble))(object)Vector128.Widen((Vector128<float>)(object)vector);
+                result = (TVectorDouble)(object)Vector128.WidenUpper((Vector128<float>)(object)vector);
             }
             else if (typeof(TVectorSingle) == typeof(Vector256<float>))
             {
                 Debug.Assert(typeof(TVectorDouble) == typeof(Vector256<double>));
-                result = ((TVectorDouble, TVectorDouble))(object)Vector256.Widen((Vector256<float>)(object)vector);
+                result = (TVectorDouble)(object)Vector256.WidenUpper((Vector256<float>)(object)vector);
             }
             else if (typeof(TVectorSingle) == typeof(Vector512<float>))
             {
                 Debug.Assert(typeof(TVectorDouble) == typeof(Vector512<double>));
-                result = ((TVectorDouble, TVectorDouble))(object)Vector512.Widen((Vector512<float>)(object)vector);
+                result = (TVectorDouble)(object)Vector512.WidenUpper((Vector512<float>)(object)vector);
             }
             else
             {

From 006a24a6652867105c4669392b85f8cd45ebb243 Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Fri, 12 Jul 2024 16:48:38 -0700
Subject: [PATCH 02/13] Remove an unnecessary generic parameter from ExpDouble

---
 .../System.Private.CoreLib/src/System/Numerics/Vector.cs        | 2 +-
 .../src/System/Runtime/Intrinsics/Vector128.cs                  | 2 +-
 .../src/System/Runtime/Intrinsics/Vector256.cs                  | 2 +-
 .../src/System/Runtime/Intrinsics/Vector512.cs                  | 2 +-
 .../src/System/Runtime/Intrinsics/Vector64.cs                   | 2 +-
 .../src/System/Runtime/Intrinsics/VectorMath.cs                 | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs
index c4fbc114dc988..c70de3e09cee0 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs
@@ -807,7 +807,7 @@ public static Vector<double> Exp(Vector<double> vector)
         {
             if (IsHardwareAccelerated)
             {
-                return VectorMath.ExpDouble<Vector<double>, Vector<long>, Vector<ulong>>(vector);
+                return VectorMath.ExpDouble<Vector<double>, Vector<ulong>>(vector);
             }
             else
             {
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
index 9fc80d60741a7..aeb66aee91b4d 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
@@ -1591,7 +1591,7 @@ public static Vector128<double> Exp(Vector128<double> vector)
         {
             if (IsHardwareAccelerated)
             {
-                return VectorMath.ExpDouble<Vector128<double>, Vector128<long>, Vector128<ulong>>(vector);
+                return VectorMath.ExpDouble<Vector128<double>, Vector128<ulong>>(vector);
             }
             else
             {
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
index dcae9f6cbb9f0..f1eeb327d4887 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
@@ -1509,7 +1509,7 @@ public static Vector256<double> Exp(Vector256<double> vector)
         {
             if (IsHardwareAccelerated)
             {
-                return VectorMath.ExpDouble<Vector256<double>, Vector256<long>, Vector256<ulong>>(vector);
+                return VectorMath.ExpDouble<Vector256<double>, Vector256<ulong>>(vector);
             }
             else
             {
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
index 7a22ec7681824..a1cd63812ad19 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
@@ -1572,7 +1572,7 @@ public static Vector512<double> Exp(Vector512<double> vector)
         {
             if (IsHardwareAccelerated)
             {
-                return VectorMath.ExpDouble<Vector512<double>, Vector512<long>, Vector512<ulong>>(vector);
+                return VectorMath.ExpDouble<Vector512<double>, Vector512<ulong>>(vector);
             }
             else
             {
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
index e0e32aaefc5e2..1d2a61fe4284c 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
@@ -1248,7 +1248,7 @@ public static Vector64<double> Exp(Vector64<double> vector)
         {
             if (IsHardwareAccelerated)
             {
-                return VectorMath.ExpDouble<Vector64<double>, Vector64<long>, Vector64<ulong>>(vector);
+                return VectorMath.ExpDouble<Vector64<double>, Vector64<ulong>>(vector);
             }
             else
             {
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
index 95a8751ed1d8b..db2934b6bc595 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
@@ -50,7 +50,7 @@ public static TVector DegreesToRadians<TVector, T>(TVector degrees)
             return (degrees * TVector.Create(T.Pi)) / TVector.Create(T.CreateTruncating(180));
         }
 
-        public static TVectorDouble ExpDouble<TVectorDouble, TVectorInt64, TVectorUInt64>(TVectorDouble x)
+        public static TVectorDouble ExpDouble<TVectorDouble, TVectorUInt64>(TVectorDouble x)
             where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
             where TVectorUInt64 : unmanaged, ISimdVector<TVectorUInt64, ulong>
         {

From 2e5d80afea6c0296c0e58cb9df1e92ebfbf9d6fa Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Fri, 12 Jul 2024 20:50:35 -0700
Subject: [PATCH 03/13] Expose the Sin, Cos, and SinCos methods on the Vector
 types

---
 .../ref/System.Numerics.Vectors.cs            |   15 +
 .../src/System/Numerics/Vector.cs             |  128 ++
 .../src/System/Numerics/Vector2.cs            |   22 +-
 .../src/System/Numerics/Vector3.cs            |   22 +-
 .../src/System/Numerics/Vector4.cs            |   16 +
 .../src/System/Numerics/VectorMath.cs         |  140 ---
 .../System/Runtime/Intrinsics/Vector128.cs    |  129 ++
 .../System/Runtime/Intrinsics/Vector256.cs    |  129 ++
 .../System/Runtime/Intrinsics/Vector512.cs    |  108 ++
 .../src/System/Runtime/Intrinsics/Vector64.cs |  161 +++
 .../System/Runtime/Intrinsics/VectorMath.cs   | 1096 +++++++++++++++++
 .../ref/System.Runtime.Intrinsics.cs          |   24 +
 12 files changed, 1844 insertions(+), 146 deletions(-)
 delete mode 100644 src/libraries/System.Private.CoreLib/src/System/Numerics/VectorMath.cs

diff --git a/src/libraries/System.Numerics.Vectors/ref/System.Numerics.Vectors.cs b/src/libraries/System.Numerics.Vectors/ref/System.Numerics.Vectors.cs
index fba0c8dbcd4be..bc5bbe4cdc102 100644
--- a/src/libraries/System.Numerics.Vectors/ref/System.Numerics.Vectors.cs
+++ b/src/libraries/System.Numerics.Vectors/ref/System.Numerics.Vectors.cs
@@ -262,6 +262,8 @@ public static partial class Vector
         [System.CLSCompliantAttribute(false)]
         public static System.Numerics.Vector<System.UInt64> ConvertToUInt64Native(System.Numerics.Vector<System.Double> value) { throw null; }
         public static System.Numerics.Vector<T> CopySign<T>(System.Numerics.Vector<T> value, System.Numerics.Vector<T> sign) { throw null; }
+        public static System.Numerics.Vector<double> Cos(System.Numerics.Vector<double> vector) { throw null; }
+        public static System.Numerics.Vector<float> Cos(System.Numerics.Vector<float> vector) { throw null; }
         public static System.Numerics.Vector<T> Create<T>(T value) { throw null; }
         public static System.Numerics.Vector<T> Create<T>(System.ReadOnlySpan<T> values) { throw null; }
         public static System.Numerics.Vector<T> CreateSequence<T>(T start, T step) { throw null; }
@@ -406,6 +408,10 @@ public static partial class Vector
         public static System.Numerics.Vector<System.UInt32> ShiftRightLogical(System.Numerics.Vector<System.UInt32> value, int shiftCount) { throw null; }
         [System.CLSCompliantAttribute(false)]
         public static System.Numerics.Vector<System.UInt64> ShiftRightLogical(System.Numerics.Vector<System.UInt64> value, int shiftCount) { throw null; }
+        public static System.Numerics.Vector<double> Sin(System.Numerics.Vector<double> vector) { throw null; }
+        public static System.Numerics.Vector<float> Sin(System.Numerics.Vector<float> vector) { throw null; }
+        public static (System.Numerics.Vector<double> Sin, System.Numerics.Vector<double> Cos) SinCos(System.Numerics.Vector<double> vector) { throw null; }
+        public static (System.Numerics.Vector<float> Sin, System.Numerics.Vector<float> Cos) SinCos(System.Numerics.Vector<float> vector) { throw null; }
         public static System.Numerics.Vector<T> SquareRoot<T>(System.Numerics.Vector<T> value) { throw null; }
 #pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         [System.CLSCompliantAttribute(false)]
@@ -490,6 +496,7 @@ public partial struct Vector2 : System.IEquatable<System.Numerics.Vector2>, Syst
         public readonly void CopyTo(float[] array) { }
         public readonly void CopyTo(float[] array, int index) { }
         public readonly void CopyTo(System.Span<float> destination) { }
+        public static System.Numerics.Vector2 Cos(System.Numerics.Vector2 vector) { throw null; }
         public readonly bool TryCopyTo(System.Span<float> destination) { throw null; }
         public static System.Numerics.Vector2 DegreesToRadians(System.Numerics.Vector2 degrees) { throw null; }
         public static float Distance(System.Numerics.Vector2 value1, System.Numerics.Vector2 value2) { throw null; }
@@ -539,6 +546,8 @@ public readonly void CopyTo(System.Span<float> destination) { }
         public static System.Numerics.Vector2 Round(System.Numerics.Vector2 vector) { throw null; }
         public static System.Numerics.Vector2 Round(System.Numerics.Vector2 vector, System.MidpointRounding mode) { throw null; }
         public static System.Numerics.Vector2 Reflect(System.Numerics.Vector2 vector, System.Numerics.Vector2 normal) { throw null; }
+        public static System.Numerics.Vector2 Sin(System.Numerics.Vector2 vector) { throw null; }
+        public static (System.Numerics.Vector2 Sin, System.Numerics.Vector2 Cos) SinCos(System.Numerics.Vector2 vector) { throw null; }
         public static System.Numerics.Vector2 SquareRoot(System.Numerics.Vector2 value) { throw null; }
         public static System.Numerics.Vector2 Subtract(System.Numerics.Vector2 left, System.Numerics.Vector2 right) { throw null; }
         public override readonly string ToString() { throw null; }
@@ -586,6 +595,7 @@ public partial struct Vector3 : System.IEquatable<System.Numerics.Vector3>, Syst
         public readonly void CopyTo(float[] array) { }
         public readonly void CopyTo(float[] array, int index) { }
         public readonly void CopyTo(System.Span<float> destination) { }
+        public static System.Numerics.Vector3 Cos(System.Numerics.Vector3 vector) { throw null; }
         public readonly bool TryCopyTo(System.Span<float> destination) { throw null; }
         public static System.Numerics.Vector3 Cross(System.Numerics.Vector3 vector1, System.Numerics.Vector3 vector2) { throw null; }
         public static System.Numerics.Vector3 DegreesToRadians(System.Numerics.Vector3 degrees) { throw null; }
@@ -636,6 +646,8 @@ public readonly void CopyTo(System.Span<float> destination) { }
         public static System.Numerics.Vector3 Reflect(System.Numerics.Vector3 vector, System.Numerics.Vector3 normal) { throw null; }
         public static System.Numerics.Vector3 Round(System.Numerics.Vector3 vector) { throw null; }
         public static System.Numerics.Vector3 Round(System.Numerics.Vector3 vector, System.MidpointRounding mode) { throw null; }
+        public static System.Numerics.Vector3 Sin(System.Numerics.Vector3 vector) { throw null; }
+        public static (System.Numerics.Vector3 Sin, System.Numerics.Vector3 Cos) SinCos(System.Numerics.Vector3 vector) { throw null; }
         public static System.Numerics.Vector3 SquareRoot(System.Numerics.Vector3 value) { throw null; }
         public static System.Numerics.Vector3 Subtract(System.Numerics.Vector3 left, System.Numerics.Vector3 right) { throw null; }
         public override readonly string ToString() { throw null; }
@@ -685,6 +697,7 @@ public partial struct Vector4 : System.IEquatable<System.Numerics.Vector4>, Syst
         public readonly void CopyTo(float[] array) { }
         public readonly void CopyTo(float[] array, int index) { }
         public readonly void CopyTo(System.Span<float> destination) { }
+        public static System.Numerics.Vector4 Cos(System.Numerics.Vector4 vector) { throw null; }
         public readonly bool TryCopyTo(System.Span<float> destination) { throw null; }
         public static System.Numerics.Vector4 DegreesToRadians(System.Numerics.Vector4 degrees) { throw null; }
         public static float Distance(System.Numerics.Vector4 value1, System.Numerics.Vector4 value2) { throw null; }
@@ -733,6 +746,8 @@ public readonly void CopyTo(System.Span<float> destination) { }
         public static System.Numerics.Vector4 RadiansToDegrees(System.Numerics.Vector4 radians) { throw null; }
         public static System.Numerics.Vector4 Round(System.Numerics.Vector4 vector) { throw null; }
         public static System.Numerics.Vector4 Round(System.Numerics.Vector4 vector, System.MidpointRounding mode) { throw null; }
+        public static System.Numerics.Vector4 Sin(System.Numerics.Vector4 vector) { throw null; }
+        public static (System.Numerics.Vector4 Sin, System.Numerics.Vector4 Cos) SinCos(System.Numerics.Vector4 vector) { throw null; }
         public static System.Numerics.Vector4 SquareRoot(System.Numerics.Vector4 value) { throw null; }
         public static System.Numerics.Vector4 Subtract(System.Numerics.Vector4 left, System.Numerics.Vector4 right) { throw null; }
         public override readonly string ToString() { throw null; }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs
index c70de3e09cee0..46fb2d19dcff2 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs
@@ -537,6 +537,48 @@ public static Vector<ulong> ConvertToUInt64Native(Vector<double> value)
             return result;
         }
 
+        internal static Vector<T> Cos<T>(Vector<T> vector)
+            where T : ITrigonometricFunctions<T>
+        {
+            Unsafe.SkipInit(out Vector<T> result);
+
+            for (int index = 0; index < Vector<T>.Count; index++)
+            {
+                T value = T.Cos(vector.GetElementUnsafe(index));
+                result.SetElementUnsafe(index, value);
+            }
+
+            return result;
+        }
+
+        /// <inheritdoc cref="Vector128.Cos(Vector128{double})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<double> Cos(Vector<double> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.CosDouble<Vector<double>, Vector<long>>(vector);
+            }
+            else
+            {
+                return Cos<double>(vector);
+            }
+        }
+
+        /// <inheritdoc cref="Vector128.Cos(Vector128{float})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<float> Cos(Vector<float> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.CosSingle<Vector<float>, Vector<int>, Vector<double>, Vector<long>>(vector);
+            }
+            else
+            {
+                return Cos<float>(vector);
+            }
+        }
+
         /// <inheritdoc cref="ISimdVector{TSelf, T}.CopySign(TSelf, TSelf)" />
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -2425,6 +2467,92 @@ internal static Vector<T> Round<T>(Vector<T> vector)
         [CLSCompliant(false)]
         public static Vector<ulong> ShiftRightLogical(Vector<ulong> value, int shiftCount) => value >>> shiftCount;
 
+        internal static Vector<T> Sin<T>(Vector<T> vector)
+            where T : ITrigonometricFunctions<T>
+        {
+            Unsafe.SkipInit(out Vector<T> result);
+
+            for (int index = 0; index < Vector<T>.Count; index++)
+            {
+                T value = T.Sin(vector.GetElementUnsafe(index));
+                result.SetElementUnsafe(index, value);
+            }
+
+            return result;
+        }
+
+        /// <inheritdoc cref="Vector128.Sin(Vector128{double})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<double> Sin(Vector<double> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.SinDouble<Vector<double>, Vector<long>>(vector);
+            }
+            else
+            {
+                return Sin<double>(vector);
+            }
+        }
+
+        /// <inheritdoc cref="Vector128.Sin(Vector128{float})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<float> Sin(Vector<float> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.SinSingle<Vector<float>, Vector<int>, Vector<double>, Vector<long>>(vector);
+            }
+            else
+            {
+                return Sin<float>(vector);
+            }
+        }
+
+        internal static (Vector<T> Sin, Vector<T> Cos) SinCos<T>(Vector<T> vector)
+            where T : ITrigonometricFunctions<T>
+        {
+            Unsafe.SkipInit(out Vector<T> sinResult);
+            Unsafe.SkipInit(out Vector<T> cosResult);
+
+            for (int index = 0; index < Vector<T>.Count; index++)
+            {
+                (T sinValue, T cosValue) = T.SinCos(vector.GetElementUnsafe(index));
+                sinResult.SetElementUnsafe(index, sinValue);
+                cosResult.SetElementUnsafe(index, cosValue);
+            }
+
+            return (sinResult, cosResult);
+        }
+
+        /// <inheritdoc cref="Vector128.SinCos(Vector128{double})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (Vector<double> Sin, Vector<double> Cos) SinCos(Vector<double> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.SinCosDouble<Vector<double>, Vector<long>>(vector);
+            }
+            else
+            {
+                return SinCos<double>(vector);
+            }
+        }
+
+        /// <inheritdoc cref="Vector128.SinCos(Vector128{float})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (Vector<float> Sin, Vector<float> Cos) SinCos(Vector<float> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.SinCosSingle<Vector<float>, Vector<int>, Vector<double>, Vector<long>>(vector);
+            }
+            else
+            {
+                return SinCos<float>(vector);
+            }
+        }
+
         /// <summary>Computes the square root of a vector on a per-element basis.</summary>
         /// <param name="value">The vector whose square root is to be computed.</param>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs
index e101a75ea9154..e51501d5102a1 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs
@@ -280,6 +280,10 @@ readonly get
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector2 CopySign(Vector2 value, Vector2 sign) => Vector128.CopySign(value.AsVector128Unsafe(), sign.AsVector128Unsafe()).AsVector2();
 
+        /// <inheritdoc cref="Vector4.Cos(Vector4)" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector2 Cos(Vector2 vector) => Vector128.Cos(vector.AsVector128()).AsVector2();
+
         /// <summary>Creates a new <see cref="Vector2" /> object whose two elements have the same value.</summary>
         /// <param name="value">The value to assign to all two elements.</param>
         /// <returns>A new <see cref="Vector2" /> whose two elements have the same value.</returns>
@@ -361,7 +365,7 @@ public static Vector2 Create(ReadOnlySpan<float> values)
 
         /// <inheritdoc cref="Vector4.Exp(Vector4)" />
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector2 Exp(Vector2 vector) => Vector128.Exp(vector.AsVector128Unsafe()).AsVector2();
+        public static Vector2 Exp(Vector2 vector) => Vector128.Exp(vector.AsVector128()).AsVector2();
 
         /// <inheritdoc cref="Vector128.MultiplyAddEstimate(Vector128{float}, Vector128{float}, Vector128{float})" />
         [Intrinsic]
@@ -385,11 +389,11 @@ public static Vector2 Create(ReadOnlySpan<float> values)
 
         /// <inheritdoc cref="Vector4.Log2(Vector4)" />
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector2 Log(Vector2 vector) => Vector128.Log(vector.AsVector128Unsafe()).AsVector2();
+        public static Vector2 Log(Vector2 vector) => Vector128.Log(Vector4.Create(vector, 1.0f, 1.0f).AsVector128()).AsVector2();
 
         /// <inheritdoc cref="Vector4.Log(Vector4)" />
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector2 Log2(Vector2 vector) => Vector128.Log2(vector.AsVector128Unsafe()).AsVector2();
+        public static Vector2 Log2(Vector2 vector) => Vector128.Log2(Vector4.Create(vector, 1.0f, 1.0f).AsVector128()).AsVector2();
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Max(TSelf, TSelf)" />
         [Intrinsic]
@@ -507,6 +511,18 @@ public static Vector2 Reflect(Vector2 vector, Vector2 normal)
         [Intrinsic]
         public static Vector2 Round(Vector2 vector, MidpointRounding mode) => Vector128.Round(vector.AsVector128Unsafe(), mode).AsVector2();
 
+        /// <inheritdoc cref="Vector4.Sin(Vector4)" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector2 Sin(Vector2 vector) => Vector128.Sin(vector.AsVector128()).AsVector2();
+
+        /// <inheritdoc cref="Vector4.SinCos(Vector4)" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (Vector2 Sin, Vector2 Cos) SinCos(Vector2 vector)
+        {
+            (Vector128<float> sin, Vector128<float> cos) = Vector128.SinCos(vector.AsVector128());
+            return (sin.AsVector2(), cos.AsVector2());
+        }
+
         /// <summary>Returns a vector whose elements are the square root of each of a specified vector's elements.</summary>
         /// <param name="value">A vector.</param>
         /// <returns>The square root vector.</returns>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs
index 8519f382655de..bc81131e20a49 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs
@@ -301,6 +301,10 @@ readonly get
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector3 CopySign(Vector3 value, Vector3 sign) => Vector128.CopySign(value.AsVector128Unsafe(), sign.AsVector128Unsafe()).AsVector3();
 
+        /// <inheritdoc cref="Vector4.Cos(Vector4)" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector3 Cos(Vector3 vector) => Vector128.Cos(vector.AsVector128()).AsVector3();
+
         /// <summary>Creates a new <see cref="Vector3" /> object whose three elements have the same value.</summary>
         /// <param name="value">The value to assign to all three elements.</param>
         /// <returns>A new <see cref="Vector3" /> whose three elements have the same value.</returns>
@@ -418,7 +422,7 @@ public static Vector3 Cross(Vector3 vector1, Vector3 vector2)
 
         /// <inheritdoc cref="Vector4.Exp(Vector4)" />
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector3 Exp(Vector3 vector) => Vector128.Exp(vector.AsVector128Unsafe()).AsVector3();
+        public static Vector3 Exp(Vector3 vector) => Vector128.Exp(vector.AsVector128()).AsVector3();
 
         /// <inheritdoc cref="Vector128.MultiplyAddEstimate(Vector128{float}, Vector128{float}, Vector128{float})" />
         [Intrinsic]
@@ -442,11 +446,11 @@ public static Vector3 Cross(Vector3 vector1, Vector3 vector2)
 
         /// <inheritdoc cref="Vector4.Log2(Vector4)" />
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector3 Log(Vector3 vector) => Vector128.Log(vector.AsVector128Unsafe()).AsVector3();
+        public static Vector3 Log(Vector3 vector) => Vector128.Log(Vector4.Create(vector, 1.0f).AsVector128()).AsVector3();
 
         /// <inheritdoc cref="Vector4.Log(Vector4)" />
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector3 Log2(Vector3 vector) => Vector128.Log2(vector.AsVector128Unsafe()).AsVector3();
+        public static Vector3 Log2(Vector3 vector) => Vector128.Log2(Vector4.Create(vector, 1.0f).AsVector128()).AsVector3();
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Max(TSelf, TSelf)" />
         [Intrinsic]
@@ -564,6 +568,18 @@ public static Vector3 Reflect(Vector3 vector, Vector3 normal)
         [Intrinsic]
         public static Vector3 Round(Vector3 vector, MidpointRounding mode) => Vector128.Round(vector.AsVector128Unsafe(), mode).AsVector3();
 
+        /// <inheritdoc cref="Vector4.Sin(Vector4)" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector3 Sin(Vector3 vector) => Vector128.Sin(vector.AsVector128()).AsVector3();
+
+        /// <inheritdoc cref="Vector4.SinCos(Vector4)" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (Vector3 Sin, Vector3 Cos) SinCos(Vector3 vector)
+        {
+            (Vector128<float> sin, Vector128<float> cos) = Vector128.SinCos(vector.AsVector128());
+            return (sin.AsVector3(), cos.AsVector3());
+        }
+
         /// <summary>Returns a vector whose elements are the square root of each of a specified vector's elements.</summary>
         /// <param name="value">A vector.</param>
         /// <returns>The square root vector.</returns>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs
index 9c7028520f762..8dbd1a07f674d 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs
@@ -320,6 +320,10 @@ public float this[int index]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector4 CopySign(Vector4 value, Vector4 sign) => Vector128.CopySign(value.AsVector128(), sign.AsVector128()).AsVector4();
 
+        /// <inheritdoc cref="Vector128.Cos(Vector128{float})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector4 Cos(Vector4 vector) => Vector128.Cos(vector.AsVector128()).AsVector4();
+
         /// <summary>Creates a new <see cref="Vector4" /> object whose four elements have the same value.</summary>
         /// <param name="value">The value to assign to all four elements.</param>
         /// <returns>A new <see cref="Vector4" /> whose four elements have the same value.</returns>
@@ -557,6 +561,18 @@ public static Vector4 Create(Vector3 vector, float w)
         [Intrinsic]
         public static Vector4 Round(Vector4 vector, MidpointRounding mode) => Vector128.Round(vector.AsVector128(), mode).AsVector4();
 
+        /// <inheritdoc cref="Vector128.Sin(Vector128{float})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector4 Sin(Vector4 vector) => Vector128.Sin(vector.AsVector128()).AsVector4();
+
+        /// <inheritdoc cref="Vector128.SinCos(Vector128{float})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (Vector4 Sin, Vector4 Cos) SinCos(Vector4 vector)
+        {
+            (Vector128<float> sin, Vector128<float> cos) = Vector128.SinCos(vector.AsVector128());
+            return (sin.AsVector4(), cos.AsVector4());
+        }
+
         /// <summary>Returns a vector whose elements are the square root of each of a specified vector's elements.</summary>
         /// <param name="value">A vector.</param>
         /// <returns>The square root vector.</returns>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/VectorMath.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/VectorMath.cs
deleted file mode 100644
index aea7b92581740..0000000000000
--- a/src/libraries/System.Private.CoreLib/src/System/Numerics/VectorMath.cs
+++ /dev/null
@@ -1,140 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Runtime.CompilerServices;
-using System.Runtime.Intrinsics;
-using System.Runtime.Intrinsics.Arm;
-using System.Runtime.Intrinsics.X86;
-
-namespace System.Numerics
-{
-    internal static class VectorMath
-    {
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector128<float> ConditionalSelectBitwise(Vector128<float> selector, Vector128<float> ifTrue, Vector128<float> ifFalse)
-        {
-            // This implementation is based on the DirectX Math Library XMVector4NotEqual method
-            // https://github.com/microsoft/DirectXMath/blob/master/Inc/DirectXMathVector.inl
-
-            if (AdvSimd.IsSupported)
-            {
-                return AdvSimd.BitwiseSelect(selector, ifTrue, ifFalse);
-            }
-            else if (Sse.IsSupported)
-            {
-                return Sse.Or(Sse.And(ifTrue, selector), Sse.AndNot(selector, ifFalse));
-            }
-            else
-            {
-                // Redundant test so we won't prejit remainder of this method on platforms without AdvSimd.
-                throw new PlatformNotSupportedException();
-            }
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector128<double> ConditionalSelectBitwise(Vector128<double> selector, Vector128<double> ifTrue, Vector128<double> ifFalse)
-        {
-            // This implementation is based on the DirectX Math Library XMVector4NotEqual method
-            // https://github.com/microsoft/DirectXMath/blob/master/Inc/DirectXMathVector.inl
-
-            if (AdvSimd.IsSupported)
-            {
-                return AdvSimd.BitwiseSelect(selector, ifTrue, ifFalse);
-            }
-            else if (Sse2.IsSupported)
-            {
-                return Sse2.Or(Sse2.And(ifTrue, selector), Sse2.AndNot(selector, ifFalse));
-            }
-            else
-            {
-                // Redundant test so we won't prejit remainder of this method on platforms without AdvSimd.
-                throw new PlatformNotSupportedException();
-            }
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static bool Equal(Vector128<float> vector1, Vector128<float> vector2)
-        {
-            // This implementation is based on the DirectX Math Library XMVector4Equal method
-            // https://github.com/microsoft/DirectXMath/blob/master/Inc/DirectXMathVector.inl
-
-            if (AdvSimd.Arm64.IsSupported)
-            {
-                Vector128<uint> vResult = AdvSimd.CompareEqual(vector1, vector2).AsUInt32();
-
-                Vector64<byte> vResult0 = vResult.GetLower().AsByte();
-                Vector64<byte> vResult1 = vResult.GetUpper().AsByte();
-
-                Vector64<byte> vTemp10 = AdvSimd.Arm64.ZipLow(vResult0, vResult1);
-                Vector64<byte> vTemp11 = AdvSimd.Arm64.ZipHigh(vResult0, vResult1);
-
-                Vector64<ushort> vTemp21 = AdvSimd.Arm64.ZipHigh(vTemp10.AsUInt16(), vTemp11.AsUInt16());
-                return vTemp21.AsUInt32().GetElement(1) == 0xFFFFFFFF;
-            }
-            else if (Sse.IsSupported)
-            {
-                return Sse.MoveMask(Sse.CompareNotEqual(vector1, vector2)) == 0;
-            }
-            else
-            {
-                // Redundant test so we won't prejit remainder of this method on platforms without AdvSimd.
-                throw new PlatformNotSupportedException();
-            }
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector128<float> Lerp(Vector128<float> a, Vector128<float> b, Vector128<float> t)
-        {
-            // This implementation is based on the DirectX Math Library XMVectorLerp method
-            // https://github.com/microsoft/DirectXMath/blob/master/Inc/DirectXMathVector.inl
-
-            if (AdvSimd.IsSupported)
-            {
-                return AdvSimd.FusedMultiplyAdd(a, AdvSimd.Subtract(b, a), t);
-            }
-            else if (Fma.IsSupported)
-            {
-                return Fma.MultiplyAdd(Sse.Subtract(b, a), t, a);
-            }
-            else if (Sse.IsSupported)
-            {
-                return Sse.Add(Sse.Multiply(a, Sse.Subtract(Vector128.Create(1.0f), t)), Sse.Multiply(b, t));
-            }
-            else
-            {
-                // Redundant test so we won't prejit remainder of this method on platforms without AdvSimd.
-                throw new PlatformNotSupportedException();
-            }
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static bool NotEqual(Vector128<float> vector1, Vector128<float> vector2)
-        {
-            // This implementation is based on the DirectX Math Library XMVector4NotEqual method
-            // https://github.com/microsoft/DirectXMath/blob/master/Inc/DirectXMathVector.inl
-
-            if (AdvSimd.Arm64.IsSupported)
-            {
-                Vector128<uint> vResult = AdvSimd.CompareEqual(vector1, vector2).AsUInt32();
-
-                Vector64<byte> vResult0 = vResult.GetLower().AsByte();
-                Vector64<byte> vResult1 = vResult.GetUpper().AsByte();
-
-                Vector64<byte> vTemp10 = AdvSimd.Arm64.ZipLow(vResult0, vResult1);
-                Vector64<byte> vTemp11 = AdvSimd.Arm64.ZipHigh(vResult0, vResult1);
-
-                Vector64<ushort> vTemp21 = AdvSimd.Arm64.ZipHigh(vTemp10.AsUInt16(), vTemp11.AsUInt16());
-                return vTemp21.AsUInt32().GetElement(1) != 0xFFFFFFFF;
-            }
-            else if (Sse.IsSupported)
-            {
-                return Sse.MoveMask(Sse.CompareNotEqual(vector1, vector2)) != 0;
-            }
-            else
-            {
-                // Redundant test so we won't prejit remainder of this method on platforms without AdvSimd.
-                throw new PlatformNotSupportedException();
-            }
-        }
-    }
-}
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
index aeb66aee91b4d..70c28d0924a83 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
@@ -824,6 +824,47 @@ public static void CopyTo<T>(this Vector128<T> vector, Span<T> destination)
             Unsafe.WriteUnaligned(ref Unsafe.As<T, byte>(ref MemoryMarshal.GetReference(destination)), vector);
         }
 
+        /// <inheritdoc cref="Vector64.Cos(Vector64{double})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<double> Cos(Vector128<double> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.CosDouble<Vector128<double>, Vector128<long>>(vector);
+            }
+            else
+            {
+                return Create(
+                    Vector64.Cos(vector._lower),
+                    Vector64.Cos(vector._upper)
+                );
+            }
+        }
+
+        /// <inheritdoc cref="Vector64.Cos(Vector64{float})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<float> Cos(Vector128<float> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                if (Vector256.IsHardwareAccelerated)
+                {
+                    return VectorMath.CosSingle<Vector128<float>, Vector128<int>, Vector256<double>, Vector256<long>>(vector);
+                }
+                else
+                {
+                    return VectorMath.CosSingle<Vector128<float>, Vector128<int>, Vector128<double>, Vector128<long>>(vector);
+                }
+            }
+            else
+            {
+                return Create(
+                    Vector64.Cos(vector._lower),
+                    Vector64.Cos(vector._upper)
+                );
+            }
+        }
+
         /// <summary>Creates a new <see cref="Vector128{T}" /> instance with all elements initialized to the specified value.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="value">The value that all elements will be initialized to.</param>
@@ -3160,6 +3201,94 @@ public static Vector128<double> Shuffle(Vector128<double> vector, Vector128<long
             return result;
         }
 
+        /// <inheritdoc cref="Vector64.Sin(Vector64{double})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<double> Sin(Vector128<double> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.CosDouble<Vector128<double>, Vector128<long>>(vector);
+            }
+            else
+            {
+                return Create(
+                    Vector64.Sin(vector._lower),
+                    Vector64.Sin(vector._upper)
+                );
+            }
+        }
+
+        /// <inheritdoc cref="Vector64.Sin(Vector64{float})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<float> Sin(Vector128<float> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                if (Vector256.IsHardwareAccelerated)
+                {
+                    return VectorMath.CosSingle<Vector128<float>, Vector128<int>, Vector256<double>, Vector256<long>>(vector);
+                }
+                else
+                {
+                    return VectorMath.CosSingle<Vector128<float>, Vector128<int>, Vector128<double>, Vector128<long>>(vector);
+                }
+            }
+            else
+            {
+                return Create(
+                    Vector64.Sin(vector._lower),
+                    Vector64.Sin(vector._upper)
+                );
+            }
+        }
+
+        /// <inheritdoc cref="Vector64.Cos(Vector64{double})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (Vector128<double> Sin, Vector128<double> Cos) SinCos(Vector128<double> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.SinCosDouble<Vector128<double>, Vector128<long>>(vector);
+            }
+            else
+            {
+                (Vector64<double> sinLower, Vector64<double> cosLower) = Vector64.SinCos(vector._lower);
+                (Vector64<double> sinUpper, Vector64<double> cosUpper) = Vector64.SinCos(vector._upper);
+
+                return (
+                    Create(sinLower, sinUpper),
+                    Create(cosLower, cosUpper)
+                );
+            }
+        }
+
+        /// <inheritdoc cref="Vector64.Cos(Vector64{float})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (Vector128<float> Sin, Vector128<float> Cos) SinCos(Vector128<float> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                if (Vector256.IsHardwareAccelerated)
+                {
+                    return VectorMath.SinCosSingle<Vector128<float>, Vector128<int>, Vector256<double>, Vector256<long>>(vector);
+                }
+                else
+                {
+                    return VectorMath.SinCosSingle<Vector128<float>, Vector128<int>, Vector128<double>, Vector128<long>>(vector);
+                }
+            }
+            else
+            {
+                (Vector64<float> sinLower, Vector64<float> cosLower) = Vector64.SinCos(vector._lower);
+                (Vector64<float> sinUpper, Vector64<float> cosUpper) = Vector64.SinCos(vector._upper);
+
+                return (
+                    Create(sinLower, sinUpper),
+                    Create(cosLower, cosUpper)
+                );
+            }
+        }
+
         /// <summary>Computes the square root of a vector on a per-element basis.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="vector">The vector whose square root is to be computed.</param>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
index f1eeb327d4887..c90524a56bdee 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
@@ -663,6 +663,47 @@ public static void CopyTo<T>(this Vector256<T> vector, Span<T> destination)
             Unsafe.WriteUnaligned(ref Unsafe.As<T, byte>(ref MemoryMarshal.GetReference(destination)), vector);
         }
 
+        /// <inheritdoc cref="Vector128.Cos(Vector128{double})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<double> Cos(Vector256<double> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.CosDouble<Vector256<double>, Vector256<long>>(vector);
+            }
+            else
+            {
+                return Create(
+                    Vector128.Cos(vector._lower),
+                    Vector128.Cos(vector._upper)
+                );
+            }
+        }
+
+        /// <inheritdoc cref="Vector128.Cos(Vector128{float})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<float> Cos(Vector256<float> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                if (Vector512.IsHardwareAccelerated)
+                {
+                    return VectorMath.CosSingle<Vector256<float>, Vector256<int>, Vector512<double>, Vector512<long>>(vector);
+                }
+                else
+                {
+                    return VectorMath.CosSingle<Vector256<float>, Vector256<int>, Vector256<double>, Vector256<long>>(vector);
+                }
+            }
+            else
+            {
+                return Create(
+                    Vector128.Cos(vector._lower),
+                    Vector128.Cos(vector._upper)
+                );
+            }
+        }
+
         /// <summary>Creates a new <see cref="Vector256{T}" /> instance with all elements initialized to the specified value.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="value">The value that all elements will be initialized to.</param>
@@ -3043,6 +3084,94 @@ public static Vector256<double> Shuffle(Vector256<double> vector, Vector256<long
             return result;
         }
 
+        /// <inheritdoc cref="Vector128.Sin(Vector128{double})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<double> Sin(Vector256<double> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.CosDouble<Vector256<double>, Vector256<long>>(vector);
+            }
+            else
+            {
+                return Create(
+                    Vector128.Sin(vector._lower),
+                    Vector128.Sin(vector._upper)
+                );
+            }
+        }
+
+        /// <inheritdoc cref="Vector128.Sin(Vector128{float})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<float> Sin(Vector256<float> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                if (Vector512.IsHardwareAccelerated)
+                {
+                    return VectorMath.CosSingle<Vector256<float>, Vector256<int>, Vector512<double>, Vector512<long>>(vector);
+                }
+                else
+                {
+                    return VectorMath.CosSingle<Vector256<float>, Vector256<int>, Vector256<double>, Vector256<long>>(vector);
+                }
+            }
+            else
+            {
+                return Create(
+                    Vector128.Sin(vector._lower),
+                    Vector128.Sin(vector._upper)
+                );
+            }
+        }
+
+        /// <inheritdoc cref="Vector128.Cos(Vector128{double})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (Vector256<double> Sin, Vector256<double> Cos) SinCos(Vector256<double> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.SinCosDouble<Vector256<double>, Vector256<long>>(vector);
+            }
+            else
+            {
+                (Vector128<double> sinLower, Vector128<double> cosLower) = Vector128.SinCos(vector._lower);
+                (Vector128<double> sinUpper, Vector128<double> cosUpper) = Vector128.SinCos(vector._upper);
+
+                return (
+                    Create(sinLower, sinUpper),
+                    Create(cosLower, cosUpper)
+                );
+            }
+        }
+
+        /// <inheritdoc cref="Vector128.Cos(Vector128{float})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (Vector256<float> Sin, Vector256<float> Cos) SinCos(Vector256<float> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                if (Vector512.IsHardwareAccelerated)
+                {
+                    return VectorMath.SinCosSingle<Vector256<float>, Vector256<int>, Vector512<double>, Vector512<long>>(vector);
+                }
+                else
+                {
+                    return VectorMath.SinCosSingle<Vector256<float>, Vector256<int>, Vector256<double>, Vector256<long>>(vector);
+                }
+            }
+            else
+            {
+                (Vector128<float> sinLower, Vector128<float> cosLower) = Vector128.SinCos(vector._lower);
+                (Vector128<float> sinUpper, Vector128<float> cosUpper) = Vector128.SinCos(vector._upper);
+
+                return (
+                    Create(sinLower, sinUpper),
+                    Create(cosLower, cosUpper)
+                );
+            }
+        }
+
         /// <summary>Computes the square root of a vector on a per-element basis.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="vector">The vector whose square root is to be computed.</param>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
index a1cd63812ad19..f5d3c0c1b9d6c 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
@@ -589,6 +589,40 @@ public static void CopyTo<T>(this Vector512<T> vector, Span<T> destination)
             Unsafe.WriteUnaligned(ref Unsafe.As<T, byte>(ref MemoryMarshal.GetReference(destination)), vector);
         }
 
+        /// <inheritdoc cref="Vector256.Cos(Vector256{double})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector512<double> Cos(Vector512<double> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.CosDouble<Vector512<double>, Vector512<long>>(vector);
+            }
+            else
+            {
+                return Create(
+                    Vector256.Cos(vector._lower),
+                    Vector256.Cos(vector._upper)
+                );
+            }
+        }
+
+        /// <inheritdoc cref="Vector256.Cos(Vector256{float})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector512<float> Cos(Vector512<float> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.CosSingle<Vector512<float>, Vector512<int>, Vector512<double>, Vector512<long>>(vector);
+            }
+            else
+            {
+                return Create(
+                    Vector256.Cos(vector._lower),
+                    Vector256.Cos(vector._upper)
+                );
+            }
+        }
+
         /// <summary>Creates a new <see cref="Vector512{T}" /> instance with all elements initialized to the specified value.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="value">The value that all elements will be initialized to.</param>
@@ -3091,6 +3125,80 @@ public static Vector512<double> Shuffle(Vector512<double> vector, Vector512<long
             return result;
         }
 
+        /// <inheritdoc cref="Vector256.Sin(Vector256{double})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector512<double> Sin(Vector512<double> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.CosDouble<Vector512<double>, Vector512<long>>(vector);
+            }
+            else
+            {
+                return Create(
+                    Vector256.Sin(vector._lower),
+                    Vector256.Sin(vector._upper)
+                );
+            }
+        }
+
+        /// <inheritdoc cref="Vector256.Sin(Vector256{float})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector512<float> Sin(Vector512<float> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.CosSingle<Vector512<float>, Vector512<int>, Vector512<double>, Vector512<long>>(vector);
+            }
+            else
+            {
+                return Create(
+                    Vector256.Sin(vector._lower),
+                    Vector256.Sin(vector._upper)
+                );
+            }
+        }
+
+        /// <inheritdoc cref="Vector256.Cos(Vector256{double})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (Vector512<double> Sin, Vector512<double> Cos) SinCos(Vector512<double> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.SinCosDouble<Vector512<double>, Vector512<long>>(vector);
+            }
+            else
+            {
+                (Vector256<double> sinLower, Vector256<double> cosLower) = Vector256.SinCos(vector._lower);
+                (Vector256<double> sinUpper, Vector256<double> cosUpper) = Vector256.SinCos(vector._upper);
+
+                return (
+                    Create(sinLower, sinUpper),
+                    Create(cosLower, cosUpper)
+                );
+            }
+        }
+
+        /// <inheritdoc cref="Vector256.Cos(Vector256{float})" />
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (Vector512<float> Sin, Vector512<float> Cos) SinCos(Vector512<float> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.SinCosSingle<Vector512<float>, Vector512<int>, Vector512<double>, Vector512<long>>(vector);
+            }
+            else
+            {
+                (Vector256<float> sinLower, Vector256<float> cosLower) = Vector256.SinCos(vector._lower);
+                (Vector256<float> sinUpper, Vector256<float> cosUpper) = Vector256.SinCos(vector._upper);
+
+                return (
+                    Create(sinLower, sinUpper),
+                    Create(cosLower, cosUpper)
+                );
+            }
+        }
+
         /// <summary>Computes the square root of a vector on a per-element basis.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="vector">The vector whose square root is to be computed.</param>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
index 1d2a61fe4284c..0d134862beb4b 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
@@ -607,6 +607,59 @@ public static void CopyTo<T>(this Vector64<T> vector, Span<T> destination)
             Unsafe.WriteUnaligned(ref Unsafe.As<T, byte>(ref MemoryMarshal.GetReference(destination)), vector);
         }
 
+        internal static Vector64<T> Cos<T>(Vector64<T> vector)
+            where T : ITrigonometricFunctions<T>
+        {
+            Unsafe.SkipInit(out Vector64<T> result);
+
+            for (int index = 0; index < Vector64<T>.Count; index++)
+            {
+                T value = T.Cos(vector.GetElementUnsafe(index));
+                result.SetElementUnsafe(index, value);
+            }
+
+            return result;
+        }
+
+        /// <summary>Computes the cos of each element in a vector.</summary>
+        /// <param name="vector">The vector that will have its Cos computed.</param>
+        /// <returns>A vector whose elements are the cos of the elements in <paramref name="vector" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<double> Cos(Vector64<double> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.CosDouble<Vector64<double>, Vector64<long>>(vector);
+            }
+            else
+            {
+                return Cos<double>(vector);
+            }
+        }
+
+        /// <summary>Computes the cos of each element in a vector.</summary>
+        /// <param name="vector">The vector that will have its Cos computed.</param>
+        /// <returns>A vector whose elements are the cos of the elements in <paramref name="vector" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<float> Cos(Vector64<float> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                if (Vector128.IsHardwareAccelerated)
+                {
+                    return VectorMath.CosSingle<Vector64<float>, Vector64<int>, Vector128<double>, Vector128<long>>(vector);
+                }
+                else
+                {
+                    return VectorMath.CosSingle<Vector64<float>, Vector64<int>, Vector64<double>, Vector64<long>>(vector);
+                }
+            }
+            else
+            {
+                return Cos<float>(vector);
+            }
+        }
+
         /// <summary>Creates a new <see cref="Vector64{T}" /> instance with all elements initialized to the specified value.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="value">The value that all elements will be initialized to.</param>
@@ -3018,6 +3071,114 @@ public static Vector64<float> Shuffle(Vector64<float> vector, Vector64<int> indi
             return result;
         }
 
+        internal static Vector64<T> Sin<T>(Vector64<T> vector)
+            where T : ITrigonometricFunctions<T>
+        {
+            Unsafe.SkipInit(out Vector64<T> result);
+
+            for (int index = 0; index < Vector64<T>.Count; index++)
+            {
+                T value = T.Sin(vector.GetElementUnsafe(index));
+                result.SetElementUnsafe(index, value);
+            }
+
+            return result;
+        }
+
+        /// <summary>Computes the sin of each element in a vector.</summary>
+        /// <param name="vector">The vector that will have its Sin computed.</param>
+        /// <returns>A vector whose elements are the sin of the elements in <paramref name="vector" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<double> Sin(Vector64<double> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.SinDouble<Vector64<double>, Vector64<long>>(vector);
+            }
+            else
+            {
+                return Sin<double>(vector);
+            }
+        }
+
+        /// <summary>Computes the sin of each element in a vector.</summary>
+        /// <param name="vector">The vector that will have its Sin computed.</param>
+        /// <returns>A vector whose elements are the sin of the elements in <paramref name="vector" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<float> Sin(Vector64<float> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                if (Vector128.IsHardwareAccelerated)
+                {
+                    return VectorMath.SinSingle<Vector64<float>, Vector64<int>, Vector128<double>, Vector128<long>>(vector);
+                }
+                else
+                {
+                    return VectorMath.SinSingle<Vector64<float>, Vector64<int>, Vector64<double>, Vector64<long>>(vector);
+                }
+            }
+            else
+            {
+                return Sin<float>(vector);
+            }
+        }
+
+        internal static (Vector64<T> Sin, Vector64<T> Cos) SinCos<T>(Vector64<T> vector)
+            where T : ITrigonometricFunctions<T>
+        {
+            Unsafe.SkipInit(out Vector64<T> sinResult);
+            Unsafe.SkipInit(out Vector64<T> cosResult);
+
+            for (int index = 0; index < Vector64<T>.Count; index++)
+            {
+                (T sinValue, T cosValue) = T.SinCos(vector.GetElementUnsafe(index));
+                sinResult.SetElementUnsafe(index, sinValue);
+                cosResult.SetElementUnsafe(index, cosValue);
+            }
+
+            return (sinResult, cosResult);
+        }
+
+        /// <summary>Computes the sincos of each element in a vector.</summary>
+        /// <param name="vector">The vector that will have its SinCos computed.</param>
+        /// <returns>A vector whose elements are the sincos of the elements in <paramref name="vector" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (Vector64<double> Sin, Vector64<double> Cos) SinCos(Vector64<double> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                return VectorMath.SinCosDouble<Vector64<double>, Vector64<long>>(vector);
+            }
+            else
+            {
+                return SinCos<double>(vector);
+            }
+        }
+
+        /// <summary>Computes the sincos of each element in a vector.</summary>
+        /// <param name="vector">The vector that will have its SinCos computed.</param>
+        /// <returns>A vector whose elements are the sincos of the elements in <paramref name="vector" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (Vector64<float> Sin, Vector64<float> Cos) SinCos(Vector64<float> vector)
+        {
+            if (IsHardwareAccelerated)
+            {
+                if (Vector128.IsHardwareAccelerated)
+                {
+                    return VectorMath.SinCosSingle<Vector64<float>, Vector64<int>, Vector128<double>, Vector128<long>>(vector);
+                }
+                else
+                {
+                    return VectorMath.SinCosSingle<Vector64<float>, Vector64<int>, Vector64<double>, Vector64<long>>(vector);
+                }
+            }
+            else
+            {
+                return SinCos<float>(vector);
+            }
+        }
+
         /// <summary>Computes the square root of a vector on a per-element basis.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="vector">The vector whose square root is to be computed.</param>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
index db2934b6bc595..066deab0dc410 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
@@ -12,6 +12,300 @@ namespace System.Runtime.Intrinsics
 {
     internal static unsafe class VectorMath
     {
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static TVectorDouble CosDouble<TVectorDouble, TVectorInt64>(TVectorDouble x)
+            where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
+            where TVectorInt64 : unmanaged, ISimdVector<TVectorInt64, long>
+        {
+            // This code is based on `cos` from amd/aocl-libm-ose
+            // Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved.
+            //
+            // Licensed under the BSD 3-Clause "New" or "Revised" License
+            // See THIRD-PARTY-NOTICES.TXT for the full license text
+
+            // Implementation Notes
+            // ---------------------
+            // checks for special cases
+            // if ( ux = infinity) raise overflow exception and return x
+            // if x is NaN then raise invalid FP operation exception and return x.
+            //
+            // 1. Argument reduction
+            // if |x| > 5e5 then
+            //      __amd_remainder_piby2(x, &r, &rr, &region)
+            // else
+            //      Argument reduction
+            //      Let z = |x| * 2/pi
+            //      z = dn + r, where dn = round(z)
+            //      rhead =  dn * pi/2_head
+            //      rtail = dn * pi/2_tail
+            //      r = z – dn = |x| - rhead – rtail
+            //      expdiff = exp(dn) – exp(r)
+            //      if(expdiff) > 15)
+            //      rtail = |x| - dn*pi/2_tail2
+            //      r = |x| -  dn*pi/2_head -  dn*pi/2_tail1 -  dn*pi/2_tail2  - (((rhead + rtail) – rhead )-rtail)
+            // rr = (|x| – rhead) – r + rtail
+            //
+            // 2. Polynomial approximation
+            // if(dn is even)
+            //       rr = rr * r;
+            //       x4 = x2 * x2;
+            //       s = 0.5 * x2;
+            //       t =  s - 1.0;
+            //       poly = x4 * (C1 + x2 * (C2 + x2 * (C3 + x2 * (C4 + x2 * (C5 + x2 * x6)))))
+            //       r = (((1.0 + t) - s) - rr) + poly – t
+            // else
+            //       x3 = x2 * r
+            //       poly = S2 + (r2 * (S3 + (r2 * (S4 + (r2 * (S5 + S6 * r2))))))
+            //       r = r - ((x2 * (0.5*rr - x3 * poly)) - rr) - S1 * x3
+            // if((sign + 1) & 2)
+            //       return r
+            // else
+            //       return -r;
+            //
+            // if |x| < pi/4 && |x| > 2.0^(-13)
+            //   cos(x) = 1.0 + x*x * (-0.5 + (C1*x*x + (C2*x*x + (C3*x*x
+            //                              + (C4*x*x + (C5*x*x + C6*x*x))))))
+            //
+            // if |x| < 2.0^(-13) && |x| > 2.0^(-27)
+            //   cos(x) = 1.0 - x*x*0.5;;
+            //
+            // else return 1.0
+
+            const long ARG_LARGE = 0x3FE921FB54442D18;      // PI / 4
+            const long ARG_SMALL = 0x3F20000000000000;      // 2^-13
+            const long ARG_SMALLER = 0x3E40000000000000;    // 2^-27
+
+            TVectorDouble ax = TVectorDouble.Abs(x);
+            TVectorInt64 ux = Unsafe.BitCast<TVectorDouble, TVectorInt64>(ax);
+
+            TVectorDouble result;
+
+            if (TVectorInt64.LessThanOrEqualAll(ux, TVectorInt64.Create(ARG_LARGE)))
+            {
+                // We must be a finite value: (pi / 4) >= |x|
+                TVectorDouble x2 = x * x;
+
+                if (TVectorInt64.GreaterThanAny(ux, TVectorInt64.Create(ARG_SMALL - 1)))
+                {
+                    // at least one element is: |x| >= 2^-13
+                    result = TVectorDouble.MultiplyAddEstimate(
+                        TVectorDouble.MultiplyAddEstimate(
+                            CosDoublePoly(x),
+                            x2,
+                            TVectorDouble.Create(-0.5)),
+                        x2,
+                        TVectorDouble.One
+                    );
+                }
+                else
+                {
+                    result = TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(-0.5), x2, TVectorDouble.One);
+                }
+            }
+            else
+            {
+                // at least one element is: |x| > (pi / 4) -or- infinite -or- nan
+                (TVectorDouble r, TVectorDouble rr, TVectorInt64 region) = SinCosReduce<TVectorDouble, TVectorInt64>(ax);
+
+                TVectorDouble sin = SinDoubleLarge(r, rr);
+                TVectorDouble cos = CosDoubleLarge(r, rr);
+
+                result = TVectorDouble.ConditionalSelect(
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(region & TVectorInt64.One, TVectorInt64.Zero)),
+                    cos,    // region 0 or 2
+                    sin     // region 1 or 3
+                );
+
+                result = TVectorDouble.ConditionalSelect(
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals((region + TVectorInt64.One) & TVectorInt64.Create(2), TVectorInt64.Zero)),
+                    +result,    // region 0 or 3
+                    -result     // region 1 or 2
+                );
+
+                // Propagate the NaN that was passed in
+                result = TVectorDouble.ConditionalSelect(
+                    TVectorDouble.IsNaN(x),
+                    x,
+                    result
+                );
+
+                // Return NaN for infinity
+                result = TVectorDouble.ConditionalSelect(
+                    TVectorDouble.IsPositiveInfinity(ax),
+                    TVectorDouble.Create(double.NaN),
+                    result
+                );
+            }
+
+            return TVectorDouble.ConditionalSelect(
+                Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.GreaterThan(ux, TVectorInt64.Create(ARG_SMALLER - 1))),
+                result,             // for elements: |x| >= 2^-27, infinity, or NaN
+                TVectorDouble.One   // for elements: 2^-27 > |x|
+            );
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static TVectorSingle CosSingle<TVectorSingle, TVectorInt32, TVectorDouble, TVectorInt64>(TVectorSingle x)
+            where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
+            where TVectorInt32 : unmanaged, ISimdVector<TVectorInt32, int>
+            where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
+            where TVectorInt64 : unmanaged, ISimdVector<TVectorInt64, long>
+        {
+            // This code is based on `cosf` from amd/aocl-libm-ose
+            // Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved.
+            //
+            // Licensed under the BSD 3-Clause "New" or "Revised" License
+            // See THIRD-PARTY-NOTICES.TXT for the full license text
+
+            // Implementation Notes
+            // ---------------------
+            // Checks for special cases
+            // if ( ux = infinity) raise overflow exception and return x
+            // if x is NaN then raise invalid FP operation exception and return x.
+            //
+            // 1. Argument reduction
+            // if |x| > 5e5 then
+            //      __amd_remainder_piby2d2f((uint64_t)x, &r, &region)
+            // else
+            //      Argument reduction
+            //      Let z = |x| * 2/pi
+            //      z = dn + r, where dn = round(z)
+            //      rhead =  dn * pi/2_head
+            //      rtail = dn * pi/2_tail
+            //      r = z – dn = |x| - rhead – rtail
+            //      expdiff = exp(dn) – exp(r)
+            //      if(expdiff) > 15)
+            //      rtail = |x| - dn*pi/2_tail2
+            //      r = |x| -  dn*pi/2_head -  dn*pi/2_tail1
+            //          -  dn*pi/2_tail2  - (((rhead + rtail) – rhead )-rtail)
+            //
+            // 2. Polynomial approximation
+            // if(dn is even)
+            //       x4 = x2 * x2;
+            //       s = 0.5 * x2;
+            //       t =  1.0 - s;
+            //       poly = x4 * (C1 + x2 * (C2 + x2 * (C3 + x2 * C4 )))
+            //       r = t + poly
+            // else
+            //       x3 = x2 * r
+            //       poly = x3 * (S1 + x2 * (S2 + x2 * (S3 + x2 * S4)))
+            //       r = r + poly
+            // if((sign + 1) & 2)
+            //       return r
+            // else
+            //       return -r;
+            //
+            // if |x| < pi/4 && |x| > 2.0^(-13)
+            //   r = 0.5 * x2;
+            //   t = 1 - r;
+            //   cos(x) = t + ((1.0 - t) - r) + (x*x * (x*x * C1 + C2*x*x + C3*x*x
+            //             + C4*x*x +x*x*C5 + x*x*C6)))
+            //
+            // if |x| < 2.0^(-13) && |x| > 2.0^(-27)
+            //   cos(x) = 1.0 - x*x*0.5;;
+            //
+            // else return 1.0
+
+            const int ARG_LARGE = 0x3F490FDB;   // PI / 4
+            const int ARG_SMALL = 0x3C000000;   // 2^-13
+            const int ARG_SMALLER = 0x39000000; // 2^-27
+
+            TVectorSingle ax = TVectorSingle.Abs(x);
+            TVectorInt32 ux = Unsafe.BitCast<TVectorSingle, TVectorInt32>(x);
+
+            TVectorSingle result;
+
+            if (TVectorSingle.LessThanOrEqualAll(ax, TVectorSingle.Create(ARG_LARGE)))
+            {
+                // We must be a finite value: (pi / 4) >= |x|
+
+                if (TVectorInt32.GreaterThanAny(ux, TVectorInt32.Create(ARG_SMALL - 1)))
+                {
+                    // at least one element is: |x| >= 2^-13
+                    TVectorSingle x2 = x * x;
+
+                    if (TVectorSingle.Count == TVectorDouble.Count)
+                    {
+                        result = Narrow<TVectorDouble, TVectorSingle>(
+                            CosSingleSmall(Widen<TVectorSingle, TVectorDouble>(x2))
+                        );
+                    }
+                    else
+                    {
+                        result = Narrow<TVectorDouble, TVectorSingle>(
+                            CosSingleSmall(WidenLower<TVectorSingle, TVectorDouble>(x2)),
+                            CosSingleSmall(WidenUpper<TVectorSingle, TVectorDouble>(x2))
+                        );
+                    }
+                }
+                else
+                {
+                    // at least one element is: 2^-13 > |x|
+                    TVectorSingle x2 = x * x;
+                    result = TVectorSingle.MultiplyAddEstimate(TVectorSingle.Create(-0.5f), x2, TVectorSingle.One);
+                }
+            }
+            else
+            {
+                // at least one element is: |x| > (pi / 4) -or- infinite -or- nan
+
+                if (TVectorSingle.Count == TVectorDouble.Count)
+                {
+                    result = Narrow<TVectorDouble, TVectorSingle>(
+                        CoreImpl(Widen<TVectorSingle, TVectorDouble>(ax))
+                    );
+                }
+                else
+                {
+                    result = Narrow<TVectorDouble, TVectorSingle>(
+                        CoreImpl(WidenLower<TVectorSingle, TVectorDouble>(ax)),
+                        CoreImpl(WidenUpper<TVectorSingle, TVectorDouble>(ax))
+                    );
+                }
+
+                // Propagate the NaN that was passed in
+                result = TVectorSingle.ConditionalSelect(
+                    TVectorSingle.IsNaN(x),
+                    x,
+                    result
+                );
+
+                // Return NaN for infinity
+                return TVectorSingle.ConditionalSelect(
+                    TVectorSingle.IsPositiveInfinity(ax),
+                    TVectorSingle.Create(float.NaN),
+                    result
+                );
+            }
+
+            return TVectorSingle.ConditionalSelect(
+                Unsafe.BitCast<TVectorInt32, TVectorSingle>(TVectorInt32.GreaterThan(ux, TVectorInt32.Create(ARG_SMALLER - 1))),
+                result,             // for elements: |x| >= 2^-27, infinity, or NaN
+                TVectorSingle.One   // for elements: 2^-27 > |x|
+            );
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static TVectorDouble CoreImpl(TVectorDouble ax)
+            {
+                (TVectorDouble r, _, TVectorInt64 region) = SinCosReduce<TVectorDouble, TVectorInt64>(ax);
+
+                TVectorDouble sin = SinSinglePoly(r);
+                TVectorDouble cos = CosSingleLarge(r);
+
+                TVectorDouble result = TVectorDouble.ConditionalSelect(
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(region & TVectorInt64.One, TVectorInt64.Zero)),
+                    cos,    // region 0 or 2
+                    sin     // region 1 or 3
+                );
+
+                return TVectorDouble.ConditionalSelect(
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals((region + TVectorInt64.One) & TVectorInt64.Create(2), TVectorInt64.Zero)),
+                    +result,    // region 0 or 3
+                    -result     // region 1 or 2
+                );
+            }
+        }
+
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static TVector CopySign<TVector, T>(TVector value, TVector sign)
             where TVector : unmanaged, ISimdVector<TVector, T>
@@ -1392,6 +1686,600 @@ public static TVectorSingle RoundSingle<TVectorSingle>(TVectorSingle vector, Mid
             }
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (TVectorDouble Sin, TVectorDouble Cos) SinCosDouble<TVectorDouble, TVectorInt64>(TVectorDouble x)
+            where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
+            where TVectorInt64 : unmanaged, ISimdVector<TVectorInt64, long>
+        {
+            // This code is based on `sin` and `cos` from amd/aocl-libm-ose
+            // Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved.
+            //
+            // Licensed under the BSD 3-Clause "New" or "Revised" License
+            // See THIRD-PARTY-NOTICES.TXT for the full license text
+
+            // See SinDouble and CosDouble for implementation details
+
+            const long ARG_LARGE = 0x3FE921FB54442D18;      // PI / 4
+            const long ARG_SMALL = 0x3F20000000000000;      // 2^-13
+            const long ARG_SMALLER = 0x3E40000000000000;    // 2^-27
+
+            TVectorDouble ax = TVectorDouble.Abs(x);
+            TVectorInt64 ux = Unsafe.BitCast<TVectorDouble, TVectorInt64>(ax);
+
+            TVectorDouble sinResult, cosResult;
+
+            if (TVectorInt64.LessThanOrEqualAll(ux, TVectorInt64.Create(ARG_LARGE)))
+            {
+                // We must be a finite value: (pi / 4) >= |x|
+                TVectorDouble x2 = x * x;
+
+                if (TVectorInt64.GreaterThanAny(ux, TVectorInt64.Create(ARG_SMALL - 1)))
+                {
+                    // at least one element is: |x| >= 2^-13
+                    sinResult = SinDoublePoly(x);
+                    cosResult = TVectorDouble.MultiplyAddEstimate(
+                        TVectorDouble.MultiplyAddEstimate(
+                            CosDoublePoly(x),
+                            x2,
+                            TVectorDouble.Create(-0.5)),
+                        x2,
+                        TVectorDouble.One
+                    );
+                }
+                else
+                {
+                    // at least one element is: 2^-13 > |x|
+                    TVectorDouble x3 = x2 * x;
+                    sinResult = TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(-0.16666666666666666), x3, x);
+                    cosResult = TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(-0.5), x2, TVectorDouble.One);
+                }
+            }
+            else
+            {
+                // at least one element is: |x| > (pi / 4) -or- infinite -or- nan
+                (TVectorDouble r, TVectorDouble rr, TVectorInt64 region) = SinCosReduce<TVectorDouble, TVectorInt64>(ax);
+
+                TVectorDouble sin = SinDoubleLarge(r, rr);
+                TVectorDouble cos = CosDoubleLarge(r, rr);
+
+                TVectorDouble regionMask = Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(region & TVectorInt64.One, TVectorInt64.Zero));
+
+                sinResult = TVectorDouble.ConditionalSelect(
+                    regionMask,
+                    sin,    // region 0 or 2
+                    cos     // region 1 or 3
+                );
+
+                cosResult = TVectorDouble.ConditionalSelect(
+                    regionMask,
+                    cos,    // region 0 or 2
+                    sin     // region 1 or 3
+                );
+
+                TVectorInt64 sign = Unsafe.BitCast<TVectorDouble, TVectorInt64>(x) >>> 63;
+
+                sinResult = TVectorDouble.ConditionalSelect(
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & region) | (~sign & ~region)) & TVectorInt64.One, TVectorInt64.Zero)),
+                    -sinResult, // negative in region 1 or 3, positive in region 0 or 2
+                    +sinResult  // negative in region 0 or 2, positive in region 1 or 3
+                );
+
+                cosResult = TVectorDouble.ConditionalSelect(
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals((region + TVectorInt64.One) & TVectorInt64.Create(2), TVectorInt64.Zero)),
+                    +cosResult, // region 0 or 3
+                    -cosResult  // region 1 or 2
+                );
+
+                // Propagate the NaN that was passed in
+                TVectorDouble nanMask = TVectorDouble.IsNaN(x);
+
+                sinResult = TVectorDouble.ConditionalSelect(
+                    nanMask,
+                    x,
+                    sinResult
+                );
+
+                cosResult = TVectorDouble.ConditionalSelect(
+                    nanMask,
+                    x,
+                    cosResult
+                );
+
+                // Return NaN for infinity
+                TVectorDouble infinityMask = TVectorDouble.IsPositiveInfinity(ax);
+
+                sinResult = TVectorDouble.ConditionalSelect(
+                    infinityMask,
+                    TVectorDouble.Create(double.NaN),
+                    sinResult
+                );
+
+                cosResult = TVectorDouble.ConditionalSelect(
+                    infinityMask,
+                    TVectorDouble.Create(double.NaN),
+                    cosResult
+                );
+            }
+
+            TVectorDouble argNotSmallerMask = Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.GreaterThan(ux, TVectorInt64.Create(ARG_SMALLER - 1)));
+
+            sinResult = TVectorDouble.ConditionalSelect(
+                argNotSmallerMask,
+                sinResult,          // for elements: |x| >= 2^-27, infinity, or NaN
+                TVectorDouble.One   // for elements: 2^-27 > |x|
+            );
+
+            cosResult = TVectorDouble.ConditionalSelect(
+                argNotSmallerMask,
+                cosResult,             // for elements: |x| >= 2^-27, infinity, or NaN
+                TVectorDouble.One   // for elements: 2^-27 > |x|
+            );
+
+            return (sinResult, cosResult);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (TVectorSingle Sin, TVectorSingle Cos) SinCosSingle<TVectorSingle, TVectorInt32, TVectorDouble, TVectorInt64>(TVectorSingle x)
+            where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
+            where TVectorInt32 : unmanaged, ISimdVector<TVectorInt32, int>
+            where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
+            where TVectorInt64 : unmanaged, ISimdVector<TVectorInt64, long>
+        {
+            // This code is based on `sinf` and `cosf` from amd/aocl-libm-ose
+            // Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved.
+            //
+            // Licensed under the BSD 3-Clause "New" or "Revised" License
+            // See THIRD-PARTY-NOTICES.TXT for the full license text
+
+            // See SinSingle and CosSingle for implementation details
+
+            const int ARG_LARGE = 0x3F490FDB;   // PI / 4
+            const int ARG_SMALL = 0x3C000000;   // 2^-13
+            const int ARG_SMALLER = 0x39000000; // 2^-27
+
+            TVectorSingle ax = TVectorSingle.Abs(x);
+            TVectorInt32 ux = Unsafe.BitCast<TVectorSingle, TVectorInt32>(x);
+
+            TVectorSingle sinResult, cosResult;
+
+            if (TVectorSingle.LessThanOrEqualAll(ax, TVectorSingle.Create(ARG_LARGE)))
+            {
+                // We must be a finite value: (pi / 4) >= |x|
+
+                if (TVectorInt32.GreaterThanAny(ux, TVectorInt32.Create(ARG_SMALL - 1)))
+                {
+                    // at least one element is: |x| >= 2^-13
+
+                    if (TVectorSingle.Count == TVectorDouble.Count)
+                    {
+                        TVectorDouble dx = Widen<TVectorSingle, TVectorDouble>(x);
+
+                        sinResult = Narrow<TVectorDouble, TVectorSingle>(
+                            SinSinglePoly(dx)
+                        );
+                        cosResult = Narrow<TVectorDouble, TVectorSingle>(
+                            CosSingleSmall(dx)
+                        );
+                    }
+                    else
+                    {
+                        TVectorDouble dxLo = WidenLower<TVectorSingle, TVectorDouble>(x);
+                        TVectorDouble dxHi = WidenUpper<TVectorSingle, TVectorDouble>(x);
+
+                        sinResult = Narrow<TVectorDouble, TVectorSingle>(
+                            SinSinglePoly(dxLo),
+                            SinSinglePoly(dxHi)
+                        );
+                        cosResult = Narrow<TVectorDouble, TVectorSingle>(
+                            CosSingleSmall(dxLo),
+                            CosSingleSmall(dxHi)
+                        );
+                    }
+                }
+                else
+                {
+                    // at least one element is: 2^-13 > |x|
+
+                    TVectorSingle x2 = x * x;
+                    TVectorSingle x3 = x2 * x;
+
+                    sinResult = TVectorSingle.MultiplyAddEstimate(TVectorSingle.Create(-0.16666667f), x3, x);
+                    cosResult = TVectorSingle.MultiplyAddEstimate(TVectorSingle.Create(-0.5f), x2, TVectorSingle.One);
+                }
+            }
+            else
+            {
+                // at least one element is: |x| > (pi / 4) -or- infinite -or- nan
+
+                if (TVectorSingle.Count == TVectorDouble.Count)
+                {
+                    (TVectorDouble sin, TVectorDouble cos) = CoreImpl(Widen<TVectorSingle, TVectorDouble>(x));
+
+                    sinResult = Narrow<TVectorDouble, TVectorSingle>(sin);
+                    cosResult = Narrow<TVectorDouble, TVectorSingle>(cos);
+                }
+                else
+                {
+                    (TVectorDouble sinLo, TVectorDouble cosLo) = CoreImpl(WidenLower<TVectorSingle, TVectorDouble>(x));
+                    (TVectorDouble sinHi, TVectorDouble cosHi) = CoreImpl(WidenUpper<TVectorSingle, TVectorDouble>(x));
+
+                    sinResult = Narrow<TVectorDouble, TVectorSingle>(sinLo, sinHi);
+                    cosResult = Narrow<TVectorDouble, TVectorSingle>(cosLo, cosHi);
+                }
+
+                // Propagate the NaN that was passed in
+                TVectorSingle nanMask = TVectorSingle.IsNaN(x);
+
+                sinResult = TVectorSingle.ConditionalSelect(
+                    nanMask,
+                    x,
+                    sinResult
+                );
+
+                cosResult = TVectorSingle.ConditionalSelect(
+                    nanMask,
+                    x,
+                    cosResult
+                );
+
+                // Return NaN for infinity
+                TVectorSingle infinityMask = TVectorSingle.IsPositiveInfinity(ax);
+
+                sinResult = TVectorSingle.ConditionalSelect(
+                    infinityMask,
+                    TVectorSingle.Create(float.NaN),
+                    sinResult
+                );
+
+                cosResult = TVectorSingle.ConditionalSelect(
+                    infinityMask,
+                    TVectorSingle.Create(float.NaN),
+                    cosResult
+                );
+
+                return (sinResult, cosResult);
+            }
+
+            TVectorSingle argNotSmallerMask = Unsafe.BitCast<TVectorInt32, TVectorSingle>(TVectorInt32.GreaterThan(ux, TVectorInt32.Create(ARG_SMALLER - 1)));
+
+            sinResult = TVectorSingle.ConditionalSelect(
+                argNotSmallerMask,
+                sinResult,          // for elements: |x| >= 2^-27, infinity, or NaN
+                TVectorSingle.One   // for elements: 2^-27 > |x|
+            );
+
+            cosResult = TVectorSingle.ConditionalSelect(
+                argNotSmallerMask,
+                cosResult,          // for elements: |x| >= 2^-27, infinity, or NaN
+                TVectorSingle.One   // for elements: 2^-27 > |x|
+            );
+
+            return (sinResult, cosResult);
+
+            static (TVectorDouble Sin, TVectorDouble Cos) CoreImpl(TVectorDouble x)
+            {
+                TVectorDouble ax = TVectorDouble.Abs(x);
+                (TVectorDouble r, _, TVectorInt64 region) = SinCosReduce<TVectorDouble, TVectorInt64>(ax);
+
+                TVectorDouble sin = SinSinglePoly(r);
+                TVectorDouble cos = CosSingleLarge(r);
+
+                TVectorDouble regionMask = Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(region & TVectorInt64.One, TVectorInt64.Zero));
+
+                TVectorDouble sinResult = TVectorDouble.ConditionalSelect(
+                    regionMask,
+                    sin,    // region 0 or 2
+                    cos     // region 1 or 3
+                );
+
+                TVectorDouble cosResult = TVectorDouble.ConditionalSelect(
+                    regionMask,
+                    cos,    // region 0 or 2
+                    sin     // region 1 or 3
+                );
+
+                TVectorInt64 sign = Unsafe.BitCast<TVectorDouble, TVectorInt64>(x) >>> 63;
+
+                sinResult = TVectorDouble.ConditionalSelect(
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & region) | (~sign & ~region)) & TVectorInt64.One, TVectorInt64.Zero)),
+                    -sinResult, // negative in region 1 or 3, positive in region 0 or 2
+                    +sinResult  // negative in region 0 or 2, positive in region 1 or 3
+                );
+
+                cosResult = TVectorDouble.ConditionalSelect(
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals((region + TVectorInt64.One) & TVectorInt64.Create(2), TVectorInt64.Zero)),
+                    +cosResult, // region 0 or 3
+                    -cosResult  // region 1 or 2
+                );
+
+                return (sinResult, cosResult);
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static TVectorDouble SinDouble<TVectorDouble, TVectorInt64>(TVectorDouble x)
+            where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
+            where TVectorInt64 : unmanaged, ISimdVector<TVectorInt64, long>
+        {
+            // This code is based on `sin` from amd/aocl-libm-ose
+            // Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved.
+            //
+            // Licensed under the BSD 3-Clause "New" or "Revised" License
+            // See THIRD-PARTY-NOTICES.TXT for the full license text
+
+            // Implementation Notes
+            // ---------------------
+            // checks for special cases
+            // if ( ux = infinity) raise overflow exception and return x
+            // if x is NaN then raise invalid FP operation exception and return x.
+            //
+            // 1. Argument reduction
+            // if |x| > 5e5 then
+            //      __amd_remainder_piby2(x, &r, &rr, &region)
+            // else
+            //      Argument reduction
+            //      Let z = |x| * 2/pi
+            //      z = dn + r, where dn = round(z)
+            //      rhead =  dn * pi/2_head
+            //      rtail = dn * pi/2_tail
+            //      r = z – dn = |x| - rhead – rtail
+            //      expdiff = exp(dn) – exp(r)
+            //      if(expdiff) > 15)
+            //      rtail = |x| - dn*pi/2_tail2
+            //      r = |x| -  dn*pi/2_head -  dn*pi/2_tail1 -  dn*pi/2_tail2  - (((rhead + rtail) – rhead )-rtail)
+            // rr = (|x| – rhead) – r + rtail
+            //
+            // 2. Polynomial approximation
+            // if(dn is odd)
+            //       rr = rr * r;
+            //       x4 = x2 * x2;
+            //       s = 0.5 * x2;
+            //       t =  s - 1.0;
+            //       poly = x4 * (C1 + x2 * (C2 + x2 * (C3 + x2 * (C4 + x2 * (C5 + x2 * x6)))))
+            //       r = (((1.0 + t) - s) - rr) + poly – t
+            // else
+            //       x3 = x2 * r
+            //       poly = S2 + (r2 * (S3 + (r2 * (S4 + (r2 * (S5 + S6 * r2))))))
+            //       r = r - ((x2 * (0.5*rr - x3 * poly)) - rr) - S1 * x3
+            // if(((sign & region) | ((~sign) & (~region))) & 1)
+            //       return r
+            // else
+            //       return -r;
+            //
+            // if |x| < pi/4 && |x| > 2.0^(-13)
+            //   sin(x) = x + (x * (r2 * (S1 + r2 * (S2 + r2 * (S3 + r2 * (S4 + r2 * (S5 + r2 * S6)))))))
+            // if |x| < 2.0^(-13) && |x| > 2.0^(-27)
+            //   sin(x) = x - (x * x * x * (1/6));
+
+            const long ARG_LARGE = 0x3FE921FB54442D18;      // PI / 4
+            const long ARG_SMALL = 0x3F20000000000000;      // 2^-13
+            const long ARG_SMALLER = 0x3E40000000000000;    // 2^-27
+
+            TVectorDouble ax = TVectorDouble.Abs(x);
+            TVectorInt64 ux = Unsafe.BitCast<TVectorDouble, TVectorInt64>(ax);
+
+            TVectorDouble result;
+
+            if (TVectorInt64.LessThanOrEqualAll(ux, TVectorInt64.Create(ARG_LARGE)))
+            {
+                // We must be a finite value: (pi / 4) >= |x|
+                TVectorDouble x2 = x * x;
+
+                if (TVectorInt64.GreaterThanAny(ux, TVectorInt64.Create(ARG_SMALL - 1)))
+                {
+                    // at least one element is: |x| >= 2^-13
+                    result = SinDoublePoly(x);
+                }
+                else
+                {
+                    // at least one element is: 2^-13 > |x|
+                    TVectorDouble x3 = x2 * x;
+                    result = TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(-0.16666666666666666), x3, x);
+                }
+            }
+            else
+            {
+                // at least one element is: |x| > (pi / 4) -or- infinite -or- nan
+                (TVectorDouble r, TVectorDouble rr, TVectorInt64 region) = SinCosReduce<TVectorDouble, TVectorInt64>(ax);
+
+                TVectorDouble sin = SinDoubleLarge(r, rr);
+                TVectorDouble cos = CosDoubleLarge(r, rr);
+
+                result = TVectorDouble.ConditionalSelect(
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(region & TVectorInt64.One, TVectorInt64.Zero)),
+                    sin,    // region 0 or 2
+                    cos     // region 1 or 3
+                );
+
+                TVectorInt64 sign = Unsafe.BitCast<TVectorDouble, TVectorInt64>(x) >>> 63;
+
+                result = TVectorDouble.ConditionalSelect(
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & region) | (~sign & ~region)) & TVectorInt64.One, TVectorInt64.Zero)),
+                    -result,    // negative in region 1 or 3, positive in region 0 or 2
+                    +result     // negative in region 0 or 2, positive in region 1 or 3
+                );
+
+                // Propagate the NaN that was passed in
+                result = TVectorDouble.ConditionalSelect(
+                    TVectorDouble.IsNaN(x),
+                    x,
+                    result
+                );
+
+                // Return NaN for infinity
+                result = TVectorDouble.ConditionalSelect(
+                    TVectorDouble.IsPositiveInfinity(ax),
+                    TVectorDouble.Create(double.NaN),
+                    result
+                );
+            }
+
+            return TVectorDouble.ConditionalSelect(
+                Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.GreaterThan(ux, TVectorInt64.Create(ARG_SMALLER - 1))),
+                result,             // for elements: |x| >= 2^-27, infinity, or NaN
+                TVectorDouble.One   // for elements: 2^-27 > |x|
+            );
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static TVectorSingle SinSingle<TVectorSingle, TVectorInt32, TVectorDouble, TVectorInt64>(TVectorSingle x)
+            where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
+            where TVectorInt32 : unmanaged, ISimdVector<TVectorInt32, int>
+            where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
+            where TVectorInt64 : unmanaged, ISimdVector<TVectorInt64, long>
+        {
+            // This code is based on `sinf` from amd/aocl-libm-ose
+            // Copyright (C) 2008-2022 Advanced Micro Devices, Inc. All rights reserved.
+            //
+            // Licensed under the BSD 3-Clause "New" or "Revised" License
+            // See THIRD-PARTY-NOTICES.TXT for the full license text
+
+            // Implementation Notes
+            // ---------------------
+            // checks for special cases
+            // if ( ux = infinity) raise overflow exception and return x
+            // if x is NaN then raise invalid FP operation exception and return x.
+            //
+            // 1. Argument reduction
+            // if |x| > 5e5 then
+            //      __amd_remainder_piby2(x, &r, &rr, &region)
+            // else
+            //      Argument reduction
+            //      Let z = |x| * 2/pi
+            //      z = dn + r, where dn = round(z)
+            //      rhead =  dn * pi/2_head
+            //      rtail = dn * pi/2_tail
+            //      r = z – dn = |x| - rhead – rtail
+            //      expdiff = exp(dn) – exp(r)
+            //      if(expdiff) > 15)
+            //      rtail = |x| - dn*pi/2_tail2
+            //      r = |x| -  dn*pi/2_head -  dn*pi/2_tail1 -  dn*pi/2_tail2  - (((rhead + rtail) – rhead )-rtail)
+            // rr = (|x| – rhead) – r + rtail
+            //
+            // 2. Polynomial approximation
+            // if(dn is odd)
+            //       rr = rr * r;
+            //       x4 = x2 * x2;
+            //       s = 0.5 * x2;
+            //       t =  s - 1.0;
+            //       poly = x4 * (C1 + x2 * (C2 + x2 * (C3 + x2 * (C4))))
+            //       r = (((1.0 + t) - s) - rr) + poly – t
+            // else
+            //       x3 = x2 * r
+            //       poly = S2 + (r2 * (S3 + (r2 * (S4))))
+            //       r = r - ((x2 * (0.5*rr - x3 * poly)) - rr) - S1 * x3
+            // if(((sign & region) | ((~sign) & (~region))) & 1)
+            //       return r
+            // else
+            //       return -r;
+            //
+            // if |x| < pi/4 && |x| > 2.0^(-13)
+            //   sin(x) = x + (x * (r2 * (S1 + r2 * (S2 + r2 * (S3 + r2 * (S4)))))
+            // if |x| < 2.0^(-13) && |x| > 2.0^(-27)
+            //   sin(x) = x - (x * x * x * (1/6));
+
+            const int ARG_LARGE = 0x3F490FDB;   // PI / 4
+            const int ARG_SMALL = 0x3C000000;   // 2^-13
+            const int ARG_SMALLER = 0x39000000; // 2^-27
+
+            TVectorSingle ax = TVectorSingle.Abs(x);
+            TVectorInt32 ux = Unsafe.BitCast<TVectorSingle, TVectorInt32>(x);
+
+            TVectorSingle result;
+
+            if (TVectorSingle.LessThanOrEqualAll(ax, TVectorSingle.Create(ARG_LARGE)))
+            {
+                // We must be a finite value: (pi / 4) >= |x|
+
+                if (TVectorInt32.GreaterThanAny(ux, TVectorInt32.Create(ARG_SMALL - 1)))
+                {
+                    // at least one element is: |x| >= 2^-13
+
+                    if (TVectorSingle.Count == TVectorDouble.Count)
+                    {
+                        result = Narrow<TVectorDouble, TVectorSingle>(
+                            SinSinglePoly(Widen<TVectorSingle, TVectorDouble>(x))
+                        );
+                    }
+                    else
+                    {
+                        result = Narrow<TVectorDouble, TVectorSingle>(
+                            SinSinglePoly(WidenLower<TVectorSingle, TVectorDouble>(x)),
+                            SinSinglePoly(WidenUpper<TVectorSingle, TVectorDouble>(x))
+                        );
+                    }
+                }
+                else
+                {
+                    // at least one element is: 2^-13 > |x|
+                    TVectorSingle x3 = (x * x) * x;
+                    result = TVectorSingle.MultiplyAddEstimate(TVectorSingle.Create(-0.16666667f), x3, x);
+                }
+            }
+            else
+            {
+                // at least one element is: |x| > (pi / 4) -or- infinite -or- nan
+
+                if (TVectorSingle.Count == TVectorDouble.Count)
+                {
+                    result = Narrow<TVectorDouble, TVectorSingle>(
+                        CoreImpl(Widen<TVectorSingle, TVectorDouble>(x))
+                    );
+                }
+                else
+                {
+                    result = Narrow<TVectorDouble, TVectorSingle>(
+                        CoreImpl(WidenLower<TVectorSingle, TVectorDouble>(x)),
+                        CoreImpl(WidenUpper<TVectorSingle, TVectorDouble>(x))
+                    );
+                }
+
+                // Propagate the NaN that was passed in
+                result = TVectorSingle.ConditionalSelect(
+                    TVectorSingle.IsNaN(x),
+                    x,
+                    result
+                );
+
+                // Return NaN for infinity
+                return TVectorSingle.ConditionalSelect(
+                    TVectorSingle.IsPositiveInfinity(ax),
+                    TVectorSingle.Create(float.NaN),
+                    result
+                );
+            }
+
+            return TVectorSingle.ConditionalSelect(
+                Unsafe.BitCast<TVectorInt32, TVectorSingle>(TVectorInt32.GreaterThan(ux, TVectorInt32.Create(ARG_SMALLER - 1))),
+                result,             // for elements: |x| >= 2^-27, infinity, or NaN
+                TVectorSingle.One   // for elements: 2^-27 > |x|
+            );
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static TVectorDouble CoreImpl(TVectorDouble x)
+            {
+                TVectorDouble ax = TVectorDouble.Abs(x);
+                (TVectorDouble r, _, TVectorInt64 region) = SinCosReduce<TVectorDouble, TVectorInt64>(ax);
+
+                TVectorDouble sin = SinSinglePoly(r);
+                TVectorDouble cos = CosSingleLarge(r);
+
+                TVectorDouble result = TVectorDouble.ConditionalSelect(
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(region & TVectorInt64.One, TVectorInt64.Zero)),
+                    sin,    // region 0 or 2
+                    cos     // region 1 or 3
+                );
+
+                TVectorInt64 sign = Unsafe.BitCast<TVectorDouble, TVectorInt64>(x) >>> 63;
+
+                return TVectorDouble.ConditionalSelect(
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & region) | (~sign & ~region)) & TVectorInt64.One, TVectorInt64.Zero)),
+                    -result,    // negative in region 1 or 3, positive in region 0 or 2
+                    +result     // negative in region 0 or 2, positive in region 1 or 3
+                );
+            }
+        }
+
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private static TVectorDouble ConvertToDouble<TVectorInt64, TVectorDouble>(TVectorInt64 vector)
             where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
@@ -1462,6 +2350,96 @@ private static TVectorSingle ConvertToSingle<TVectorInt32, TVectorSingle>(TVecto
             return result;
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static TVectorDouble CosDoubleLarge<TVectorDouble>(TVectorDouble r, TVectorDouble rr)
+            where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
+        {
+            TVectorDouble r2 = r * r;
+            TVectorDouble r4 = r2 * r2;
+
+            TVectorDouble s = r2 * 0.5;
+            TVectorDouble t = s - TVectorDouble.One;
+
+            return TVectorDouble.MultiplyAddEstimate(
+                CosDoublePoly(r),
+                r4,
+                TVectorDouble.MultiplyAddEstimate(r, rr, ((TVectorDouble.One + t) - s))
+            ) - t;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static TVectorDouble CosDoublePoly<TVectorDouble>(TVectorDouble r)
+            where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
+        {
+            const double C1 = +0.041666666666666664;
+            const double C2 = -0.0013888888888887398;
+            const double C3 = +2.4801587298767044E-05;
+            const double C4 = -2.755731727234489E-07;
+            const double C5 = +2.0876146382372144E-09;
+            const double C6 = -1.138263981623609E-11;
+
+            TVectorDouble r2 = r * r;
+            TVectorDouble r4 = r2 * r2;
+            TVectorDouble r8 = r4 * r4;
+
+            return TVectorDouble.MultiplyAddEstimate(
+                TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C6), r2, TVectorDouble.Create(C5)),
+                r8,
+                TVectorDouble.MultiplyAddEstimate(
+                    TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C4), r2, TVectorDouble.Create(C3)),
+                    r4,
+                    TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C2), r2, TVectorDouble.Create(C1))
+                )
+            );
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static TVectorDouble CosSingleLarge<TVectorDouble>(TVectorDouble r)
+            where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
+        {
+            TVectorDouble r2 = r * r;
+            TVectorDouble r4 = r2 * r2;
+
+            return TVectorDouble.MultiplyAddEstimate(
+                CosSinglePoly(r),
+                r4,
+                TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(-0.5), r2, TVectorDouble.One)
+            );
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static TVectorDouble CosSinglePoly<TVectorDouble>(TVectorDouble r)
+            where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
+        {
+            const double C1 = +0.041666666666666664;
+            const double C2 = -0.0013888888888887398;
+            const double C3 = +2.4801587298767044E-05;
+            const double C4 = -2.755731727234489E-07;
+
+            TVectorDouble r2 = r * r;
+            TVectorDouble r4 = r2 * r2;
+
+            return TVectorDouble.MultiplyAddEstimate(
+                TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C4), r2, TVectorDouble.Create(C3)),
+                r4,
+                TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(C2), r2, TVectorDouble.Create(C1))
+            );
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static TVectorDouble CosSingleSmall<TVectorDouble>(TVectorDouble x)
+            where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
+        {
+            TVectorDouble x2 = x * x;
+            TVectorDouble x4 = x2 * x2;
+
+            TVectorDouble r = x2 * 0.5;
+            TVectorDouble t = TVectorDouble.One - r;
+            TVectorDouble s = t + (TVectorDouble.One - t - r);
+
+            return TVectorDouble.MultiplyAddEstimate(CosSinglePoly(x), x4, s);
+        }
+
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private static TVector Create<TVector, T>(double value)
             where TVector : unmanaged, ISimdVector<TVector, T>
@@ -1627,6 +2605,124 @@ private static TVectorSingle Narrow<TVectorDouble, TVectorSingle>(TVectorDouble
             return result;
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static TVectorDouble SinDoubleLarge<TVectorDouble>(TVectorDouble r, TVectorDouble rr)
+            where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
+        {
+            const double S1 = -0.16666666666666666;
+            const double S2 = +0.00833333333333095;
+            const double S3 = -0.00019841269836761127;
+            const double S4 = +2.7557316103728802E-06;
+            const double S5 = -2.5051132068021698E-08;
+            const double S6 = +1.5918144304485914E-10;
+
+            TVectorDouble r2 = r * r;
+            TVectorDouble r3 = r2 * r;
+            TVectorDouble r4 = r2 * r2;
+            TVectorDouble r8 = r4 * r4;
+
+            TVectorDouble sinPoly = TVectorDouble.MultiplyAddEstimate(
+                TVectorDouble.Create(S6),
+                r8,
+                TVectorDouble.MultiplyAddEstimate(
+                    TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(S5), r2, TVectorDouble.Create(S4)),
+                    r4,
+                    TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(S3), r2, TVectorDouble.Create(S2))
+                )
+            );
+
+            return r - TVectorDouble.MultiplyAddEstimate(
+                TVectorDouble.Create(-S1),
+                r3,
+                TVectorDouble.MultiplyAddEstimate(
+                    TVectorDouble.MultiplyAddEstimate(rr, TVectorDouble.Create(0.5), -(r3 * sinPoly)),
+                    r2,
+                    -rr
+                )
+            );
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static TVectorDouble SinDoublePoly<TVectorDouble>(TVectorDouble r)
+            where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
+        {
+            const double S1 = -0.16666666666666666;
+            const double S2 = +0.00833333333333095;
+            const double S3 = -0.00019841269836761127;
+            const double S4 = +2.7557316103728802E-06;
+            const double S5 = -2.5051132068021698E-08;
+            const double S6 = +1.5918144304485914E-10;
+
+            TVectorDouble r2 = r * r;
+            TVectorDouble r3 = r2 * r;
+            TVectorDouble r4 = r2 * r2;
+            TVectorDouble r8 = r4 * r4;
+
+            TVectorDouble poly = TVectorDouble.MultiplyAddEstimate(
+                TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(S6), r2, TVectorDouble.Create(S5)),
+                r8,
+                TVectorDouble.MultiplyAddEstimate(
+                    TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(S4), r2, TVectorDouble.Create(S3)),
+                    r4,
+                    TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(S2), r2, TVectorDouble.Create(S1)))
+            );
+
+            return TVectorDouble.MultiplyAddEstimate(poly, r3, r);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static TVectorDouble SinSinglePoly<TVectorDouble>(TVectorDouble r)
+            where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
+        {
+            const double S1 = -0.16666666666666666;
+            const double S2 = +0.00833333333333095;
+            const double S3 = -0.00019841269836761127;
+            const double S4 = +2.7557316103728802E-06;
+
+            TVectorDouble r2 = r * r;
+            TVectorDouble r3 = r2 * r;
+            TVectorDouble r4 = r2 * r2;
+
+            return TVectorDouble.MultiplyAddEstimate(
+                TVectorDouble.MultiplyAddEstimate(
+                    TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(S4), r2, TVectorDouble.Create(S3)),
+                    r4,
+                    TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(S2), r2, TVectorDouble.Create(S1))),
+                r3,
+                r
+            );
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static (TVectorDouble r, TVectorDouble rr, TVectorInt64 region) SinCosReduce<TVectorDouble, TVectorInt64>(TVectorDouble ax)
+            where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
+            where TVectorInt64 : unmanaged, ISimdVector<TVectorInt64, long>
+        {
+            // reduce  the argument to be in a range from (-pi / 4) to (+pi / 4) by subtracting multiples of (pi / 2)
+
+            const double V_ALM_SHIFT = 6755399441055744.0;
+            const double V_TWO_BY_PI = 0.6366197723675814;
+
+            const double V_PI_BY_TWO_1 = 1.5707963267341256;
+            const double V_PI_BY_TWO_2 = 6.077100506303966E-11;
+            const double V_PI_BY_TWO_2_TAIL = 2.0222662487959506E-21;
+
+            // dn = (int)(|x| * 2 / pi)
+            TVectorDouble npi2 = TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(V_TWO_BY_PI), ax, TVectorDouble.Create(V_ALM_SHIFT));
+            TVectorInt64 region = Unsafe.BitCast<TVectorDouble, TVectorInt64>(npi2);
+            npi2 -= TVectorDouble.Create(V_ALM_SHIFT);
+
+            TVectorDouble rhead = TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(-V_PI_BY_TWO_1), npi2, ax);
+            TVectorDouble rtail = npi2 * V_PI_BY_TWO_2;
+            TVectorDouble r = rhead - rtail;
+
+            rtail = TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(V_PI_BY_TWO_2_TAIL), npi2, -(rhead - r - rtail));
+            rhead = r;
+            r -= rtail;
+
+            return (r, (rhead - r) - rtail, region);
+        }
+
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private static TVectorDouble Widen<TVectorSingle, TVectorDouble>(TVectorSingle vector)
             where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
index 221461a0c82a5..16a90de80ade9 100644
--- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
+++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
@@ -71,6 +71,8 @@ public static partial class Vector128
         public static void CopyTo<T>(this System.Runtime.Intrinsics.Vector128<T> vector, System.Span<T> destination) { }
         public static void CopyTo<T>(this System.Runtime.Intrinsics.Vector128<T> vector, T[] destination) { }
         public static void CopyTo<T>(this System.Runtime.Intrinsics.Vector128<T> vector, T[] destination, int startIndex) { }
+        public static System.Runtime.Intrinsics.Vector128<double> Cos(System.Runtime.Intrinsics.Vector128<double> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<float> Cos(System.Runtime.Intrinsics.Vector128<float> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<System.Byte> Create(byte value) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<System.Byte> Create(byte e0, byte e1, byte e2, byte e3, byte e4, byte e5, byte e6, byte e7, byte e8, byte e9, byte e10, byte e11, byte e12, byte e13, byte e14, byte e15) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<System.Double> Create(double value) { throw null; }
@@ -301,6 +303,10 @@ public static void CopyTo<T>(this System.Runtime.Intrinsics.Vector128<T> vector,
         [System.CLSCompliantAttribute(false)]
         public static System.Runtime.Intrinsics.Vector128<ulong> Shuffle(System.Runtime.Intrinsics.Vector128<ulong> vector, System.Runtime.Intrinsics.Vector128<ulong> indices) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<double> Shuffle(System.Runtime.Intrinsics.Vector128<double> vector, System.Runtime.Intrinsics.Vector128<long> indices) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<double> Sin(System.Runtime.Intrinsics.Vector128<double> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<float> Sin(System.Runtime.Intrinsics.Vector128<float> vector) { throw null; }
+        public static (System.Runtime.Intrinsics.Vector128<double> Sin, System.Runtime.Intrinsics.Vector128<double> Cos) SinCos(System.Runtime.Intrinsics.Vector128<double> vector) { throw null; }
+        public static (System.Runtime.Intrinsics.Vector128<float> Sin, System.Runtime.Intrinsics.Vector128<float> Cos) SinCos(System.Runtime.Intrinsics.Vector128<float> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<T> Sqrt<T>(System.Runtime.Intrinsics.Vector128<T> vector) { throw null; }
 #pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         [System.CLSCompliantAttribute(false)]
@@ -447,6 +453,8 @@ public static partial class Vector256
         public static void CopyTo<T>(this System.Runtime.Intrinsics.Vector256<T> vector, System.Span<T> destination) { }
         public static void CopyTo<T>(this System.Runtime.Intrinsics.Vector256<T> vector, T[] destination) { }
         public static void CopyTo<T>(this System.Runtime.Intrinsics.Vector256<T> vector, T[] destination, int startIndex) { }
+        public static System.Runtime.Intrinsics.Vector256<double> Cos(System.Runtime.Intrinsics.Vector256<double> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<float> Cos(System.Runtime.Intrinsics.Vector256<float> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<System.Byte> Create(byte value) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<System.Byte> Create(byte e0, byte e1, byte e2, byte e3, byte e4, byte e5, byte e6, byte e7, byte e8, byte e9, byte e10, byte e11, byte e12, byte e13, byte e14, byte e15, byte e16, byte e17, byte e18, byte e19, byte e20, byte e21, byte e22, byte e23, byte e24, byte e25, byte e26, byte e27, byte e28, byte e29, byte e30, byte e31) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<System.Double> Create(double value) { throw null; }
@@ -678,6 +686,10 @@ public static void CopyTo<T>(this System.Runtime.Intrinsics.Vector256<T> vector,
         [System.CLSCompliantAttribute(false)]
         public static System.Runtime.Intrinsics.Vector256<ulong> Shuffle(System.Runtime.Intrinsics.Vector256<ulong> vector, System.Runtime.Intrinsics.Vector256<ulong> indices) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<double> Shuffle(System.Runtime.Intrinsics.Vector256<double> vector, System.Runtime.Intrinsics.Vector256<long> indices) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<double> Sin(System.Runtime.Intrinsics.Vector256<double> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<float> Sin(System.Runtime.Intrinsics.Vector256<float> vector) { throw null; }
+        public static (System.Runtime.Intrinsics.Vector256<double> Sin, System.Runtime.Intrinsics.Vector256<double> Cos) SinCos(System.Runtime.Intrinsics.Vector256<double> vector) { throw null; }
+        public static (System.Runtime.Intrinsics.Vector256<float> Sin, System.Runtime.Intrinsics.Vector256<float> Cos) SinCos(System.Runtime.Intrinsics.Vector256<float> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<T> Sqrt<T>(System.Runtime.Intrinsics.Vector256<T> vector) { throw null; }
 #pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         [System.CLSCompliantAttribute(false)]
@@ -824,6 +836,8 @@ public static partial class Vector512
         public static void CopyTo<T>(this System.Runtime.Intrinsics.Vector512<T> vector, System.Span<T> destination) { }
         public static void CopyTo<T>(this System.Runtime.Intrinsics.Vector512<T> vector, T[] destination) { }
         public static void CopyTo<T>(this System.Runtime.Intrinsics.Vector512<T> vector, T[] destination, int startIndex) { }
+        public static System.Runtime.Intrinsics.Vector512<double> Cos(System.Runtime.Intrinsics.Vector512<double> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<float> Cos(System.Runtime.Intrinsics.Vector512<float> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<System.Byte> Create(byte value) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<System.Byte> Create(byte e0,  byte e1,  byte e2,  byte e3,  byte e4,  byte e5,  byte e6,  byte e7,  byte e8,  byte e9,  byte e10, byte e11, byte e12, byte e13, byte e14, byte e15, byte e16, byte e17, byte e18, byte e19, byte e20, byte e21, byte e22, byte e23, byte e24, byte e25, byte e26, byte e27, byte e28, byte e29, byte e30, byte e31, byte e32, byte e33, byte e34, byte e35, byte e36, byte e37, byte e38, byte e39, byte e40, byte e41, byte e42, byte e43, byte e44, byte e45, byte e46, byte e47, byte e48, byte e49, byte e50, byte e51, byte e52, byte e53, byte e54, byte e55, byte e56, byte e57, byte e58, byte e59, byte e60, byte e61, byte e62, byte e63) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<System.Double> Create(double value) { throw null; }
@@ -1056,6 +1070,10 @@ public static void CopyTo<T>(this System.Runtime.Intrinsics.Vector512<T> vector,
         [System.CLSCompliantAttribute(false)]
         public static System.Runtime.Intrinsics.Vector512<ulong> Shuffle(System.Runtime.Intrinsics.Vector512<ulong> vector, System.Runtime.Intrinsics.Vector512<ulong> indices) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<double> Shuffle(System.Runtime.Intrinsics.Vector512<double> vector, System.Runtime.Intrinsics.Vector512<long> indices) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<double> Sin(System.Runtime.Intrinsics.Vector512<double> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector512<float> Sin(System.Runtime.Intrinsics.Vector512<float> vector) { throw null; }
+        public static (System.Runtime.Intrinsics.Vector512<double> Sin, System.Runtime.Intrinsics.Vector512<double> Cos) SinCos(System.Runtime.Intrinsics.Vector512<double> vector) { throw null; }
+        public static (System.Runtime.Intrinsics.Vector512<float> Sin, System.Runtime.Intrinsics.Vector512<float> Cos) SinCos(System.Runtime.Intrinsics.Vector512<float> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector512<T> Sqrt<T>(System.Runtime.Intrinsics.Vector512<T> vector) { throw null; }
 #pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         [System.CLSCompliantAttribute(false)]
@@ -1198,6 +1216,8 @@ public static partial class Vector64
         public static void CopyTo<T>(this System.Runtime.Intrinsics.Vector64<T> vector, System.Span<T> destination) { }
         public static void CopyTo<T>(this System.Runtime.Intrinsics.Vector64<T> vector, T[] destination) { }
         public static void CopyTo<T>(this System.Runtime.Intrinsics.Vector64<T> vector, T[] destination, int startIndex) { }
+        public static System.Runtime.Intrinsics.Vector64<double> Cos(System.Runtime.Intrinsics.Vector64<double> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<float> Cos(System.Runtime.Intrinsics.Vector64<float> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector64<System.Byte> Create(byte value) { throw null; }
         public static System.Runtime.Intrinsics.Vector64<System.Byte> Create(byte e0, byte e1, byte e2, byte e3, byte e4, byte e5, byte e6, byte e7) { throw null; }
         public static System.Runtime.Intrinsics.Vector64<System.Double> Create(double value) { throw null; }
@@ -1399,6 +1419,10 @@ public static void CopyTo<T>(this System.Runtime.Intrinsics.Vector64<T> vector,
         [System.CLSCompliantAttribute(false)]
         public static System.Runtime.Intrinsics.Vector64<uint> Shuffle(System.Runtime.Intrinsics.Vector64<uint> vector, System.Runtime.Intrinsics.Vector64<uint> indices) { throw null; }
         public static System.Runtime.Intrinsics.Vector64<float> Shuffle(System.Runtime.Intrinsics.Vector64<float> vector, System.Runtime.Intrinsics.Vector64<int> indices) { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<double> Sin(System.Runtime.Intrinsics.Vector64<double> vector) { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<float> Sin(System.Runtime.Intrinsics.Vector64<float> vector) { throw null; }
+        public static (System.Runtime.Intrinsics.Vector64<double> Sin, System.Runtime.Intrinsics.Vector64<double> Cos) SinCos(System.Runtime.Intrinsics.Vector64<double> vector) { throw null; }
+        public static (System.Runtime.Intrinsics.Vector64<float> Sin, System.Runtime.Intrinsics.Vector64<float> Cos) SinCos(System.Runtime.Intrinsics.Vector64<float> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector64<T> Sqrt<T>(System.Runtime.Intrinsics.Vector64<T> vector) { throw null; }
 #pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         [System.CLSCompliantAttribute(false)]

From 5e7012bf36b06916d17df44adb253fec57fa7044 Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Fri, 12 Jul 2024 21:06:55 -0700
Subject: [PATCH 04/13] Use the vector Sin, Cos, and SinCos methods where
 possible

---
 .../Tensors/netcore/TensorPrimitives.Cos.cs   |  41 ++++++-
 .../Tensors/netcore/TensorPrimitives.CosPi.cs |  12 +-
 .../netcore/TensorPrimitives.FloatHelpers.cs  |  74 ++++++++++++
 .../Tensors/netcore/TensorPrimitives.Sin.cs   |  41 ++++++-
 .../netcore/TensorPrimitives.SinCos.cs        | 106 +++++++++++++++++-
 .../netcore/TensorPrimitives.SinCosPi.cs      |  94 +++++++++++++++-
 .../Tensors/netcore/TensorPrimitives.SinPi.cs |  12 +-
 .../src/System/Numerics/Quaternion.cs         |  10 +-
 8 files changed, 373 insertions(+), 17 deletions(-)

diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Cos.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Cos.cs
index 36bdcc82e337b..815eb6b700edc 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Cos.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Cos.cs
@@ -60,12 +60,24 @@ public static void Cos<T>(ReadOnlySpan<T> x, Span<T> destination)
             // 3. Reconstruction
             //      Hence, cos(x) = sin(x + pi/2) = (-1)^N * sin(f)
 
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+            public static bool Vectorizable => (typeof(T) == typeof(float))
+                                            || (typeof(T) == typeof(double));
 
             public static T Invoke(T x) => T.Cos(x);
 
             public static Vector128<T> Invoke(Vector128<T> x)
             {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector128.Cos(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector128.Cos(x.AsSingle()).As<float, T>();
+                }
+#else
                 if (typeof(T) == typeof(float))
                 {
                     return CosOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
@@ -75,10 +87,22 @@ public static Vector128<T> Invoke(Vector128<T> x)
                     Debug.Assert(typeof(T) == typeof(double));
                     return CosOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
                 }
+#endif
             }
 
             public static Vector256<T> Invoke(Vector256<T> x)
             {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector256.Cos(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector256.Cos(x.AsSingle()).As<float, T>();
+                }
+#else
                 if (typeof(T) == typeof(float))
                 {
                     return CosOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
@@ -88,10 +112,22 @@ public static Vector256<T> Invoke(Vector256<T> x)
                     Debug.Assert(typeof(T) == typeof(double));
                     return CosOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
                 }
+#endif
             }
 
             public static Vector512<T> Invoke(Vector512<T> x)
             {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector512.Cos(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector512.Cos(x.AsSingle()).As<float, T>();
+                }
+#else
                 if (typeof(T) == typeof(float))
                 {
                     return CosOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
@@ -101,9 +137,11 @@ public static Vector512<T> Invoke(Vector512<T> x)
                     Debug.Assert(typeof(T) == typeof(double));
                     return CosOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
                 }
+#endif
             }
         }
 
+#if !NET9_0_OR_GREATER
         /// <summary>float.Cos(x)</summary>
         private readonly struct CosOperatorSingle : IUnaryOperator<float, float>
         {
@@ -347,5 +385,6 @@ public static Vector512<double> Invoke(Vector512<double> x)
                 return (poly.AsUInt64() ^ odd).AsDouble();
             }
         }
+#endif
     }
 }
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.CosPi.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.CosPi.cs
index b286a18d0f942..58dbe83dc050f 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.CosPi.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.CosPi.cs
@@ -33,13 +33,16 @@ public static void CosPi<T>(ReadOnlySpan<T> x, Span<T> destination)
         private readonly struct CosPiOperator<T> : IUnaryOperator<T, T>
             where T : ITrigonometricFunctions<T>
         {
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+            public static bool Vectorizable => (typeof(T) == typeof(float))
+                                            || (typeof(T) == typeof(double));
 
             public static T Invoke(T x) => T.CosPi(x);
 
             public static Vector128<T> Invoke(Vector128<T> x)
             {
                 Vector128<T> xpi = x * Vector128.Create(T.Pi);
+
+#if !NET9_0_OR_GREATER
                 if (typeof(T) == typeof(float))
                 {
                     if (Vector128.GreaterThanAny(xpi.AsUInt32() & Vector128.Create(CosOperatorSingle.SignMask), Vector128.Create(CosOperatorSingle.MaxVectorizedValue)))
@@ -55,6 +58,7 @@ public static Vector128<T> Invoke(Vector128<T> x)
                         return ApplyScalar<CosPiOperator<double>>(x.AsDouble()).As<double, T>();
                     }
                 }
+#endif
 
                 return CosOperator<T>.Invoke(xpi);
             }
@@ -62,6 +66,8 @@ public static Vector128<T> Invoke(Vector128<T> x)
             public static Vector256<T> Invoke(Vector256<T> x)
             {
                 Vector256<T> xpi = x * Vector256.Create(T.Pi);
+
+#if !NET9_0_OR_GREATER
                 if (typeof(T) == typeof(float))
                 {
                     if (Vector256.GreaterThanAny(xpi.AsUInt32() & Vector256.Create(CosOperatorSingle.SignMask), Vector256.Create(CosOperatorSingle.MaxVectorizedValue)))
@@ -77,6 +83,7 @@ public static Vector256<T> Invoke(Vector256<T> x)
                         return ApplyScalar<CosPiOperator<double>>(x.AsDouble()).As<double, T>();
                     }
                 }
+#endif
 
                 return CosOperator<T>.Invoke(xpi);
             }
@@ -84,6 +91,8 @@ public static Vector256<T> Invoke(Vector256<T> x)
             public static Vector512<T> Invoke(Vector512<T> x)
             {
                 Vector512<T> xpi = x * Vector512.Create(T.Pi);
+
+#if !NET9_0_OR_GREATER
                 if (typeof(T) == typeof(float))
                 {
                     if (Vector512.GreaterThanAny(xpi.AsUInt32() & Vector512.Create(CosOperatorSingle.SignMask), Vector512.Create(CosOperatorSingle.MaxVectorizedValue)))
@@ -99,6 +108,7 @@ public static Vector512<T> Invoke(Vector512<T> x)
                         return ApplyScalar<CosPiOperator<double>>(x.AsDouble()).As<double, T>();
                     }
                 }
+#endif
 
                 return CosOperator<T>.Invoke(xpi);
             }
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.FloatHelpers.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.FloatHelpers.cs
index ec97b9a61af9a..ecb869c814579 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.FloatHelpers.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.FloatHelpers.cs
@@ -24,5 +24,79 @@ private static Vector256<double> ApplyScalar<TOperator>(Vector256<double> double
 
         private static Vector512<double> ApplyScalar<TOperator>(Vector512<double> doubles) where TOperator : IUnaryOperator<double, double> =>
             Vector512.Create(ApplyScalar<TOperator>(doubles.GetLower()), ApplyScalar<TOperator>(doubles.GetUpper()));
+
+        private static (Vector128<float> First, Vector128<float> Second) Apply2xScalar<TOperator>(Vector128<float> floats)
+            where TOperator : IUnaryInputBinaryOutput<float>
+        {
+            (float firstRes0, float secondRes0) = TOperator.Invoke(floats[0]);
+            (float firstRes1, float secondRes1) = TOperator.Invoke(floats[1]);
+            (float firstRes2, float secondRes2) = TOperator.Invoke(floats[2]);
+            (float firstRes3, float secondRes3) = TOperator.Invoke(floats[3]);
+
+            return (
+                Vector128.Create(firstRes0, firstRes1, firstRes2, firstRes3),
+                Vector128.Create(secondRes0, secondRes1, secondRes2, secondRes3)
+            );
+        }
+
+        private static (Vector256<float> First, Vector256<float> Second) Apply2xScalar<TOperator>(Vector256<float> floats)
+            where TOperator : IUnaryInputBinaryOutput<float>
+        {
+            (Vector128<float> firstLower, Vector128<float> secondLower) = Apply2xScalar<TOperator>(floats.GetLower());
+            (Vector128<float> firstUpper, Vector128<float> secondUpper) = Apply2xScalar<TOperator>(floats.GetUpper());
+
+            return (
+                Vector256.Create(firstLower, firstUpper),
+                Vector256.Create(secondLower, secondUpper)
+            );
+        }
+
+        private static (Vector512<float> First, Vector512<float> Second) Apply2xScalar<TOperator>(Vector512<float> floats)
+            where TOperator : IUnaryInputBinaryOutput<float>
+        {
+            (Vector256<float> firstLower, Vector256<float> secondLower) = Apply2xScalar<TOperator>(floats.GetLower());
+            (Vector256<float> firstUpper, Vector256<float> secondUpper) = Apply2xScalar<TOperator>(floats.GetUpper());
+
+            return (
+                Vector512.Create(firstLower, firstUpper),
+                Vector512.Create(secondLower, secondUpper)
+            );
+        }
+
+        private static (Vector128<double> First, Vector128<double> Second) Apply2xScalar<TOperator>(Vector128<double> doubles)
+            where TOperator : IUnaryInputBinaryOutput<double>
+        {
+            (double firstRes0, double secondRes0) = TOperator.Invoke(doubles[0]);
+            (double firstRes1, double secondRes1) = TOperator.Invoke(doubles[1]);
+
+            return (
+                Vector128.Create(firstRes0, firstRes1),
+                Vector128.Create(secondRes0, secondRes1)
+            );
+        }
+
+        private static (Vector256<double> First, Vector256<double> Second) Apply2xScalar<TOperator>(Vector256<double> doubles)
+            where TOperator : IUnaryInputBinaryOutput<double>
+        {
+            (Vector128<double> firstLower, Vector128<double> secondLower) = Apply2xScalar<TOperator>(doubles.GetLower());
+            (Vector128<double> firstUpper, Vector128<double> secondUpper) = Apply2xScalar<TOperator>(doubles.GetUpper());
+
+            return (
+                Vector256.Create(firstLower, firstUpper),
+                Vector256.Create(secondLower, secondUpper)
+            );
+        }
+
+        private static (Vector512<double> First, Vector512<double> Second) Apply2xScalar<TOperator>(Vector512<double> doubles)
+            where TOperator : IUnaryInputBinaryOutput<double>
+        {
+            (Vector256<double> firstLower, Vector256<double> secondLower) = Apply2xScalar<TOperator>(doubles.GetLower());
+            (Vector256<double> firstUpper, Vector256<double> secondUpper) = Apply2xScalar<TOperator>(doubles.GetUpper());
+
+            return (
+                Vector512.Create(firstLower, firstUpper),
+                Vector512.Create(secondLower, secondUpper)
+            );
+        }
     }
 }
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sin.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sin.cs
index 6976a35b3d23a..8211528121d39 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sin.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sin.cs
@@ -50,12 +50,24 @@ public static void Sin<T>(ReadOnlySpan<T> x, Span<T> destination)
             //
             // The term sin(f) can be approximated by using a polynomial
 
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+            public static bool Vectorizable => (typeof(T) == typeof(float))
+                                            || (typeof(T) == typeof(double));
 
             public static T Invoke(T x) => T.Sin(x);
 
             public static Vector128<T> Invoke(Vector128<T> x)
             {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector128.Sin(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector128.Sin(x.AsSingle()).As<float, T>();
+                }
+#else
                 if (typeof(T) == typeof(float))
                 {
                     return SinOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
@@ -65,10 +77,22 @@ public static Vector128<T> Invoke(Vector128<T> x)
                     Debug.Assert(typeof(T) == typeof(double));
                     return SinOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
                 }
+#endif
             }
 
             public static Vector256<T> Invoke(Vector256<T> x)
             {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector256.Sin(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector256.Sin(x.AsSingle()).As<float, T>();
+                }
+#else
                 if (typeof(T) == typeof(float))
                 {
                     return SinOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
@@ -78,10 +102,22 @@ public static Vector256<T> Invoke(Vector256<T> x)
                     Debug.Assert(typeof(T) == typeof(double));
                     return SinOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
                 }
+#endif
             }
 
             public static Vector512<T> Invoke(Vector512<T> x)
             {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    return Vector512.Sin(x.AsDouble()).As<double, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    return Vector512.Sin(x.AsSingle()).As<float, T>();
+                }
+#else
                 if (typeof(T) == typeof(float))
                 {
                     return SinOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
@@ -91,9 +127,11 @@ public static Vector512<T> Invoke(Vector512<T> x)
                     Debug.Assert(typeof(T) == typeof(double));
                     return SinOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
                 }
+#endif
             }
         }
 
+#if !NET9_0_OR_GREATER
         /// <summary>float.Sin(x)</summary>
         private readonly struct SinOperatorSingle : IUnaryOperator<float, float>
         {
@@ -334,5 +372,6 @@ public static Vector512<double> Invoke(Vector512<double> x)
                 return (poly.AsUInt64() ^ (x.AsUInt64() & Vector512.Create(~SignMask)) ^ odd).AsDouble();
             }
         }
+#endif
     }
 }
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCos.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCos.cs
index 766269957a2e7..2c823a6f9482f 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCos.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCos.cs
@@ -1,6 +1,7 @@
 ﻿// Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.Diagnostics;
 using System.Runtime.Intrinsics;
 
 namespace System.Numerics.Tensors
@@ -29,12 +30,109 @@ public static void SinCos<T>(ReadOnlySpan<T> x, Span<T> sinDestination, Span<T>
         /// <summary>T.SinCos(x)</summary>
         private readonly struct SinCosOperator<T> : IUnaryInputBinaryOutput<T> where T : ITrigonometricFunctions<T>
         {
-            public static bool Vectorizable => false; // TODO: vectorize
+            public static bool Vectorizable => (typeof(T) == typeof(float))
+                                            || (typeof(T) == typeof(double));
 
             public static (T, T) Invoke(T x) => T.SinCos(x);
-            public static (Vector128<T> First, Vector128<T> Second) Invoke(Vector128<T> x) => throw new NotSupportedException();
-            public static (Vector256<T> First, Vector256<T> Second) Invoke(Vector256<T> x) => throw new NotSupportedException();
-            public static (Vector512<T> First, Vector512<T> Second) Invoke(Vector512<T> x) => throw new NotSupportedException();
+
+            public static (Vector128<T> First, Vector128<T> Second) Invoke(Vector128<T> x)
+            {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    (Vector128<double> sin, Vector128<double> cos) = Vector128.SinCos(x.AsDouble());
+                    return (sin.As<double, T>(), cos.As<double, T>());
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    (Vector128<float> sin, Vector128<float> cos) = Vector128.SinCos(x.AsSingle());
+                    return (sin.As<float, T>(), cos.As<float, T>());
+                }
+#else
+                if (typeof(T) == typeof(float))
+                {
+                    return (
+                        SinOperatorSingle.Invoke(x.AsSingle()).As<float, T>(),
+                        CosOperatorSingle.Invoke(x.AsSingle()).As<float, T>()
+                    );
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return (
+                        SinOperatorDouble.Invoke(x.AsDouble()).As<double, T>(),
+                        CosOperatorDouble.Invoke(x.AsDouble()).As<double, T>()
+                    );
+                }
+#endif
+            }
+
+            public static (Vector256<T> First, Vector256<T> Second) Invoke(Vector256<T> x)
+            {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    (Vector256<double> sin, Vector256<double> cos) = Vector256.SinCos(x.AsDouble());
+                    return (sin.As<double, T>(), cos.As<double, T>());
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    (Vector256<float> sin, Vector256<float> cos) = Vector256.SinCos(x.AsSingle());
+                    return (sin.As<float, T>(), cos.As<float, T>());
+                }
+#else
+                if (typeof(T) == typeof(float))
+                {
+                    return (
+                        SinOperatorSingle.Invoke(x.AsSingle()).As<float, T>(),
+                        CosOperatorSingle.Invoke(x.AsSingle()).As<float, T>()
+                    );
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return (
+                        SinOperatorDouble.Invoke(x.AsDouble()).As<double, T>(),
+                        CosOperatorDouble.Invoke(x.AsDouble()).As<double, T>()
+                    );
+                }
+#endif
+            }
+
+            public static (Vector512<T> First, Vector512<T> Second) Invoke(Vector512<T> x)
+            {
+#if NET9_0_OR_GREATER
+                if (typeof(T) == typeof(double))
+                {
+                    (Vector512<double> sin, Vector512<double> cos) = Vector512.SinCos(x.AsDouble());
+                    return (sin.As<double, T>(), cos.As<double, T>());
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(float));
+                    (Vector512<float> sin, Vector512<float> cos) = Vector512.SinCos(x.AsSingle());
+                    return (sin.As<float, T>(), cos.As<float, T>());
+                }
+#else
+                if (typeof(T) == typeof(float))
+                {
+                    return (
+                        SinOperatorSingle.Invoke(x.AsSingle()).As<float, T>(),
+                        CosOperatorSingle.Invoke(x.AsSingle()).As<float, T>()
+                    );
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return (
+                        SinOperatorDouble.Invoke(x.AsDouble()).As<double, T>(),
+                        CosOperatorDouble.Invoke(x.AsDouble()).As<double, T>()
+                    );
+                }
+#endif
+            }
         }
     }
 }
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCosPi.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCosPi.cs
index 574db7667be00..39366b9dfca29 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCosPi.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCosPi.cs
@@ -1,6 +1,7 @@
 ﻿// Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.Diagnostics;
 using System.Runtime.Intrinsics;
 
 namespace System.Numerics.Tensors
@@ -29,12 +30,97 @@ public static void SinCosPi<T>(ReadOnlySpan<T> x, Span<T> sinPiDestination, Span
         /// <summary>T.SinCosPi(x)</summary>
         private readonly struct SinCosPiOperator<T> : IUnaryInputBinaryOutput<T> where T : ITrigonometricFunctions<T>
         {
-            public static bool Vectorizable => false; // TODO: vectorize
+            public static bool Vectorizable => (typeof(T) == typeof(float))
+                                            || (typeof(T) == typeof(double));
 
             public static (T, T) Invoke(T x) => T.SinCosPi(x);
-            public static (Vector128<T> First, Vector128<T> Second) Invoke(Vector128<T> x) => throw new NotSupportedException();
-            public static (Vector256<T> First, Vector256<T> Second) Invoke(Vector256<T> x) => throw new NotSupportedException();
-            public static (Vector512<T> First, Vector512<T> Second) Invoke(Vector512<T> x) => throw new NotSupportedException();
+
+            public static (Vector128<T> First, Vector128<T> Second) Invoke(Vector128<T> x)
+            {
+                Vector128<T> xpi = x * Vector128.Create(T.Pi);
+
+#if !NET9_0_OR_GREATER
+                if (typeof(T) == typeof(float))
+                {
+                    if (Vector128.GreaterThanAny(xpi.AsUInt32() & Vector128.Create(SinOperatorSingle.SignMask), Vector128.Create(SinOperatorSingle.MaxVectorizedValue)) ||
+                        Vector128.GreaterThanAny(xpi.AsUInt32() & Vector128.Create(CosOperatorSingle.SignMask), Vector128.Create(CosOperatorSingle.MaxVectorizedValue)))
+                    {
+                        (Vector128<float> sin, Vector128<float> cos) = Apply2xScalar<SinCosPiOperator<float>>(x.AsSingle());
+                        return (sin.As<float, T>(), cos.As<float, T>());
+                    }
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    if (Vector128.GreaterThanAny(xpi.AsUInt64() & Vector128.Create(SinOperatorDouble.SignMask), Vector128.Create(SinOperatorDouble.MaxVectorizedValue)) ||
+                        Vector128.GreaterThanAny(xpi.AsUInt64() & Vector128.Create(CosOperatorDouble.SignMask), Vector128.Create(CosOperatorDouble.MaxVectorizedValue)))
+                    {
+                        (Vector128<double> sin, Vector128<double> cos) = Apply2xScalar<SinCosPiOperator<double>>(x.AsDouble());
+                        return (sin.As<double, T>(), cos.As<double, T>());
+                    }
+                }
+#endif
+
+                return SinCosOperator<T>.Invoke(xpi);
+            }
+
+            public static (Vector256<T> First, Vector256<T> Second) Invoke(Vector256<T> x)
+            {
+                Vector256<T> xpi = x * Vector256.Create(T.Pi);
+
+#if !NET9_0_OR_GREATER
+                if (typeof(T) == typeof(float))
+                {
+                    if (Vector256.GreaterThanAny(xpi.AsUInt32() & Vector256.Create(SinOperatorSingle.SignMask), Vector256.Create(SinOperatorSingle.MaxVectorizedValue)) ||
+                        Vector256.GreaterThanAny(xpi.AsUInt32() & Vector256.Create(CosOperatorSingle.SignMask), Vector256.Create(CosOperatorSingle.MaxVectorizedValue)))
+                    {
+                        (Vector256<float> sin, Vector256<float> cos) = Apply2xScalar<SinCosPiOperator<float>>(x.AsSingle());
+                        return (sin.As<float, T>(), cos.As<float, T>());
+                    }
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    if (Vector256.GreaterThanAny(xpi.AsUInt64() & Vector256.Create(SinOperatorDouble.SignMask), Vector256.Create(SinOperatorDouble.MaxVectorizedValue)) ||
+                        Vector256.GreaterThanAny(xpi.AsUInt64() & Vector256.Create(CosOperatorDouble.SignMask), Vector256.Create(CosOperatorDouble.MaxVectorizedValue)))
+                    {
+                        (Vector256<double> sin, Vector256<double> cos) = Apply2xScalar<SinCosPiOperator<double>>(x.AsDouble());
+                        return (sin.As<double, T>(), cos.As<double, T>());
+                    }
+                }
+#endif
+
+                return SinCosOperator<T>.Invoke(xpi);
+            }
+
+            public static (Vector512<T> First, Vector512<T> Second) Invoke(Vector512<T> x)
+            {
+                Vector512<T> xpi = x * Vector512.Create(T.Pi);
+
+#if !NET9_0_OR_GREATER
+                if (typeof(T) == typeof(float))
+                {
+                    if (Vector512.GreaterThanAny(xpi.AsUInt32() & Vector512.Create(SinOperatorSingle.SignMask), Vector512.Create(SinOperatorSingle.MaxVectorizedValue)) ||
+                        Vector512.GreaterThanAny(xpi.AsUInt32() & Vector512.Create(CosOperatorSingle.SignMask), Vector512.Create(CosOperatorSingle.MaxVectorizedValue)))
+                    {
+                        (Vector512<float> sin, Vector512<float> cos) = Apply2xScalar<SinCosPiOperator<float>>(x.AsSingle());
+                        return (sin.As<float, T>(), cos.As<float, T>());
+                    }
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    if (Vector512.GreaterThanAny(xpi.AsUInt64() & Vector512.Create(SinOperatorDouble.SignMask), Vector512.Create(SinOperatorDouble.MaxVectorizedValue)) ||
+                        Vector512.GreaterThanAny(xpi.AsUInt64() & Vector512.Create(CosOperatorDouble.SignMask), Vector512.Create(CosOperatorDouble.MaxVectorizedValue)))
+                    {
+                        (Vector512<double> sin, Vector512<double> cos) = Apply2xScalar<SinCosPiOperator<double>>(x.AsDouble());
+                        return (sin.As<double, T>(), cos.As<double, T>());
+                    }
+                }
+#endif
+
+                return SinCosOperator<T>.Invoke(xpi);
+            }
         }
     }
 }
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinPi.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinPi.cs
index 3ee43ecd58c0a..360c542f3779a 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinPi.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinPi.cs
@@ -33,13 +33,16 @@ public static void SinPi<T>(ReadOnlySpan<T> x, Span<T> destination)
         private readonly struct SinPiOperator<T> : IUnaryOperator<T, T>
             where T : ITrigonometricFunctions<T>
         {
-            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+            public static bool Vectorizable => (typeof(T) == typeof(float))
+                                            || (typeof(T) == typeof(double));
 
             public static T Invoke(T x) => T.SinPi(x);
 
             public static Vector128<T> Invoke(Vector128<T> x)
             {
                 Vector128<T> xpi = x * Vector128.Create(T.Pi);
+
+#if !NET9_0_OR_GREATER
                 if (typeof(T) == typeof(float))
                 {
                     if (Vector128.GreaterThanAny(xpi.AsUInt32() & Vector128.Create(SinOperatorSingle.SignMask), Vector128.Create(SinOperatorSingle.MaxVectorizedValue)))
@@ -55,6 +58,7 @@ public static Vector128<T> Invoke(Vector128<T> x)
                         return ApplyScalar<SinPiOperator<double>>(x.AsDouble()).As<double, T>();
                     }
                 }
+#endif
 
                 return SinOperator<T>.Invoke(xpi);
             }
@@ -62,6 +66,8 @@ public static Vector128<T> Invoke(Vector128<T> x)
             public static Vector256<T> Invoke(Vector256<T> x)
             {
                 Vector256<T> xpi = x * Vector256.Create(T.Pi);
+
+#if !NET9_0_OR_GREATER
                 if (typeof(T) == typeof(float))
                 {
                     if (Vector256.GreaterThanAny(xpi.AsUInt32() & Vector256.Create(SinOperatorSingle.SignMask), Vector256.Create(SinOperatorSingle.MaxVectorizedValue)))
@@ -77,6 +83,7 @@ public static Vector256<T> Invoke(Vector256<T> x)
                         return ApplyScalar<SinPiOperator<double>>(x.AsDouble()).As<double, T>();
                     }
                 }
+#endif
 
                 return SinOperator<T>.Invoke(xpi);
             }
@@ -84,6 +91,8 @@ public static Vector256<T> Invoke(Vector256<T> x)
             public static Vector512<T> Invoke(Vector512<T> x)
             {
                 Vector512<T> xpi = x * Vector512.Create(T.Pi);
+
+#if !NET9_0_OR_GREATER
                 if (typeof(T) == typeof(float))
                 {
                     if (Vector512.GreaterThanAny(xpi.AsUInt32() & Vector512.Create(SinOperatorSingle.SignMask), Vector512.Create(SinOperatorSingle.MaxVectorizedValue)))
@@ -99,6 +108,7 @@ public static Vector512<T> Invoke(Vector512<T> x)
                         return ApplyScalar<SinPiOperator<double>>(x.AsDouble()).As<double, T>();
                     }
                 }
+#endif
 
                 return SinOperator<T>.Invoke(xpi);
             }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Quaternion.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Quaternion.cs
index f52da0df94f0d..b54bb0c360940 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Quaternion.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Quaternion.cs
@@ -281,11 +281,11 @@ public static Quaternion CreateFromRotationMatrix(Matrix4x4 matrix)
         /// <returns>The resulting quaternion.</returns>
         public static Quaternion CreateFromYawPitchRoll(float yaw, float pitch, float roll)
         {
-            //  Roll first, about axis the object is facing, then
-            //  pitch upward, then yaw to face into the new heading
-            (float sr, float cr) = float.SinCos(roll * 0.5f);
-            (float sp, float cp) = float.SinCos(pitch * 0.5f);
-            (float sy, float cy) = float.SinCos(yaw * 0.5f);
+            (Vector3 sin, Vector3 cos) = Vector3.SinCos(Vector3.Create(roll, pitch, yaw) * 0.5f);
+
+            (float sr, float cr) = (sin.X, cos.X);
+            (float sp, float cp) = (sin.Y, cos.Y);
+            (float sy, float cy) = (sin.Z, cos.Z);
 
             Quaternion result;
 

From 19183c55f08179143dd1f0862f231bf9a77f11c7 Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Fri, 12 Jul 2024 22:15:47 -0700
Subject: [PATCH 05/13] Adding tests covering the vector Sin, Cos, and SinCos
 APIs

---
 .../tests/System/GenericMathTestMemberData.cs | 246 +++++++++++++++++-
 .../tests/GenericVectorTests.cs               |  50 ++++
 .../tests/Vector2Tests.cs                     |  25 ++
 .../tests/Vector3Tests.cs                     |  25 ++
 .../tests/Vector4Tests.cs                     |  25 ++
 .../tests/Vectors/Vector128Tests.cs           |  50 ++++
 .../tests/Vectors/Vector256Tests.cs           |  50 ++++
 .../tests/Vectors/Vector512Tests.cs           |  50 ++++
 .../tests/Vectors/Vector64Tests.cs            |  50 ++++
 .../System/Math.cs                            | 104 +-------
 .../System/MathF.cs                           | 104 +-------
 11 files changed, 576 insertions(+), 203 deletions(-)

diff --git a/src/libraries/Common/tests/System/GenericMathTestMemberData.cs b/src/libraries/Common/tests/System/GenericMathTestMemberData.cs
index 2e40a0940fc5a..0e19ac826dfdd 100644
--- a/src/libraries/Common/tests/System/GenericMathTestMemberData.cs
+++ b/src/libraries/Common/tests/System/GenericMathTestMemberData.cs
@@ -198,6 +198,86 @@ public static IEnumerable<object[]> CopySignSingle
             }
         }
 
+        public static IEnumerable<object[]> CosDouble
+        {
+            get
+            {
+                yield return new object[] {  double.NegativeInfinity,  double.NaN,          0.0 };
+                yield return new object[] { -3.1415926535897932,      -1.0,                 DoubleCrossPlatformMachineEpsilon * 10 }; // value: -(pi)
+                yield return new object[] { -2.7182818284590452,      -0.91173391478696510, DoubleCrossPlatformMachineEpsilon };      // value: -(e)
+                yield return new object[] { -2.3025850929940457,      -0.66820151019031295, DoubleCrossPlatformMachineEpsilon };      // value: -(ln(10))
+                yield return new object[] { -1.5707963267948966,       0.0,                 DoubleCrossPlatformMachineEpsilon };      // value: -(pi / 2)
+                yield return new object[] { -1.4426950408889634,       0.12775121753523991, DoubleCrossPlatformMachineEpsilon };      // value: -(log2(e))
+                yield return new object[] { -1.4142135623730950,       0.15594369476537447, DoubleCrossPlatformMachineEpsilon };      // value: -(sqrt(2))
+                yield return new object[] { -1.1283791670955126,       0.42812514788535792, DoubleCrossPlatformMachineEpsilon };      // value: -(2 / sqrt(pi))
+                yield return new object[] { -1.0,                      0.54030230586813972, DoubleCrossPlatformMachineEpsilon };
+                yield return new object[] { -0.78539816339744831,      0.70710678118654752, DoubleCrossPlatformMachineEpsilon };      // value: -(pi / 4),        expected:  (1 / sqrt(2))
+                yield return new object[] { -0.70710678118654752,      0.76024459707563015, DoubleCrossPlatformMachineEpsilon };      // value: -(1 / sqrt(2))
+                yield return new object[] { -0.69314718055994531,      0.76923890136397213, DoubleCrossPlatformMachineEpsilon };      // value: -(ln(2))
+                yield return new object[] { -0.63661977236758134,      0.80410982822879171, DoubleCrossPlatformMachineEpsilon };      // value: -(2 / pi)
+                yield return new object[] { -0.43429448190325183,      0.90716712923909839, DoubleCrossPlatformMachineEpsilon };      // value: -(log10(e))
+                yield return new object[] { -0.31830988618379067,      0.94976571538163866, DoubleCrossPlatformMachineEpsilon };      // value: -(1 / pi)
+                yield return new object[] { -0.0,                      1.0,                 DoubleCrossPlatformMachineEpsilon * 10 };
+                yield return new object[] {  double.NaN,               double.NaN,          0.0 };
+                yield return new object[] {  0.0,                      1.0,                 DoubleCrossPlatformMachineEpsilon * 10 };
+                yield return new object[] {  0.31830988618379067,      0.94976571538163866, DoubleCrossPlatformMachineEpsilon };      // value:  (1 / pi)
+                yield return new object[] {  0.43429448190325183,      0.90716712923909839, DoubleCrossPlatformMachineEpsilon };      // value:  (log10(e))
+                yield return new object[] {  0.63661977236758134,      0.80410982822879171, DoubleCrossPlatformMachineEpsilon };      // value:  (2 / pi)
+                yield return new object[] {  0.69314718055994531,      0.76923890136397213, DoubleCrossPlatformMachineEpsilon };      // value:  (ln(2))
+                yield return new object[] {  0.70710678118654752,      0.76024459707563015, DoubleCrossPlatformMachineEpsilon };      // value:  (1 / sqrt(2))
+                yield return new object[] {  0.78539816339744831,      0.70710678118654752, DoubleCrossPlatformMachineEpsilon };      // value:  (pi / 4),        expected:  (1 / sqrt(2))
+                yield return new object[] {  1.0,                      0.54030230586813972, DoubleCrossPlatformMachineEpsilon };
+                yield return new object[] {  1.1283791670955126,       0.42812514788535792, DoubleCrossPlatformMachineEpsilon };      // value:  (2 / sqrt(pi))
+                yield return new object[] {  1.4142135623730950,       0.15594369476537447, DoubleCrossPlatformMachineEpsilon };      // value:  (sqrt(2))
+                yield return new object[] {  1.4426950408889634,       0.12775121753523991, DoubleCrossPlatformMachineEpsilon };      // value:  (log2(e))
+                yield return new object[] {  1.5707963267948966,       0.0,                 DoubleCrossPlatformMachineEpsilon };      // value:  (pi / 2)
+                yield return new object[] {  2.3025850929940457,      -0.66820151019031295, DoubleCrossPlatformMachineEpsilon };      // value:  (ln(10))
+                yield return new object[] {  2.7182818284590452,      -0.91173391478696510, DoubleCrossPlatformMachineEpsilon };      // value:  (e)
+                yield return new object[] {  3.1415926535897932,      -1.0,                 DoubleCrossPlatformMachineEpsilon * 10 }; // value:  (pi)
+                yield return new object[] {  double.PositiveInfinity,  double.NaN,          0.0 };
+            }
+        }
+
+        public static IEnumerable<object[]> CosSingle
+        {
+            get
+            {
+                yield return new object[] {  float.NegativeInfinity,  float.NaN,    0.0f };
+                yield return new object[] { -3.14159265f,            -1.0f,         SingleCrossPlatformMachineEpsilon * 10 }; // value: -(pi)
+                yield return new object[] { -2.71828183f,            -0.911733918f, SingleCrossPlatformMachineEpsilon };      // value: -(e)
+                yield return new object[] { -2.30258509f,            -0.668201510f, SingleCrossPlatformMachineEpsilon };      // value: -(ln(10))
+                yield return new object[] { -1.57079633f,             0.0f,         SingleCrossPlatformMachineEpsilon };      // value: -(pi / 2)
+                yield return new object[] { -1.44269504f,             0.127751218f, SingleCrossPlatformMachineEpsilon };      // value: -(log2(e))
+                yield return new object[] { -1.41421356f,             0.155943695f, SingleCrossPlatformMachineEpsilon };      // value: -(sqrt(2))
+                yield return new object[] { -1.12837917f,             0.428125148f, SingleCrossPlatformMachineEpsilon };      // value: -(2 / sqrt(pi))
+                yield return new object[] { -1.0f,                    0.540302306f, SingleCrossPlatformMachineEpsilon };
+                yield return new object[] { -0.785398163f,            0.707106781f, SingleCrossPlatformMachineEpsilon };      // value: -(pi / 4),        expected:  (1 / sqrt(2))
+                yield return new object[] { -0.707106781f,            0.760244597f, SingleCrossPlatformMachineEpsilon };      // value: -(1 / sqrt(2))
+                yield return new object[] { -0.693147181f,            0.769238901f, SingleCrossPlatformMachineEpsilon };      // value: -(ln(2))
+                yield return new object[] { -0.636619772f,            0.804109828f, SingleCrossPlatformMachineEpsilon };      // value: -(2 / pi)
+                yield return new object[] { -0.434294482f,            0.907167129f, SingleCrossPlatformMachineEpsilon };      // value: -(log10(e))
+                yield return new object[] { -0.318309886f,            0.949765715f, SingleCrossPlatformMachineEpsilon };      // value: -(1 / pi)
+                yield return new object[] { -0.0f,                    1.0f,         SingleCrossPlatformMachineEpsilon * 10 };
+                yield return new object[] {  float.NaN,               float.NaN,    0.0f };
+                yield return new object[] {  0.0f,                    1.0f,         SingleCrossPlatformMachineEpsilon * 10 };
+                yield return new object[] {  0.318309886f,            0.949765715f, SingleCrossPlatformMachineEpsilon };      // value:  (1 / pi)
+                yield return new object[] {  0.434294482f,            0.907167129f, SingleCrossPlatformMachineEpsilon };      // value:  (log10(e))
+                yield return new object[] {  0.636619772f,            0.804109828f, SingleCrossPlatformMachineEpsilon };      // value:  (2 / pi)
+                yield return new object[] {  0.693147181f,            0.769238901f, SingleCrossPlatformMachineEpsilon };      // value:  (ln(2))
+                yield return new object[] {  0.707106781f,            0.760244597f, SingleCrossPlatformMachineEpsilon };      // value:  (1 / sqrt(2))
+                yield return new object[] {  0.785398163f,            0.707106781f, SingleCrossPlatformMachineEpsilon };      // value:  (pi / 4),        expected:  (1 / sqrt(2))
+                yield return new object[] {  1.0f,                    0.540302306f, SingleCrossPlatformMachineEpsilon };
+                yield return new object[] {  1.12837917f,             0.428125148f, SingleCrossPlatformMachineEpsilon };      // value:  (2 / sqrt(pi))
+                yield return new object[] {  1.41421356f,             0.155943695f, SingleCrossPlatformMachineEpsilon };      // value:  (sqrt(2))
+                yield return new object[] {  1.44269504f,             0.127751218f, SingleCrossPlatformMachineEpsilon };      // value:  (log2(e))
+                yield return new object[] {  1.57079633f,             0.0f,         SingleCrossPlatformMachineEpsilon };      // value:  (pi / 2)
+                yield return new object[] {  2.30258509f,            -0.668201510f, SingleCrossPlatformMachineEpsilon };      // value:  (ln(10))
+                yield return new object[] {  2.71828183f,            -0.911733918f, SingleCrossPlatformMachineEpsilon };      // value:  (e)
+                yield return new object[] {  3.14159265f,            -1.0f,         SingleCrossPlatformMachineEpsilon * 10 }; // value:  (pi)
+                yield return new object[] {  float.PositiveInfinity,  float.NaN,    0.0f };
+            }
+        }
+
         public static IEnumerable<object[]> DegreesToRadiansDouble
         {
             get
@@ -981,7 +1061,7 @@ public static IEnumerable<object[]> Log2Double
                 yield return new object[] { 0.64321824193300488,     -0.63661977236758126,     DoubleCrossPlatformMachineEpsilon };         // expected: -(2 / pi)
                 yield return new object[] { 0.74005557395545179,     -0.43429448190325190,     DoubleCrossPlatformMachineEpsilon };         // expected: -(log10(e))
                 yield return new object[] { 0.80200887896145195,     -0.31830988618379073,     DoubleCrossPlatformMachineEpsilon };         // expected: -(1 / pi)
-                yield return new object[] { 1,                        0.0,                     0.0 };                        
+                yield return new object[] { 1,                        0.0,                     0.0 };
                 yield return new object[] { 1.2468689889006383,       0.31830988618379073,     DoubleCrossPlatformMachineEpsilon };         // expected:  (1 / pi)
                 yield return new object[] { 1.3512498725672678,       0.43429448190325226,     DoubleCrossPlatformMachineEpsilon };         // expected:  (log10(e))
                 yield return new object[] { 1.5546822754821001,       0.63661977236758126,     DoubleCrossPlatformMachineEpsilon };         // expected:  (2 / pi)
@@ -1664,6 +1744,170 @@ public static IEnumerable<object[]> RoundToEvenSingle
             }
         }
 
+        public static IEnumerable<object[]> SinDouble
+        {
+            get
+            {
+                yield return new object[] {  double.NegativeInfinity,  double.NaN,          0.0 };
+                yield return new object[] { -3.1415926535897932,      -0.0,                 DoubleCrossPlatformMachineEpsilon };      // value: -(pi)
+                yield return new object[] { -2.7182818284590452,      -0.41078129050290870, DoubleCrossPlatformMachineEpsilon };      // value: -(e)
+                yield return new object[] { -2.3025850929940457,      -0.74398033695749319, DoubleCrossPlatformMachineEpsilon };      // value: -(ln(10))
+                yield return new object[] { -1.5707963267948966,      -1.0,                 DoubleCrossPlatformMachineEpsilon * 10 }; // value: -(pi / 2)
+                yield return new object[] { -1.4426950408889634,      -0.99180624439366372, DoubleCrossPlatformMachineEpsilon };      // value: -(log2(e))
+                yield return new object[] { -1.4142135623730950,      -0.98776594599273553, DoubleCrossPlatformMachineEpsilon };      // value: -(sqrt(2))
+                yield return new object[] { -1.1283791670955126,      -0.90371945743584630, DoubleCrossPlatformMachineEpsilon };      // value: -(2 / sqrt(pi))
+                yield return new object[] { -1.0,                     -0.84147098480789651, DoubleCrossPlatformMachineEpsilon };
+                yield return new object[] { -0.78539816339744831,     -0.70710678118654752, DoubleCrossPlatformMachineEpsilon };      // value: -(pi / 4),        expected: -(1 / sqrt(2))
+                yield return new object[] { -0.70710678118654752,     -0.64963693908006244, DoubleCrossPlatformMachineEpsilon };      // value: -(1 / sqrt(2))
+                yield return new object[] { -0.69314718055994531,     -0.63896127631363480, DoubleCrossPlatformMachineEpsilon };      // value: -(ln(2))
+                yield return new object[] { -0.63661977236758134,     -0.59448076852482208, DoubleCrossPlatformMachineEpsilon };      // value: -(2 / pi)
+                yield return new object[] { -0.43429448190325183,     -0.42077048331375735, DoubleCrossPlatformMachineEpsilon };      // value: -(log10(e))
+                yield return new object[] { -0.31830988618379067,     -0.31296179620778659, DoubleCrossPlatformMachineEpsilon };      // value: -(1 / pi)
+                yield return new object[] { -0.0,                     -0.0,                 0.0 };
+                yield return new object[] {  double.NaN,               double.NaN,          0.0 };
+                yield return new object[] {  0.0,                      0.0,                 0.0 };
+                yield return new object[] {  0.31830988618379067,      0.31296179620778659, DoubleCrossPlatformMachineEpsilon };      // value:  (1 / pi)
+                yield return new object[] {  0.43429448190325183,      0.42077048331375735, DoubleCrossPlatformMachineEpsilon };      // value:  (log10(e))
+                yield return new object[] {  0.63661977236758134,      0.59448076852482208, DoubleCrossPlatformMachineEpsilon };      // value:  (2 / pi)
+                yield return new object[] {  0.69314718055994531,      0.63896127631363480, DoubleCrossPlatformMachineEpsilon };      // value:  (ln(2))
+                yield return new object[] {  0.70710678118654752,      0.64963693908006244, DoubleCrossPlatformMachineEpsilon };      // value:  (1 / sqrt(2))
+                yield return new object[] {  0.78539816339744831,      0.70710678118654752, DoubleCrossPlatformMachineEpsilon };      // value:  (pi / 4),        expected:  (1 / sqrt(2))
+                yield return new object[] {  1.0,                      0.84147098480789651, DoubleCrossPlatformMachineEpsilon };
+                yield return new object[] {  1.1283791670955126,       0.90371945743584630, DoubleCrossPlatformMachineEpsilon };      // value:  (2 / sqrt(pi))
+                yield return new object[] {  1.4142135623730950,       0.98776594599273553, DoubleCrossPlatformMachineEpsilon };      // value:  (sqrt(2))
+                yield return new object[] {  1.4426950408889634,       0.99180624439366372, DoubleCrossPlatformMachineEpsilon };      // value:  (log2(e))
+                yield return new object[] {  1.5707963267948966,       1.0,                 DoubleCrossPlatformMachineEpsilon * 10 }; // value:  (pi / 2)
+                yield return new object[] {  2.3025850929940457,       0.74398033695749319, DoubleCrossPlatformMachineEpsilon };      // value:  (ln(10))
+                yield return new object[] {  2.7182818284590452,       0.41078129050290870, DoubleCrossPlatformMachineEpsilon };      // value:  (e)
+                yield return new object[] {  3.1415926535897932,       0.0,                 DoubleCrossPlatformMachineEpsilon };      // value:  (pi)
+                yield return new object[] {  double.PositiveInfinity,  double.NaN,          0.0 };
+            }
+        }
+
+        public static IEnumerable<object[]> SinSingle
+        {
+            get
+            {
+                yield return new object[] {  float.NegativeInfinity,  float.NaN,    0.0f };
+                yield return new object[] { -3.14159265f,            -0.0f,         SingleCrossPlatformMachineEpsilon };      // value: -(pi)
+                yield return new object[] { -2.71828183f,            -0.410781291f, SingleCrossPlatformMachineEpsilon };      // value: -(e)
+                yield return new object[] { -2.30258509f,            -0.743980337f, SingleCrossPlatformMachineEpsilon };      // value: -(ln(10))
+                yield return new object[] { -1.57079633f,            -1.0f,         SingleCrossPlatformMachineEpsilon * 10 }; // value: -(pi / 2)
+                yield return new object[] { -1.44269504f,            -0.991806244f, SingleCrossPlatformMachineEpsilon };      // value: -(log2(e))
+                yield return new object[] { -1.41421356f,            -0.987765946f, SingleCrossPlatformMachineEpsilon };      // value: -(sqrt(2))
+                yield return new object[] { -1.12837917f,            -0.903719457f, SingleCrossPlatformMachineEpsilon };      // value: -(2 / sqrt(pi))
+                yield return new object[] { -1.0f,                   -0.841470985f, SingleCrossPlatformMachineEpsilon };
+                yield return new object[] { -0.785398163f,           -0.707106781f, SingleCrossPlatformMachineEpsilon };      // value: -(pi / 4),        expected: -(1 / sqrt(2))
+                yield return new object[] { -0.707106781f,           -0.649636939f, SingleCrossPlatformMachineEpsilon };      // value: -(1 / sqrt(2))
+                yield return new object[] { -0.693147181f,           -0.638961276f, SingleCrossPlatformMachineEpsilon };      // value: -(ln(2))
+                yield return new object[] { -0.636619772f,           -0.594480769f, SingleCrossPlatformMachineEpsilon };      // value: -(2 / pi)
+                yield return new object[] { -0.434294482f,           -0.420770483f, SingleCrossPlatformMachineEpsilon };      // value: -(log10(e))
+                yield return new object[] { -0.318309886f,           -0.312961796f, SingleCrossPlatformMachineEpsilon };      // value: -(1 / pi)
+                yield return new object[] { -0.0f,                   -0.0f,         0.0f };
+                yield return new object[] {  float.NaN,               float.NaN,    0.0f };
+                yield return new object[] {  0.0f,                    0.0f,         0.0f };
+                yield return new object[] {  0.318309886f,            0.312961796f, SingleCrossPlatformMachineEpsilon };      // value:  (1 / pi)
+                yield return new object[] {  0.434294482f,            0.420770483f, SingleCrossPlatformMachineEpsilon };      // value:  (log10(e))
+                yield return new object[] {  0.636619772f,            0.594480769f, SingleCrossPlatformMachineEpsilon };      // value:  (2 / pi)
+                yield return new object[] {  0.693147181f,            0.638961276f, SingleCrossPlatformMachineEpsilon };      // value:  (ln(2))
+                yield return new object[] {  0.707106781f,            0.649636939f, SingleCrossPlatformMachineEpsilon };      // value:  (1 / sqrt(2))
+                yield return new object[] {  0.785398163f,            0.707106781f, SingleCrossPlatformMachineEpsilon };      // value:  (pi / 4),        expected:  (1 / sqrt(2))
+                yield return new object[] {  1.0f,                    0.841470985f, SingleCrossPlatformMachineEpsilon };
+                yield return new object[] {  1.12837917f,             0.903719457f, SingleCrossPlatformMachineEpsilon };      // value:  (2 / sqrt(pi))
+                yield return new object[] {  1.41421356f,             0.987765946f, SingleCrossPlatformMachineEpsilon };      // value:  (sqrt(2))
+                yield return new object[] {  1.44269504f,             0.991806244f, SingleCrossPlatformMachineEpsilon };      // value:  (log2(e))
+                yield return new object[] {  1.57079633f,             1.0f,         SingleCrossPlatformMachineEpsilon * 10 }; // value:  (pi / 2)
+                yield return new object[] {  2.30258509f,             0.743980337f, SingleCrossPlatformMachineEpsilon };      // value:  (ln(10))
+                yield return new object[] {  2.71828183f,             0.410781291f, SingleCrossPlatformMachineEpsilon };      // value:  (e)
+                yield return new object[] {  3.14159265f,             0.0f,         SingleCrossPlatformMachineEpsilon };      // value:  (pi)
+                yield return new object[] {  float.PositiveInfinity,  float.NaN,    0.0f };
+            }
+        }
+
+        public static IEnumerable<object[]> SinCosDouble
+        {
+            get
+            {
+                yield return new object[] {  double.NegativeInfinity,  double.NaN,           double.NaN,          0.0,                                    0.0 };
+                yield return new object[] { -1e18,                     0.9929693207404051,   0.11837199021871073, 0.0002,                                 0.002 };                                  // https://github.com/dotnet/runtime/issues/98204
+                yield return new object[] { -3.1415926535897932,      -0.0,                 -1.0,                 DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon * 10 }; // value: -(pi)
+                yield return new object[] { -2.7182818284590452,      -0.41078129050290870, -0.91173391478696510, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };      // value: -(e)
+                yield return new object[] { -2.3025850929940457,      -0.74398033695749319, -0.66820151019031295, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };      // value: -(ln(10))
+                yield return new object[] { -1.5707963267948966,      -1.0,                  0.0,                 DoubleCrossPlatformMachineEpsilon * 10, DoubleCrossPlatformMachineEpsilon };      // value: -(pi / 2)
+                yield return new object[] { -1.4426950408889634,      -0.99180624439366372,  0.12775121753523991, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };      // value: -(log2(e))
+                yield return new object[] { -1.4142135623730950,      -0.98776594599273553,  0.15594369476537447, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };      // value: -(sqrt(2))
+                yield return new object[] { -1.1283791670955126,      -0.90371945743584630,  0.42812514788535792, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };      // value: -(2 / sqrt(pi))
+                yield return new object[] { -1.0,                     -0.84147098480789651,  0.54030230586813972, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };
+                yield return new object[] { -0.78539816339744831,     -0.70710678118654752,  0.70710678118654752, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };      // value: -(pi / 4),        expected_sin: -(1 / sqrt(2)),    expected_cos: 1
+                yield return new object[] { -0.70710678118654752,     -0.64963693908006244,  0.76024459707563015, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };      // value: -(1 / sqrt(2))
+                yield return new object[] { -0.69314718055994531,     -0.63896127631363480,  0.76923890136397213, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };      // value: -(ln(2))
+                yield return new object[] { -0.63661977236758134,     -0.59448076852482208,  0.80410982822879171, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };      // value: -(2 / pi)
+                yield return new object[] { -0.43429448190325183,     -0.42077048331375735,  0.90716712923909839, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };      // value: -(log10(e))
+                yield return new object[] { -0.31830988618379067,     -0.31296179620778659,  0.94976571538163866, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };      // value: -(1 / pi)
+                yield return new object[] { -0.0,                     -0.0,                  1.0,                 0.0,                                    DoubleCrossPlatformMachineEpsilon * 10 };
+                yield return new object[] {  double.NaN,               double.NaN,           double.NaN,          0.0,                                    0.0 };
+                yield return new object[] {  0.0,                      0.0,                  1.0,                 0.0,                                    DoubleCrossPlatformMachineEpsilon * 10 };
+                yield return new object[] {  0.31830988618379067,      0.31296179620778659,  0.94976571538163866, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };      // value:  (1 / pi)
+                yield return new object[] {  0.43429448190325183,      0.42077048331375735,  0.90716712923909839, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };      // value:  (log10(e))
+                yield return new object[] {  0.63661977236758134,      0.59448076852482208,  0.80410982822879171, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };      // value:  (2 / pi)
+                yield return new object[] {  0.69314718055994531,      0.63896127631363480,  0.76923890136397213, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };      // value:  (ln(2))
+                yield return new object[] {  0.70710678118654752,      0.64963693908006244,  0.76024459707563015, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };      // value:  (1 / sqrt(2))
+                yield return new object[] {  0.78539816339744831,      0.70710678118654752,  0.70710678118654752, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };      // value:  (pi / 4),        expected_sin:  (1 / sqrt(2)),    expected_cos: 1
+                yield return new object[] {  1.0,                      0.84147098480789651,  0.54030230586813972, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };
+                yield return new object[] {  1.1283791670955126,       0.90371945743584630,  0.42812514788535792, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };      // value:  (2 / sqrt(pi))
+                yield return new object[] {  1.4142135623730950,       0.98776594599273553,  0.15594369476537447, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };      // value:  (sqrt(2))
+                yield return new object[] {  1.4426950408889634,       0.99180624439366372,  0.12775121753523991, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };      // value:  (log2(e))
+                yield return new object[] {  1.5707963267948966,       1.0,                  0.0,                 DoubleCrossPlatformMachineEpsilon * 10, DoubleCrossPlatformMachineEpsilon };      // value:  (pi / 2)
+                yield return new object[] {  2.3025850929940457,       0.74398033695749319, -0.66820151019031295, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };      // value:  (ln(10))
+                yield return new object[] {  2.7182818284590452,       0.41078129050290870, -0.91173391478696510, DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon };      // value:  (e)
+                yield return new object[] {  3.1415926535897932,       0.0,                 -1.0,                 DoubleCrossPlatformMachineEpsilon,      DoubleCrossPlatformMachineEpsilon * 10 }; // value:  (pi)
+                yield return new object[] {  1e18,                    -0.9929693207404051,   0.11837199021871073, 0.0002,                                 0.002 };                                  // https://github.com/dotnet/runtime/issues/98204
+                yield return new object[] {  double.PositiveInfinity,  double.NaN,           double.NaN,          0.0,                                    0.0 };
+            }
+        }
+
+        public static IEnumerable<object[]> SinCosSingle
+        {
+            get
+            {
+                yield return new object[] {  float.NegativeInfinity,  float.NaN,     float.NaN,    0.0f,                                   0.0f };
+                yield return new object[] { -1e8f,                   -0.931639,     -0.36338508,   SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // https://github.com/dotnet/runtime/issues/98204
+                yield return new object[] { -3.14159265f,            -0.0f,         -1.0f,         SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon * 10 };    // value: -(pi)
+                yield return new object[] { -2.71828183f,            -0.410781291f, -0.911733918f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // value: -(e)
+                yield return new object[] { -2.30258509f,            -0.743980337f, -0.668201510f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // value: -(ln(10))
+                yield return new object[] { -1.57079633f,            -1.0f,          0.0f,         SingleCrossPlatformMachineEpsilon * 10, SingleCrossPlatformMachineEpsilon };         // value: -(pi / 2)
+                yield return new object[] { -1.44269504f,            -0.991806244f,  0.127751218f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // value: -(log2(e))
+                yield return new object[] { -1.41421356f,            -0.987765946f,  0.155943695f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // value: -(sqrt(2))
+                yield return new object[] { -1.12837917f,            -0.903719457f,  0.428125148f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // value: -(2 / sqrt(pi))
+                yield return new object[] { -1.0f,                   -0.841470985f,  0.540302306f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };
+                yield return new object[] { -0.785398163f,           -0.707106781f,  0.707106781f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // value: -(pi / 4),        expected_sin: -(1 / sqrt(2)),    expected_cos: 1
+                yield return new object[] { -0.707106781f,           -0.649636939f,  0.760244597f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // value: -(1 / sqrt(2))
+                yield return new object[] { -0.693147181f,           -0.638961276f,  0.769238901f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // value: -(ln(2))
+                yield return new object[] { -0.636619772f,           -0.594480769f,  0.804109828f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // value: -(2 / pi)
+                yield return new object[] { -0.434294482f,           -0.420770483f,  0.907167129f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // value: -(log10(e))
+                yield return new object[] { -0.318309886f,           -0.312961796f,  0.949765715f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // value: -(1 / pi)
+                yield return new object[] { -0.0f,                   -0.0f,          1.0f,         0.0f,                                   SingleCrossPlatformMachineEpsilon * 10 };
+                yield return new object[] {  float.NaN,               float.NaN,     float.NaN,    0.0f,                                   0.0f };
+                yield return new object[] {  0.0f,                    0.0f,          1.0f,         0.0f,                                   SingleCrossPlatformMachineEpsilon * 10 };
+                yield return new object[] {  0.318309886f,            0.312961796f,  0.949765715f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // value:  (1 / pi)
+                yield return new object[] {  0.434294482f,            0.420770483f,  0.907167129f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // value:  (log10(e))
+                yield return new object[] {  0.636619772f,            0.594480769f,  0.804109828f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // value:  (2 / pi)
+                yield return new object[] {  0.693147181f,            0.638961276f,  0.769238901f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // value:  (ln(2))
+                yield return new object[] {  0.707106781f,            0.649636939f,  0.760244597f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // value:  (1 / sqrt(2))
+                yield return new object[] {  0.785398163f,            0.707106781f,  0.707106781f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // value:  (pi / 4),        expected_sin:  (1 / sqrt(2)),    expected_cos: 1
+                yield return new object[] {  1.0f,                    0.841470985f,  0.540302306f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };
+                yield return new object[] {  1.12837917f,             0.903719457f,  0.428125148f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // value:  (2 / sqrt(pi))
+                yield return new object[] {  1.41421356f,             0.987765946f,  0.155943695f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // value:  (sqrt(2))
+                yield return new object[] {  1.44269504f,             0.991806244f,  0.127751218f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // value:  (log2(e))
+                yield return new object[] {  1.57079633f,             1.0f,          0.0f,         SingleCrossPlatformMachineEpsilon * 10, SingleCrossPlatformMachineEpsilon };         // value:  (pi / 2)
+                yield return new object[] {  2.30258509f,             0.743980337f, -0.668201510f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // value:  (ln(10))
+                yield return new object[] {  2.71828183f,             0.410781291f, -0.911733918f, SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // value:  (e)
+                yield return new object[] {  3.14159265f,             0.0f,         -1.0f,         SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon * 10 };    // value:  (pi)
+                yield return new object[] {  1e8f,                    0.931639,     -0.36338508,   SingleCrossPlatformMachineEpsilon,      SingleCrossPlatformMachineEpsilon };         // https://github.com/dotnet/runtime/issues/98204
+                yield return new object[] {  float.PositiveInfinity,  float.NaN,     float.NaN,    0.0f,                                   0.0f };
+            }
+        }
+
         public static IEnumerable<object[]> TruncateDouble
         {
             get
diff --git a/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs b/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs
index 8caf901a258be..14b1a2ec62833 100644
--- a/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs
+++ b/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs
@@ -4536,6 +4536,22 @@ private static void TestCreateSequence<T>(T start, T step)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.CosDouble), MemberType = typeof(GenericMathTestMemberData))]
+        public void CosDoubleTest(double value, double expectedResult, double variance)
+        {
+            Vector<double> actualResult = Vector.Cos(Vector.Create(value));
+            AssertEqual(Vector.Create(expectedResult), actualResult, Vector.Create(variance));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.CosSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void CosCosgleTest(float value, float expectedResult, float variance)
+        {
+            Vector<float> actualResult = Vector.Cos(Vector.Create(value));
+            AssertEqual(Vector.Create(expectedResult), actualResult, Vector.Create(variance));
+        }
+
         [Theory]
         [MemberData(nameof(GenericMathTestMemberData.ExpDouble), MemberType = typeof(GenericMathTestMemberData))]
         public void ExpDoubleTest(double value, double expectedResult, double variance)
@@ -4959,6 +4975,40 @@ public void RoundToEvenSingleTest(float value, float expectedResult)
             AssertEqual(Vector.Create(expectedResult), actualResult, Vector<float>.Zero);
         }
 
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinDouble), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinDoubleTest(double value, double expectedResult, double variance)
+        {
+            Vector<double> actualResult = Vector.Sin(Vector.Create(value));
+            AssertEqual(Vector.Create(expectedResult), actualResult, Vector.Create(variance));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinSingleTest(float value, float expectedResult, float variance)
+        {
+            Vector<float> actualResult = Vector.Sin(Vector.Create(value));
+            AssertEqual(Vector.Create(expectedResult), actualResult, Vector.Create(variance));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinCosDouble), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinCosDoubleTest(double value, double expectedResultSin, double expectedResultCos, double allowedVarianceSin, double allowedVarianceCos)
+        {
+            (Vector<double> resultSin, Vector<double> resultCos) = Vector.SinCos(Vector.Create(value));
+            AssertEqual(Vector.Create(expectedResultSin), resultSin, Vector.Create(allowedVarianceSin));
+            AssertEqual(Vector.Create(expectedResultCos), resultCos, Vector.Create(allowedVarianceCos));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinCosSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinCosSingleTest(float value, float expectedResultSin, float expectedResultCos, float allowedVarianceSin, float allowedVarianceCos)
+        {
+            (Vector<float> resultSin, Vector<float> resultCos) = Vector.SinCos(Vector.Create(value));
+            AssertEqual(Vector.Create(expectedResultSin), resultSin, Vector.Create(allowedVarianceSin));
+            AssertEqual(Vector.Create(expectedResultCos), resultCos, Vector.Create(allowedVarianceCos));
+        }
+
         [Theory]
         [MemberData(nameof(GenericMathTestMemberData.TruncateDouble), MemberType = typeof(GenericMathTestMemberData))]
         public void TruncateDoubleTest(double value, double expectedResult)
diff --git a/src/libraries/System.Numerics.Vectors/tests/Vector2Tests.cs b/src/libraries/System.Numerics.Vectors/tests/Vector2Tests.cs
index 3d130d2655609..f2b4a61bbd9bc 100644
--- a/src/libraries/System.Numerics.Vectors/tests/Vector2Tests.cs
+++ b/src/libraries/System.Numerics.Vectors/tests/Vector2Tests.cs
@@ -1299,6 +1299,14 @@ private class EmbeddedVectorObject
             public Vector2 FieldVector;
         }
 
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.CosSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void CosSingleTest(float value, float expectedResult, float variance)
+        {
+            Vector2 actualResult = Vector2.Cos(Vector2.Create(value));
+            AssertEqual(Vector2.Create(expectedResult), actualResult, Vector2.Create(variance));
+        }
+
         [Theory]
         [MemberData(nameof(GenericMathTestMemberData.ExpSingle), MemberType = typeof(GenericMathTestMemberData))]
         public void ExpSingleTest(float value, float expectedResult, float variance)
@@ -1474,6 +1482,23 @@ public void RoundToEvenSingleTest(float value, float expectedResult)
             AssertEqual(Vector2.Create(expectedResult), actualResult, Vector2.Zero);
         }
 
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinSingleTest(float value, float expectedResult, float variance)
+        {
+            Vector2 actualResult = Vector2.Sin(Vector2.Create(value));
+            AssertEqual(Vector2.Create(expectedResult), actualResult, Vector2.Create(variance));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinCosSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinCosSingleTest(float value, float expectedResultSin, float expectedResultCos, float allowedVarianceSin, float allowedVarianceCos)
+        {
+            (Vector2 resultSin, Vector2 resultCos) = Vector2.SinCos(Vector2.Create(value));
+            AssertEqual(Vector2.Create(expectedResultSin), resultSin, Vector2.Create(allowedVarianceSin));
+            AssertEqual(Vector2.Create(expectedResultCos), resultCos, Vector2.Create(allowedVarianceCos));
+        }
+
         [Theory]
         [MemberData(nameof(GenericMathTestMemberData.TruncateSingle), MemberType = typeof(GenericMathTestMemberData))]
         public void TruncateSingleTest(float value, float expectedResult)
diff --git a/src/libraries/System.Numerics.Vectors/tests/Vector3Tests.cs b/src/libraries/System.Numerics.Vectors/tests/Vector3Tests.cs
index 9ec5e50423938..210caf826fb10 100644
--- a/src/libraries/System.Numerics.Vectors/tests/Vector3Tests.cs
+++ b/src/libraries/System.Numerics.Vectors/tests/Vector3Tests.cs
@@ -1349,6 +1349,14 @@ private class EmbeddedVectorObject
             public Vector3 FieldVector;
         }
 
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.CosSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void CosSingleTest(float value, float expectedResult, float variance)
+        {
+            Vector3 actualResult = Vector3.Cos(Vector3.Create(value));
+            AssertEqual(Vector3.Create(expectedResult), actualResult, Vector3.Create(variance));
+        }
+
         [Theory]
         [MemberData(nameof(GenericMathTestMemberData.ExpSingle), MemberType = typeof(GenericMathTestMemberData))]
         public void ExpSingleTest(float value, float expectedResult, float variance)
@@ -1524,6 +1532,23 @@ public void RoundToEvenSingleTest(float value, float expectedResult)
             AssertEqual(Vector3.Create(expectedResult), actualResult, Vector3.Zero);
         }
 
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinSingleTest(float value, float expectedResult, float variance)
+        {
+            Vector3 actualResult = Vector3.Sin(Vector3.Create(value));
+            AssertEqual(Vector3.Create(expectedResult), actualResult, Vector3.Create(variance));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinCosSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinCosSingleTest(float value, float expectedResultSin, float expectedResultCos, float allowedVarianceSin, float allowedVarianceCos)
+        {
+            (Vector3 resultSin, Vector3 resultCos) = Vector3.SinCos(Vector3.Create(value));
+            AssertEqual(Vector3.Create(expectedResultSin), resultSin, Vector3.Create(allowedVarianceSin));
+            AssertEqual(Vector3.Create(expectedResultCos), resultCos, Vector3.Create(allowedVarianceCos));
+        }
+
         [Theory]
         [MemberData(nameof(GenericMathTestMemberData.TruncateSingle), MemberType = typeof(GenericMathTestMemberData))]
         public void TruncateSingleTest(float value, float expectedResult)
diff --git a/src/libraries/System.Numerics.Vectors/tests/Vector4Tests.cs b/src/libraries/System.Numerics.Vectors/tests/Vector4Tests.cs
index d1144cdf3e5cd..74ffeeba94c56 100644
--- a/src/libraries/System.Numerics.Vectors/tests/Vector4Tests.cs
+++ b/src/libraries/System.Numerics.Vectors/tests/Vector4Tests.cs
@@ -1724,6 +1724,14 @@ public struct Level7
         }
 #pragma warning restore 0169
 
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.CosSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void CosSingleTest(float value, float expectedResult, float variance)
+        {
+            Vector4 actualResult = Vector4.Cos(Vector4.Create(value));
+            AssertEqual(Vector4.Create(expectedResult), actualResult, Vector4.Create(variance));
+        }
+
         [Theory]
         [MemberData(nameof(GenericMathTestMemberData.ExpSingle), MemberType = typeof(GenericMathTestMemberData))]
         public void ExpSingleTest(float value, float expectedResult, float variance)
@@ -1899,6 +1907,23 @@ public void RoundToEvenSingleTest(float value, float expectedResult)
             AssertEqual(Vector4.Create(expectedResult), actualResult, Vector4.Zero);
         }
 
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinSingleTest(float value, float expectedResult, float variance)
+        {
+            Vector4 actualResult = Vector4.Sin(Vector4.Create(value));
+            AssertEqual(Vector4.Create(expectedResult), actualResult, Vector4.Create(variance));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinCosSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinCosSingleTest(float value, float expectedResultSin, float expectedResultCos, float allowedVarianceSin, float allowedVarianceCos)
+        {
+            (Vector4 resultSin, Vector4 resultCos) = Vector4.SinCos(Vector4.Create(value));
+            AssertEqual(Vector4.Create(expectedResultSin), resultSin, Vector4.Create(allowedVarianceSin));
+            AssertEqual(Vector4.Create(expectedResultCos), resultCos, Vector4.Create(allowedVarianceCos));
+        }
+
         [Theory]
         [MemberData(nameof(GenericMathTestMemberData.TruncateSingle), MemberType = typeof(GenericMathTestMemberData))]
         public void TruncateSingleTest(float value, float expectedResult)
diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs
index 5c32e8b991e7e..27955248daac8 100644
--- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs
+++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs
@@ -4824,6 +4824,22 @@ private static void TestCreateSequence<T>(T start, T step)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.CosDouble), MemberType = typeof(GenericMathTestMemberData))]
+        public void CosDoubleTest(double value, double expectedResult, double variance)
+        {
+            Vector128<double> actualResult = Vector128.Cos(Vector128.Create(value));
+            AssertEqual(Vector128.Create(expectedResult), actualResult, Vector128.Create(variance));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.CosSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void CosSingleTest(float value, float expectedResult, float variance)
+        {
+            Vector128<float> actualResult = Vector128.Cos(Vector128.Create(value));
+            AssertEqual(Vector128.Create(expectedResult), actualResult, Vector128.Create(variance));
+        }
+
         [Theory]
         [MemberData(nameof(GenericMathTestMemberData.ExpDouble), MemberType = typeof(GenericMathTestMemberData))]
         public void ExpDoubleTest(double value, double expectedResult, double variance)
@@ -5329,6 +5345,40 @@ public void RoundToEvenSingleTest(float value, float expectedResult)
             AssertEqual(Vector128.Create(expectedResult), actualResult, Vector128<float>.Zero);
         }
 
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinDouble), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinDoubleTest(double value, double expectedResult, double variance)
+        {
+            Vector128<double> actualResult = Vector128.Sin(Vector128.Create(value));
+            AssertEqual(Vector128.Create(expectedResult), actualResult, Vector128.Create(variance));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinSingleTest(float value, float expectedResult, float variance)
+        {
+            Vector128<float> actualResult = Vector128.Sin(Vector128.Create(value));
+            AssertEqual(Vector128.Create(expectedResult), actualResult, Vector128.Create(variance));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinCosDouble), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinCosDoubleTest(double value, double expectedResultSin, double expectedResultCos, double allowedVarianceSin, double allowedVarianceCos)
+        {
+            (Vector128<double> resultSin, Vector128<double> resultCos) = Vector128.SinCos(Vector128.Create(value));
+            AssertEqual(Vector128.Create(expectedResultSin), resultSin, Vector128.Create(allowedVarianceSin));
+            AssertEqual(Vector128.Create(expectedResultCos), resultCos, Vector128.Create(allowedVarianceCos));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinCosSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinCosSingleTest(float value, float expectedResultSin, float expectedResultCos, float allowedVarianceSin, float allowedVarianceCos)
+        {
+            (Vector128<float> resultSin, Vector128<float> resultCos) = Vector128.SinCos(Vector128.Create(value));
+            AssertEqual(Vector128.Create(expectedResultSin), resultSin, Vector128.Create(allowedVarianceSin));
+            AssertEqual(Vector128.Create(expectedResultCos), resultCos, Vector128.Create(allowedVarianceCos));
+        }
+
         [Theory]
         [MemberData(nameof(GenericMathTestMemberData.TruncateDouble), MemberType = typeof(GenericMathTestMemberData))]
         public void TruncateDoubleTest(double value, double expectedResult)
diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs
index 0c5cd3bdb2e82..d16e5eed48180 100644
--- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs
+++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs
@@ -5840,6 +5840,22 @@ private static void TestCreateSequence<T>(T start, T step)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.CosDouble), MemberType = typeof(GenericMathTestMemberData))]
+        public void CosDoubleTest(double value, double expectedResult, double variance)
+        {
+            Vector256<double> actualResult = Vector256.Cos(Vector256.Create(value));
+            AssertEqual(Vector256.Create(expectedResult), actualResult, Vector256.Create(variance));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.CosSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void CosSingleTest(float value, float expectedResult, float variance)
+        {
+            Vector256<float> actualResult = Vector256.Cos(Vector256.Create(value));
+            AssertEqual(Vector256.Create(expectedResult), actualResult, Vector256.Create(variance));
+        }
+
         [Theory]
         [MemberData(nameof(GenericMathTestMemberData.ExpDouble), MemberType = typeof(GenericMathTestMemberData))]
         public void ExpDoubleTest(double value, double expectedResult, double variance)
@@ -6345,6 +6361,40 @@ public void RoundToEvenSingleTest(float value, float expectedResult)
             AssertEqual(Vector256.Create(expectedResult), actualResult, Vector256<float>.Zero);
         }
 
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinDouble), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinDoubleTest(double value, double expectedResult, double variance)
+        {
+            Vector256<double> actualResult = Vector256.Sin(Vector256.Create(value));
+            AssertEqual(Vector256.Create(expectedResult), actualResult, Vector256.Create(variance));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinSingleTest(float value, float expectedResult, float variance)
+        {
+            Vector256<float> actualResult = Vector256.Sin(Vector256.Create(value));
+            AssertEqual(Vector256.Create(expectedResult), actualResult, Vector256.Create(variance));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinCosDouble), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinCosDoubleTest(double value, double expectedResultSin, double expectedResultCos, double allowedVarianceSin, double allowedVarianceCos)
+        {
+            (Vector256<double> resultSin, Vector256<double> resultCos) = Vector256.SinCos(Vector256.Create(value));
+            AssertEqual(Vector256.Create(expectedResultSin), resultSin, Vector256.Create(allowedVarianceSin));
+            AssertEqual(Vector256.Create(expectedResultCos), resultCos, Vector256.Create(allowedVarianceCos));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinCosSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinCosSingleTest(float value, float expectedResultSin, float expectedResultCos, float allowedVarianceSin, float allowedVarianceCos)
+        {
+            (Vector256<float> resultSin, Vector256<float> resultCos) = Vector256.SinCos(Vector256.Create(value));
+            AssertEqual(Vector256.Create(expectedResultSin), resultSin, Vector256.Create(allowedVarianceSin));
+            AssertEqual(Vector256.Create(expectedResultCos), resultCos, Vector256.Create(allowedVarianceCos));
+        }
+
         [Theory]
         [MemberData(nameof(GenericMathTestMemberData.TruncateDouble), MemberType = typeof(GenericMathTestMemberData))]
         public void TruncateDoubleTest(double value, double expectedResult)
diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs
index 450abc7f32b86..f996d82369615 100644
--- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs
+++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs
@@ -5273,6 +5273,22 @@ private static void TestCreateSequence<T>(T start, T step)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.CosDouble), MemberType = typeof(GenericMathTestMemberData))]
+        public void CosDoubleTest(double value, double expectedResult, double variance)
+        {
+            Vector512<double> actualResult = Vector512.Cos(Vector512.Create(value));
+            AssertEqual(Vector512.Create(expectedResult), actualResult, Vector512.Create(variance));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.CosSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void CosSingleTest(float value, float expectedResult, float variance)
+        {
+            Vector512<float> actualResult = Vector512.Cos(Vector512.Create(value));
+            AssertEqual(Vector512.Create(expectedResult), actualResult, Vector512.Create(variance));
+        }
+
         [Theory]
         [MemberData(nameof(GenericMathTestMemberData.ExpDouble), MemberType = typeof(GenericMathTestMemberData))]
         public void ExpDoubleTest(double value, double expectedResult, double variance)
@@ -5778,6 +5794,40 @@ public void RoundToEvenSingleTest(float value, float expectedResult)
             AssertEqual(Vector512.Create(expectedResult), actualResult, Vector512<float>.Zero);
         }
 
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinDouble), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinDoubleTest(double value, double expectedResult, double variance)
+        {
+            Vector512<double> actualResult = Vector512.Sin(Vector512.Create(value));
+            AssertEqual(Vector512.Create(expectedResult), actualResult, Vector512.Create(variance));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinSingleTest(float value, float expectedResult, float variance)
+        {
+            Vector512<float> actualResult = Vector512.Sin(Vector512.Create(value));
+            AssertEqual(Vector512.Create(expectedResult), actualResult, Vector512.Create(variance));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinCosDouble), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinCosDoubleTest(double value, double expectedResultSin, double expectedResultCos, double allowedVarianceSin, double allowedVarianceCos)
+        {
+            (Vector512<double> resultSin, Vector512<double> resultCos) = Vector512.SinCos(Vector512.Create(value));
+            AssertEqual(Vector512.Create(expectedResultSin), resultSin, Vector512.Create(allowedVarianceSin));
+            AssertEqual(Vector512.Create(expectedResultCos), resultCos, Vector512.Create(allowedVarianceCos));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinCosSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinCosSingleTest(float value, float expectedResultSin, float expectedResultCos, float allowedVarianceSin, float allowedVarianceCos)
+        {
+            (Vector512<float> resultSin, Vector512<float> resultCos) = Vector512.SinCos(Vector512.Create(value));
+            AssertEqual(Vector512.Create(expectedResultSin), resultSin, Vector512.Create(allowedVarianceSin));
+            AssertEqual(Vector512.Create(expectedResultCos), resultCos, Vector512.Create(allowedVarianceCos));
+        }
+
         [Theory]
         [MemberData(nameof(GenericMathTestMemberData.TruncateDouble), MemberType = typeof(GenericMathTestMemberData))]
         public void TruncateDoubleTest(double value, double expectedResult)
diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs
index ba7ee7c891787..c049c08515a08 100644
--- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs
+++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs
@@ -4241,6 +4241,22 @@ private static void TestCreateSequence<T>(T start, T step)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.CosDouble), MemberType = typeof(GenericMathTestMemberData))]
+        public void CosDoubleTest(double value, double expectedResult, double variance)
+        {
+            Vector64<double> actualResult = Vector64.Cos(Vector64.Create(value));
+            AssertEqual(Vector64.Create(expectedResult), actualResult, Vector64.Create(variance));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.CosSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void CosSingleTest(float value, float expectedResult, float variance)
+        {
+            Vector64<float> actualResult = Vector64.Cos(Vector64.Create(value));
+            AssertEqual(Vector64.Create(expectedResult), actualResult, Vector64.Create(variance));
+        }
+
         [Theory]
         [MemberData(nameof(GenericMathTestMemberData.ExpDouble), MemberType = typeof(GenericMathTestMemberData))]
         public void ExpDoubleTest(double value, double expectedResult, double variance)
@@ -4736,6 +4752,40 @@ public void RoundToEvenSingleTest(float value, float expectedResult)
             AssertEqual(Vector64.Create(expectedResult), actualResult, Vector64<float>.Zero);
         }
 
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinDouble), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinDoubleTest(double value, double expectedResult, double variance)
+        {
+            Vector64<double> actualResult = Vector64.Sin(Vector64.Create(value));
+            AssertEqual(Vector64.Create(expectedResult), actualResult, Vector64.Create(variance));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinSingleTest(float value, float expectedResult, float variance)
+        {
+            Vector64<float> actualResult = Vector64.Sin(Vector64.Create(value));
+            AssertEqual(Vector64.Create(expectedResult), actualResult, Vector64.Create(variance));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinCosDouble), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinCosDoubleTest(double value, double expectedResultSin, double expectedResultCos, double allowedVarianceSin, double allowedVarianceCos)
+        {
+            (Vector64<double> resultSin, Vector64<double> resultCos) = Vector64.SinCos(Vector64.Create(value));
+            AssertEqual(Vector64.Create(expectedResultSin), resultSin, Vector64.Create(allowedVarianceSin));
+            AssertEqual(Vector64.Create(expectedResultCos), resultCos, Vector64.Create(allowedVarianceCos));
+        }
+
+        [Theory]
+        [MemberData(nameof(GenericMathTestMemberData.SinCosSingle), MemberType = typeof(GenericMathTestMemberData))]
+        public void SinCosSingleTest(float value, float expectedResultSin, float expectedResultCos, float allowedVarianceSin, float allowedVarianceCos)
+        {
+            (Vector64<float> resultSin, Vector64<float> resultCos) = Vector64.SinCos(Vector64.Create(value));
+            AssertEqual(Vector64.Create(expectedResultSin), resultSin, Vector64.Create(allowedVarianceSin));
+            AssertEqual(Vector64.Create(expectedResultCos), resultCos, Vector64.Create(allowedVarianceCos));
+        }
+
         [Theory]
         [MemberData(nameof(GenericMathTestMemberData.TruncateDouble), MemberType = typeof(GenericMathTestMemberData))]
         public void TruncateDoubleTest(double value, double expectedResult)
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/Math.cs b/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/Math.cs
index 136e4acf1d373..2550b706b4106 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/Math.cs
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/Math.cs
@@ -473,39 +473,7 @@ public static void Ceiling_Double_IEEE(double value, double expectedResult, doub
         }
 
         [Theory]
-        [InlineData( double.NegativeInfinity,  double.NaN,          0.0)]
-        [InlineData(-3.1415926535897932,      -1.0,                 CrossPlatformMachineEpsilon * 10)]  // value: -(pi)
-        [InlineData(-2.7182818284590452,      -0.91173391478696510, CrossPlatformMachineEpsilon)]       // value: -(e)
-        [InlineData(-2.3025850929940457,      -0.66820151019031295, CrossPlatformMachineEpsilon)]       // value: -(ln(10))
-        [InlineData(-1.5707963267948966,       0.0,                 CrossPlatformMachineEpsilon)]       // value: -(pi / 2)
-        [InlineData(-1.4426950408889634,       0.12775121753523991, CrossPlatformMachineEpsilon)]       // value: -(log2(e))
-        [InlineData(-1.4142135623730950,       0.15594369476537447, CrossPlatformMachineEpsilon)]       // value: -(sqrt(2))
-        [InlineData(-1.1283791670955126,       0.42812514788535792, CrossPlatformMachineEpsilon)]       // value: -(2 / sqrt(pi))
-        [InlineData(-1.0,                      0.54030230586813972, CrossPlatformMachineEpsilon)]
-        [InlineData(-0.78539816339744831,      0.70710678118654752, CrossPlatformMachineEpsilon)]       // value: -(pi / 4),        expected:  (1 / sqrt(2))
-        [InlineData(-0.70710678118654752,      0.76024459707563015, CrossPlatformMachineEpsilon)]       // value: -(1 / sqrt(2))
-        [InlineData(-0.69314718055994531,      0.76923890136397213, CrossPlatformMachineEpsilon)]       // value: -(ln(2))
-        [InlineData(-0.63661977236758134,      0.80410982822879171, CrossPlatformMachineEpsilon)]       // value: -(2 / pi)
-        [InlineData(-0.43429448190325183,      0.90716712923909839, CrossPlatformMachineEpsilon)]       // value: -(log10(e))
-        [InlineData(-0.31830988618379067,      0.94976571538163866, CrossPlatformMachineEpsilon)]       // value: -(1 / pi)
-        [InlineData(-0.0,                      1.0,                 CrossPlatformMachineEpsilon * 10)]
-        [InlineData( double.NaN,               double.NaN,          0.0)]
-        [InlineData( 0.0,                      1.0,                 CrossPlatformMachineEpsilon * 10)]
-        [InlineData( 0.31830988618379067,      0.94976571538163866, CrossPlatformMachineEpsilon)]       // value:  (1 / pi)
-        [InlineData( 0.43429448190325183,      0.90716712923909839, CrossPlatformMachineEpsilon)]       // value:  (log10(e))
-        [InlineData( 0.63661977236758134,      0.80410982822879171, CrossPlatformMachineEpsilon)]       // value:  (2 / pi)
-        [InlineData( 0.69314718055994531,      0.76923890136397213, CrossPlatformMachineEpsilon)]       // value:  (ln(2))
-        [InlineData( 0.70710678118654752,      0.76024459707563015, CrossPlatformMachineEpsilon)]       // value:  (1 / sqrt(2))
-        [InlineData( 0.78539816339744831,      0.70710678118654752, CrossPlatformMachineEpsilon)]       // value:  (pi / 4),        expected:  (1 / sqrt(2))
-        [InlineData( 1.0,                      0.54030230586813972, CrossPlatformMachineEpsilon)]
-        [InlineData( 1.1283791670955126,       0.42812514788535792, CrossPlatformMachineEpsilon)]       // value:  (2 / sqrt(pi))
-        [InlineData( 1.4142135623730950,       0.15594369476537447, CrossPlatformMachineEpsilon)]       // value:  (sqrt(2))
-        [InlineData( 1.4426950408889634,       0.12775121753523991, CrossPlatformMachineEpsilon)]       // value:  (log2(e))
-        [InlineData( 1.5707963267948966,       0.0,                 CrossPlatformMachineEpsilon)]       // value:  (pi / 2)
-        [InlineData( 2.3025850929940457,      -0.66820151019031295, CrossPlatformMachineEpsilon)]       // value:  (ln(10))
-        [InlineData( 2.7182818284590452,      -0.91173391478696510, CrossPlatformMachineEpsilon)]       // value:  (e)
-        [InlineData( 3.1415926535897932,      -1.0,                 CrossPlatformMachineEpsilon * 10)]  // value:  (pi)
-        [InlineData( double.PositiveInfinity,  double.NaN,          0.0)]
+        [MemberData(nameof(GenericMathTestMemberData.CosDouble), MemberType = typeof(GenericMathTestMemberData))]
         public static void Cos(double value, double expectedResult, double allowedVariance)
         {
             AssertExtensions.Equal(expectedResult, Math.Cos(value), allowedVariance);
@@ -1354,80 +1322,14 @@ public static void Sign_Single()
         }
 
         [Theory]
-        [InlineData( double.NegativeInfinity,  double.NaN,          0.0)]
-        [InlineData(-3.1415926535897932,      -0.0,                 CrossPlatformMachineEpsilon)]       // value: -(pi)
-        [InlineData(-2.7182818284590452,      -0.41078129050290870, CrossPlatformMachineEpsilon)]       // value: -(e)
-        [InlineData(-2.3025850929940457,      -0.74398033695749319, CrossPlatformMachineEpsilon)]       // value: -(ln(10))
-        [InlineData(-1.5707963267948966,      -1.0,                 CrossPlatformMachineEpsilon * 10)]  // value: -(pi / 2)
-        [InlineData(-1.4426950408889634,      -0.99180624439366372, CrossPlatformMachineEpsilon)]       // value: -(log2(e))
-        [InlineData(-1.4142135623730950,      -0.98776594599273553, CrossPlatformMachineEpsilon)]       // value: -(sqrt(2))
-        [InlineData(-1.1283791670955126,      -0.90371945743584630, CrossPlatformMachineEpsilon)]       // value: -(2 / sqrt(pi))
-        [InlineData(-1.0,                     -0.84147098480789651, CrossPlatformMachineEpsilon)]
-        [InlineData(-0.78539816339744831,     -0.70710678118654752, CrossPlatformMachineEpsilon)]       // value: -(pi / 4),        expected: -(1 / sqrt(2))
-        [InlineData(-0.70710678118654752,     -0.64963693908006244, CrossPlatformMachineEpsilon)]       // value: -(1 / sqrt(2))
-        [InlineData(-0.69314718055994531,     -0.63896127631363480, CrossPlatformMachineEpsilon)]       // value: -(ln(2))
-        [InlineData(-0.63661977236758134,     -0.59448076852482208, CrossPlatformMachineEpsilon)]       // value: -(2 / pi)
-        [InlineData(-0.43429448190325183,     -0.42077048331375735, CrossPlatformMachineEpsilon)]       // value: -(log10(e))
-        [InlineData(-0.31830988618379067,     -0.31296179620778659, CrossPlatformMachineEpsilon)]       // value: -(1 / pi)
-        [InlineData(-0.0,                     -0.0,                 0.0)]
-        [InlineData( double.NaN,               double.NaN,          0.0)]
-        [InlineData( 0.0,                      0.0,                 0.0)]
-        [InlineData( 0.31830988618379067,      0.31296179620778659, CrossPlatformMachineEpsilon)]       // value:  (1 / pi)
-        [InlineData( 0.43429448190325183,      0.42077048331375735, CrossPlatformMachineEpsilon)]       // value:  (log10(e))
-        [InlineData( 0.63661977236758134,      0.59448076852482208, CrossPlatformMachineEpsilon)]       // value:  (2 / pi)
-        [InlineData( 0.69314718055994531,      0.63896127631363480, CrossPlatformMachineEpsilon)]       // value:  (ln(2))
-        [InlineData( 0.70710678118654752,      0.64963693908006244, CrossPlatformMachineEpsilon)]       // value:  (1 / sqrt(2))
-        [InlineData( 0.78539816339744831,      0.70710678118654752, CrossPlatformMachineEpsilon)]       // value:  (pi / 4),        expected:  (1 / sqrt(2))
-        [InlineData( 1.0,                      0.84147098480789651, CrossPlatformMachineEpsilon)]
-        [InlineData( 1.1283791670955126,       0.90371945743584630, CrossPlatformMachineEpsilon)]       // value:  (2 / sqrt(pi))
-        [InlineData( 1.4142135623730950,       0.98776594599273553, CrossPlatformMachineEpsilon)]       // value:  (sqrt(2))
-        [InlineData( 1.4426950408889634,       0.99180624439366372, CrossPlatformMachineEpsilon)]       // value:  (log2(e))
-        [InlineData( 1.5707963267948966,       1.0,                 CrossPlatformMachineEpsilon * 10)]  // value:  (pi / 2)
-        [InlineData( 2.3025850929940457,       0.74398033695749319, CrossPlatformMachineEpsilon)]       // value:  (ln(10))
-        [InlineData( 2.7182818284590452,       0.41078129050290870, CrossPlatformMachineEpsilon)]       // value:  (e)
-        [InlineData( 3.1415926535897932,       0.0,                 CrossPlatformMachineEpsilon)]       // value:  (pi)
-        [InlineData( double.PositiveInfinity,  double.NaN,          0.0)]
+        [MemberData(nameof(GenericMathTestMemberData.SinDouble), MemberType = typeof(GenericMathTestMemberData))]
         public static void Sin(double value, double expectedResult, double allowedVariance)
         {
             AssertExtensions.Equal(expectedResult, Math.Sin(value), allowedVariance);
         }
 
         [Theory]
-        [InlineData( double.NegativeInfinity,  double.NaN,           double.NaN,          0.0,                              0.0)]
-        [InlineData(-1e18,                     0.9929693207404051,   0.11837199021871073, 0.0002,                           0.002)]                             // https://github.com/dotnet/runtime/issues/98204
-        [InlineData(-3.1415926535897932,      -0.0,                 -1.0,                 CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon * 10)]  // value: -(pi)
-        [InlineData(-2.7182818284590452,      -0.41078129050290870, -0.91173391478696510, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(e)
-        [InlineData(-2.3025850929940457,      -0.74398033695749319, -0.66820151019031295, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(ln(10))
-        [InlineData(-1.5707963267948966,      -1.0,                  0.0,                 CrossPlatformMachineEpsilon * 10, CrossPlatformMachineEpsilon)]       // value: -(pi / 2)
-        [InlineData(-1.4426950408889634,      -0.99180624439366372,  0.12775121753523991, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(log2(e))
-        [InlineData(-1.4142135623730950,      -0.98776594599273553,  0.15594369476537447, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(sqrt(2))
-        [InlineData(-1.1283791670955126,      -0.90371945743584630,  0.42812514788535792, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(2 / sqrt(pi))
-        [InlineData(-1.0,                     -0.84147098480789651,  0.54030230586813972, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]
-        [InlineData(-0.78539816339744831,     -0.70710678118654752,  0.70710678118654752, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(pi / 4),        expected_sin: -(1 / sqrt(2)),    expected_cos: 1
-        [InlineData(-0.70710678118654752,     -0.64963693908006244,  0.76024459707563015, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(1 / sqrt(2))
-        [InlineData(-0.69314718055994531,     -0.63896127631363480,  0.76923890136397213, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(ln(2))
-        [InlineData(-0.63661977236758134,     -0.59448076852482208,  0.80410982822879171, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(2 / pi)
-        [InlineData(-0.43429448190325183,     -0.42077048331375735,  0.90716712923909839, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(log10(e))
-        [InlineData(-0.31830988618379067,     -0.31296179620778659,  0.94976571538163866, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(1 / pi)
-        [InlineData(-0.0,                     -0.0,                  1.0,                 0.0,                              CrossPlatformMachineEpsilon * 10)]
-        [InlineData( double.NaN,               double.NaN,           double.NaN,          0.0,                              0.0)]
-        [InlineData( 0.0,                      0.0,                  1.0,                 0.0,                              CrossPlatformMachineEpsilon * 10)]
-        [InlineData( 0.31830988618379067,      0.31296179620778659,  0.94976571538163866, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (1 / pi)
-        [InlineData( 0.43429448190325183,      0.42077048331375735,  0.90716712923909839, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (log10(e))
-        [InlineData( 0.63661977236758134,      0.59448076852482208,  0.80410982822879171, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (2 / pi)
-        [InlineData( 0.69314718055994531,      0.63896127631363480,  0.76923890136397213, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (ln(2))
-        [InlineData( 0.70710678118654752,      0.64963693908006244,  0.76024459707563015, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (1 / sqrt(2))
-        [InlineData( 0.78539816339744831,      0.70710678118654752,  0.70710678118654752, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (pi / 4),        expected_sin:  (1 / sqrt(2)),    expected_cos: 1
-        [InlineData( 1.0,                      0.84147098480789651,  0.54030230586813972, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]
-        [InlineData( 1.1283791670955126,       0.90371945743584630,  0.42812514788535792, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (2 / sqrt(pi))
-        [InlineData( 1.4142135623730950,       0.98776594599273553,  0.15594369476537447, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (sqrt(2))
-        [InlineData( 1.4426950408889634,       0.99180624439366372,  0.12775121753523991, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (log2(e))
-        [InlineData( 1.5707963267948966,       1.0,                  0.0,                 CrossPlatformMachineEpsilon * 10, CrossPlatformMachineEpsilon)]       // value:  (pi / 2)
-        [InlineData( 2.3025850929940457,       0.74398033695749319, -0.66820151019031295, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (ln(10))
-        [InlineData( 2.7182818284590452,       0.41078129050290870, -0.91173391478696510, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (e)
-        [InlineData( 3.1415926535897932,       0.0,                 -1.0,                 CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon * 10)]  // value:  (pi)
-        [InlineData( 1e18,                    -0.9929693207404051,   0.11837199021871073, 0.0002,                           0.002)]                             // https://github.com/dotnet/runtime/issues/98204
-        [InlineData( double.PositiveInfinity,  double.NaN,           double.NaN,          0.0,                              0.0)]
+        [MemberData(nameof(GenericMathTestMemberData.SinCosDouble), MemberType = typeof(GenericMathTestMemberData))]
         public static void SinCos(double value, double expectedResultSin, double expectedResultCos, double allowedVarianceSin, double allowedVarianceCos)
         {
             (double resultSin, double resultCos) = Math.SinCos(value);
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/MathF.cs b/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/MathF.cs
index 565c00a986b29..1363a2b407b01 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/MathF.cs
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Extensions.Tests/System/MathF.cs
@@ -584,39 +584,7 @@ public static void CopySign(float x, float y, float expectedResult)
         }
 
         [Theory]
-        [InlineData(float.NegativeInfinity, float.NaN, 0.0f)]
-        [InlineData(-3.14159265f, -1.0f, CrossPlatformMachineEpsilon * 10)]  // value: -(pi)
-        [InlineData(-2.71828183f, -0.911733918f, CrossPlatformMachineEpsilon)]       // value: -(e)
-        [InlineData(-2.30258509f, -0.668201510f, CrossPlatformMachineEpsilon)]       // value: -(ln(10))
-        [InlineData(-1.57079633f, 0.0f, CrossPlatformMachineEpsilon)]       // value: -(pi / 2)
-        [InlineData(-1.44269504f, 0.127751218f, CrossPlatformMachineEpsilon)]       // value: -(log2(e))
-        [InlineData(-1.41421356f, 0.155943695f, CrossPlatformMachineEpsilon)]       // value: -(sqrt(2))
-        [InlineData(-1.12837917f, 0.428125148f, CrossPlatformMachineEpsilon)]       // value: -(2 / sqrt(pi))
-        [InlineData(-1.0f, 0.540302306f, CrossPlatformMachineEpsilon)]
-        [InlineData(-0.785398163f, 0.707106781f, CrossPlatformMachineEpsilon)]       // value: -(pi / 4),        expected:  (1 / sqrt(2))
-        [InlineData(-0.707106781f, 0.760244597f, CrossPlatformMachineEpsilon)]       // value: -(1 / sqrt(2))
-        [InlineData(-0.693147181f, 0.769238901f, CrossPlatformMachineEpsilon)]       // value: -(ln(2))
-        [InlineData(-0.636619772f, 0.804109828f, CrossPlatformMachineEpsilon)]       // value: -(2 / pi)
-        [InlineData(-0.434294482f, 0.907167129f, CrossPlatformMachineEpsilon)]       // value: -(log10(e))
-        [InlineData(-0.318309886f, 0.949765715f, CrossPlatformMachineEpsilon)]       // value: -(1 / pi)
-        [InlineData(-0.0f, 1.0f, CrossPlatformMachineEpsilon * 10)]
-        [InlineData(float.NaN, float.NaN, 0.0f)]
-        [InlineData(0.0f, 1.0f, CrossPlatformMachineEpsilon * 10)]
-        [InlineData(0.318309886f, 0.949765715f, CrossPlatformMachineEpsilon)]       // value:  (1 / pi)
-        [InlineData(0.434294482f, 0.907167129f, CrossPlatformMachineEpsilon)]       // value:  (log10(e))
-        [InlineData(0.636619772f, 0.804109828f, CrossPlatformMachineEpsilon)]       // value:  (2 / pi)
-        [InlineData(0.693147181f, 0.769238901f, CrossPlatformMachineEpsilon)]       // value:  (ln(2))
-        [InlineData(0.707106781f, 0.760244597f, CrossPlatformMachineEpsilon)]       // value:  (1 / sqrt(2))
-        [InlineData(0.785398163f, 0.707106781f, CrossPlatformMachineEpsilon)]       // value:  (pi / 4),        expected:  (1 / sqrt(2))
-        [InlineData(1.0f, 0.540302306f, CrossPlatformMachineEpsilon)]
-        [InlineData(1.12837917f, 0.428125148f, CrossPlatformMachineEpsilon)]       // value:  (2 / sqrt(pi))
-        [InlineData(1.41421356f, 0.155943695f, CrossPlatformMachineEpsilon)]       // value:  (sqrt(2))
-        [InlineData(1.44269504f, 0.127751218f, CrossPlatformMachineEpsilon)]       // value:  (log2(e))
-        [InlineData(1.57079633f, 0.0f, CrossPlatformMachineEpsilon)]       // value:  (pi / 2)
-        [InlineData(2.30258509f, -0.668201510f, CrossPlatformMachineEpsilon)]       // value:  (ln(10))
-        [InlineData(2.71828183f, -0.911733918f, CrossPlatformMachineEpsilon)]       // value:  (e)
-        [InlineData(3.14159265f, -1.0f, CrossPlatformMachineEpsilon * 10)]  // value:  (pi)
-        [InlineData(float.PositiveInfinity, float.NaN, 0.0f)]
+        [MemberData(nameof(GenericMathTestMemberData.CosSingle), MemberType = typeof(GenericMathTestMemberData))]
         public static void Cos(float value, float expectedResult, float allowedVariance)
         {
             AssertExtensions.Equal(expectedResult, MathF.Cos(value), allowedVariance);
@@ -1349,80 +1317,14 @@ public static void Sign()
         }
 
         [Theory]
-        [InlineData(float.NegativeInfinity, float.NaN, 0.0f)]
-        [InlineData(-3.14159265f, -0.0f, CrossPlatformMachineEpsilon)]       // value: -(pi)
-        [InlineData(-2.71828183f, -0.410781291f, CrossPlatformMachineEpsilon)]       // value: -(e)
-        [InlineData(-2.30258509f, -0.743980337f, CrossPlatformMachineEpsilon)]       // value: -(ln(10))
-        [InlineData(-1.57079633f, -1.0f, CrossPlatformMachineEpsilon * 10)]  // value: -(pi / 2)
-        [InlineData(-1.44269504f, -0.991806244f, CrossPlatformMachineEpsilon)]       // value: -(log2(e))
-        [InlineData(-1.41421356f, -0.987765946f, CrossPlatformMachineEpsilon)]       // value: -(sqrt(2))
-        [InlineData(-1.12837917f, -0.903719457f, CrossPlatformMachineEpsilon)]       // value: -(2 / sqrt(pi))
-        [InlineData(-1.0f, -0.841470985f, CrossPlatformMachineEpsilon)]
-        [InlineData(-0.785398163f, -0.707106781f, CrossPlatformMachineEpsilon)]       // value: -(pi / 4),        expected: -(1 / sqrt(2))
-        [InlineData(-0.707106781f, -0.649636939f, CrossPlatformMachineEpsilon)]       // value: -(1 / sqrt(2))
-        [InlineData(-0.693147181f, -0.638961276f, CrossPlatformMachineEpsilon)]       // value: -(ln(2))
-        [InlineData(-0.636619772f, -0.594480769f, CrossPlatformMachineEpsilon)]       // value: -(2 / pi)
-        [InlineData(-0.434294482f, -0.420770483f, CrossPlatformMachineEpsilon)]       // value: -(log10(e))
-        [InlineData(-0.318309886f, -0.312961796f, CrossPlatformMachineEpsilon)]       // value: -(1 / pi)
-        [InlineData(-0.0f, -0.0f, 0.0f)]
-        [InlineData(float.NaN, float.NaN, 0.0f)]
-        [InlineData(0.0f, 0.0f, 0.0f)]
-        [InlineData(0.318309886f, 0.312961796f, CrossPlatformMachineEpsilon)]       // value:  (1 / pi)
-        [InlineData(0.434294482f, 0.420770483f, CrossPlatformMachineEpsilon)]       // value:  (log10(e))
-        [InlineData(0.636619772f, 0.594480769f, CrossPlatformMachineEpsilon)]       // value:  (2 / pi)
-        [InlineData(0.693147181f, 0.638961276f, CrossPlatformMachineEpsilon)]       // value:  (ln(2))
-        [InlineData(0.707106781f, 0.649636939f, CrossPlatformMachineEpsilon)]       // value:  (1 / sqrt(2))
-        [InlineData(0.785398163f, 0.707106781f, CrossPlatformMachineEpsilon)]       // value:  (pi / 4),        expected:  (1 / sqrt(2))
-        [InlineData(1.0f, 0.841470985f, CrossPlatformMachineEpsilon)]
-        [InlineData(1.12837917f, 0.903719457f, CrossPlatformMachineEpsilon)]       // value:  (2 / sqrt(pi))
-        [InlineData(1.41421356f, 0.987765946f, CrossPlatformMachineEpsilon)]       // value:  (sqrt(2))
-        [InlineData(1.44269504f, 0.991806244f, CrossPlatformMachineEpsilon)]       // value:  (log2(e))
-        [InlineData(1.57079633f, 1.0f, CrossPlatformMachineEpsilon * 10)]  // value:  (pi / 2)
-        [InlineData(2.30258509f, 0.743980337f, CrossPlatformMachineEpsilon)]       // value:  (ln(10))
-        [InlineData(2.71828183f, 0.410781291f, CrossPlatformMachineEpsilon)]       // value:  (e)
-        [InlineData(3.14159265f, 0.0f, CrossPlatformMachineEpsilon)]       // value:  (pi)
-        [InlineData(float.PositiveInfinity, float.NaN, 0.0f)]
+        [MemberData(nameof(GenericMathTestMemberData.SinSingle), MemberType = typeof(GenericMathTestMemberData))]
         public static void Sin(float value, float expectedResult, float allowedVariance)
         {
             AssertExtensions.Equal(expectedResult, MathF.Sin(value), allowedVariance);
         }
 
         [Theory]
-        [InlineData( float.NegativeInfinity,  float.NaN,     float.NaN,    0.0f,                             0.0f)]
-        [InlineData(-1e8f,                   -0.931639,     -0.36338508,   CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // https://github.com/dotnet/runtime/issues/98204
-        [InlineData(-3.14159265f,            -0.0f,         -1.0f,         CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon * 10)]  // value: -(pi)
-        [InlineData(-2.71828183f,            -0.410781291f, -0.911733918f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(e)
-        [InlineData(-2.30258509f,            -0.743980337f, -0.668201510f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(ln(10))
-        [InlineData(-1.57079633f,            -1.0f,          0.0f,         CrossPlatformMachineEpsilon * 10, CrossPlatformMachineEpsilon)]       // value: -(pi / 2)
-        [InlineData(-1.44269504f,            -0.991806244f,  0.127751218f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(log2(e))
-        [InlineData(-1.41421356f,            -0.987765946f,  0.155943695f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(sqrt(2))
-        [InlineData(-1.12837917f,            -0.903719457f,  0.428125148f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(2 / sqrt(pi))
-        [InlineData(-1.0f,                   -0.841470985f,  0.540302306f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]
-        [InlineData(-0.785398163f,           -0.707106781f,  0.707106781f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(pi / 4),        expected_sin: -(1 / sqrt(2)),    expected_cos: 1
-        [InlineData(-0.707106781f,           -0.649636939f,  0.760244597f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(1 / sqrt(2))
-        [InlineData(-0.693147181f,           -0.638961276f,  0.769238901f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(ln(2))
-        [InlineData(-0.636619772f,           -0.594480769f,  0.804109828f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(2 / pi)
-        [InlineData(-0.434294482f,           -0.420770483f,  0.907167129f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(log10(e))
-        [InlineData(-0.318309886f,           -0.312961796f,  0.949765715f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value: -(1 / pi)
-        [InlineData(-0.0f,                   -0.0f,          1.0f,         0.0f,                             CrossPlatformMachineEpsilon * 10)]
-        [InlineData( float.NaN,               float.NaN,     float.NaN,    0.0f,                             0.0f)]
-        [InlineData( 0.0f,                    0.0f,          1.0f,         0.0f,                             CrossPlatformMachineEpsilon * 10)]
-        [InlineData( 0.318309886f,            0.312961796f,  0.949765715f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (1 / pi)
-        [InlineData( 0.434294482f,            0.420770483f,  0.907167129f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (log10(e))
-        [InlineData( 0.636619772f,            0.594480769f,  0.804109828f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (2 / pi)
-        [InlineData( 0.693147181f,            0.638961276f,  0.769238901f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (ln(2))
-        [InlineData( 0.707106781f,            0.649636939f,  0.760244597f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (1 / sqrt(2))
-        [InlineData( 0.785398163f,            0.707106781f,  0.707106781f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (pi / 4),        expected_sin:  (1 / sqrt(2)),    expected_cos: 1
-        [InlineData( 1.0f,                    0.841470985f,  0.540302306f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]
-        [InlineData( 1.12837917f,             0.903719457f,  0.428125148f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (2 / sqrt(pi))
-        [InlineData( 1.41421356f,             0.987765946f,  0.155943695f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (sqrt(2))
-        [InlineData( 1.44269504f,             0.991806244f,  0.127751218f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (log2(e))
-        [InlineData( 1.57079633f,             1.0f,          0.0f,         CrossPlatformMachineEpsilon * 10, CrossPlatformMachineEpsilon)]       // value:  (pi / 2)
-        [InlineData( 2.30258509f,             0.743980337f, -0.668201510f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (ln(10))
-        [InlineData( 2.71828183f,             0.410781291f, -0.911733918f, CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // value:  (e)
-        [InlineData( 3.14159265f,             0.0f,         -1.0f,         CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon * 10)]  // value:  (pi)
-        [InlineData( 1e8f,                    0.931639,     -0.36338508,   CrossPlatformMachineEpsilon,      CrossPlatformMachineEpsilon)]       // https://github.com/dotnet/runtime/issues/98204
-        [InlineData( float.PositiveInfinity,  float.NaN,     float.NaN,    0.0f,                             0.0f)]
+        [MemberData(nameof(GenericMathTestMemberData.SinCosSingle), MemberType = typeof(GenericMathTestMemberData))]
         public static void SinCos(float value, float expectedResultSin, float expectedResultCos, float allowedVarianceSin, float allowedVarianceCos)
         {
             (float resultSin, float resultCos) = MathF.SinCos(value);

From 549ef80a3e46291a66fac6f7dd824d297ba55bb2 Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Sat, 13 Jul 2024 11:25:33 -0700
Subject: [PATCH 06/13] Fix some small bugs in the Sin, Cos, and SinCos impls

---
 .../tests/GenericVectorTests.cs               |  2 +-
 .../System/Runtime/Intrinsics/Vector128.cs    |  6 +--
 .../System/Runtime/Intrinsics/Vector256.cs    |  6 +--
 .../System/Runtime/Intrinsics/Vector512.cs    |  4 +-
 .../System/Runtime/Intrinsics/VectorMath.cs   | 49 +++++++++----------
 5 files changed, 33 insertions(+), 34 deletions(-)

diff --git a/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs b/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs
index 14b1a2ec62833..dc13ea9c1117b 100644
--- a/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs
+++ b/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs
@@ -4546,7 +4546,7 @@ public void CosDoubleTest(double value, double expectedResult, double variance)
 
         [Theory]
         [MemberData(nameof(GenericMathTestMemberData.CosSingle), MemberType = typeof(GenericMathTestMemberData))]
-        public void CosCosgleTest(float value, float expectedResult, float variance)
+        public void CosSingleTest(float value, float expectedResult, float variance)
         {
             Vector<float> actualResult = Vector.Cos(Vector.Create(value));
             AssertEqual(Vector.Create(expectedResult), actualResult, Vector.Create(variance));
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
index 70c28d0924a83..27bd7c10fa3e0 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
@@ -3207,7 +3207,7 @@ public static Vector128<double> Sin(Vector128<double> vector)
         {
             if (IsHardwareAccelerated)
             {
-                return VectorMath.CosDouble<Vector128<double>, Vector128<long>>(vector);
+                return VectorMath.SinDouble<Vector128<double>, Vector128<long>>(vector);
             }
             else
             {
@@ -3226,11 +3226,11 @@ public static Vector128<float> Sin(Vector128<float> vector)
             {
                 if (Vector256.IsHardwareAccelerated)
                 {
-                    return VectorMath.CosSingle<Vector128<float>, Vector128<int>, Vector256<double>, Vector256<long>>(vector);
+                    return VectorMath.SinSingle<Vector128<float>, Vector128<int>, Vector256<double>, Vector256<long>>(vector);
                 }
                 else
                 {
-                    return VectorMath.CosSingle<Vector128<float>, Vector128<int>, Vector128<double>, Vector128<long>>(vector);
+                    return VectorMath.SinSingle<Vector128<float>, Vector128<int>, Vector128<double>, Vector128<long>>(vector);
                 }
             }
             else
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
index c90524a56bdee..eb2aa61fce4ed 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
@@ -3090,7 +3090,7 @@ public static Vector256<double> Sin(Vector256<double> vector)
         {
             if (IsHardwareAccelerated)
             {
-                return VectorMath.CosDouble<Vector256<double>, Vector256<long>>(vector);
+                return VectorMath.SinDouble<Vector256<double>, Vector256<long>>(vector);
             }
             else
             {
@@ -3109,11 +3109,11 @@ public static Vector256<float> Sin(Vector256<float> vector)
             {
                 if (Vector512.IsHardwareAccelerated)
                 {
-                    return VectorMath.CosSingle<Vector256<float>, Vector256<int>, Vector512<double>, Vector512<long>>(vector);
+                    return VectorMath.SinSingle<Vector256<float>, Vector256<int>, Vector512<double>, Vector512<long>>(vector);
                 }
                 else
                 {
-                    return VectorMath.CosSingle<Vector256<float>, Vector256<int>, Vector256<double>, Vector256<long>>(vector);
+                    return VectorMath.SinSingle<Vector256<float>, Vector256<int>, Vector256<double>, Vector256<long>>(vector);
                 }
             }
             else
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
index f5d3c0c1b9d6c..8f12942a7cfc5 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
@@ -3131,7 +3131,7 @@ public static Vector512<double> Sin(Vector512<double> vector)
         {
             if (IsHardwareAccelerated)
             {
-                return VectorMath.CosDouble<Vector512<double>, Vector512<long>>(vector);
+                return VectorMath.SinDouble<Vector512<double>, Vector512<long>>(vector);
             }
             else
             {
@@ -3148,7 +3148,7 @@ public static Vector512<float> Sin(Vector512<float> vector)
         {
             if (IsHardwareAccelerated)
             {
-                return VectorMath.CosSingle<Vector512<float>, Vector512<int>, Vector512<double>, Vector512<long>>(vector);
+                return VectorMath.SinSingle<Vector512<float>, Vector512<int>, Vector512<double>, Vector512<long>>(vector);
             }
             else
             {
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
index 066deab0dc410..cf8df6927558f 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
@@ -211,30 +211,29 @@ public static TVectorSingle CosSingle<TVectorSingle, TVectorInt32, TVectorDouble
             const int ARG_SMALLER = 0x39000000; // 2^-27
 
             TVectorSingle ax = TVectorSingle.Abs(x);
-            TVectorInt32 ux = Unsafe.BitCast<TVectorSingle, TVectorInt32>(x);
+            TVectorInt32 ux = Unsafe.BitCast<TVectorSingle, TVectorInt32>(ax);
 
             TVectorSingle result;
 
-            if (TVectorSingle.LessThanOrEqualAll(ax, TVectorSingle.Create(ARG_LARGE)))
+            if (TVectorInt32.LessThanOrEqualAll(ux, TVectorInt32.Create(ARG_LARGE)))
             {
                 // We must be a finite value: (pi / 4) >= |x|
 
                 if (TVectorInt32.GreaterThanAny(ux, TVectorInt32.Create(ARG_SMALL - 1)))
                 {
                     // at least one element is: |x| >= 2^-13
-                    TVectorSingle x2 = x * x;
 
                     if (TVectorSingle.Count == TVectorDouble.Count)
                     {
                         result = Narrow<TVectorDouble, TVectorSingle>(
-                            CosSingleSmall(Widen<TVectorSingle, TVectorDouble>(x2))
+                            CosSingleSmall(Widen<TVectorSingle, TVectorDouble>(x))
                         );
                     }
                     else
                     {
                         result = Narrow<TVectorDouble, TVectorSingle>(
-                            CosSingleSmall(WidenLower<TVectorSingle, TVectorDouble>(x2)),
-                            CosSingleSmall(WidenUpper<TVectorSingle, TVectorDouble>(x2))
+                            CosSingleSmall(WidenLower<TVectorSingle, TVectorDouble>(x)),
+                            CosSingleSmall(WidenUpper<TVectorSingle, TVectorDouble>(x))
                         );
                     }
                 }
@@ -1760,8 +1759,8 @@ public static (TVectorDouble Sin, TVectorDouble Cos) SinCosDouble<TVectorDouble,
 
                 sinResult = TVectorDouble.ConditionalSelect(
                     Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & region) | (~sign & ~region)) & TVectorInt64.One, TVectorInt64.Zero)),
-                    -sinResult, // negative in region 1 or 3, positive in region 0 or 2
-                    +sinResult  // negative in region 0 or 2, positive in region 1 or 3
+                    +sinResult, // negative in region 1 or 3, positive in region 0 or 2
+                    -sinResult  // negative in region 0 or 2, positive in region 1 or 3
                 );
 
                 cosResult = TVectorDouble.ConditionalSelect(
@@ -1806,12 +1805,12 @@ public static (TVectorDouble Sin, TVectorDouble Cos) SinCosDouble<TVectorDouble,
             sinResult = TVectorDouble.ConditionalSelect(
                 argNotSmallerMask,
                 sinResult,          // for elements: |x| >= 2^-27, infinity, or NaN
-                TVectorDouble.One   // for elements: 2^-27 > |x|
+                x                   // for elements: 2^-27 > |x|
             );
 
             cosResult = TVectorDouble.ConditionalSelect(
                 argNotSmallerMask,
-                cosResult,             // for elements: |x| >= 2^-27, infinity, or NaN
+                cosResult,          // for elements: |x| >= 2^-27, infinity, or NaN
                 TVectorDouble.One   // for elements: 2^-27 > |x|
             );
 
@@ -1838,11 +1837,11 @@ public static (TVectorSingle Sin, TVectorSingle Cos) SinCosSingle<TVectorSingle,
             const int ARG_SMALLER = 0x39000000; // 2^-27
 
             TVectorSingle ax = TVectorSingle.Abs(x);
-            TVectorInt32 ux = Unsafe.BitCast<TVectorSingle, TVectorInt32>(x);
+            TVectorInt32 ux = Unsafe.BitCast<TVectorSingle, TVectorInt32>(ax);
 
             TVectorSingle sinResult, cosResult;
 
-            if (TVectorSingle.LessThanOrEqualAll(ax, TVectorSingle.Create(ARG_LARGE)))
+            if (TVectorInt32.LessThanOrEqualAll(ux, TVectorInt32.Create(ARG_LARGE)))
             {
                 // We must be a finite value: (pi / 4) >= |x|
 
@@ -1945,7 +1944,7 @@ public static (TVectorSingle Sin, TVectorSingle Cos) SinCosSingle<TVectorSingle,
             sinResult = TVectorSingle.ConditionalSelect(
                 argNotSmallerMask,
                 sinResult,          // for elements: |x| >= 2^-27, infinity, or NaN
-                TVectorSingle.One   // for elements: 2^-27 > |x|
+                x                   // for elements: 2^-27 > |x|
             );
 
             cosResult = TVectorSingle.ConditionalSelect(
@@ -1982,8 +1981,8 @@ public static (TVectorSingle Sin, TVectorSingle Cos) SinCosSingle<TVectorSingle,
 
                 sinResult = TVectorDouble.ConditionalSelect(
                     Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & region) | (~sign & ~region)) & TVectorInt64.One, TVectorInt64.Zero)),
-                    -sinResult, // negative in region 1 or 3, positive in region 0 or 2
-                    +sinResult  // negative in region 0 or 2, positive in region 1 or 3
+                    +sinResult, // negative in region 1 or 3, positive in region 0 or 2
+                    -sinResult  // negative in region 0 or 2, positive in region 1 or 3
                 );
 
                 cosResult = TVectorDouble.ConditionalSelect(
@@ -2095,8 +2094,8 @@ public static TVectorDouble SinDouble<TVectorDouble, TVectorInt64>(TVectorDouble
 
                 result = TVectorDouble.ConditionalSelect(
                     Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & region) | (~sign & ~region)) & TVectorInt64.One, TVectorInt64.Zero)),
-                    -result,    // negative in region 1 or 3, positive in region 0 or 2
-                    +result     // negative in region 0 or 2, positive in region 1 or 3
+                    +result,    // negative in region 1 or 3, positive in region 0 or 2
+                    -result     // negative in region 0 or 2, positive in region 1 or 3
                 );
 
                 // Propagate the NaN that was passed in
@@ -2116,8 +2115,8 @@ public static TVectorDouble SinDouble<TVectorDouble, TVectorInt64>(TVectorDouble
 
             return TVectorDouble.ConditionalSelect(
                 Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.GreaterThan(ux, TVectorInt64.Create(ARG_SMALLER - 1))),
-                result,             // for elements: |x| >= 2^-27, infinity, or NaN
-                TVectorDouble.One   // for elements: 2^-27 > |x|
+                result,     // for elements: |x| >= 2^-27, infinity, or NaN
+                x           // for elements: 2^-27 > |x|
             );
         }
 
@@ -2183,11 +2182,11 @@ public static TVectorSingle SinSingle<TVectorSingle, TVectorInt32, TVectorDouble
             const int ARG_SMALLER = 0x39000000; // 2^-27
 
             TVectorSingle ax = TVectorSingle.Abs(x);
-            TVectorInt32 ux = Unsafe.BitCast<TVectorSingle, TVectorInt32>(x);
+            TVectorInt32 ux = Unsafe.BitCast<TVectorSingle, TVectorInt32>(ax);
 
             TVectorSingle result;
 
-            if (TVectorSingle.LessThanOrEqualAll(ax, TVectorSingle.Create(ARG_LARGE)))
+            if (TVectorInt32.LessThanOrEqualAll(ux, TVectorInt32.Create(ARG_LARGE)))
             {
                 // We must be a finite value: (pi / 4) >= |x|
 
@@ -2251,8 +2250,8 @@ public static TVectorSingle SinSingle<TVectorSingle, TVectorInt32, TVectorDouble
 
             return TVectorSingle.ConditionalSelect(
                 Unsafe.BitCast<TVectorInt32, TVectorSingle>(TVectorInt32.GreaterThan(ux, TVectorInt32.Create(ARG_SMALLER - 1))),
-                result,             // for elements: |x| >= 2^-27, infinity, or NaN
-                TVectorSingle.One   // for elements: 2^-27 > |x|
+                result,     // for elements: |x| >= 2^-27, infinity, or NaN
+                x           // for elements: 2^-27 > |x|
             );
 
             [MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -2274,8 +2273,8 @@ static TVectorDouble CoreImpl(TVectorDouble x)
 
                 return TVectorDouble.ConditionalSelect(
                     Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & region) | (~sign & ~region)) & TVectorInt64.One, TVectorInt64.Zero)),
-                    -result,    // negative in region 1 or 3, positive in region 0 or 2
-                    +result     // negative in region 0 or 2, positive in region 1 or 3
+                    +result,    // negative in region 1 or 3, positive in region 0 or 2
+                    -result     // negative in region 0 or 2, positive in region 1 or 3
                 );
             }
         }

From a1318a51ce74997e8438788c5876932ce73703e9 Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Mon, 15 Jul 2024 09:30:27 -0700
Subject: [PATCH 07/13] Ensure that very large inputs are handled

---
 .../System/Runtime/Intrinsics/VectorMath.cs   | 166 ++++++++++++++----
 1 file changed, 128 insertions(+), 38 deletions(-)

diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
index cf8df6927558f..6a6bdbfb3d59f 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
@@ -102,7 +102,7 @@ public static TVectorDouble CosDouble<TVectorDouble, TVectorInt64>(TVectorDouble
                     result = TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(-0.5), x2, TVectorDouble.One);
                 }
             }
-            else
+            else if (TVectorDouble.LessThanAll(ax, TVectorDouble.Create(5000000.0)))
             {
                 // at least one element is: |x| > (pi / 4) -or- infinite -or- nan
                 (TVectorDouble r, TVectorDouble rr, TVectorInt64 region) = SinCosReduce<TVectorDouble, TVectorInt64>(ax);
@@ -136,12 +136,29 @@ public static TVectorDouble CosDouble<TVectorDouble, TVectorInt64>(TVectorDouble
                     result
                 );
             }
+            else
+            {
+                return ScalarFallback(x);
+            }
 
             return TVectorDouble.ConditionalSelect(
                 Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.GreaterThan(ux, TVectorInt64.Create(ARG_SMALLER - 1))),
                 result,             // for elements: |x| >= 2^-27, infinity, or NaN
                 TVectorDouble.One   // for elements: 2^-27 > |x|
             );
+
+            static TVectorDouble ScalarFallback(TVectorDouble x)
+            {
+                TVectorDouble result = TVectorDouble.Zero;
+
+                for (int i = 0; i < TVectorDouble.Count; i++)
+                {
+                    double scalar = double.Cos(x[i]);
+                    result = result.WithElement(i, scalar);
+                }
+
+                return result;
+            }
         }
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -244,7 +261,7 @@ public static TVectorSingle CosSingle<TVectorSingle, TVectorInt32, TVectorDouble
                     result = TVectorSingle.MultiplyAddEstimate(TVectorSingle.Create(-0.5f), x2, TVectorSingle.One);
                 }
             }
-            else
+            else if (TVectorSingle.LessThanAll(ax, TVectorSingle.Create(5000000.0f)))
             {
                 // at least one element is: |x| > (pi / 4) -or- infinite -or- nan
 
@@ -276,6 +293,10 @@ public static TVectorSingle CosSingle<TVectorSingle, TVectorInt32, TVectorDouble
                     result
                 );
             }
+            else
+            {
+                return ScalarFallback(x);
+            }
 
             return TVectorSingle.ConditionalSelect(
                 Unsafe.BitCast<TVectorInt32, TVectorSingle>(TVectorInt32.GreaterThan(ux, TVectorInt32.Create(ARG_SMALLER - 1))),
@@ -303,6 +324,19 @@ static TVectorDouble CoreImpl(TVectorDouble ax)
                     -result     // region 1 or 2
                 );
             }
+
+            static TVectorSingle ScalarFallback(TVectorSingle x)
+            {
+                TVectorSingle result = TVectorSingle.Zero;
+
+                for (int i = 0; i < TVectorSingle.Count; i++)
+                {
+                    float scalar = float.Cos(x[i]);
+                    result = result.WithElement(i, scalar);
+                }
+
+                return result;
+            }
         }
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -1733,7 +1767,7 @@ public static (TVectorDouble Sin, TVectorDouble Cos) SinCosDouble<TVectorDouble,
                     cosResult = TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(-0.5), x2, TVectorDouble.One);
                 }
             }
-            else
+            else if (TVectorDouble.LessThanAll(ax, TVectorDouble.Create(5000000.0)))
             {
                 // at least one element is: |x| > (pi / 4) -or- infinite -or- nan
                 (TVectorDouble r, TVectorDouble rr, TVectorInt64 region) = SinCosReduce<TVectorDouble, TVectorInt64>(ax);
@@ -1741,16 +1775,14 @@ public static (TVectorDouble Sin, TVectorDouble Cos) SinCosDouble<TVectorDouble,
                 TVectorDouble sin = SinDoubleLarge(r, rr);
                 TVectorDouble cos = CosDoubleLarge(r, rr);
 
-                TVectorDouble regionMask = Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(region & TVectorInt64.One, TVectorInt64.Zero));
-
                 sinResult = TVectorDouble.ConditionalSelect(
-                    regionMask,
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(region & TVectorInt64.One, TVectorInt64.Zero)),
                     sin,    // region 0 or 2
                     cos     // region 1 or 3
                 );
 
                 cosResult = TVectorDouble.ConditionalSelect(
-                    regionMask,
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(region & TVectorInt64.One, TVectorInt64.Zero)),
                     cos,    // region 0 or 2
                     sin     // region 1 or 3
                 );
@@ -1770,51 +1802,64 @@ public static (TVectorDouble Sin, TVectorDouble Cos) SinCosDouble<TVectorDouble,
                 );
 
                 // Propagate the NaN that was passed in
-                TVectorDouble nanMask = TVectorDouble.IsNaN(x);
-
                 sinResult = TVectorDouble.ConditionalSelect(
-                    nanMask,
+                    TVectorDouble.IsNaN(x),
                     x,
                     sinResult
                 );
 
                 cosResult = TVectorDouble.ConditionalSelect(
-                    nanMask,
+                    TVectorDouble.IsNaN(x),
                     x,
                     cosResult
                 );
 
                 // Return NaN for infinity
-                TVectorDouble infinityMask = TVectorDouble.IsPositiveInfinity(ax);
-
                 sinResult = TVectorDouble.ConditionalSelect(
-                    infinityMask,
+                    TVectorDouble.IsPositiveInfinity(ax),
                     TVectorDouble.Create(double.NaN),
                     sinResult
                 );
 
                 cosResult = TVectorDouble.ConditionalSelect(
-                    infinityMask,
+                    TVectorDouble.IsPositiveInfinity(ax),
                     TVectorDouble.Create(double.NaN),
                     cosResult
                 );
             }
-
-            TVectorDouble argNotSmallerMask = Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.GreaterThan(ux, TVectorInt64.Create(ARG_SMALLER - 1)));
+            else
+            {
+                return ScalarFallback(x);
+            }
 
             sinResult = TVectorDouble.ConditionalSelect(
-                argNotSmallerMask,
+                Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.GreaterThan(ux, TVectorInt64.Create(ARG_SMALLER - 1))),
                 sinResult,          // for elements: |x| >= 2^-27, infinity, or NaN
                 x                   // for elements: 2^-27 > |x|
             );
 
             cosResult = TVectorDouble.ConditionalSelect(
-                argNotSmallerMask,
+                Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.GreaterThan(ux, TVectorInt64.Create(ARG_SMALLER - 1))),
                 cosResult,          // for elements: |x| >= 2^-27, infinity, or NaN
                 TVectorDouble.One   // for elements: 2^-27 > |x|
             );
 
             return (sinResult, cosResult);
+
+            static (TVectorDouble Sin, TVectorDouble Cos) ScalarFallback(TVectorDouble x)
+            {
+                TVectorDouble sinResult = TVectorDouble.Zero;
+                TVectorDouble cosResult = TVectorDouble.Zero;
+
+                for (int i = 0; i < TVectorDouble.Count; i++)
+                {
+                    (double sinScalar, double cosScalar) = double.SinCos(x[i]);
+                    sinResult = sinResult.WithElement(i, sinScalar);
+                    cosResult = cosResult.WithElement(i, cosScalar);
+                }
+
+                return (sinResult, cosResult);
+            }
         }
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -1886,7 +1931,7 @@ public static (TVectorSingle Sin, TVectorSingle Cos) SinCosSingle<TVectorSingle,
                     cosResult = TVectorSingle.MultiplyAddEstimate(TVectorSingle.Create(-0.5f), x2, TVectorSingle.One);
                 }
             }
-            else
+            else if (TVectorSingle.LessThanAll(ax, TVectorSingle.Create(5000000.0f)))
             {
                 // at least one element is: |x| > (pi / 4) -or- infinite -or- nan
 
@@ -1907,48 +1952,46 @@ public static (TVectorSingle Sin, TVectorSingle Cos) SinCosSingle<TVectorSingle,
                 }
 
                 // Propagate the NaN that was passed in
-                TVectorSingle nanMask = TVectorSingle.IsNaN(x);
-
                 sinResult = TVectorSingle.ConditionalSelect(
-                    nanMask,
+                    TVectorSingle.IsNaN(x),
                     x,
                     sinResult
                 );
 
                 cosResult = TVectorSingle.ConditionalSelect(
-                    nanMask,
+                    TVectorSingle.IsNaN(x),
                     x,
                     cosResult
                 );
 
                 // Return NaN for infinity
-                TVectorSingle infinityMask = TVectorSingle.IsPositiveInfinity(ax);
-
                 sinResult = TVectorSingle.ConditionalSelect(
-                    infinityMask,
+                    TVectorSingle.IsPositiveInfinity(ax),
                     TVectorSingle.Create(float.NaN),
                     sinResult
                 );
 
                 cosResult = TVectorSingle.ConditionalSelect(
-                    infinityMask,
+                    TVectorSingle.IsPositiveInfinity(ax),
                     TVectorSingle.Create(float.NaN),
                     cosResult
                 );
 
                 return (sinResult, cosResult);
             }
-
-            TVectorSingle argNotSmallerMask = Unsafe.BitCast<TVectorInt32, TVectorSingle>(TVectorInt32.GreaterThan(ux, TVectorInt32.Create(ARG_SMALLER - 1)));
+            else
+            {
+                return ScalarFallback(x);
+            }
 
             sinResult = TVectorSingle.ConditionalSelect(
-                argNotSmallerMask,
+                Unsafe.BitCast<TVectorInt32, TVectorSingle>(TVectorInt32.GreaterThan(ux, TVectorInt32.Create(ARG_SMALLER - 1))),
                 sinResult,          // for elements: |x| >= 2^-27, infinity, or NaN
                 x                   // for elements: 2^-27 > |x|
             );
 
             cosResult = TVectorSingle.ConditionalSelect(
-                argNotSmallerMask,
+                Unsafe.BitCast<TVectorInt32, TVectorSingle>(TVectorInt32.GreaterThan(ux, TVectorInt32.Create(ARG_SMALLER - 1))),
                 cosResult,          // for elements: |x| >= 2^-27, infinity, or NaN
                 TVectorSingle.One   // for elements: 2^-27 > |x|
             );
@@ -1963,16 +2006,14 @@ public static (TVectorSingle Sin, TVectorSingle Cos) SinCosSingle<TVectorSingle,
                 TVectorDouble sin = SinSinglePoly(r);
                 TVectorDouble cos = CosSingleLarge(r);
 
-                TVectorDouble regionMask = Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(region & TVectorInt64.One, TVectorInt64.Zero));
-
                 TVectorDouble sinResult = TVectorDouble.ConditionalSelect(
-                    regionMask,
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(region & TVectorInt64.One, TVectorInt64.Zero)),
                     sin,    // region 0 or 2
                     cos     // region 1 or 3
                 );
 
                 TVectorDouble cosResult = TVectorDouble.ConditionalSelect(
-                    regionMask,
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(region & TVectorInt64.One, TVectorInt64.Zero)),
                     cos,    // region 0 or 2
                     sin     // region 1 or 3
                 );
@@ -1993,6 +2034,21 @@ public static (TVectorSingle Sin, TVectorSingle Cos) SinCosSingle<TVectorSingle,
 
                 return (sinResult, cosResult);
             }
+
+            static (TVectorSingle Sin, TVectorSingle Cos) ScalarFallback(TVectorSingle x)
+            {
+                TVectorSingle sinResult = TVectorSingle.Zero;
+                TVectorSingle cosResult = TVectorSingle.Zero;
+
+                for (int i = 0; i < TVectorSingle.Count; i++)
+                {
+                    (float sinScalar, float cosScalar) = float.SinCos(x[i]);
+                    sinResult = sinResult.WithElement(i, sinScalar);
+                    cosResult = cosResult.WithElement(i, cosScalar);
+                }
+
+                return (sinResult, cosResult);
+            }
         }
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -2076,7 +2132,7 @@ public static TVectorDouble SinDouble<TVectorDouble, TVectorInt64>(TVectorDouble
                     result = TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(-0.16666666666666666), x3, x);
                 }
             }
-            else
+            else if (TVectorDouble.LessThanAll(ax, TVectorDouble.Create(5000000.0)))
             {
                 // at least one element is: |x| > (pi / 4) -or- infinite -or- nan
                 (TVectorDouble r, TVectorDouble rr, TVectorInt64 region) = SinCosReduce<TVectorDouble, TVectorInt64>(ax);
@@ -2112,12 +2168,29 @@ public static TVectorDouble SinDouble<TVectorDouble, TVectorInt64>(TVectorDouble
                     result
                 );
             }
+            else
+            {
+                return ScalarFallback(x);
+            }
 
             return TVectorDouble.ConditionalSelect(
                 Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.GreaterThan(ux, TVectorInt64.Create(ARG_SMALLER - 1))),
                 result,     // for elements: |x| >= 2^-27, infinity, or NaN
                 x           // for elements: 2^-27 > |x|
             );
+
+            static TVectorDouble ScalarFallback(TVectorDouble x)
+            {
+                TVectorDouble result = TVectorDouble.Zero;
+
+                for (int i = 0; i < TVectorDouble.Count; i++)
+                {
+                    double scalar = double.Sin(x[i]);
+                    result = result.WithElement(i, scalar);
+                }
+
+                return result;
+            }
         }
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -2215,7 +2288,7 @@ public static TVectorSingle SinSingle<TVectorSingle, TVectorInt32, TVectorDouble
                     result = TVectorSingle.MultiplyAddEstimate(TVectorSingle.Create(-0.16666667f), x3, x);
                 }
             }
-            else
+            else if (TVectorSingle.LessThanAll(ax, TVectorSingle.Create(5000000.0f)))
             {
                 // at least one element is: |x| > (pi / 4) -or- infinite -or- nan
 
@@ -2247,6 +2320,10 @@ public static TVectorSingle SinSingle<TVectorSingle, TVectorInt32, TVectorDouble
                     result
                 );
             }
+            else
+            {
+                return ScalarFallback(x);
+            }
 
             return TVectorSingle.ConditionalSelect(
                 Unsafe.BitCast<TVectorInt32, TVectorSingle>(TVectorInt32.GreaterThan(ux, TVectorInt32.Create(ARG_SMALLER - 1))),
@@ -2277,6 +2354,19 @@ static TVectorDouble CoreImpl(TVectorDouble x)
                     -result     // negative in region 0 or 2, positive in region 1 or 3
                 );
             }
+
+            static TVectorSingle ScalarFallback(TVectorSingle x)
+            {
+                TVectorSingle result = TVectorSingle.Zero;
+
+                for (int i = 0; i < TVectorSingle.Count; i++)
+                {
+                    float scalar = float.Sin(x[i]);
+                    result = result.WithElement(i, scalar);
+                }
+
+                return result;
+            }
         }
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]

From d35f3acb360c8736161180a90a85392836bfd464 Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Mon, 15 Jul 2024 11:29:33 -0700
Subject: [PATCH 08/13] Ensure region is correctly adjusted when determining
 the sign of sin

---
 .../System/Runtime/Intrinsics/VectorMath.cs   | 152 +++---------------
 1 file changed, 24 insertions(+), 128 deletions(-)

diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
index 6a6bdbfb3d59f..b122107238f36 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
@@ -71,6 +71,7 @@ public static TVectorDouble CosDouble<TVectorDouble, TVectorInt64>(TVectorDouble
             //
             // else return 1.0
 
+            const long ARG_HUGE = 0x415312D000000000;       // 5e6
             const long ARG_LARGE = 0x3FE921FB54442D18;      // PI / 4
             const long ARG_SMALL = 0x3F20000000000000;      // 2^-13
             const long ARG_SMALLER = 0x3E40000000000000;    // 2^-27
@@ -102,7 +103,7 @@ public static TVectorDouble CosDouble<TVectorDouble, TVectorInt64>(TVectorDouble
                     result = TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(-0.5), x2, TVectorDouble.One);
                 }
             }
-            else if (TVectorDouble.LessThanAll(ax, TVectorDouble.Create(5000000.0)))
+            else if (TVectorInt64.LessThanAll(ux, TVectorInt64.Create(ARG_HUGE)))
             {
                 // at least one element is: |x| > (pi / 4) -or- infinite -or- nan
                 (TVectorDouble r, TVectorDouble rr, TVectorInt64 region) = SinCosReduce<TVectorDouble, TVectorInt64>(ax);
@@ -121,20 +122,6 @@ public static TVectorDouble CosDouble<TVectorDouble, TVectorInt64>(TVectorDouble
                     +result,    // region 0 or 3
                     -result     // region 1 or 2
                 );
-
-                // Propagate the NaN that was passed in
-                result = TVectorDouble.ConditionalSelect(
-                    TVectorDouble.IsNaN(x),
-                    x,
-                    result
-                );
-
-                // Return NaN for infinity
-                result = TVectorDouble.ConditionalSelect(
-                    TVectorDouble.IsPositiveInfinity(ax),
-                    TVectorDouble.Create(double.NaN),
-                    result
-                );
             }
             else
             {
@@ -223,6 +210,7 @@ public static TVectorSingle CosSingle<TVectorSingle, TVectorInt32, TVectorDouble
             //
             // else return 1.0
 
+            const int ARG_HUGE = 0x4A989680;    // 5e6
             const int ARG_LARGE = 0x3F490FDB;   // PI / 4
             const int ARG_SMALL = 0x3C000000;   // 2^-13
             const int ARG_SMALLER = 0x39000000; // 2^-27
@@ -261,7 +249,7 @@ public static TVectorSingle CosSingle<TVectorSingle, TVectorInt32, TVectorDouble
                     result = TVectorSingle.MultiplyAddEstimate(TVectorSingle.Create(-0.5f), x2, TVectorSingle.One);
                 }
             }
-            else if (TVectorSingle.LessThanAll(ax, TVectorSingle.Create(5000000.0f)))
+            else if (TVectorInt32.LessThanAll(ux, TVectorInt32.Create(ARG_HUGE)))
             {
                 // at least one element is: |x| > (pi / 4) -or- infinite -or- nan
 
@@ -278,20 +266,6 @@ public static TVectorSingle CosSingle<TVectorSingle, TVectorInt32, TVectorDouble
                         CoreImpl(WidenUpper<TVectorSingle, TVectorDouble>(ax))
                     );
                 }
-
-                // Propagate the NaN that was passed in
-                result = TVectorSingle.ConditionalSelect(
-                    TVectorSingle.IsNaN(x),
-                    x,
-                    result
-                );
-
-                // Return NaN for infinity
-                return TVectorSingle.ConditionalSelect(
-                    TVectorSingle.IsPositiveInfinity(ax),
-                    TVectorSingle.Create(float.NaN),
-                    result
-                );
             }
             else
             {
@@ -1732,6 +1706,7 @@ public static (TVectorDouble Sin, TVectorDouble Cos) SinCosDouble<TVectorDouble,
 
             // See SinDouble and CosDouble for implementation details
 
+            const long ARG_HUGE = 0x415312D000000000;       // 5e6
             const long ARG_LARGE = 0x3FE921FB54442D18;      // PI / 4
             const long ARG_SMALL = 0x3F20000000000000;      // 2^-13
             const long ARG_SMALLER = 0x3E40000000000000;    // 2^-27
@@ -1767,7 +1742,7 @@ public static (TVectorDouble Sin, TVectorDouble Cos) SinCosDouble<TVectorDouble,
                     cosResult = TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(-0.5), x2, TVectorDouble.One);
                 }
             }
-            else if (TVectorDouble.LessThanAll(ax, TVectorDouble.Create(5000000.0)))
+            else if (TVectorInt64.LessThanAll(ux, TVectorInt64.Create(ARG_HUGE)))
             {
                 // at least one element is: |x| > (pi / 4) -or- infinite -or- nan
                 (TVectorDouble r, TVectorDouble rr, TVectorInt64 region) = SinCosReduce<TVectorDouble, TVectorInt64>(ax);
@@ -1790,9 +1765,9 @@ public static (TVectorDouble Sin, TVectorDouble Cos) SinCosDouble<TVectorDouble,
                 TVectorInt64 sign = Unsafe.BitCast<TVectorDouble, TVectorInt64>(x) >>> 63;
 
                 sinResult = TVectorDouble.ConditionalSelect(
-                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & region) | (~sign & ~region)) & TVectorInt64.One, TVectorInt64.Zero)),
-                    +sinResult, // negative in region 1 or 3, positive in region 0 or 2
-                    -sinResult  // negative in region 0 or 2, positive in region 1 or 3
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & (region >> 1)) | (~sign & ~(region >> 1))) & TVectorInt64.One, TVectorInt64.Zero)),
+                    -sinResult, // negative in region 1 or 3, positive in region 0 or 2
+                    +sinResult  // negative in region 0 or 2, positive in region 1 or 3
                 );
 
                 cosResult = TVectorDouble.ConditionalSelect(
@@ -1800,32 +1775,6 @@ public static (TVectorDouble Sin, TVectorDouble Cos) SinCosDouble<TVectorDouble,
                     +cosResult, // region 0 or 3
                     -cosResult  // region 1 or 2
                 );
-
-                // Propagate the NaN that was passed in
-                sinResult = TVectorDouble.ConditionalSelect(
-                    TVectorDouble.IsNaN(x),
-                    x,
-                    sinResult
-                );
-
-                cosResult = TVectorDouble.ConditionalSelect(
-                    TVectorDouble.IsNaN(x),
-                    x,
-                    cosResult
-                );
-
-                // Return NaN for infinity
-                sinResult = TVectorDouble.ConditionalSelect(
-                    TVectorDouble.IsPositiveInfinity(ax),
-                    TVectorDouble.Create(double.NaN),
-                    sinResult
-                );
-
-                cosResult = TVectorDouble.ConditionalSelect(
-                    TVectorDouble.IsPositiveInfinity(ax),
-                    TVectorDouble.Create(double.NaN),
-                    cosResult
-                );
             }
             else
             {
@@ -1877,6 +1826,7 @@ public static (TVectorSingle Sin, TVectorSingle Cos) SinCosSingle<TVectorSingle,
 
             // See SinSingle and CosSingle for implementation details
 
+            const int ARG_HUGE = 0x4A989680;    // 5e6
             const int ARG_LARGE = 0x3F490FDB;   // PI / 4
             const int ARG_SMALL = 0x3C000000;   // 2^-13
             const int ARG_SMALLER = 0x39000000; // 2^-27
@@ -1931,7 +1881,7 @@ public static (TVectorSingle Sin, TVectorSingle Cos) SinCosSingle<TVectorSingle,
                     cosResult = TVectorSingle.MultiplyAddEstimate(TVectorSingle.Create(-0.5f), x2, TVectorSingle.One);
                 }
             }
-            else if (TVectorSingle.LessThanAll(ax, TVectorSingle.Create(5000000.0f)))
+            else if (TVectorInt32.LessThanAll(ux, TVectorInt32.Create(ARG_HUGE)))
             {
                 // at least one element is: |x| > (pi / 4) -or- infinite -or- nan
 
@@ -1950,34 +1900,6 @@ public static (TVectorSingle Sin, TVectorSingle Cos) SinCosSingle<TVectorSingle,
                     sinResult = Narrow<TVectorDouble, TVectorSingle>(sinLo, sinHi);
                     cosResult = Narrow<TVectorDouble, TVectorSingle>(cosLo, cosHi);
                 }
-
-                // Propagate the NaN that was passed in
-                sinResult = TVectorSingle.ConditionalSelect(
-                    TVectorSingle.IsNaN(x),
-                    x,
-                    sinResult
-                );
-
-                cosResult = TVectorSingle.ConditionalSelect(
-                    TVectorSingle.IsNaN(x),
-                    x,
-                    cosResult
-                );
-
-                // Return NaN for infinity
-                sinResult = TVectorSingle.ConditionalSelect(
-                    TVectorSingle.IsPositiveInfinity(ax),
-                    TVectorSingle.Create(float.NaN),
-                    sinResult
-                );
-
-                cosResult = TVectorSingle.ConditionalSelect(
-                    TVectorSingle.IsPositiveInfinity(ax),
-                    TVectorSingle.Create(float.NaN),
-                    cosResult
-                );
-
-                return (sinResult, cosResult);
             }
             else
             {
@@ -2021,9 +1943,9 @@ public static (TVectorSingle Sin, TVectorSingle Cos) SinCosSingle<TVectorSingle,
                 TVectorInt64 sign = Unsafe.BitCast<TVectorDouble, TVectorInt64>(x) >>> 63;
 
                 sinResult = TVectorDouble.ConditionalSelect(
-                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & region) | (~sign & ~region)) & TVectorInt64.One, TVectorInt64.Zero)),
-                    +sinResult, // negative in region 1 or 3, positive in region 0 or 2
-                    -sinResult  // negative in region 0 or 2, positive in region 1 or 3
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & (region >> 1)) | (~sign & ~(region >> 1))) & TVectorInt64.One, TVectorInt64.Zero)),
+                    -sinResult, // negative in region 1 or 3, positive in region 0 or 2
+                    +sinResult  // negative in region 0 or 2, positive in region 1 or 3
                 );
 
                 cosResult = TVectorDouble.ConditionalSelect(
@@ -2106,6 +2028,7 @@ public static TVectorDouble SinDouble<TVectorDouble, TVectorInt64>(TVectorDouble
             // if |x| < 2.0^(-13) && |x| > 2.0^(-27)
             //   sin(x) = x - (x * x * x * (1/6));
 
+            const long ARG_HUGE = 0x415312D000000000;       // 5e6
             const long ARG_LARGE = 0x3FE921FB54442D18;      // PI / 4
             const long ARG_SMALL = 0x3F20000000000000;      // 2^-13
             const long ARG_SMALLER = 0x3E40000000000000;    // 2^-27
@@ -2132,7 +2055,7 @@ public static TVectorDouble SinDouble<TVectorDouble, TVectorInt64>(TVectorDouble
                     result = TVectorDouble.MultiplyAddEstimate(TVectorDouble.Create(-0.16666666666666666), x3, x);
                 }
             }
-            else if (TVectorDouble.LessThanAll(ax, TVectorDouble.Create(5000000.0)))
+            else if (TVectorInt64.LessThanAll(ux, TVectorInt64.Create(ARG_HUGE)))
             {
                 // at least one element is: |x| > (pi / 4) -or- infinite -or- nan
                 (TVectorDouble r, TVectorDouble rr, TVectorInt64 region) = SinCosReduce<TVectorDouble, TVectorInt64>(ax);
@@ -2149,23 +2072,9 @@ public static TVectorDouble SinDouble<TVectorDouble, TVectorInt64>(TVectorDouble
                 TVectorInt64 sign = Unsafe.BitCast<TVectorDouble, TVectorInt64>(x) >>> 63;
 
                 result = TVectorDouble.ConditionalSelect(
-                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & region) | (~sign & ~region)) & TVectorInt64.One, TVectorInt64.Zero)),
-                    +result,    // negative in region 1 or 3, positive in region 0 or 2
-                    -result     // negative in region 0 or 2, positive in region 1 or 3
-                );
-
-                // Propagate the NaN that was passed in
-                result = TVectorDouble.ConditionalSelect(
-                    TVectorDouble.IsNaN(x),
-                    x,
-                    result
-                );
-
-                // Return NaN for infinity
-                result = TVectorDouble.ConditionalSelect(
-                    TVectorDouble.IsPositiveInfinity(ax),
-                    TVectorDouble.Create(double.NaN),
-                    result
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & (region >> 1)) | (~sign & ~(region >> 1))) & TVectorInt64.One, TVectorInt64.Zero)),
+                    -result,    // negative in region 1 or 3, positive in region 0 or 2
+                    +result     // negative in region 0 or 2, positive in region 1 or 3
                 );
             }
             else
@@ -2250,6 +2159,7 @@ public static TVectorSingle SinSingle<TVectorSingle, TVectorInt32, TVectorDouble
             // if |x| < 2.0^(-13) && |x| > 2.0^(-27)
             //   sin(x) = x - (x * x * x * (1/6));
 
+            const int ARG_HUGE = 0x4A989680;    // 5e6
             const int ARG_LARGE = 0x3F490FDB;   // PI / 4
             const int ARG_SMALL = 0x3C000000;   // 2^-13
             const int ARG_SMALLER = 0x39000000; // 2^-27
@@ -2288,7 +2198,7 @@ public static TVectorSingle SinSingle<TVectorSingle, TVectorInt32, TVectorDouble
                     result = TVectorSingle.MultiplyAddEstimate(TVectorSingle.Create(-0.16666667f), x3, x);
                 }
             }
-            else if (TVectorSingle.LessThanAll(ax, TVectorSingle.Create(5000000.0f)))
+            else if (TVectorInt32.LessThanAll(ux, TVectorInt32.Create(ARG_HUGE)))
             {
                 // at least one element is: |x| > (pi / 4) -or- infinite -or- nan
 
@@ -2305,20 +2215,6 @@ public static TVectorSingle SinSingle<TVectorSingle, TVectorInt32, TVectorDouble
                         CoreImpl(WidenUpper<TVectorSingle, TVectorDouble>(x))
                     );
                 }
-
-                // Propagate the NaN that was passed in
-                result = TVectorSingle.ConditionalSelect(
-                    TVectorSingle.IsNaN(x),
-                    x,
-                    result
-                );
-
-                // Return NaN for infinity
-                return TVectorSingle.ConditionalSelect(
-                    TVectorSingle.IsPositiveInfinity(ax),
-                    TVectorSingle.Create(float.NaN),
-                    result
-                );
             }
             else
             {
@@ -2349,9 +2245,9 @@ static TVectorDouble CoreImpl(TVectorDouble x)
                 TVectorInt64 sign = Unsafe.BitCast<TVectorDouble, TVectorInt64>(x) >>> 63;
 
                 return TVectorDouble.ConditionalSelect(
-                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & region) | (~sign & ~region)) & TVectorInt64.One, TVectorInt64.Zero)),
-                    +result,    // negative in region 1 or 3, positive in region 0 or 2
-                    -result     // negative in region 0 or 2, positive in region 1 or 3
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & (region >> 1)) | (~sign & ~(region >> 1))) & TVectorInt64.One, TVectorInt64.Zero)),
+                    -result,    // negative in region 1 or 3, positive in region 0 or 2
+                    +result     // negative in region 0 or 2, positive in region 1 or 3
                 );
             }
 

From a71604bd7d6172517a7cdc12e447a6e30f7d328a Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Mon, 15 Jul 2024 15:28:35 -0700
Subject: [PATCH 09/13] Ensure that TernaryLogic lowering accounts for AND_NOT
 since it is not commutative

---
 src/coreclr/jit/lowerxarch.cpp                | 95 +++++++++++++++++--
 .../System/Runtime/Intrinsics/VectorMath.cs   | 26 ++---
 2 files changed, 95 insertions(+), 26 deletions(-)

diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp
index d34595399a56f..6a0d84597b651 100644
--- a/src/coreclr/jit/lowerxarch.cpp
+++ b/src/coreclr/jit/lowerxarch.cpp
@@ -1487,24 +1487,55 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
                         BlockRange().Remove(node);
                         op3 = userIntrin->Op(2);
 
+                        // Tracks which two operands get used first
+                        TernaryLogicUseFlags firstOpUseFlags = TernaryLogicUseFlags::AB;
+
                         if (op3 == node)
                         {
-                            op3 = userIntrin->Op(1);
+                            if (userOper == GT_AND_NOT)
+                            {
+                                op3 = op2;
+                                op2 = op1;
+                                op1 = userIntrin->Op(1);
+
+                                // AND_NOT isn't commutative so we need to shift parameters down
+                                firstOpUseFlags = TernaryLogicUseFlags::BC;
+                            }
+                            else
+                            {
+                                op3 = userIntrin->Op(1);
+                            }
                         }
 
                         uint8_t controlByte = 0x00;
 
                         if ((userOper == GT_XOR) && op3->IsVectorAllBitsSet())
                         {
-                            // We're being used by what is actually GT_NOT, so we
-                            // need to shift parameters down so that A is unused
+                            // We have XOR(OP(A, B), AllBitsSet)
+                            //   A: op1
+                            //   B: op2
+                            //   C: op3 (AllBitsSet)
+                            //
+                            // We want A to be the unused parameter so swap it around
+                            //   A: op3 (AllBitsSet)
+                            //   B: op1
+                            //   C: op2
+                            //
+                            // This gives us NOT(OP(B, C))
+
+                            assert(firstOpUseFlags == TernaryLogicUseFlags::AB);
 
                             std::swap(op2, op3);
                             std::swap(op1, op2);
 
                             if (isOperNot)
                             {
-                                // We have what is actually a double not, so just return op2
+                                // We have NOT(XOR(B, AllBitsSet))
+                                //   A: op3 (AllBitsSet)
+                                //   B: op1
+                                //   C: op2 (AllBitsSet)
+                                //
+                                // This represents a double not, so so just return op2
                                 // which is the only actual value now that the parameters
                                 // were shifted around
 
@@ -1538,20 +1569,64 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
                         }
                         else if (isOperNot)
                         {
-                            // A is unused, so we just want OP(NOT(B), C)
+                            if (firstOpUseFlags == TernaryLogicUseFlags::AB)
+                            {
+                                // We have OP(XOR(A, AllBitsSet), C)
+                                //   A: op1
+                                //   B: op2 (AllBitsSet)
+                                //   C: op3
+                                //
+                                // We want A to be the unused parameter so swap it around
+                                //   A: op2 (AllBitsSet)
+                                //   B: op1
+                                //   C: op3
+                                //
+                                // This gives us OP(NOT(B), C)
 
-                            assert(op2->IsVectorAllBitsSet());
-                            std::swap(op1, op2);
+                                assert(op2->IsVectorAllBitsSet());
+                                std::swap(op1, op2);
 
-                            controlByte = static_cast<uint8_t>(~B);
-                            controlByte = TernaryLogicInfo::GetTernaryControlByte(userOper, controlByte, C);
+                                controlByte = static_cast<uint8_t>(~B);
+                                controlByte = TernaryLogicInfo::GetTernaryControlByte(userOper, controlByte, C);
+                            }
+                            else
+                            {
+                                // We have OP(A, XOR(B, AllBitsSet))
+                                //   A: op1
+                                //   B: op2
+                                //   C: op3 (AllBitsSet)
+                                //
+                                // We want A to be the unused parameter so swap it around
+                                //   A: op3 (AllBitsSet)
+                                //   B: op1
+                                //   C: op2
+                                //
+                                // This gives us OP(B, NOT(C))
+
+                                assert(firstOpUseFlags == TernaryLogicUseFlags::BC);
+
+                                assert(op3->IsVectorAllBitsSet());
+                                std::swap(op2, op3);
+                                std::swap(op1, op2);
+
+                                controlByte = static_cast<uint8_t>(~C);
+                                controlByte = TernaryLogicInfo::GetTernaryControlByte(userOper, B, controlByte);
+                            }
                         }
-                        else
+                        else if (firstOpUseFlags == TernaryLogicUseFlags::AB)
                         {
                             // We have OP2(OP1(A, B), C)
                             controlByte = TernaryLogicInfo::GetTernaryControlByte(oper, A, B);
                             controlByte = TernaryLogicInfo::GetTernaryControlByte(userOper, controlByte, C);
                         }
+                        else
+                        {
+                            // We have OP2(A, OP1(B, C))
+                            assert(firstOpUseFlags == TernaryLogicUseFlags::BC);
+
+                            controlByte = TernaryLogicInfo::GetTernaryControlByte(oper, B, C);
+                            controlByte = TernaryLogicInfo::GetTernaryControlByte(userOper, A, controlByte);
+                        }
 
                         NamedIntrinsic ternaryLogicId = NI_AVX512F_TernaryLogic;
 
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
index b122107238f36..bc0d1b7c82f84 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs
@@ -12,7 +12,6 @@ namespace System.Runtime.Intrinsics
 {
     internal static unsafe class VectorMath
     {
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static TVectorDouble CosDouble<TVectorDouble, TVectorInt64>(TVectorDouble x)
             where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
             where TVectorInt64 : unmanaged, ISimdVector<TVectorInt64, long>
@@ -81,7 +80,7 @@ public static TVectorDouble CosDouble<TVectorDouble, TVectorInt64>(TVectorDouble
 
             TVectorDouble result;
 
-            if (TVectorInt64.LessThanOrEqualAll(ux, TVectorInt64.Create(ARG_LARGE)))
+            if (TVectorInt64.LessThanAll(ux, TVectorInt64.Create(ARG_LARGE + 1)))
             {
                 // We must be a finite value: (pi / 4) >= |x|
                 TVectorDouble x2 = x * x;
@@ -148,7 +147,6 @@ static TVectorDouble ScalarFallback(TVectorDouble x)
             }
         }
 
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static TVectorSingle CosSingle<TVectorSingle, TVectorInt32, TVectorDouble, TVectorInt64>(TVectorSingle x)
             where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
             where TVectorInt32 : unmanaged, ISimdVector<TVectorInt32, int>
@@ -220,7 +218,7 @@ public static TVectorSingle CosSingle<TVectorSingle, TVectorInt32, TVectorDouble
 
             TVectorSingle result;
 
-            if (TVectorInt32.LessThanOrEqualAll(ux, TVectorInt32.Create(ARG_LARGE)))
+            if (TVectorInt32.LessThanAll(ux, TVectorInt32.Create(ARG_LARGE + 1)))
             {
                 // We must be a finite value: (pi / 4) >= |x|
 
@@ -1693,7 +1691,6 @@ public static TVectorSingle RoundSingle<TVectorSingle>(TVectorSingle vector, Mid
             }
         }
 
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static (TVectorDouble Sin, TVectorDouble Cos) SinCosDouble<TVectorDouble, TVectorInt64>(TVectorDouble x)
             where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
             where TVectorInt64 : unmanaged, ISimdVector<TVectorInt64, long>
@@ -1716,7 +1713,7 @@ public static (TVectorDouble Sin, TVectorDouble Cos) SinCosDouble<TVectorDouble,
 
             TVectorDouble sinResult, cosResult;
 
-            if (TVectorInt64.LessThanOrEqualAll(ux, TVectorInt64.Create(ARG_LARGE)))
+            if (TVectorInt64.LessThanAll(ux, TVectorInt64.Create(ARG_LARGE + 1)))
             {
                 // We must be a finite value: (pi / 4) >= |x|
                 TVectorDouble x2 = x * x;
@@ -1765,7 +1762,7 @@ public static (TVectorDouble Sin, TVectorDouble Cos) SinCosDouble<TVectorDouble,
                 TVectorInt64 sign = Unsafe.BitCast<TVectorDouble, TVectorInt64>(x) >>> 63;
 
                 sinResult = TVectorDouble.ConditionalSelect(
-                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & (region >> 1)) | (~sign & ~(region >> 1))) & TVectorInt64.One, TVectorInt64.Zero)),
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & (region >>> 1)) | (~sign & ~(region >>> 1))) & TVectorInt64.One, TVectorInt64.Zero)),
                     -sinResult, // negative in region 1 or 3, positive in region 0 or 2
                     +sinResult  // negative in region 0 or 2, positive in region 1 or 3
                 );
@@ -1811,7 +1808,6 @@ public static (TVectorDouble Sin, TVectorDouble Cos) SinCosDouble<TVectorDouble,
             }
         }
 
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static (TVectorSingle Sin, TVectorSingle Cos) SinCosSingle<TVectorSingle, TVectorInt32, TVectorDouble, TVectorInt64>(TVectorSingle x)
             where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
             where TVectorInt32 : unmanaged, ISimdVector<TVectorInt32, int>
@@ -1836,7 +1832,7 @@ public static (TVectorSingle Sin, TVectorSingle Cos) SinCosSingle<TVectorSingle,
 
             TVectorSingle sinResult, cosResult;
 
-            if (TVectorInt32.LessThanOrEqualAll(ux, TVectorInt32.Create(ARG_LARGE)))
+            if (TVectorInt32.LessThanAll(ux, TVectorInt32.Create(ARG_LARGE + 1)))
             {
                 // We must be a finite value: (pi / 4) >= |x|
 
@@ -1943,7 +1939,7 @@ public static (TVectorSingle Sin, TVectorSingle Cos) SinCosSingle<TVectorSingle,
                 TVectorInt64 sign = Unsafe.BitCast<TVectorDouble, TVectorInt64>(x) >>> 63;
 
                 sinResult = TVectorDouble.ConditionalSelect(
-                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & (region >> 1)) | (~sign & ~(region >> 1))) & TVectorInt64.One, TVectorInt64.Zero)),
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & (region >>> 1)) | (~sign & ~(region >>> 1))) & TVectorInt64.One, TVectorInt64.Zero)),
                     -sinResult, // negative in region 1 or 3, positive in region 0 or 2
                     +sinResult  // negative in region 0 or 2, positive in region 1 or 3
                 );
@@ -1973,7 +1969,6 @@ public static (TVectorSingle Sin, TVectorSingle Cos) SinCosSingle<TVectorSingle,
             }
         }
 
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static TVectorDouble SinDouble<TVectorDouble, TVectorInt64>(TVectorDouble x)
             where TVectorDouble : unmanaged, ISimdVector<TVectorDouble, double>
             where TVectorInt64 : unmanaged, ISimdVector<TVectorInt64, long>
@@ -2038,7 +2033,7 @@ public static TVectorDouble SinDouble<TVectorDouble, TVectorInt64>(TVectorDouble
 
             TVectorDouble result;
 
-            if (TVectorInt64.LessThanOrEqualAll(ux, TVectorInt64.Create(ARG_LARGE)))
+            if (TVectorInt64.LessThanAll(ux, TVectorInt64.Create(ARG_LARGE + 1)))
             {
                 // We must be a finite value: (pi / 4) >= |x|
                 TVectorDouble x2 = x * x;
@@ -2072,7 +2067,7 @@ public static TVectorDouble SinDouble<TVectorDouble, TVectorInt64>(TVectorDouble
                 TVectorInt64 sign = Unsafe.BitCast<TVectorDouble, TVectorInt64>(x) >>> 63;
 
                 result = TVectorDouble.ConditionalSelect(
-                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & (region >> 1)) | (~sign & ~(region >> 1))) & TVectorInt64.One, TVectorInt64.Zero)),
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & (region >>> 1)) | (~sign & ~(region >>> 1))) & TVectorInt64.One, TVectorInt64.Zero)),
                     -result,    // negative in region 1 or 3, positive in region 0 or 2
                     +result     // negative in region 0 or 2, positive in region 1 or 3
                 );
@@ -2102,7 +2097,6 @@ static TVectorDouble ScalarFallback(TVectorDouble x)
             }
         }
 
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static TVectorSingle SinSingle<TVectorSingle, TVectorInt32, TVectorDouble, TVectorInt64>(TVectorSingle x)
             where TVectorSingle : unmanaged, ISimdVector<TVectorSingle, float>
             where TVectorInt32 : unmanaged, ISimdVector<TVectorInt32, int>
@@ -2169,7 +2163,7 @@ public static TVectorSingle SinSingle<TVectorSingle, TVectorInt32, TVectorDouble
 
             TVectorSingle result;
 
-            if (TVectorInt32.LessThanOrEqualAll(ux, TVectorInt32.Create(ARG_LARGE)))
+            if (TVectorInt32.LessThanAll(ux, TVectorInt32.Create(ARG_LARGE + 1)))
             {
                 // We must be a finite value: (pi / 4) >= |x|
 
@@ -2245,7 +2239,7 @@ static TVectorDouble CoreImpl(TVectorDouble x)
                 TVectorInt64 sign = Unsafe.BitCast<TVectorDouble, TVectorInt64>(x) >>> 63;
 
                 return TVectorDouble.ConditionalSelect(
-                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & (region >> 1)) | (~sign & ~(region >> 1))) & TVectorInt64.One, TVectorInt64.Zero)),
+                    Unsafe.BitCast<TVectorInt64, TVectorDouble>(TVectorInt64.Equals(((sign & (region >>> 1)) | (~sign & ~(region >>> 1))) & TVectorInt64.One, TVectorInt64.Zero)),
                     -result,    // negative in region 1 or 3, positive in region 0 or 2
                     +result     // negative in region 0 or 2, positive in region 1 or 3
                 );

From 1b0b07baaab29ff0fb361a16d4f111b4d794d7fb Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Mon, 15 Jul 2024 17:16:55 -0700
Subject: [PATCH 10/13] Don't vectorize too large SinPi or CosPi inputs for
 TensorPrimitives

---
 .../Tensors/netcore/TensorPrimitives.Cos.cs      | 16 +++++++++++++++-
 .../Tensors/netcore/TensorPrimitives.CosPi.cs    |  6 ------
 .../Tensors/netcore/TensorPrimitives.Sin.cs      | 16 +++++++++++++++-
 .../Tensors/netcore/TensorPrimitives.SinCosPi.cs |  6 ------
 .../Tensors/netcore/TensorPrimitives.SinPi.cs    |  6 ------
 5 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Cos.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Cos.cs
index 815eb6b700edc..ff706d02fd78e 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Cos.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Cos.cs
@@ -141,7 +141,21 @@ public static Vector512<T> Invoke(Vector512<T> x)
             }
         }
 
-#if !NET9_0_OR_GREATER
+#if NET9_0_OR_GREATER
+        // These are still used by CosPiOperator
+
+        private readonly struct CosOperatorSingle
+        {
+            internal const uint MaxVectorizedValue = 0x4A989680u;
+            internal const uint SignMask = 0x7FFFFFFFu;
+        }
+
+        private readonly struct CosOperatorDouble
+        {
+            internal const ulong SignMask = 0x7FFFFFFFFFFFFFFFul;
+            internal const ulong MaxVectorizedValue = 0x4160000000000000ul;
+        }
+#else
         /// <summary>float.Cos(x)</summary>
         private readonly struct CosOperatorSingle : IUnaryOperator<float, float>
         {
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.CosPi.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.CosPi.cs
index 58dbe83dc050f..2db142d2f7290 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.CosPi.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.CosPi.cs
@@ -42,7 +42,6 @@ public static Vector128<T> Invoke(Vector128<T> x)
             {
                 Vector128<T> xpi = x * Vector128.Create(T.Pi);
 
-#if !NET9_0_OR_GREATER
                 if (typeof(T) == typeof(float))
                 {
                     if (Vector128.GreaterThanAny(xpi.AsUInt32() & Vector128.Create(CosOperatorSingle.SignMask), Vector128.Create(CosOperatorSingle.MaxVectorizedValue)))
@@ -58,7 +57,6 @@ public static Vector128<T> Invoke(Vector128<T> x)
                         return ApplyScalar<CosPiOperator<double>>(x.AsDouble()).As<double, T>();
                     }
                 }
-#endif
 
                 return CosOperator<T>.Invoke(xpi);
             }
@@ -67,7 +65,6 @@ public static Vector256<T> Invoke(Vector256<T> x)
             {
                 Vector256<T> xpi = x * Vector256.Create(T.Pi);
 
-#if !NET9_0_OR_GREATER
                 if (typeof(T) == typeof(float))
                 {
                     if (Vector256.GreaterThanAny(xpi.AsUInt32() & Vector256.Create(CosOperatorSingle.SignMask), Vector256.Create(CosOperatorSingle.MaxVectorizedValue)))
@@ -83,7 +80,6 @@ public static Vector256<T> Invoke(Vector256<T> x)
                         return ApplyScalar<CosPiOperator<double>>(x.AsDouble()).As<double, T>();
                     }
                 }
-#endif
 
                 return CosOperator<T>.Invoke(xpi);
             }
@@ -92,7 +88,6 @@ public static Vector512<T> Invoke(Vector512<T> x)
             {
                 Vector512<T> xpi = x * Vector512.Create(T.Pi);
 
-#if !NET9_0_OR_GREATER
                 if (typeof(T) == typeof(float))
                 {
                     if (Vector512.GreaterThanAny(xpi.AsUInt32() & Vector512.Create(CosOperatorSingle.SignMask), Vector512.Create(CosOperatorSingle.MaxVectorizedValue)))
@@ -108,7 +103,6 @@ public static Vector512<T> Invoke(Vector512<T> x)
                         return ApplyScalar<CosPiOperator<double>>(x.AsDouble()).As<double, T>();
                     }
                 }
-#endif
 
                 return CosOperator<T>.Invoke(xpi);
             }
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sin.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sin.cs
index 8211528121d39..02c249f9437a4 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sin.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sin.cs
@@ -131,7 +131,21 @@ public static Vector512<T> Invoke(Vector512<T> x)
             }
         }
 
-#if !NET9_0_OR_GREATER
+#if NET9_0_OR_GREATER
+        // These are still used by SinPiOperator
+
+        private readonly struct SinOperatorSingle
+        {
+            internal const uint MaxVectorizedValue = 0x49800000u;
+            internal const uint SignMask = 0x7FFFFFFFu;
+        }
+
+        private readonly struct SinOperatorDouble
+        {
+            internal const ulong SignMask = 0x7FFFFFFFFFFFFFFFul;
+            internal const ulong MaxVectorizedValue = 0x4160000000000000ul;
+        }
+#else
         /// <summary>float.Sin(x)</summary>
         private readonly struct SinOperatorSingle : IUnaryOperator<float, float>
         {
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCosPi.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCosPi.cs
index 39366b9dfca29..5af943a275649 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCosPi.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCosPi.cs
@@ -39,7 +39,6 @@ public static (Vector128<T> First, Vector128<T> Second) Invoke(Vector128<T> x)
             {
                 Vector128<T> xpi = x * Vector128.Create(T.Pi);
 
-#if !NET9_0_OR_GREATER
                 if (typeof(T) == typeof(float))
                 {
                     if (Vector128.GreaterThanAny(xpi.AsUInt32() & Vector128.Create(SinOperatorSingle.SignMask), Vector128.Create(SinOperatorSingle.MaxVectorizedValue)) ||
@@ -59,7 +58,6 @@ public static (Vector128<T> First, Vector128<T> Second) Invoke(Vector128<T> x)
                         return (sin.As<double, T>(), cos.As<double, T>());
                     }
                 }
-#endif
 
                 return SinCosOperator<T>.Invoke(xpi);
             }
@@ -68,7 +66,6 @@ public static (Vector256<T> First, Vector256<T> Second) Invoke(Vector256<T> x)
             {
                 Vector256<T> xpi = x * Vector256.Create(T.Pi);
 
-#if !NET9_0_OR_GREATER
                 if (typeof(T) == typeof(float))
                 {
                     if (Vector256.GreaterThanAny(xpi.AsUInt32() & Vector256.Create(SinOperatorSingle.SignMask), Vector256.Create(SinOperatorSingle.MaxVectorizedValue)) ||
@@ -88,7 +85,6 @@ public static (Vector256<T> First, Vector256<T> Second) Invoke(Vector256<T> x)
                         return (sin.As<double, T>(), cos.As<double, T>());
                     }
                 }
-#endif
 
                 return SinCosOperator<T>.Invoke(xpi);
             }
@@ -97,7 +93,6 @@ public static (Vector512<T> First, Vector512<T> Second) Invoke(Vector512<T> x)
             {
                 Vector512<T> xpi = x * Vector512.Create(T.Pi);
 
-#if !NET9_0_OR_GREATER
                 if (typeof(T) == typeof(float))
                 {
                     if (Vector512.GreaterThanAny(xpi.AsUInt32() & Vector512.Create(SinOperatorSingle.SignMask), Vector512.Create(SinOperatorSingle.MaxVectorizedValue)) ||
@@ -117,7 +112,6 @@ public static (Vector512<T> First, Vector512<T> Second) Invoke(Vector512<T> x)
                         return (sin.As<double, T>(), cos.As<double, T>());
                     }
                 }
-#endif
 
                 return SinCosOperator<T>.Invoke(xpi);
             }
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinPi.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinPi.cs
index 360c542f3779a..2b6d86fa8caca 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinPi.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinPi.cs
@@ -42,7 +42,6 @@ public static Vector128<T> Invoke(Vector128<T> x)
             {
                 Vector128<T> xpi = x * Vector128.Create(T.Pi);
 
-#if !NET9_0_OR_GREATER
                 if (typeof(T) == typeof(float))
                 {
                     if (Vector128.GreaterThanAny(xpi.AsUInt32() & Vector128.Create(SinOperatorSingle.SignMask), Vector128.Create(SinOperatorSingle.MaxVectorizedValue)))
@@ -58,7 +57,6 @@ public static Vector128<T> Invoke(Vector128<T> x)
                         return ApplyScalar<SinPiOperator<double>>(x.AsDouble()).As<double, T>();
                     }
                 }
-#endif
 
                 return SinOperator<T>.Invoke(xpi);
             }
@@ -67,7 +65,6 @@ public static Vector256<T> Invoke(Vector256<T> x)
             {
                 Vector256<T> xpi = x * Vector256.Create(T.Pi);
 
-#if !NET9_0_OR_GREATER
                 if (typeof(T) == typeof(float))
                 {
                     if (Vector256.GreaterThanAny(xpi.AsUInt32() & Vector256.Create(SinOperatorSingle.SignMask), Vector256.Create(SinOperatorSingle.MaxVectorizedValue)))
@@ -83,7 +80,6 @@ public static Vector256<T> Invoke(Vector256<T> x)
                         return ApplyScalar<SinPiOperator<double>>(x.AsDouble()).As<double, T>();
                     }
                 }
-#endif
 
                 return SinOperator<T>.Invoke(xpi);
             }
@@ -92,7 +88,6 @@ public static Vector512<T> Invoke(Vector512<T> x)
             {
                 Vector512<T> xpi = x * Vector512.Create(T.Pi);
 
-#if !NET9_0_OR_GREATER
                 if (typeof(T) == typeof(float))
                 {
                     if (Vector512.GreaterThanAny(xpi.AsUInt32() & Vector512.Create(SinOperatorSingle.SignMask), Vector512.Create(SinOperatorSingle.MaxVectorizedValue)))
@@ -108,7 +103,6 @@ public static Vector512<T> Invoke(Vector512<T> x)
                         return ApplyScalar<SinPiOperator<double>>(x.AsDouble()).As<double, T>();
                     }
                 }
-#endif
 
                 return SinOperator<T>.Invoke(xpi);
             }

From d52bc9ba31c43cb111b13bd6cb30ec3ce4e839f2 Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Tue, 16 Jul 2024 07:50:23 -0700
Subject: [PATCH 11/13] Don't accelerate SinCosPi for the time being

---
 .../netcore/TensorPrimitives.SinCosPi.cs      | 88 +------------------
 1 file changed, 4 insertions(+), 84 deletions(-)

diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCosPi.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCosPi.cs
index 5af943a275649..574db7667be00 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCosPi.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCosPi.cs
@@ -1,7 +1,6 @@
 ﻿// Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-using System.Diagnostics;
 using System.Runtime.Intrinsics;
 
 namespace System.Numerics.Tensors
@@ -30,91 +29,12 @@ public static void SinCosPi<T>(ReadOnlySpan<T> x, Span<T> sinPiDestination, Span
         /// <summary>T.SinCosPi(x)</summary>
         private readonly struct SinCosPiOperator<T> : IUnaryInputBinaryOutput<T> where T : ITrigonometricFunctions<T>
         {
-            public static bool Vectorizable => (typeof(T) == typeof(float))
-                                            || (typeof(T) == typeof(double));
+            public static bool Vectorizable => false; // TODO: vectorize
 
             public static (T, T) Invoke(T x) => T.SinCosPi(x);
-
-            public static (Vector128<T> First, Vector128<T> Second) Invoke(Vector128<T> x)
-            {
-                Vector128<T> xpi = x * Vector128.Create(T.Pi);
-
-                if (typeof(T) == typeof(float))
-                {
-                    if (Vector128.GreaterThanAny(xpi.AsUInt32() & Vector128.Create(SinOperatorSingle.SignMask), Vector128.Create(SinOperatorSingle.MaxVectorizedValue)) ||
-                        Vector128.GreaterThanAny(xpi.AsUInt32() & Vector128.Create(CosOperatorSingle.SignMask), Vector128.Create(CosOperatorSingle.MaxVectorizedValue)))
-                    {
-                        (Vector128<float> sin, Vector128<float> cos) = Apply2xScalar<SinCosPiOperator<float>>(x.AsSingle());
-                        return (sin.As<float, T>(), cos.As<float, T>());
-                    }
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    if (Vector128.GreaterThanAny(xpi.AsUInt64() & Vector128.Create(SinOperatorDouble.SignMask), Vector128.Create(SinOperatorDouble.MaxVectorizedValue)) ||
-                        Vector128.GreaterThanAny(xpi.AsUInt64() & Vector128.Create(CosOperatorDouble.SignMask), Vector128.Create(CosOperatorDouble.MaxVectorizedValue)))
-                    {
-                        (Vector128<double> sin, Vector128<double> cos) = Apply2xScalar<SinCosPiOperator<double>>(x.AsDouble());
-                        return (sin.As<double, T>(), cos.As<double, T>());
-                    }
-                }
-
-                return SinCosOperator<T>.Invoke(xpi);
-            }
-
-            public static (Vector256<T> First, Vector256<T> Second) Invoke(Vector256<T> x)
-            {
-                Vector256<T> xpi = x * Vector256.Create(T.Pi);
-
-                if (typeof(T) == typeof(float))
-                {
-                    if (Vector256.GreaterThanAny(xpi.AsUInt32() & Vector256.Create(SinOperatorSingle.SignMask), Vector256.Create(SinOperatorSingle.MaxVectorizedValue)) ||
-                        Vector256.GreaterThanAny(xpi.AsUInt32() & Vector256.Create(CosOperatorSingle.SignMask), Vector256.Create(CosOperatorSingle.MaxVectorizedValue)))
-                    {
-                        (Vector256<float> sin, Vector256<float> cos) = Apply2xScalar<SinCosPiOperator<float>>(x.AsSingle());
-                        return (sin.As<float, T>(), cos.As<float, T>());
-                    }
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    if (Vector256.GreaterThanAny(xpi.AsUInt64() & Vector256.Create(SinOperatorDouble.SignMask), Vector256.Create(SinOperatorDouble.MaxVectorizedValue)) ||
-                        Vector256.GreaterThanAny(xpi.AsUInt64() & Vector256.Create(CosOperatorDouble.SignMask), Vector256.Create(CosOperatorDouble.MaxVectorizedValue)))
-                    {
-                        (Vector256<double> sin, Vector256<double> cos) = Apply2xScalar<SinCosPiOperator<double>>(x.AsDouble());
-                        return (sin.As<double, T>(), cos.As<double, T>());
-                    }
-                }
-
-                return SinCosOperator<T>.Invoke(xpi);
-            }
-
-            public static (Vector512<T> First, Vector512<T> Second) Invoke(Vector512<T> x)
-            {
-                Vector512<T> xpi = x * Vector512.Create(T.Pi);
-
-                if (typeof(T) == typeof(float))
-                {
-                    if (Vector512.GreaterThanAny(xpi.AsUInt32() & Vector512.Create(SinOperatorSingle.SignMask), Vector512.Create(SinOperatorSingle.MaxVectorizedValue)) ||
-                        Vector512.GreaterThanAny(xpi.AsUInt32() & Vector512.Create(CosOperatorSingle.SignMask), Vector512.Create(CosOperatorSingle.MaxVectorizedValue)))
-                    {
-                        (Vector512<float> sin, Vector512<float> cos) = Apply2xScalar<SinCosPiOperator<float>>(x.AsSingle());
-                        return (sin.As<float, T>(), cos.As<float, T>());
-                    }
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    if (Vector512.GreaterThanAny(xpi.AsUInt64() & Vector512.Create(SinOperatorDouble.SignMask), Vector512.Create(SinOperatorDouble.MaxVectorizedValue)) ||
-                        Vector512.GreaterThanAny(xpi.AsUInt64() & Vector512.Create(CosOperatorDouble.SignMask), Vector512.Create(CosOperatorDouble.MaxVectorizedValue)))
-                    {
-                        (Vector512<double> sin, Vector512<double> cos) = Apply2xScalar<SinCosPiOperator<double>>(x.AsDouble());
-                        return (sin.As<double, T>(), cos.As<double, T>());
-                    }
-                }
-
-                return SinCosOperator<T>.Invoke(xpi);
-            }
+            public static (Vector128<T> First, Vector128<T> Second) Invoke(Vector128<T> x) => throw new NotSupportedException();
+            public static (Vector256<T> First, Vector256<T> Second) Invoke(Vector256<T> x) => throw new NotSupportedException();
+            public static (Vector512<T> First, Vector512<T> Second) Invoke(Vector512<T> x) => throw new NotSupportedException();
         }
     }
 }

From f225aa037e836941bf19d2e95ef759a237a922af Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Tue, 16 Jul 2024 10:39:24 -0700
Subject: [PATCH 12/13] Don't accelerate TensorPrimitives.SinCos for the time
 being

---
 .../netcore/TensorPrimitives.SinCos.cs        | 106 +-----------------
 1 file changed, 4 insertions(+), 102 deletions(-)

diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCos.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCos.cs
index 2c823a6f9482f..766269957a2e7 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCos.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.SinCos.cs
@@ -1,7 +1,6 @@
 ﻿// Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-using System.Diagnostics;
 using System.Runtime.Intrinsics;
 
 namespace System.Numerics.Tensors
@@ -30,109 +29,12 @@ public static void SinCos<T>(ReadOnlySpan<T> x, Span<T> sinDestination, Span<T>
         /// <summary>T.SinCos(x)</summary>
         private readonly struct SinCosOperator<T> : IUnaryInputBinaryOutput<T> where T : ITrigonometricFunctions<T>
         {
-            public static bool Vectorizable => (typeof(T) == typeof(float))
-                                            || (typeof(T) == typeof(double));
+            public static bool Vectorizable => false; // TODO: vectorize
 
             public static (T, T) Invoke(T x) => T.SinCos(x);
-
-            public static (Vector128<T> First, Vector128<T> Second) Invoke(Vector128<T> x)
-            {
-#if NET9_0_OR_GREATER
-                if (typeof(T) == typeof(double))
-                {
-                    (Vector128<double> sin, Vector128<double> cos) = Vector128.SinCos(x.AsDouble());
-                    return (sin.As<double, T>(), cos.As<double, T>());
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(float));
-                    (Vector128<float> sin, Vector128<float> cos) = Vector128.SinCos(x.AsSingle());
-                    return (sin.As<float, T>(), cos.As<float, T>());
-                }
-#else
-                if (typeof(T) == typeof(float))
-                {
-                    return (
-                        SinOperatorSingle.Invoke(x.AsSingle()).As<float, T>(),
-                        CosOperatorSingle.Invoke(x.AsSingle()).As<float, T>()
-                    );
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    return (
-                        SinOperatorDouble.Invoke(x.AsDouble()).As<double, T>(),
-                        CosOperatorDouble.Invoke(x.AsDouble()).As<double, T>()
-                    );
-                }
-#endif
-            }
-
-            public static (Vector256<T> First, Vector256<T> Second) Invoke(Vector256<T> x)
-            {
-#if NET9_0_OR_GREATER
-                if (typeof(T) == typeof(double))
-                {
-                    (Vector256<double> sin, Vector256<double> cos) = Vector256.SinCos(x.AsDouble());
-                    return (sin.As<double, T>(), cos.As<double, T>());
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(float));
-                    (Vector256<float> sin, Vector256<float> cos) = Vector256.SinCos(x.AsSingle());
-                    return (sin.As<float, T>(), cos.As<float, T>());
-                }
-#else
-                if (typeof(T) == typeof(float))
-                {
-                    return (
-                        SinOperatorSingle.Invoke(x.AsSingle()).As<float, T>(),
-                        CosOperatorSingle.Invoke(x.AsSingle()).As<float, T>()
-                    );
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    return (
-                        SinOperatorDouble.Invoke(x.AsDouble()).As<double, T>(),
-                        CosOperatorDouble.Invoke(x.AsDouble()).As<double, T>()
-                    );
-                }
-#endif
-            }
-
-            public static (Vector512<T> First, Vector512<T> Second) Invoke(Vector512<T> x)
-            {
-#if NET9_0_OR_GREATER
-                if (typeof(T) == typeof(double))
-                {
-                    (Vector512<double> sin, Vector512<double> cos) = Vector512.SinCos(x.AsDouble());
-                    return (sin.As<double, T>(), cos.As<double, T>());
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(float));
-                    (Vector512<float> sin, Vector512<float> cos) = Vector512.SinCos(x.AsSingle());
-                    return (sin.As<float, T>(), cos.As<float, T>());
-                }
-#else
-                if (typeof(T) == typeof(float))
-                {
-                    return (
-                        SinOperatorSingle.Invoke(x.AsSingle()).As<float, T>(),
-                        CosOperatorSingle.Invoke(x.AsSingle()).As<float, T>()
-                    );
-                }
-                else
-                {
-                    Debug.Assert(typeof(T) == typeof(double));
-                    return (
-                        SinOperatorDouble.Invoke(x.AsDouble()).As<double, T>(),
-                        CosOperatorDouble.Invoke(x.AsDouble()).As<double, T>()
-                    );
-                }
-#endif
-            }
+            public static (Vector128<T> First, Vector128<T> Second) Invoke(Vector128<T> x) => throw new NotSupportedException();
+            public static (Vector256<T> First, Vector256<T> Second) Invoke(Vector256<T> x) => throw new NotSupportedException();
+            public static (Vector512<T> First, Vector512<T> Second) Invoke(Vector512<T> x) => throw new NotSupportedException();
         }
     }
 }

From 3a27c845aded524149c7f5636ed487788a1a21ab Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Thu, 18 Jul 2024 10:11:28 -0700
Subject: [PATCH 13/13] Don't include JIT changes, they were extracted to their
 own PR

---
 src/coreclr/jit/lowerxarch.cpp | 95 ++++------------------------------
 1 file changed, 10 insertions(+), 85 deletions(-)

diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp
index 6a0d84597b651..d34595399a56f 100644
--- a/src/coreclr/jit/lowerxarch.cpp
+++ b/src/coreclr/jit/lowerxarch.cpp
@@ -1487,55 +1487,24 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
                         BlockRange().Remove(node);
                         op3 = userIntrin->Op(2);
 
-                        // Tracks which two operands get used first
-                        TernaryLogicUseFlags firstOpUseFlags = TernaryLogicUseFlags::AB;
-
                         if (op3 == node)
                         {
-                            if (userOper == GT_AND_NOT)
-                            {
-                                op3 = op2;
-                                op2 = op1;
-                                op1 = userIntrin->Op(1);
-
-                                // AND_NOT isn't commutative so we need to shift parameters down
-                                firstOpUseFlags = TernaryLogicUseFlags::BC;
-                            }
-                            else
-                            {
-                                op3 = userIntrin->Op(1);
-                            }
+                            op3 = userIntrin->Op(1);
                         }
 
                         uint8_t controlByte = 0x00;
 
                         if ((userOper == GT_XOR) && op3->IsVectorAllBitsSet())
                         {
-                            // We have XOR(OP(A, B), AllBitsSet)
-                            //   A: op1
-                            //   B: op2
-                            //   C: op3 (AllBitsSet)
-                            //
-                            // We want A to be the unused parameter so swap it around
-                            //   A: op3 (AllBitsSet)
-                            //   B: op1
-                            //   C: op2
-                            //
-                            // This gives us NOT(OP(B, C))
-
-                            assert(firstOpUseFlags == TernaryLogicUseFlags::AB);
+                            // We're being used by what is actually GT_NOT, so we
+                            // need to shift parameters down so that A is unused
 
                             std::swap(op2, op3);
                             std::swap(op1, op2);
 
                             if (isOperNot)
                             {
-                                // We have NOT(XOR(B, AllBitsSet))
-                                //   A: op3 (AllBitsSet)
-                                //   B: op1
-                                //   C: op2 (AllBitsSet)
-                                //
-                                // This represents a double not, so so just return op2
+                                // We have what is actually a double not, so just return op2
                                 // which is the only actual value now that the parameters
                                 // were shifted around
 
@@ -1569,64 +1538,20 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
                         }
                         else if (isOperNot)
                         {
-                            if (firstOpUseFlags == TernaryLogicUseFlags::AB)
-                            {
-                                // We have OP(XOR(A, AllBitsSet), C)
-                                //   A: op1
-                                //   B: op2 (AllBitsSet)
-                                //   C: op3
-                                //
-                                // We want A to be the unused parameter so swap it around
-                                //   A: op2 (AllBitsSet)
-                                //   B: op1
-                                //   C: op3
-                                //
-                                // This gives us OP(NOT(B), C)
+                            // A is unused, so we just want OP(NOT(B), C)
 
-                                assert(op2->IsVectorAllBitsSet());
-                                std::swap(op1, op2);
-
-                                controlByte = static_cast<uint8_t>(~B);
-                                controlByte = TernaryLogicInfo::GetTernaryControlByte(userOper, controlByte, C);
-                            }
-                            else
-                            {
-                                // We have OP(A, XOR(B, AllBitsSet))
-                                //   A: op1
-                                //   B: op2
-                                //   C: op3 (AllBitsSet)
-                                //
-                                // We want A to be the unused parameter so swap it around
-                                //   A: op3 (AllBitsSet)
-                                //   B: op1
-                                //   C: op2
-                                //
-                                // This gives us OP(B, NOT(C))
-
-                                assert(firstOpUseFlags == TernaryLogicUseFlags::BC);
-
-                                assert(op3->IsVectorAllBitsSet());
-                                std::swap(op2, op3);
-                                std::swap(op1, op2);
+                            assert(op2->IsVectorAllBitsSet());
+                            std::swap(op1, op2);
 
-                                controlByte = static_cast<uint8_t>(~C);
-                                controlByte = TernaryLogicInfo::GetTernaryControlByte(userOper, B, controlByte);
-                            }
+                            controlByte = static_cast<uint8_t>(~B);
+                            controlByte = TernaryLogicInfo::GetTernaryControlByte(userOper, controlByte, C);
                         }
-                        else if (firstOpUseFlags == TernaryLogicUseFlags::AB)
+                        else
                         {
                             // We have OP2(OP1(A, B), C)
                             controlByte = TernaryLogicInfo::GetTernaryControlByte(oper, A, B);
                             controlByte = TernaryLogicInfo::GetTernaryControlByte(userOper, controlByte, C);
                         }
-                        else
-                        {
-                            // We have OP2(A, OP1(B, C))
-                            assert(firstOpUseFlags == TernaryLogicUseFlags::BC);
-
-                            controlByte = TernaryLogicInfo::GetTernaryControlByte(oper, B, C);
-                            controlByte = TernaryLogicInfo::GetTernaryControlByte(userOper, A, controlByte);
-                        }
 
                         NamedIntrinsic ternaryLogicId = NI_AVX512F_TernaryLogic;