Skip to content

Commit 07b705b

Browse files
Improve precision of the vectorized trig functions (#99549)
* Improve precision of the vectorized trig functions * Ensure all trig algorithms use FMA where required. * Revert "Ensure all trig algorithms use FMA where required." This reverts commit c97605c. * Use factories in memberdata methods for improved type safety and inference. * Update tolerances for hardware that doesn't support FMA. * Increase tolerance for trig functions in non-FMA hardware.
1 parent a4cbe52 commit 07b705b

File tree

4 files changed

+203
-113
lines changed

4 files changed

+203
-113
lines changed

src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Cos.cs

+15-6
Original file line numberDiff line numberDiff line change
@@ -241,11 +241,14 @@ public static Vector128<double> Invoke(Vector128<double> x)
241241
return ApplyScalar<CosOperatorDouble>(x);
242242
}
243243

244+
// dn = int(x / pi + 1/2) - 1/2
244245
Vector128<double> almHuge = Vector128.Create(AlmHuge);
245-
Vector128<double> dn = (uxMasked * Vector128.Create(1 / double.Pi)) + Vector128.Create(double.Pi / 2) + almHuge;
246+
Vector128<double> half = Vector128.Create(0.5);
247+
Vector128<double> dn = (uxMasked * Vector128.Create(1 / double.Pi)) + half + almHuge;
246248
Vector128<ulong> odd = dn.AsUInt64() << 63;
247-
dn = dn - almHuge - Vector128.Create(0.5);
249+
dn = dn - almHuge - half;
248250

251+
// f = x - (n*pi)
249252
Vector128<double> f = uxMasked;
250253
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(-double.Pi), f);
251254
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(Pi_Tail2), f);
@@ -276,11 +279,14 @@ public static Vector256<double> Invoke(Vector256<double> x)
276279
return ApplyScalar<CosOperatorDouble>(x);
277280
}
278281

282+
// dn = int(x / pi + 1/2) - 1/2
279283
Vector256<double> almHuge = Vector256.Create(AlmHuge);
280-
Vector256<double> dn = (uxMasked * Vector256.Create(1 / double.Pi)) + Vector256.Create(double.Pi / 2) + almHuge;
284+
Vector256<double> half = Vector256.Create(0.5);
285+
Vector256<double> dn = (uxMasked * Vector256.Create(1 / double.Pi)) + half + almHuge;
281286
Vector256<ulong> odd = dn.AsUInt64() << 63;
282-
dn = dn - almHuge - Vector256.Create(0.5);
287+
dn = dn - almHuge - half;
283288

289+
// f = x - (n*pi)
284290
Vector256<double> f = uxMasked;
285291
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(-double.Pi), f);
286292
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(Pi_Tail2), f);
@@ -311,11 +317,14 @@ public static Vector512<double> Invoke(Vector512<double> x)
311317
return ApplyScalar<CosOperatorDouble>(x);
312318
}
313319

320+
// dn = int(x / pi + 1/2) - 1/2
314321
Vector512<double> almHuge = Vector512.Create(AlmHuge);
315-
Vector512<double> dn = (uxMasked * Vector512.Create(1 / double.Pi)) + Vector512.Create(double.Pi / 2) + almHuge;
322+
Vector512<double> half = Vector512.Create(0.5);
323+
Vector512<double> dn = (uxMasked * Vector512.Create(1 / double.Pi)) + half + almHuge;
316324
Vector512<ulong> odd = dn.AsUInt64() << 63;
317-
dn = dn - almHuge - Vector512.Create(0.5);
325+
dn = dn - almHuge - half;
318326

327+
// f = x - (n*pi)
319328
Vector512<double> f = uxMasked;
320329
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(-double.Pi), f);
321330
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(Pi_Tail2), f);

src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Sin.cs

+22-4
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,8 @@ public static Vector512<T> Invoke(Vector512<T> x)
9797
/// <summary>float.Sin(x)</summary>
9898
private readonly struct SinOperatorSingle : IUnaryOperator<float, float>
9999
{
100-
internal const uint SignMask = 0x7FFFFFFFu;
101100
internal const uint MaxVectorizedValue = 0x49800000u;
101+
internal const uint SignMask = 0x7FFFFFFFu;
102102
private const float AlmHuge = 1.2582912e7f;
103103
private const float Pi_Tail1 = 8.742278e-8f;
104104
private const float Pi_Tail2 = 3.430249e-15f;
@@ -231,11 +231,17 @@ public static Vector128<double> Invoke(Vector128<double> x)
231231
return ApplyScalar<SinOperatorDouble>(x);
232232
}
233233

234+
// dn = |x| * (1 / π)
234235
Vector128<double> almHuge = Vector128.Create(AlmHuge);
235236
Vector128<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector128.Create(1 / double.Pi), almHuge);
236237
Vector128<ulong> odd = dn.AsUInt64() << 63;
237238
dn -= almHuge;
238-
Vector128<double> f = uxMasked - (dn * Vector128.Create(double.Pi)) - (dn * Vector128.Create(Pi_Tail1)) - (dn * Vector128.Create(Pi_Tail2));
239+
240+
// f = |x| - (dn * π)
241+
Vector128<double> f = uxMasked;
242+
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(-double.Pi), f);
243+
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(-Pi_Tail1), f);
244+
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(-Pi_Tail2), f);
239245

240246
// POLY_EVAL_ODD_17
241247
Vector128<double> f2 = f * f;
@@ -262,11 +268,17 @@ public static Vector256<double> Invoke(Vector256<double> x)
262268
return ApplyScalar<SinOperatorDouble>(x);
263269
}
264270

271+
// dn = |x| * (1 / π)
265272
Vector256<double> almHuge = Vector256.Create(AlmHuge);
266273
Vector256<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector256.Create(1 / double.Pi), almHuge);
267274
Vector256<ulong> odd = dn.AsUInt64() << 63;
268275
dn -= almHuge;
269-
Vector256<double> f = uxMasked - (dn * Vector256.Create(double.Pi)) - (dn * Vector256.Create(Pi_Tail1)) - (dn * Vector256.Create(Pi_Tail2));
276+
277+
// f = |x| - (dn * π)
278+
Vector256<double> f = uxMasked;
279+
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(-double.Pi), f);
280+
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(-Pi_Tail1), f);
281+
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(-Pi_Tail2), f);
270282

271283
// POLY_EVAL_ODD_17
272284
Vector256<double> f2 = f * f;
@@ -293,11 +305,17 @@ public static Vector512<double> Invoke(Vector512<double> x)
293305
return ApplyScalar<SinOperatorDouble>(x);
294306
}
295307

308+
// dn = |x| * (1 / π)
296309
Vector512<double> almHuge = Vector512.Create(AlmHuge);
297310
Vector512<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector512.Create(1 / double.Pi), almHuge);
298311
Vector512<ulong> odd = dn.AsUInt64() << 63;
299312
dn -= almHuge;
300-
Vector512<double> f = uxMasked - (dn * Vector512.Create(double.Pi)) - (dn * Vector512.Create(Pi_Tail1)) - (dn * Vector512.Create(Pi_Tail2));
313+
314+
// f = |x| - (dn * π)
315+
Vector512<double> f = uxMasked;
316+
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(-double.Pi), f);
317+
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(-Pi_Tail1), f);
318+
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(-Pi_Tail2), f);
301319

302320
// POLY_EVAL_ODD_17
303321
Vector512<double> f2 = f * f;

src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Tan.cs

+21-3
Original file line numberDiff line numberDiff line change
@@ -259,10 +259,16 @@ public static Vector128<double> Invoke(Vector128<double> x)
259259
return ApplyScalar<TanOperatorDouble>(x);
260260
}
261261

262+
// dn = |x| * (2/π)
262263
Vector128<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector128.Create(2 / double.Pi), Vector128.Create(AlmHuge));
263264
Vector128<ulong> odd = dn.AsUInt64() << 63;
264265
dn -= Vector128.Create(AlmHuge);
265-
Vector128<double> f = uxMasked.AsDouble() - (dn * (double.Pi / 2)) - (dn * HalfPi2) - (dn * HalfPi3);
266+
267+
// f = |x| - (dn * π/2)
268+
Vector128<double> f = uxMasked;
269+
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(-double.Pi / 2), f);
270+
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(-HalfPi2), f);
271+
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(-HalfPi3), f);
266272

267273
// POLY_EVAL_ODD_29
268274
Vector128<double> g = f * f;
@@ -300,10 +306,16 @@ public static Vector256<double> Invoke(Vector256<double> x)
300306
return ApplyScalar<TanOperatorDouble>(x);
301307
}
302308

309+
// dn = |x| * (2/π)
303310
Vector256<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector256.Create(2 / double.Pi), Vector256.Create(AlmHuge));
304311
Vector256<ulong> odd = dn.AsUInt64() << 63;
305312
dn -= Vector256.Create(AlmHuge);
306-
Vector256<double> f = uxMasked.AsDouble() - (dn * (double.Pi / 2)) - (dn * HalfPi2) - (dn * HalfPi3);
313+
314+
// f = |x| - (dn * π/2)
315+
Vector256<double> f = uxMasked;
316+
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(-double.Pi / 2), f);
317+
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(-HalfPi2), f);
318+
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(-HalfPi3), f);
307319

308320
// POLY_EVAL_ODD_29
309321
Vector256<double> g = f * f;
@@ -341,10 +353,16 @@ public static Vector512<double> Invoke(Vector512<double> x)
341353
return ApplyScalar<TanOperatorDouble>(x);
342354
}
343355

356+
// dn = |x| * (2/π)
344357
Vector512<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector512.Create(2 / double.Pi), Vector512.Create(AlmHuge));
345358
Vector512<ulong> odd = dn.AsUInt64() << 63;
346359
dn -= Vector512.Create(AlmHuge);
347-
Vector512<double> f = uxMasked.AsDouble() - (dn * (double.Pi / 2)) - (dn * HalfPi2) - (dn * HalfPi3);
360+
361+
// f = |x| - (dn * π/2)
362+
Vector512<double> f = uxMasked;
363+
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(-double.Pi / 2), f);
364+
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(-HalfPi2), f);
365+
f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(-HalfPi3), f);
348366

349367
// POLY_EVAL_ODD_29
350368
Vector512<double> g = f * f;

0 commit comments

Comments
 (0)