Ensure that aggregation is consistent regardless of data alignment (#…

…106166) * Ensure that aggregation is consistent regardless of data alignment * Ensure we handle for all aggregation helpers * Ensure we don't process beg twice * Ensure that we properly track in the case we can't align * Add missing semicolon * Fix the handling on .NET Framework * Ensure yptr on .NET Framework is incremented as well
dotnet · Aug 13, 2024 · 05abb76 · 05abb76
1 parent bfb674e
commit 05abb76
Show file tree

Hide file tree

Showing 2 changed files with 116 additions and 54 deletions.
diff --git a/...nsors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IAggregationOperator.cs b/...nsors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IAggregationOperator.cs
@@ -141,9 +141,12 @@ static T Vectorized128(ref T xRef, nuint remainder)
 
  // We need to the ensure the underlying data can be aligned and only align
  // it if it can. It is possible we have an unaligned ref, in which case we
- // can never achieve the required SIMD alignment.
+ // can never achieve the required SIMD alignment. This cannot be done for
+ // float or double since that changes how results compound together.
 
- bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
+ bool canAlign = (typeof(T) != typeof(float)) &&
+ (typeof(T) != typeof(double)) &&
+ ((nuint)xPtr % (nuint)sizeof(T)) == 0;
 
  if (canAlign)
  {
@@ -156,11 +159,20 @@ static T Vectorized128(ref T xRef, nuint remainder)
  misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)xPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);
 
  xPtr += misalignment;
-
  Debug.Assert(((nuint)xPtr % (uint)sizeof(Vector128<T>)) == 0);
 
  remainder -= misalignment;
  }
+ else
+ {
+ // We can't align, but this also means we're processing the full data from beg
+ // so account for that to ensure we don't double process and include them in the
+ // aggregate twice.
+
+ misalignment = (uint)Vector128<T>.Count;
+ xPtr += misalignment;
+ remainder -= misalignment;
+ }
 
  Vector128<T> vector1;
  Vector128<T> vector2;
@@ -310,9 +322,12 @@ static T Vectorized256(ref T xRef, nuint remainder)
 
  // We need to the ensure the underlying data can be aligned and only align
  // it if it can. It is possible we have an unaligned ref, in which case we
- // can never achieve the required SIMD alignment.
+ // can never achieve the required SIMD alignment. This cannot be done for
+ // float or double since that changes how results compound together.
 
- bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
+ bool canAlign = (typeof(T) != typeof(float)) &&
+ (typeof(T) != typeof(double)) &&
+ ((nuint)xPtr % (nuint)sizeof(T)) == 0;
 
  if (canAlign)
  {
@@ -330,6 +345,16 @@ static T Vectorized256(ref T xRef, nuint remainder)
 
  remainder -= misalignment;
  }
+ else
+ {
+ // We can't align, but this also means we're processing the full data from beg
+ // so account for that to ensure we don't double process and include them in the
+ // aggregate twice.
+
+ misalignment = (uint)Vector256<T>.Count;
+ xPtr += misalignment;
+ remainder -= misalignment;
+ }
 
  Vector256<T> vector1;
  Vector256<T> vector2;
@@ -479,9 +504,12 @@ static T Vectorized512(ref T xRef, nuint remainder)
 
  // We need to the ensure the underlying data can be aligned and only align
  // it if it can. It is possible we have an unaligned ref, in which case we
- // can never achieve the required SIMD alignment.
+ // can never achieve the required SIMD alignment. This cannot be done for
+ // float or double since that changes how results compound together.
 
- bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
+ bool canAlign = (typeof(T) != typeof(float)) &&
+ (typeof(T) != typeof(double)) &&
+ ((nuint)xPtr % (nuint)sizeof(T)) == 0;
 
  if (canAlign)
  {
@@ -499,6 +527,16 @@ static T Vectorized512(ref T xRef, nuint remainder)
 
  remainder -= misalignment;
  }
+ else
+ {
+ // We can't align, but this also means we're processing the full data from beg
+ // so account for that to ensure we don't double process and include them in the
+ // aggregate twice.
+
+ misalignment = (uint)Vector512<T>.Count;
+ xPtr += misalignment;
+ remainder -= misalignment;
+ }
 
  Vector512<T> vector1;
  Vector512<T> vector2;
@@ -1227,9 +1265,12 @@ static T Vectorized128(ref T xRef, ref T yRef, nuint remainder)
 
  // We need to the ensure the underlying data can be aligned and only align
  // it if it can. It is possible we have an unaligned ref, in which case we
- // can never achieve the required SIMD alignment.
+ // can never achieve the required SIMD alignment. This cannot be done for
+ // float or double since that changes how results compound together.
 
- bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
+ bool canAlign = (typeof(T) != typeof(float)) &&
+ (typeof(T) != typeof(double)) &&
+ ((nuint)xPtr % (nuint)sizeof(T)) == 0;
 
  if (canAlign)
  {
@@ -1248,6 +1289,19 @@ static T Vectorized128(ref T xRef, ref T yRef, nuint remainder)
 
  remainder -= misalignment;
  }
+ else
+ {
+ // We can't align, but this also means we're processing the full data from beg
+ // so account for that to ensure we don't double process and include them in the
+ // aggregate twice.
+
+ misalignment = (uint)Vector128<T>.Count;
+
+ xPtr += misalignment;
+ yPtr += misalignment;
+
+ remainder -= misalignment;
+ }
 
  Vector128<T> vector1;
  Vector128<T> vector2;
@@ -1418,9 +1472,12 @@ static T Vectorized256(ref T xRef, ref T yRef, nuint remainder)
 
  // We need to the ensure the underlying data can be aligned and only align
  // it if it can. It is possible we have an unaligned ref, in which case we
- // can never achieve the required SIMD alignment.
+ // can never achieve the required SIMD alignment. This cannot be done for
+ // float or double since that changes how results compound together.
 
- bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
+ bool canAlign = (typeof(T) != typeof(float)) &&
+ (typeof(T) != typeof(double)) &&
+ ((nuint)xPtr % (nuint)sizeof(T)) == 0;
 
  if (canAlign)
  {
@@ -1439,6 +1496,19 @@ static T Vectorized256(ref T xRef, ref T yRef, nuint remainder)
 
  remainder -= misalignment;
  }
+ else
+ {
+ // We can't align, but this also means we're processing the full data from beg
+ // so account for that to ensure we don't double process and include them in the
+ // aggregate twice.
+
+ misalignment = (uint)Vector256<T>.Count;
+
+ xPtr += misalignment;
+ yPtr += misalignment;
+
+ remainder -= misalignment;
+ }
 
  Vector256<T> vector1;
  Vector256<T> vector2;
@@ -1609,9 +1679,12 @@ static T Vectorized512(ref T xRef, ref T yRef, nuint remainder)
 
  // We need to the ensure the underlying data can be aligned and only align
  // it if it can. It is possible we have an unaligned ref, in which case we
- // can never achieve the required SIMD alignment.
+ // can never achieve the required SIMD alignment. This cannot be done for
+ // float or double since that changes how results compound together.
 
- bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
+ bool canAlign = (typeof(T) != typeof(float)) &&
+ (typeof(T) != typeof(double)) &&
+ ((nuint)xPtr % (nuint)sizeof(T)) == 0;
 
  if (canAlign)
  {
@@ -1630,6 +1703,19 @@ static T Vectorized512(ref T xRef, ref T yRef, nuint remainder)
 
  remainder -= misalignment;
  }
+ else
+ {
+ // We can't align, but this also means we're processing the full data from beg
+ // so account for that to ensure we don't double process and include them in the
+ // aggregate twice.
+
+ misalignment = (uint)Vector512<T>.Count;
+
+ xPtr += misalignment;
+ yPtr += misalignment;
+
+ remainder -= misalignment;
+ }
 
  Vector512<T> vector1;
  Vector512<T> vector2;

diff --git a/...cs.Tensors/src/System/Numerics/Tensors/netstandard/TensorPrimitives.Single.netstandard.cs b/...cs.Tensors/src/System/Numerics/Tensors/netstandard/TensorPrimitives.Single.netstandard.cs
@@ -175,28 +175,15 @@ static float Vectorized(ref float xRef, nuint remainder, TTransformOperator tran
  {
  float* xPtr = px;
 
- // We need to the ensure the underlying data can be aligned and only align
- // it if it can. It is possible we have an unaligned ref, in which case we
- // can never achieve the required SIMD alignment.
+ // Unlike many other vectorization algorithms, we cannot align for aggregation
+ // because that changes how results compound together and can cause a significant
+ // difference in the output. This also means we're processing the full data from beg
+ // so account for that to ensure we don't double process and include them in the
+ // aggregate twice.
 
- bool canAlign = ((nuint)(xPtr) % sizeof(float)) == 0;
-
- if (canAlign)
- {
- // Compute by how many elements we're misaligned and adjust the pointers accordingly
- //
- // Noting that we are only actually aligning dPtr. This is because unaligned stores
- // are more expensive than unaligned loads and aligning both is significantly more
- // complex.
-
- misalignment = ((uint)(sizeof(Vector<float>)) - ((nuint)(xPtr) % (uint)(sizeof(Vector<float>)))) / sizeof(float);
-
- xPtr += misalignment;
-
- Debug.Assert(((nuint)(xPtr) % (uint)(sizeof(Vector<float>))) == 0);
-
- remainder -= misalignment;
- }
+ misalignment = (uint)Vector<float>.Count;
+ xPtr += misalignment;
+ remainder -= misalignment;
 
  Vector<float> vector1;
  Vector<float> vector2;
@@ -480,29 +467,18 @@ static float Vectorized(ref float xRef, ref float yRef, nuint remainder, TBinary
  float* xPtr = px;
  float* yPtr = py;
 
- // We need to the ensure the underlying data can be aligned and only align
- // it if it can. It is possible we have an unaligned ref, in which case we
- // can never achieve the required SIMD alignment.
-
- bool canAlign = ((nuint)(xPtr) % sizeof(float)) == 0;
-
- if (canAlign)
- {
- // Compute by how many elements we're misaligned and adjust the pointers accordingly
- //
- // Noting that we are only actually aligning dPtr. This is because unaligned stores
- // are more expensive than unaligned loads and aligning both is significantly more
- // complex.
-
- misalignment = ((uint)(sizeof(Vector<float>)) - ((nuint)(xPtr) % (uint)(sizeof(Vector<float>)))) / sizeof(float);
+ // Unlike many other vectorization algorithms, we cannot align for aggregation
+ // because that changes how results compound together and can cause a significant
+ // difference in the output. This also means we're processing the full data from beg
+ // so account for that to ensure we don't double process and include them in the
+ // aggregate twice.
 
- xPtr += misalignment;
- yPtr += misalignment;
+ misalignment = (uint)Vector<float>.Count;
 
- Debug.Assert(((nuint)(xPtr) % (uint)(sizeof(Vector<float>))) == 0);
+ xPtr += misalignment;
+ yPtr += misalignment;
 
- remainder -= misalignment;
- }
+ remainder -= misalignment;
 
  Vector<float> vector1;
  Vector<float> vector2;