From 05abb765e2740d7182c56e142fbe44d76fafedf4 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 12 Aug 2024 22:54:40 -0700 Subject: [PATCH] Ensure that aggregation is consistent regardless of data alignment (#106166) * Ensure that aggregation is consistent regardless of data alignment * Ensure we handle for all aggregation helpers * Ensure we don't process beg twice * Ensure that we properly track in the case we can't align * Add missing semicolon * Fix the handling on .NET Framework * Ensure yptr on .NET Framework is incremented as well --- .../TensorPrimitives.IAggregationOperator.cs | 112 ++++++++++++++++-- .../TensorPrimitives.Single.netstandard.cs | 58 +++------ 2 files changed, 116 insertions(+), 54 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IAggregationOperator.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IAggregationOperator.cs index df03ad1634d28..1c8d215bd137d 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IAggregationOperator.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/Common/TensorPrimitives.IAggregationOperator.cs @@ -141,9 +141,12 @@ static T Vectorized128(ref T xRef, nuint remainder) // We need to the ensure the underlying data can be aligned and only align // it if it can. It is possible we have an unaligned ref, in which case we - // can never achieve the required SIMD alignment. + // can never achieve the required SIMD alignment. This cannot be done for + // float or double since that changes how results compound together. - bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0; + bool canAlign = (typeof(T) != typeof(float)) && + (typeof(T) != typeof(double)) && + ((nuint)xPtr % (nuint)sizeof(T)) == 0; if (canAlign) { @@ -156,11 +159,20 @@ static T Vectorized128(ref T xRef, nuint remainder) misalignment = ((uint)sizeof(Vector128) - ((nuint)xPtr % (uint)sizeof(Vector128))) / (uint)sizeof(T); xPtr += misalignment; - Debug.Assert(((nuint)xPtr % (uint)sizeof(Vector128)) == 0); remainder -= misalignment; } + else + { + // We can't align, but this also means we're processing the full data from beg + // so account for that to ensure we don't double process and include them in the + // aggregate twice. + + misalignment = (uint)Vector128.Count; + xPtr += misalignment; + remainder -= misalignment; + } Vector128 vector1; Vector128 vector2; @@ -310,9 +322,12 @@ static T Vectorized256(ref T xRef, nuint remainder) // We need to the ensure the underlying data can be aligned and only align // it if it can. It is possible we have an unaligned ref, in which case we - // can never achieve the required SIMD alignment. + // can never achieve the required SIMD alignment. This cannot be done for + // float or double since that changes how results compound together. - bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0; + bool canAlign = (typeof(T) != typeof(float)) && + (typeof(T) != typeof(double)) && + ((nuint)xPtr % (nuint)sizeof(T)) == 0; if (canAlign) { @@ -330,6 +345,16 @@ static T Vectorized256(ref T xRef, nuint remainder) remainder -= misalignment; } + else + { + // We can't align, but this also means we're processing the full data from beg + // so account for that to ensure we don't double process and include them in the + // aggregate twice. + + misalignment = (uint)Vector256.Count; + xPtr += misalignment; + remainder -= misalignment; + } Vector256 vector1; Vector256 vector2; @@ -479,9 +504,12 @@ static T Vectorized512(ref T xRef, nuint remainder) // We need to the ensure the underlying data can be aligned and only align // it if it can. It is possible we have an unaligned ref, in which case we - // can never achieve the required SIMD alignment. + // can never achieve the required SIMD alignment. This cannot be done for + // float or double since that changes how results compound together. - bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0; + bool canAlign = (typeof(T) != typeof(float)) && + (typeof(T) != typeof(double)) && + ((nuint)xPtr % (nuint)sizeof(T)) == 0; if (canAlign) { @@ -499,6 +527,16 @@ static T Vectorized512(ref T xRef, nuint remainder) remainder -= misalignment; } + else + { + // We can't align, but this also means we're processing the full data from beg + // so account for that to ensure we don't double process and include them in the + // aggregate twice. + + misalignment = (uint)Vector512.Count; + xPtr += misalignment; + remainder -= misalignment; + } Vector512 vector1; Vector512 vector2; @@ -1227,9 +1265,12 @@ static T Vectorized128(ref T xRef, ref T yRef, nuint remainder) // We need to the ensure the underlying data can be aligned and only align // it if it can. It is possible we have an unaligned ref, in which case we - // can never achieve the required SIMD alignment. + // can never achieve the required SIMD alignment. This cannot be done for + // float or double since that changes how results compound together. - bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0; + bool canAlign = (typeof(T) != typeof(float)) && + (typeof(T) != typeof(double)) && + ((nuint)xPtr % (nuint)sizeof(T)) == 0; if (canAlign) { @@ -1248,6 +1289,19 @@ static T Vectorized128(ref T xRef, ref T yRef, nuint remainder) remainder -= misalignment; } + else + { + // We can't align, but this also means we're processing the full data from beg + // so account for that to ensure we don't double process and include them in the + // aggregate twice. + + misalignment = (uint)Vector128.Count; + + xPtr += misalignment; + yPtr += misalignment; + + remainder -= misalignment; + } Vector128 vector1; Vector128 vector2; @@ -1418,9 +1472,12 @@ static T Vectorized256(ref T xRef, ref T yRef, nuint remainder) // We need to the ensure the underlying data can be aligned and only align // it if it can. It is possible we have an unaligned ref, in which case we - // can never achieve the required SIMD alignment. + // can never achieve the required SIMD alignment. This cannot be done for + // float or double since that changes how results compound together. - bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0; + bool canAlign = (typeof(T) != typeof(float)) && + (typeof(T) != typeof(double)) && + ((nuint)xPtr % (nuint)sizeof(T)) == 0; if (canAlign) { @@ -1439,6 +1496,19 @@ static T Vectorized256(ref T xRef, ref T yRef, nuint remainder) remainder -= misalignment; } + else + { + // We can't align, but this also means we're processing the full data from beg + // so account for that to ensure we don't double process and include them in the + // aggregate twice. + + misalignment = (uint)Vector256.Count; + + xPtr += misalignment; + yPtr += misalignment; + + remainder -= misalignment; + } Vector256 vector1; Vector256 vector2; @@ -1609,9 +1679,12 @@ static T Vectorized512(ref T xRef, ref T yRef, nuint remainder) // We need to the ensure the underlying data can be aligned and only align // it if it can. It is possible we have an unaligned ref, in which case we - // can never achieve the required SIMD alignment. + // can never achieve the required SIMD alignment. This cannot be done for + // float or double since that changes how results compound together. - bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0; + bool canAlign = (typeof(T) != typeof(float)) && + (typeof(T) != typeof(double)) && + ((nuint)xPtr % (nuint)sizeof(T)) == 0; if (canAlign) { @@ -1630,6 +1703,19 @@ static T Vectorized512(ref T xRef, ref T yRef, nuint remainder) remainder -= misalignment; } + else + { + // We can't align, but this also means we're processing the full data from beg + // so account for that to ensure we don't double process and include them in the + // aggregate twice. + + misalignment = (uint)Vector512.Count; + + xPtr += misalignment; + yPtr += misalignment; + + remainder -= misalignment; + } Vector512 vector1; Vector512 vector2; diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netstandard/TensorPrimitives.Single.netstandard.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netstandard/TensorPrimitives.Single.netstandard.cs index c9474cb470fd7..563080bf742c2 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netstandard/TensorPrimitives.Single.netstandard.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netstandard/TensorPrimitives.Single.netstandard.cs @@ -175,28 +175,15 @@ static float Vectorized(ref float xRef, nuint remainder, TTransformOperator tran { float* xPtr = px; - // We need to the ensure the underlying data can be aligned and only align - // it if it can. It is possible we have an unaligned ref, in which case we - // can never achieve the required SIMD alignment. + // Unlike many other vectorization algorithms, we cannot align for aggregation + // because that changes how results compound together and can cause a significant + // difference in the output. This also means we're processing the full data from beg + // so account for that to ensure we don't double process and include them in the + // aggregate twice. - bool canAlign = ((nuint)(xPtr) % sizeof(float)) == 0; - - if (canAlign) - { - // Compute by how many elements we're misaligned and adjust the pointers accordingly - // - // Noting that we are only actually aligning dPtr. This is because unaligned stores - // are more expensive than unaligned loads and aligning both is significantly more - // complex. - - misalignment = ((uint)(sizeof(Vector)) - ((nuint)(xPtr) % (uint)(sizeof(Vector)))) / sizeof(float); - - xPtr += misalignment; - - Debug.Assert(((nuint)(xPtr) % (uint)(sizeof(Vector))) == 0); - - remainder -= misalignment; - } + misalignment = (uint)Vector.Count; + xPtr += misalignment; + remainder -= misalignment; Vector vector1; Vector vector2; @@ -480,29 +467,18 @@ static float Vectorized(ref float xRef, ref float yRef, nuint remainder, TBinary float* xPtr = px; float* yPtr = py; - // We need to the ensure the underlying data can be aligned and only align - // it if it can. It is possible we have an unaligned ref, in which case we - // can never achieve the required SIMD alignment. - - bool canAlign = ((nuint)(xPtr) % sizeof(float)) == 0; - - if (canAlign) - { - // Compute by how many elements we're misaligned and adjust the pointers accordingly - // - // Noting that we are only actually aligning dPtr. This is because unaligned stores - // are more expensive than unaligned loads and aligning both is significantly more - // complex. - - misalignment = ((uint)(sizeof(Vector)) - ((nuint)(xPtr) % (uint)(sizeof(Vector)))) / sizeof(float); + // Unlike many other vectorization algorithms, we cannot align for aggregation + // because that changes how results compound together and can cause a significant + // difference in the output. This also means we're processing the full data from beg + // so account for that to ensure we don't double process and include them in the + // aggregate twice. - xPtr += misalignment; - yPtr += misalignment; + misalignment = (uint)Vector.Count; - Debug.Assert(((nuint)(xPtr) % (uint)(sizeof(Vector))) == 0); + xPtr += misalignment; + yPtr += misalignment; - remainder -= misalignment; - } + remainder -= misalignment; Vector vector1; Vector vector2;