Skip to content

Commit

Permalink
Ensure that aggregation is consistent regardless of data alignment (#…
Browse files Browse the repository at this point in the history
…106166)

* Ensure that aggregation is consistent regardless of data alignment

* Ensure we handle for all aggregation helpers

* Ensure we don't process beg twice

* Ensure that we properly track in the case we can't align

* Add missing semicolon

* Fix the handling on .NET Framework

* Ensure yptr on .NET Framework is incremented as well
  • Loading branch information
tannergooding committed Aug 13, 2024
1 parent bfb674e commit 05abb76
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 54 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,12 @@ static T Vectorized128(ref T xRef, nuint remainder)

// We need to the ensure the underlying data can be aligned and only align
// it if it can. It is possible we have an unaligned ref, in which case we
// can never achieve the required SIMD alignment.
// can never achieve the required SIMD alignment. This cannot be done for
// float or double since that changes how results compound together.

bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
bool canAlign = (typeof(T) != typeof(float)) &&
(typeof(T) != typeof(double)) &&
((nuint)xPtr % (nuint)sizeof(T)) == 0;

if (canAlign)
{
Expand All @@ -156,11 +159,20 @@ static T Vectorized128(ref T xRef, nuint remainder)
misalignment = ((uint)sizeof(Vector128<T>) - ((nuint)xPtr % (uint)sizeof(Vector128<T>))) / (uint)sizeof(T);

xPtr += misalignment;

Debug.Assert(((nuint)xPtr % (uint)sizeof(Vector128<T>)) == 0);

remainder -= misalignment;
}
else
{
// We can't align, but this also means we're processing the full data from beg
// so account for that to ensure we don't double process and include them in the
// aggregate twice.

misalignment = (uint)Vector128<T>.Count;
xPtr += misalignment;
remainder -= misalignment;
}

Vector128<T> vector1;
Vector128<T> vector2;
Expand Down Expand Up @@ -310,9 +322,12 @@ static T Vectorized256(ref T xRef, nuint remainder)

// We need to the ensure the underlying data can be aligned and only align
// it if it can. It is possible we have an unaligned ref, in which case we
// can never achieve the required SIMD alignment.
// can never achieve the required SIMD alignment. This cannot be done for
// float or double since that changes how results compound together.

bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
bool canAlign = (typeof(T) != typeof(float)) &&
(typeof(T) != typeof(double)) &&
((nuint)xPtr % (nuint)sizeof(T)) == 0;

if (canAlign)
{
Expand All @@ -330,6 +345,16 @@ static T Vectorized256(ref T xRef, nuint remainder)

remainder -= misalignment;
}
else
{
// We can't align, but this also means we're processing the full data from beg
// so account for that to ensure we don't double process and include them in the
// aggregate twice.

misalignment = (uint)Vector256<T>.Count;
xPtr += misalignment;
remainder -= misalignment;
}

Vector256<T> vector1;
Vector256<T> vector2;
Expand Down Expand Up @@ -479,9 +504,12 @@ static T Vectorized512(ref T xRef, nuint remainder)

// We need to the ensure the underlying data can be aligned and only align
// it if it can. It is possible we have an unaligned ref, in which case we
// can never achieve the required SIMD alignment.
// can never achieve the required SIMD alignment. This cannot be done for
// float or double since that changes how results compound together.

bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
bool canAlign = (typeof(T) != typeof(float)) &&
(typeof(T) != typeof(double)) &&
((nuint)xPtr % (nuint)sizeof(T)) == 0;

if (canAlign)
{
Expand All @@ -499,6 +527,16 @@ static T Vectorized512(ref T xRef, nuint remainder)

remainder -= misalignment;
}
else
{
// We can't align, but this also means we're processing the full data from beg
// so account for that to ensure we don't double process and include them in the
// aggregate twice.

misalignment = (uint)Vector512<T>.Count;
xPtr += misalignment;
remainder -= misalignment;
}

Vector512<T> vector1;
Vector512<T> vector2;
Expand Down Expand Up @@ -1227,9 +1265,12 @@ static T Vectorized128(ref T xRef, ref T yRef, nuint remainder)

// We need to the ensure the underlying data can be aligned and only align
// it if it can. It is possible we have an unaligned ref, in which case we
// can never achieve the required SIMD alignment.
// can never achieve the required SIMD alignment. This cannot be done for
// float or double since that changes how results compound together.

bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
bool canAlign = (typeof(T) != typeof(float)) &&
(typeof(T) != typeof(double)) &&
((nuint)xPtr % (nuint)sizeof(T)) == 0;

if (canAlign)
{
Expand All @@ -1248,6 +1289,19 @@ static T Vectorized128(ref T xRef, ref T yRef, nuint remainder)

remainder -= misalignment;
}
else
{
// We can't align, but this also means we're processing the full data from beg
// so account for that to ensure we don't double process and include them in the
// aggregate twice.

misalignment = (uint)Vector128<T>.Count;

xPtr += misalignment;
yPtr += misalignment;

remainder -= misalignment;
}

Vector128<T> vector1;
Vector128<T> vector2;
Expand Down Expand Up @@ -1418,9 +1472,12 @@ static T Vectorized256(ref T xRef, ref T yRef, nuint remainder)

// We need to the ensure the underlying data can be aligned and only align
// it if it can. It is possible we have an unaligned ref, in which case we
// can never achieve the required SIMD alignment.
// can never achieve the required SIMD alignment. This cannot be done for
// float or double since that changes how results compound together.

bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
bool canAlign = (typeof(T) != typeof(float)) &&
(typeof(T) != typeof(double)) &&
((nuint)xPtr % (nuint)sizeof(T)) == 0;

if (canAlign)
{
Expand All @@ -1439,6 +1496,19 @@ static T Vectorized256(ref T xRef, ref T yRef, nuint remainder)

remainder -= misalignment;
}
else
{
// We can't align, but this also means we're processing the full data from beg
// so account for that to ensure we don't double process and include them in the
// aggregate twice.

misalignment = (uint)Vector256<T>.Count;

xPtr += misalignment;
yPtr += misalignment;

remainder -= misalignment;
}

Vector256<T> vector1;
Vector256<T> vector2;
Expand Down Expand Up @@ -1609,9 +1679,12 @@ static T Vectorized512(ref T xRef, ref T yRef, nuint remainder)

// We need to the ensure the underlying data can be aligned and only align
// it if it can. It is possible we have an unaligned ref, in which case we
// can never achieve the required SIMD alignment.
// can never achieve the required SIMD alignment. This cannot be done for
// float or double since that changes how results compound together.

bool canAlign = ((nuint)xPtr % (nuint)sizeof(T)) == 0;
bool canAlign = (typeof(T) != typeof(float)) &&
(typeof(T) != typeof(double)) &&
((nuint)xPtr % (nuint)sizeof(T)) == 0;

if (canAlign)
{
Expand All @@ -1630,6 +1703,19 @@ static T Vectorized512(ref T xRef, ref T yRef, nuint remainder)

remainder -= misalignment;
}
else
{
// We can't align, but this also means we're processing the full data from beg
// so account for that to ensure we don't double process and include them in the
// aggregate twice.

misalignment = (uint)Vector512<T>.Count;

xPtr += misalignment;
yPtr += misalignment;

remainder -= misalignment;
}

Vector512<T> vector1;
Vector512<T> vector2;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,28 +175,15 @@ static float Vectorized(ref float xRef, nuint remainder, TTransformOperator tran
{
float* xPtr = px;

// We need to the ensure the underlying data can be aligned and only align
// it if it can. It is possible we have an unaligned ref, in which case we
// can never achieve the required SIMD alignment.
// Unlike many other vectorization algorithms, we cannot align for aggregation
// because that changes how results compound together and can cause a significant
// difference in the output. This also means we're processing the full data from beg
// so account for that to ensure we don't double process and include them in the
// aggregate twice.

bool canAlign = ((nuint)(xPtr) % sizeof(float)) == 0;

if (canAlign)
{
// Compute by how many elements we're misaligned and adjust the pointers accordingly
//
// Noting that we are only actually aligning dPtr. This is because unaligned stores
// are more expensive than unaligned loads and aligning both is significantly more
// complex.

misalignment = ((uint)(sizeof(Vector<float>)) - ((nuint)(xPtr) % (uint)(sizeof(Vector<float>)))) / sizeof(float);

xPtr += misalignment;

Debug.Assert(((nuint)(xPtr) % (uint)(sizeof(Vector<float>))) == 0);

remainder -= misalignment;
}
misalignment = (uint)Vector<float>.Count;
xPtr += misalignment;
remainder -= misalignment;

Vector<float> vector1;
Vector<float> vector2;
Expand Down Expand Up @@ -480,29 +467,18 @@ static float Vectorized(ref float xRef, ref float yRef, nuint remainder, TBinary
float* xPtr = px;
float* yPtr = py;

// We need to the ensure the underlying data can be aligned and only align
// it if it can. It is possible we have an unaligned ref, in which case we
// can never achieve the required SIMD alignment.

bool canAlign = ((nuint)(xPtr) % sizeof(float)) == 0;

if (canAlign)
{
// Compute by how many elements we're misaligned and adjust the pointers accordingly
//
// Noting that we are only actually aligning dPtr. This is because unaligned stores
// are more expensive than unaligned loads and aligning both is significantly more
// complex.

misalignment = ((uint)(sizeof(Vector<float>)) - ((nuint)(xPtr) % (uint)(sizeof(Vector<float>)))) / sizeof(float);
// Unlike many other vectorization algorithms, we cannot align for aggregation
// because that changes how results compound together and can cause a significant
// difference in the output. This also means we're processing the full data from beg
// so account for that to ensure we don't double process and include them in the
// aggregate twice.

xPtr += misalignment;
yPtr += misalignment;
misalignment = (uint)Vector<float>.Count;

Debug.Assert(((nuint)(xPtr) % (uint)(sizeof(Vector<float>))) == 0);
xPtr += misalignment;
yPtr += misalignment;

remainder -= misalignment;
}
remainder -= misalignment;

Vector<float> vector1;
Vector<float> vector2;
Expand Down

0 comments on commit 05abb76

Please sign in to comment.