Skip to content

Commit

Permalink
port SpanHelpers.IndexOfAny(ref byte, byte, byte, int) to Vector128/2…
Browse files Browse the repository at this point in the history
…56 (#73384)
  • Loading branch information
adamsitnik authored Aug 5, 2022
1 parent 66c93ca commit 73cf30d
Showing 1 changed file with 21 additions and 76 deletions.
97 changes: 21 additions & 76 deletions src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs
Original file line number Diff line number Diff line change
Expand Up @@ -781,7 +781,7 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int
nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations
nuint lengthToExamine = (nuint)(uint)length;

if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported)
if (Vector128.IsHardwareAccelerated)
{
// Avx2 branch also operates on Sse2 sizes, so check is combined.
nint vectorDiff = (nint)length - Vector128<byte>.Count;
Expand Down Expand Up @@ -897,10 +897,10 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int
// the end and forwards, which may overlap on an earlier compare.

// We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported.
if (Sse2.IsSupported)
if (Vector128.IsHardwareAccelerated)
{
int matches;
if (Avx2.IsSupported)
uint matches;
if (Vector256.IsHardwareAccelerated)
{
Vector256<byte> search;
// Guard as we may only have a valid size for Vector128; when we will move to the Sse2
Expand All @@ -916,13 +916,10 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int
// First time this checks again against 0, however we will move into final compare if it fails.
while (lengthToExamine > offset)
{
search = LoadVector256(ref searchSpace, offset);
search = Vector256.LoadUnsafe(ref searchSpace, offset);
// Bitwise Or to combine the flagged matches for the second value to our match flags
matches = Avx2.MoveMask(
Avx2.Or(
Avx2.CompareEqual(values0, search),
Avx2.CompareEqual(values1, search)));
// Note that MoveMask has converted the equal vector elements into a set of bit flags,
matches = (Vector256.Equals(values0, search) | Vector256.Equals(values1, search)).ExtractMostSignificantBits();
// Note that ExtractMostSignificantBits has converted the equal vector elements into a set of bit flags,
// So the bit position in 'matches' corresponds to the element offset.
if (matches == 0)
{
Expand All @@ -935,13 +932,10 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int
}

// Move to Vector length from end for final compare
search = LoadVector256(ref searchSpace, lengthToExamine);
search = Vector256.LoadUnsafe(ref searchSpace, lengthToExamine);
offset = lengthToExamine;
// Same as method as above
matches = Avx2.MoveMask(
Avx2.Or(
Avx2.CompareEqual(values0, search),
Avx2.CompareEqual(values1, search)));
matches = (Vector256.Equals(values0, search) | Vector256.Equals(values1, search)).ExtractMostSignificantBits();
if (matches == 0)
{
// None matched
Expand All @@ -953,6 +947,7 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int
}

// Initial size check was done on method entry.
Vector128<byte> compareResult;
Debug.Assert(length >= Vector128<byte>.Count);
{
Vector128<byte> search;
Expand All @@ -961,90 +956,40 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int
// First time this checks against 0 and we will move into final compare if it fails.
while (lengthToExamine > offset)
{
search = LoadVector128(ref searchSpace, offset);
search = Vector128.LoadUnsafe(ref searchSpace, offset);

matches = Sse2.MoveMask(
Sse2.Or(
Sse2.CompareEqual(values0, search),
Sse2.CompareEqual(values1, search))
.AsByte());
// Note that MoveMask has converted the equal vector elements into a set of bit flags,
// So the bit position in 'matches' corresponds to the element offset.
if (matches == 0)
compareResult = Vector128.Equals(values0, search) | Vector128.Equals(values1, search);

if (compareResult == Vector128<byte>.Zero)
{
// None matched
offset += (nuint)Vector128<byte>.Count;
continue;
}

matches = compareResult.ExtractMostSignificantBits();
goto IntrinsicsMatch;
}
// Move to Vector length from end for final compare
search = LoadVector128(ref searchSpace, lengthToExamine);
search = Vector128.LoadUnsafe(ref searchSpace, lengthToExamine);
offset = lengthToExamine;
// Same as method as above
matches = Sse2.MoveMask(
Sse2.Or(
Sse2.CompareEqual(values0, search),
Sse2.CompareEqual(values1, search)));
if (matches == 0)
compareResult = Vector128.Equals(values0, search) | Vector128.Equals(values1, search);

if (compareResult == Vector128<byte>.Zero)
{
// None matched
goto NotFound;
}

matches = compareResult.ExtractMostSignificantBits();
}

IntrinsicsMatch:
// Find bitflag offset of first difference and add to current offset
offset += (nuint)BitOperations.TrailingZeroCount(matches);
goto Found;
}
else if (AdvSimd.Arm64.IsSupported)
{
Vector128<byte> search;
Vector128<byte> matches;
Vector128<byte> values0 = Vector128.Create(value0);
Vector128<byte> values1 = Vector128.Create(value1);
// First time this checks against 0 and we will move into final compare if it fails.
while (lengthToExamine > offset)
{
search = LoadVector128(ref searchSpace, offset);

matches = AdvSimd.Or(
AdvSimd.CompareEqual(values0, search),
AdvSimd.CompareEqual(values1, search));

if (matches == Vector128<byte>.Zero)
{
offset += (nuint)Vector128<byte>.Count;
continue;
}

// Find bitflag offset of first match and add to current offset
offset += FindFirstMatchedLane(matches);

goto Found;
}

// Move to Vector length from end for final compare
search = LoadVector128(ref searchSpace, lengthToExamine);
offset = lengthToExamine;
// Same as method as above
matches = AdvSimd.Or(
AdvSimd.CompareEqual(values0, search),
AdvSimd.CompareEqual(values1, search));

if (matches == Vector128<byte>.Zero)
{
// None matched
goto NotFound;
}

// Find bitflag offset of first match and add to current offset
offset += FindFirstMatchedLane(matches);

goto Found;
}
else if (Vector.IsHardwareAccelerated)
{
Vector<byte> values0 = new Vector<byte>(value0);
Expand Down

0 comments on commit 73cf30d

Please sign in to comment.