diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 8eedd2955eb55..29a5906643a2a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -769,13 +769,13 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations nuint lengthToExamine = (nuint)(uint)length; - if (Sse2.IsSupported) + if (Vector128.IsHardwareAccelerated) { // Calculate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. nint vectorDiff = (nint)length - Vector128.Count; if (vectorDiff >= 0) { - // >= Sse2 intrinsics are supported and length is enough to use them, so use that path. + // >= Vector128 is accelerated and length is enough to use them, so use that path. // We jump forward to the intrinsics at the end of them method so a naive branch predict // will choose the non-intrinsic path so short lengths which don't gain anything aren't // overly disadvantaged by having to jump over a lot of code. Whereas the longer lengths @@ -845,10 +845,11 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu // the end and forwards, which may overlap on an earlier compare. // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (Sse2.IsSupported) + if (Vector128.IsHardwareAccelerated) { - int matches; - if (Avx2.IsSupported) + uint matches; + ref ushort ushortSearchStart = ref Unsafe.As(ref searchStart); + if (Vector256.IsHardwareAccelerated) { Vector256 search; // Guard as we may only have a valid size for Vector128; when we will move to the Sse2 @@ -864,14 +865,12 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu // First time this checks again against 0, however we will move into final compare if it fails. while (lengthToExamine > offset) { - search = LoadVector256(ref searchStart, offset); + search = Vector256.LoadUnsafe(ref ushortSearchStart, offset); // Bitwise Or to combine the flagged matches for the second value to our match flags - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search)) - .AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, + matches = (Vector256.Equals(values0, search) | Vector256.Equals(values1, search)) + .AsByte().ExtractMostSignificantBits(); + + // Note that ExtractMostSignificantBits has converted the equal vector elements into a set of bit flags, // So the bit position in 'matches' corresponds to the element offset. if (matches == 0) { @@ -884,14 +883,11 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu } // Move to Vector length from end for final compare - search = LoadVector256(ref searchStart, lengthToExamine); + search = Vector256.LoadUnsafe(ref ushortSearchStart, lengthToExamine); offset = lengthToExamine; // Same as method as above - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search)) - .AsByte()); + matches = (Vector256.Equals(values0, search) | Vector256.Equals(values1, search)) + .AsByte().ExtractMostSignificantBits(); if (matches == 0) { // None matched @@ -905,44 +901,38 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu // Initial size check was done on method entry. Debug.Assert(length >= Vector128.Count); { - Vector128 search; + Vector128 search, compareResult; Vector128 values0 = Vector128.Create((ushort)value0); Vector128 values1 = Vector128.Create((ushort)value1); // First time this checks against 0 and we will move into final compare if it fails. while (lengthToExamine > offset) { - search = LoadVector128(ref searchStart, offset); - - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)) - .AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) + search = Vector128.LoadUnsafe(ref ushortSearchStart, offset); + + compareResult = Vector128.Equals(values0, search) | Vector128.Equals(values1, search); + if (compareResult == Vector128.Zero) { // None matched offset += (nuint)Vector128.Count; continue; } + // Note that ExtractMostSignificantBits has converted the equal vector elements into a set of bit flags, + // So the bit position in 'matches' corresponds to the element offset. + matches = compareResult.AsByte().ExtractMostSignificantBits(); goto IntrinsicsMatch; } // Move to Vector length from end for final compare - search = LoadVector128(ref searchStart, lengthToExamine); + search = Vector128.LoadUnsafe(ref ushortSearchStart, lengthToExamine); offset = lengthToExamine; // Same as method as above - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)) - .AsByte()); - if (matches == 0) + compareResult = Vector128.Equals(values0, search) | Vector128.Equals(values1, search); + if (compareResult == Vector128.Zero) { // None matched goto NotFound; } + matches = compareResult.AsByte().ExtractMostSignificantBits(); } IntrinsicsMatch: @@ -954,7 +944,7 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu VectorCompare: // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (!Sse2.IsSupported && Vector.IsHardwareAccelerated) + if (!Vector128.IsHardwareAccelerated && Vector.IsHardwareAccelerated) { Vector values0 = new Vector(value0); Vector values1 = new Vector(value1); @@ -1006,13 +996,13 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations nuint lengthToExamine = (nuint)(uint)length; - if (Sse2.IsSupported) + if (Vector128.IsHardwareAccelerated) { // Calculate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. nint vectorDiff = (nint)length - Vector128.Count; if (vectorDiff >= 0) { - // >= Sse2 intrinsics are supported and length is enough to use them, so use that path. + // >= Vector128 is accelerated and length is enough to use them, so use that path. // We jump forward to the intrinsics at the end of them method so a naive branch predict // will choose the non-intrinsic path so short lengths which don't gain anything aren't // overly disadvantaged by having to jump over a lot of code. Whereas the longer lengths @@ -1082,10 +1072,11 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu // the end and forwards, which may overlap on an earlier compare. // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (Sse2.IsSupported) + if (Vector128.IsHardwareAccelerated) { - int matches; - if (Avx2.IsSupported) + uint matches; + ref ushort ushortSearchStart = ref Unsafe.As(ref searchStart); + if (Vector256.IsHardwareAccelerated) { Vector256 search; // Guard as we may only have a valid size for Vector128; when we will move to the Sse2 @@ -1102,16 +1093,11 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu // First time this checks again against 0, however we will move into final compare if it fails. while (lengthToExamine > offset) { - search = LoadVector256(ref searchStart, offset); + search = Vector256.LoadUnsafe(ref ushortSearchStart, offset); // Bitwise Or to combine the flagged matches for the second value to our match flags - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search)), - Avx2.CompareEqual(values2, search)) - .AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, + matches = (Vector256.Equals(values0, search) | Vector256.Equals(values1, search) | Vector256.Equals(values2, search)) + .AsByte().ExtractMostSignificantBits(); + // Note that ExtractMostSignificantBits has converted the equal vector elements into a set of bit flags, // So the bit position in 'matches' corresponds to the element offset. if (matches == 0) { @@ -1124,16 +1110,11 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu } // Move to Vector length from end for final compare - search = LoadVector256(ref searchStart, lengthToExamine); + search = Vector256.LoadUnsafe(ref ushortSearchStart, lengthToExamine); offset = lengthToExamine; // Same as method as above - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search)), - Avx2.CompareEqual(values2, search)) - .AsByte()); + matches = (Vector256.Equals(values0, search) | Vector256.Equals(values1, search) | Vector256.Equals(values2, search)) + .AsByte().ExtractMostSignificantBits(); if (matches == 0) { // None matched @@ -1147,49 +1128,39 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu // Initial size check was done on method entry. Debug.Assert(length >= Vector128.Count); { - Vector128 search; + Vector128 search, compareResult; Vector128 values0 = Vector128.Create((ushort)value0); Vector128 values1 = Vector128.Create((ushort)value1); Vector128 values2 = Vector128.Create((ushort)value2); // First time this checks against 0 and we will move into final compare if it fails. while (lengthToExamine > offset) { - search = LoadVector128(ref searchStart, offset); - - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)), - Sse2.CompareEqual(values2, search)) - .AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) + search = Vector128.LoadUnsafe(ref ushortSearchStart, offset); + + compareResult = Vector128.Equals(values0, search) | Vector128.Equals(values1, search) | Vector128.Equals(values2, search); + if (compareResult == Vector128.Zero) { // None matched offset += (nuint)Vector128.Count; continue; } + // Note that ExtractMostSignificantBits has converted the equal vector elements into a set of bit flags, + // So the bit position in 'matches' corresponds to the element offset. + matches = compareResult.AsByte().ExtractMostSignificantBits(); goto IntrinsicsMatch; } // Move to Vector length from end for final compare - search = LoadVector128(ref searchStart, lengthToExamine); + search = Vector128.LoadUnsafe(ref ushortSearchStart, lengthToExamine); offset = lengthToExamine; // Same as method as above - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)), - Sse2.CompareEqual(values2, search)) - .AsByte()); - if (matches == 0) + compareResult = Vector128.Equals(values0, search) | Vector128.Equals(values1, search) | Vector128.Equals(values2, search); + if (compareResult == Vector128.Zero) { // None matched goto NotFound; } + matches = compareResult.AsByte().ExtractMostSignificantBits(); } IntrinsicsMatch: @@ -1201,7 +1172,7 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu VectorCompare: // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (!Sse2.IsSupported && Vector.IsHardwareAccelerated) + if (!Vector128.IsHardwareAccelerated && Vector.IsHardwareAccelerated) { Vector values0 = new Vector(value0); Vector values1 = new Vector(value1); @@ -1258,13 +1229,13 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations nuint lengthToExamine = (nuint)(uint)length; - if (Sse2.IsSupported) + if (Vector128.IsHardwareAccelerated) { // Calculate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. nint vectorDiff = (nint)length - Vector128.Count; if (vectorDiff >= 0) { - // >= Sse2 intrinsics are supported and length is enough to use them, so use that path. + // >= Vector128 is accelerated and length is enough to use them, so use that path. // We jump forward to the intrinsics at the end of them method so a naive branch predict // will choose the non-intrinsic path so short lengths which don't gain anything aren't // overly disadvantaged by having to jump over a lot of code. Whereas the longer lengths @@ -1334,10 +1305,11 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu // the end and forwards, which may overlap on an earlier compare. // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (Sse2.IsSupported) + if (Vector128.IsHardwareAccelerated) { - int matches; - if (Avx2.IsSupported) + uint matches; + ref ushort ushortSearchStart = ref Unsafe.As(ref searchStart); + if (Vector256.IsHardwareAccelerated) { Vector256 search; // Guard as we may only have a valid size for Vector128; when we will move to the Sse2 @@ -1355,15 +1327,11 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu // First time this checks again against 0, however we will move into final compare if it fails. while (lengthToExamine > offset) { - search = LoadVector256(ref searchStart, offset); - // We preform the Or at non-Vector level as we are using the maximum number of non-preserved registers, - // and more causes them first to be pushed to stack and then popped on exit to preseve their values. - matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search).AsByte()); - // Bitwise Or to combine the flagged matches for the second, third and fourth values to our match flags - matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search).AsByte()); - matches |= Avx2.MoveMask(Avx2.CompareEqual(values2, search).AsByte()); - matches |= Avx2.MoveMask(Avx2.CompareEqual(values3, search).AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, + search = Vector256.LoadUnsafe(ref ushortSearchStart, offset); + matches = (Vector256.Equals(values0, search) | Vector256.Equals(values1, search) + | Vector256.Equals(values2, search) | Vector256.Equals(values3, search)) + .AsByte().ExtractMostSignificantBits(); + // Note that ExtractMostSignificantBits has converted the equal vector elements into a set of bit flags, // So the bit position in 'matches' corresponds to the element offset. if (matches == 0) { @@ -1376,14 +1344,12 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu } // Move to Vector length from end for final compare - search = LoadVector256(ref searchStart, lengthToExamine); + search = Vector256.LoadUnsafe(ref ushortSearchStart, lengthToExamine); offset = lengthToExamine; // Same as method as above - matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search).AsByte()); - // Bitwise Or to combine the flagged matches for the second, third and fourth values to our match flags - matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search).AsByte()); - matches |= Avx2.MoveMask(Avx2.CompareEqual(values2, search).AsByte()); - matches |= Avx2.MoveMask(Avx2.CompareEqual(values3, search).AsByte()); + matches = (Vector256.Equals(values0, search) | Vector256.Equals(values1, search) + | Vector256.Equals(values2, search) | Vector256.Equals(values3, search)) + .AsByte().ExtractMostSignificantBits(); if (matches == 0) { // None matched @@ -1397,7 +1363,7 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu // Initial size check was done on method entry. Debug.Assert(length >= Vector128.Count); { - Vector128 search; + Vector128 search, compareResult; Vector128 values0 = Vector128.Create((ushort)value0); Vector128 values1 = Vector128.Create((ushort)value1); Vector128 values2 = Vector128.Create((ushort)value2); @@ -1405,36 +1371,34 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu // First time this checks against 0 and we will move into final compare if it fails. while (lengthToExamine > offset) { - search = LoadVector128(ref searchStart, offset); + search = Vector128.LoadUnsafe(ref ushortSearchStart, offset); - matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values3, search).AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) + compareResult = Vector128.Equals(values0, search) | Vector128.Equals(values1, search) + | Vector128.Equals(values2, search) | Vector128.Equals(values3, search); + if (compareResult == Vector128.Zero) { // None matched offset += (nuint)Vector128.Count; continue; } + // Note that ExtractMostSignificantBits has converted the equal vector elements into a set of bit flags, + // So the bit position in 'matches' corresponds to the element offset. + matches = compareResult.AsByte().ExtractMostSignificantBits(); goto IntrinsicsMatch; } // Move to Vector length from end for final compare - search = LoadVector128(ref searchStart, lengthToExamine); + search = Vector128.LoadUnsafe(ref ushortSearchStart, lengthToExamine); offset = lengthToExamine; // Same as method as above - matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values3, search).AsByte()); - if (matches == 0) + compareResult = Vector128.Equals(values0, search) | Vector128.Equals(values1, search) + | Vector128.Equals(values2, search) | Vector128.Equals(values3, search); + if (compareResult == Vector128.Zero) { // None matched goto NotFound; } + matches = compareResult.AsByte().ExtractMostSignificantBits(); } IntrinsicsMatch: @@ -1446,7 +1410,7 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu VectorCompare: // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (!Sse2.IsSupported && Vector.IsHardwareAccelerated) + if (!Vector128.IsHardwareAccelerated && Vector.IsHardwareAccelerated) { Vector values0 = new Vector(value0); Vector values1 = new Vector(value1); @@ -1508,13 +1472,13 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations nuint lengthToExamine = (nuint)(uint)length; - if (Sse2.IsSupported) + if (Vector128.IsHardwareAccelerated) { // Calculate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. nint vectorDiff = (nint)length - Vector128.Count; if (vectorDiff >= 0) { - // >= Sse2 intrinsics are supported and length is enough to use them, so use that path. + // >= Vector128 is accelerated and length is enough to use them, so use that path. // We jump forward to the intrinsics at the end of them method so a naive branch predict // will choose the non-intrinsic path so short lengths which don't gain anything aren't // overly disadvantaged by having to jump over a lot of code. Whereas the longer lengths @@ -1584,10 +1548,11 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu // the end and forwards, which may overlap on an earlier compare. // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (Sse2.IsSupported) + if (Vector128.IsHardwareAccelerated) { - int matches; - if (Avx2.IsSupported) + uint matches; + ref ushort ushortSearchStart = ref Unsafe.As(ref searchStart); + if (Vector256.IsHardwareAccelerated) { Vector256 search; // Guard as we may only have a valid size for Vector128; when we will move to the Sse2 @@ -1606,16 +1571,11 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu // First time this checks again against 0, however we will move into final compare if it fails. while (lengthToExamine > offset) { - search = LoadVector256(ref searchStart, offset); - // We preform the Or at non-Vector level as we are using the maximum number of non-preserved registers (+ 1), - // and more causes them first to be pushed to stack and then popped on exit to preseve their values. - matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search).AsByte()); - // Bitwise Or to combine the flagged matches for the second, third and fourth values to our match flags - matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search).AsByte()); - matches |= Avx2.MoveMask(Avx2.CompareEqual(values2, search).AsByte()); - matches |= Avx2.MoveMask(Avx2.CompareEqual(values3, search).AsByte()); - matches |= Avx2.MoveMask(Avx2.CompareEqual(values4, search).AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, + search = Vector256.LoadUnsafe(ref ushortSearchStart, offset); + matches = (Vector256.Equals(values0, search) | Vector256.Equals(values1, search) | Vector256.Equals(values2, search) + | Vector256.Equals(values3, search) | Vector256.Equals(values4, search)) + .AsByte().ExtractMostSignificantBits(); + // Note that ExtractMostSignificantBits has converted the equal vector elements into a set of bit flags, // So the bit position in 'matches' corresponds to the element offset. if (matches == 0) { @@ -1628,15 +1588,12 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu } // Move to Vector length from end for final compare - search = LoadVector256(ref searchStart, lengthToExamine); + search = Vector256.LoadUnsafe(ref ushortSearchStart, lengthToExamine); offset = lengthToExamine; // Same as method as above - matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search).AsByte()); - // Bitwise Or to combine the flagged matches for the second, third and fourth values to our match flags - matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search).AsByte()); - matches |= Avx2.MoveMask(Avx2.CompareEqual(values2, search).AsByte()); - matches |= Avx2.MoveMask(Avx2.CompareEqual(values3, search).AsByte()); - matches |= Avx2.MoveMask(Avx2.CompareEqual(values4, search).AsByte()); + matches = (Vector256.Equals(values0, search) | Vector256.Equals(values1, search) | Vector256.Equals(values2, search) + | Vector256.Equals(values3, search) | Vector256.Equals(values4, search)) + .AsByte().ExtractMostSignificantBits(); if (matches == 0) { // None matched @@ -1650,7 +1607,7 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu // Initial size check was done on method entry. Debug.Assert(length >= Vector128.Count); { - Vector128 search; + Vector128 search, compareResult; Vector128 values0 = Vector128.Create((ushort)value0); Vector128 values1 = Vector128.Create((ushort)value1); Vector128 values2 = Vector128.Create((ushort)value2); @@ -1659,38 +1616,34 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu // First time this checks against 0 and we will move into final compare if it fails. while (lengthToExamine > offset) { - search = LoadVector128(ref searchStart, offset); - - matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values3, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values4, search).AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) + search = Vector128.LoadUnsafe(ref ushortSearchStart, offset); + + compareResult = Vector128.Equals(values0, search) | Vector128.Equals(values1, search) | Vector128.Equals(values2, search) + | Vector128.Equals(values3, search) | Vector128.Equals(values4, search); + if (compareResult == Vector128.Zero) { // None matched offset += (nuint)Vector128.Count; continue; } + // Note that ExtractMostSignificantBits has converted the equal vector elements into a set of bit flags, + // So the bit position in 'matches' corresponds to the element offset. + matches = compareResult.AsByte().ExtractMostSignificantBits(); goto IntrinsicsMatch; } // Move to Vector length from end for final compare - search = LoadVector128(ref searchStart, lengthToExamine); + search = Vector128.LoadUnsafe(ref ushortSearchStart, lengthToExamine); offset = lengthToExamine; // Same as method as above - matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values3, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values4, search).AsByte()); - if (matches == 0) + compareResult = Vector128.Equals(values0, search) | Vector128.Equals(values1, search) | Vector128.Equals(values2, search) + | Vector128.Equals(values3, search) | Vector128.Equals(values4, search); + if (compareResult == Vector128.Zero) { // None matched goto NotFound; } + matches = compareResult.AsByte().ExtractMostSignificantBits(); } IntrinsicsMatch: @@ -1702,7 +1655,7 @@ public static unsafe int IndexOfAny(ref char searchStart, char value0, char valu VectorCompare: // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (!Sse2.IsSupported && Vector.IsHardwareAccelerated) + if (!Vector128.IsHardwareAccelerated && Vector.IsHardwareAccelerated) { Vector values0 = new Vector(value0); Vector values1 = new Vector(value1);