diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs index 33e5181363920..7730708030eba 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs @@ -4,6 +4,8 @@ using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; #if SYSTEM_PRIVATE_CORELIB @@ -117,22 +119,35 @@ internal static unsafe partial class Utf8Utility // the alignment check consumes at most a single DWORD.) byte* pInputBufferFinalPosAtWhichCanSafelyLoop = pFinalPosWhereCanReadDWordFromInputBuffer - 3 * sizeof(uint); // can safely read 4 DWORDs here - uint mask; + nuint trailingZeroCount; + + Vector128 bitMask128 = BitConverter.IsLittleEndian ? + Vector128.Create((ushort)0x1001).AsByte() : + Vector128.Create((ushort)0x0110).AsByte(); do { - if (Sse2.IsSupported) + // pInputBuffer is 32-bit aligned but not necessary 128-bit aligned, so we're + // going to perform an unaligned load. We don't necessarily care about aligning + // this because we pessimistically assume we'll encounter non-ASCII data at some + // point in the not-too-distant future (otherwise we would've stayed entirely + // within the all-ASCII vectorized code at the entry to this method). + if (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian) { - // pInputBuffer is 32-bit aligned but not necessary 128-bit aligned, so we're - // going to perform an unaligned load. We don't necessarily care about aligning - // this because we pessimistically assume we'll encounter non-ASCII data at some - // point in the not-too-distant future (otherwise we would've stayed entirely - // within the all-ASCII vectorized code at the entry to this method). - - mask = (uint)Sse2.MoveMask(Sse2.LoadVector128((byte*)pInputBuffer)); + ulong mask = GetNonAsciiBytes(AdvSimd.LoadVector128(pInputBuffer), bitMask128); + if (mask != 0) + { + trailingZeroCount = (nuint)BitOperations.TrailingZeroCount(mask) >> 2; + goto LoopTerminatedEarlyDueToNonAsciiData; + } + } + else if (Sse2.IsSupported) + { + uint mask = (uint)Sse2.MoveMask(Sse2.LoadVector128(pInputBuffer)); if (mask != 0) { - goto Sse2LoopTerminatedEarlyDueToNonAsciiData; + trailingZeroCount = (nuint)BitOperations.TrailingZeroCount(mask); + goto LoopTerminatedEarlyDueToNonAsciiData; } } else @@ -153,19 +168,20 @@ internal static unsafe partial class Utf8Utility continue; // need to perform a bounds check because we might be running out of data - Sse2LoopTerminatedEarlyDueToNonAsciiData: + LoopTerminatedEarlyDueToNonAsciiData: + // x86 can only be little endian, while ARM can be big or little endian + // so if we reached this label we need to check both combinations are supported + Debug.Assert((AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian) || Sse2.IsSupported); - Debug.Assert(BitConverter.IsLittleEndian); - Debug.Assert(Sse2.IsSupported); // The 'mask' value will have a 0 bit for each ASCII byte we saw and a 1 bit - // for each non-ASCII byte we saw. We can count the number of ASCII bytes, + // for each non-ASCII byte we saw. trailingZeroCount will count the number of ASCII bytes, // bump our input counter by that amount, and resume processing from the // "the first byte is no longer ASCII" portion of the main loop. + // We should not expect a total number of zeroes equal or larger than 16. + Debug.Assert(trailingZeroCount < 16); - Debug.Assert(mask != 0); - - pInputBuffer += BitOperations.TrailingZeroCount(mask); + pInputBuffer += trailingZeroCount; if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer) { goto ProcessRemainingBytesSlow; @@ -719,5 +735,19 @@ internal static unsafe partial class Utf8Utility scalarCountAdjustment = tempScalarCountAdjustment; return pInputBuffer; } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static ulong GetNonAsciiBytes(Vector128 value, Vector128 bitMask128) + { + if (!AdvSimd.Arm64.IsSupported || !BitConverter.IsLittleEndian) + { + throw new PlatformNotSupportedException(); + } + + Vector128 mostSignificantBitIsSet = AdvSimd.ShiftRightArithmetic(value.AsSByte(), 7).AsByte(); + Vector128 extractedBits = AdvSimd.And(mostSignificantBitIsSet, bitMask128); + extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits); + return extractedBits.AsUInt64().ToScalar(); + } } } diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs b/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs index e5f9284769d6b..c4c68e966eba2 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs @@ -16,11 +16,14 @@ internal readonly struct Vector64 internal static class Vector128 { + public static Vector128 Create(long value) => throw new PlatformNotSupportedException(); public static Vector128 Create(short value) => throw new PlatformNotSupportedException(); + public static Vector128 Create(ulong value) => throw new PlatformNotSupportedException(); public static Vector128 Create(ushort value) => throw new PlatformNotSupportedException(); public static Vector128 CreateScalarUnsafe(ulong value) => throw new PlatformNotSupportedException(); public static Vector128 AsByte(this Vector128 vector) where T : struct => throw new PlatformNotSupportedException(); public static Vector128 AsInt16(this Vector128 vector) where T : struct => throw new PlatformNotSupportedException(); + public static Vector128 AsSByte(this Vector128 vector) where T : struct => throw new PlatformNotSupportedException(); public static Vector128 AsUInt16(this Vector128 vector) where T : struct => throw new PlatformNotSupportedException(); public static Vector128 AsUInt32(this Vector128 vector) where T : struct => throw new PlatformNotSupportedException(); public static Vector128 AsUInt64(this Vector128 vector) where T : struct => throw new PlatformNotSupportedException();