From 415074e3fa35c8552474dae24c429deb051f27ad Mon Sep 17 00:00:00 2001 From: Carlos Sanchez Lopez <1175054+carlossanlop@users.noreply.github.com> Date: Thu, 16 Jul 2020 15:57:39 -0700 Subject: [PATCH] AdvSimd support for System.Text.Unicode.Utf16Utility.GetPointerToFirstInvalidChar (#39050) * AdvSimd support for System.Text.Unicode.Utf16Utility.GetPointerToFirstInvalidChar * Move using directive outside #if. Improve Arm64MoveMask. * Change overloads * UIn64 in Arm64MoveMask * Build error implicit conversion fix * Rename method and use simpler version * Use ShiftRightArithmetic instead of CompareEqual + And. * Remove unnecessary comment * Add missing shims causing Linux build to fail --- .../Text/Unicode/Utf16Utility.Validation.cs | 91 ++++++++++++++++--- .../Runtime/Intrinsics/Intrinsics.Shims.cs | 3 + 2 files changed, 81 insertions(+), 13 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.Validation.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.Validation.cs index 395a887f9b4230..cd4e6627d84455 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.Validation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.Validation.cs @@ -4,6 +4,7 @@ using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; using System.Numerics; @@ -78,7 +79,7 @@ static Utf16Utility() long tempUtf8CodeUnitCountAdjustment = 0; int tempScalarCountAdjustment = 0; - if (Sse2.IsSupported) + if ((AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian) || Sse2.IsSupported) { if (inputLength >= Vector128.Count) { @@ -86,18 +87,30 @@ static Utf16Utility() Vector128 vectorA800 = Vector128.Create((ushort)0xA800); Vector128 vector8800 = Vector128.Create(unchecked((short)0x8800)); Vector128 vectorZero = Vector128.Zero; - do { - Vector128 utf16Data = Sse2.LoadVector128((ushort*)pInputBuffer); // unaligned - uint mask; + Vector128 utf16Data; + if (AdvSimd.Arm64.IsSupported) + { + utf16Data = AdvSimd.LoadVector128((ushort*)pInputBuffer); // unaligned + } + else + { + utf16Data = Sse2.LoadVector128((ushort*)pInputBuffer); // unaligned + } Vector128 charIsNonAscii; - if (Sse41.IsSupported) + + if (AdvSimd.Arm64.IsSupported) + { + // Sets the 0x0080 bit of each element in 'charIsNonAscii' if the corresponding + // input was 0x0080 <= [value]. (i.e., [value] is non-ASCII.) + charIsNonAscii = AdvSimd.Min(utf16Data, vector0080); + } + else if (Sse41.IsSupported) { // Sets the 0x0080 bit of each element in 'charIsNonAscii' if the corresponding // input was 0x0080 <= [value]. (i.e., [value] is non-ASCII.) - charIsNonAscii = Sse41.Min(utf16Data, vector0080); } else @@ -111,16 +124,34 @@ static Utf16Utility() #if DEBUG // Quick check to ensure we didn't accidentally set the 0x8000 bit of any element. - uint debugMask = (uint)Sse2.MoveMask(charIsNonAscii.AsByte()); + uint debugMask; + if (AdvSimd.Arm64.IsSupported) + { + debugMask = GetNonAsciiBytes(charIsNonAscii.AsByte()); + } + else + { + debugMask = (uint)Sse2.MoveMask(charIsNonAscii.AsByte()); + } Debug.Assert((debugMask & 0b_1010_1010_1010_1010) == 0, "Shouldn't have set the 0x8000 bit of any element in 'charIsNonAscii'."); #endif // DEBUG // Sets the 0x8080 bits of each element in 'charIsNonAscii' if the corresponding // input was 0x0800 <= [value]. This also handles the missing range a few lines above. - Vector128 charIsThreeByteUtf8Encoded = Sse2.Subtract(vectorZero, Sse2.ShiftRightLogical(utf16Data, 11)); + Vector128 charIsThreeByteUtf8Encoded; + uint mask; - mask = (uint)Sse2.MoveMask(Sse2.Or(charIsNonAscii, charIsThreeByteUtf8Encoded).AsByte()); + if (AdvSimd.IsSupported) + { + charIsThreeByteUtf8Encoded = AdvSimd.Subtract(vectorZero, AdvSimd.ShiftRightLogical(utf16Data, 11)); + mask = GetNonAsciiBytes(AdvSimd.Or(charIsNonAscii, charIsThreeByteUtf8Encoded).AsByte()); + } + else + { + charIsThreeByteUtf8Encoded = Sse2.Subtract(vectorZero, Sse2.ShiftRightLogical(utf16Data, 11)); + mask = (uint)Sse2.MoveMask(Sse2.Or(charIsNonAscii, charIsThreeByteUtf8Encoded).AsByte()); + } // Each even bit of mask will be 1 only if the char was >= 0x0080, // and each odd bit of mask will be 1 only if the char was >= 0x0800. @@ -151,9 +182,16 @@ static Utf16Utility() // Surrogates need to be special-cased for two reasons: (a) we need // to account for the fact that we over-counted in the addition above; // and (b) they require separate validation. - - utf16Data = Sse2.Add(utf16Data, vectorA800); - mask = (uint)Sse2.MoveMask(Sse2.CompareLessThan(utf16Data.AsInt16(), vector8800).AsByte()); + if (AdvSimd.Arm64.IsSupported) + { + utf16Data = AdvSimd.Add(utf16Data, vectorA800); + mask = GetNonAsciiBytes(AdvSimd.CompareLessThan(utf16Data.AsInt16(), vector8800).AsByte()); + } + else + { + utf16Data = Sse2.Add(utf16Data, vectorA800); + mask = (uint)Sse2.MoveMask(Sse2.CompareLessThan(utf16Data.AsInt16(), vector8800).AsByte()); + } if (mask != 0) { @@ -178,7 +216,15 @@ static Utf16Utility() // Since 'mask' already has 00 in these positions (since the corresponding char // wasn't a surrogate), "mask AND mask2 == 00" holds for these positions. - uint mask2 = (uint)Sse2.MoveMask(Sse2.ShiftRightLogical(utf16Data, 3).AsByte()); + uint mask2; + if (AdvSimd.Arm64.IsSupported) + { + mask2 = GetNonAsciiBytes(AdvSimd.ShiftRightLogical(utf16Data, 3).AsByte()); + } + else + { + mask2 = (uint)Sse2.MoveMask(Sse2.ShiftRightLogical(utf16Data, 3).AsByte()); + } // 'lowSurrogatesMask' has its bits occur in pairs: // - 01 if the corresponding char was a low surrogate char, @@ -433,5 +479,24 @@ static Utf16Utility() scalarCountAdjustment = tempScalarCountAdjustment; return pInputBuffer; } + + private static readonly Vector128 s_bitMask128 = BitConverter.IsLittleEndian ? + Vector128.Create(0x80402010_08040201).AsByte() : + Vector128.Create(0x01020408_10204080).AsByte(); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static uint GetNonAsciiBytes(Vector128 value) + { + Debug.Assert(AdvSimd.Arm64.IsSupported); + + Vector128 mostSignificantBitIsSet = AdvSimd.ShiftRightArithmetic(value.AsSByte(), 7).AsByte(); + Vector128 extractedBits = AdvSimd.And(mostSignificantBitIsSet, s_bitMask128); + + // self-pairwise add until all flags have moved to the first two bytes of the vector + extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits); + extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits); + extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits); + return extractedBits.AsUInt16().ToScalar(); + } } } diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs b/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs index e5f9284769d6b5..c4c68e966eba22 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs @@ -16,11 +16,14 @@ internal readonly struct Vector64 internal static class Vector128 { + public static Vector128 Create(long value) => throw new PlatformNotSupportedException(); public static Vector128 Create(short value) => throw new PlatformNotSupportedException(); + public static Vector128 Create(ulong value) => throw new PlatformNotSupportedException(); public static Vector128 Create(ushort value) => throw new PlatformNotSupportedException(); public static Vector128 CreateScalarUnsafe(ulong value) => throw new PlatformNotSupportedException(); public static Vector128 AsByte(this Vector128 vector) where T : struct => throw new PlatformNotSupportedException(); public static Vector128 AsInt16(this Vector128 vector) where T : struct => throw new PlatformNotSupportedException(); + public static Vector128 AsSByte(this Vector128 vector) where T : struct => throw new PlatformNotSupportedException(); public static Vector128 AsUInt16(this Vector128 vector) where T : struct => throw new PlatformNotSupportedException(); public static Vector128 AsUInt32(this Vector128 vector) where T : struct => throw new PlatformNotSupportedException(); public static Vector128 AsUInt64(this Vector128 vector) where T : struct => throw new PlatformNotSupportedException();