AdvSimd support for System.Text.Unicode.Utf16Utility.GetPointerToFirs…

…tInvalidChar (dotnet#39050) * AdvSimd support for System.Text.Unicode.Utf16Utility.GetPointerToFirstInvalidChar * Move using directive outside #if. Improve Arm64MoveMask. * Change overloads * UIn64 in Arm64MoveMask * Build error implicit conversion fix * Rename method and use simpler version * Use ShiftRightArithmetic instead of CompareEqual + And. * Remove unnecessary comment * Add missing shims causing Linux build to fail
tannergooding · Jul 21, 2020 · 415074e · 415074e
1 parent 44446b1
commit 415074e
Show file tree

Hide file tree

Showing 2 changed files with 81 additions and 13 deletions.
diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.Validation.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.Validation.cs
@@ -4,6 +4,7 @@
 using System.Diagnostics;
 using System.Runtime.CompilerServices;
 using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.Arm;
 using System.Runtime.Intrinsics.X86;
 using System.Numerics;
 
@@ -78,26 +79,38 @@ static Utf16Utility()
             long tempUtf8CodeUnitCountAdjustment = 0;
             int tempScalarCountAdjustment = 0;
 
-            if (Sse2.IsSupported)
+            if ((AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian) || Sse2.IsSupported)
             {
                 if (inputLength >= Vector128<ushort>.Count)
                 {
                     Vector128<ushort> vector0080 = Vector128.Create((ushort)0x80);
                     Vector128<ushort> vectorA800 = Vector128.Create((ushort)0xA800);
                     Vector128<short> vector8800 = Vector128.Create(unchecked((short)0x8800));
                     Vector128<ushort> vectorZero = Vector128<ushort>.Zero;
-
                     do
                     {
-                        Vector128<ushort> utf16Data = Sse2.LoadVector128((ushort*)pInputBuffer); // unaligned
-                        uint mask;
+                        Vector128<ushort> utf16Data;
+                        if (AdvSimd.Arm64.IsSupported)
+                        {
+                            utf16Data = AdvSimd.LoadVector128((ushort*)pInputBuffer); // unaligned
+                        }
+                        else
+                        {
+                            utf16Data = Sse2.LoadVector128((ushort*)pInputBuffer); // unaligned
+                        }
 
                         Vector128<ushort> charIsNonAscii;
-                        if (Sse41.IsSupported)
+
+                        if (AdvSimd.Arm64.IsSupported)
+                        {
+                            // Sets the 0x0080 bit of each element in 'charIsNonAscii' if the corresponding
+                            // input was 0x0080 <= [value]. (i.e., [value] is non-ASCII.)
+                            charIsNonAscii = AdvSimd.Min(utf16Data, vector0080);
+                        }
+                        else if (Sse41.IsSupported)
                         {
                             // Sets the 0x0080 bit of each element in 'charIsNonAscii' if the corresponding
                             // input was 0x0080 <= [value]. (i.e., [value] is non-ASCII.)
-
                             charIsNonAscii = Sse41.Min(utf16Data, vector0080);
                         }
                         else
@@ -111,16 +124,34 @@ static Utf16Utility()
 
 #if DEBUG
                         // Quick check to ensure we didn't accidentally set the 0x8000 bit of any element.
-                        uint debugMask = (uint)Sse2.MoveMask(charIsNonAscii.AsByte());
+                        uint debugMask;
+                        if (AdvSimd.Arm64.IsSupported)
+                        {
+                            debugMask = GetNonAsciiBytes(charIsNonAscii.AsByte());
+                        }
+                        else
+                        {
+                            debugMask = (uint)Sse2.MoveMask(charIsNonAscii.AsByte());
+                        }
                         Debug.Assert((debugMask & 0b_1010_1010_1010_1010) == 0, "Shouldn't have set the 0x8000 bit of any element in 'charIsNonAscii'.");
 #endif // DEBUG
 
                         // Sets the 0x8080 bits of each element in 'charIsNonAscii' if the corresponding
                         // input was 0x0800 <= [value]. This also handles the missing range a few lines above.
 
-                        Vector128<ushort> charIsThreeByteUtf8Encoded = Sse2.Subtract(vectorZero, Sse2.ShiftRightLogical(utf16Data, 11));
+                        Vector128<ushort> charIsThreeByteUtf8Encoded;
+                        uint mask;
 
-                        mask = (uint)Sse2.MoveMask(Sse2.Or(charIsNonAscii, charIsThreeByteUtf8Encoded).AsByte());
+                        if (AdvSimd.IsSupported)
+                        {
+                            charIsThreeByteUtf8Encoded = AdvSimd.Subtract(vectorZero, AdvSimd.ShiftRightLogical(utf16Data, 11));
+                            mask = GetNonAsciiBytes(AdvSimd.Or(charIsNonAscii, charIsThreeByteUtf8Encoded).AsByte());
+                        }
+                        else
+                        {
+                            charIsThreeByteUtf8Encoded = Sse2.Subtract(vectorZero, Sse2.ShiftRightLogical(utf16Data, 11));
+                            mask = (uint)Sse2.MoveMask(Sse2.Or(charIsNonAscii, charIsThreeByteUtf8Encoded).AsByte());
+                        }
 
                         // Each even bit of mask will be 1 only if the char was >= 0x0080,
                         // and each odd bit of mask will be 1 only if the char was >= 0x0800.
@@ -151,9 +182,16 @@ static Utf16Utility()
                         // Surrogates need to be special-cased for two reasons: (a) we need
                         // to account for the fact that we over-counted in the addition above;
                         // and (b) they require separate validation.
-
-                        utf16Data = Sse2.Add(utf16Data, vectorA800);
-                        mask = (uint)Sse2.MoveMask(Sse2.CompareLessThan(utf16Data.AsInt16(), vector8800).AsByte());
+                        if (AdvSimd.Arm64.IsSupported)
+                        {
+                            utf16Data = AdvSimd.Add(utf16Data, vectorA800);
+                            mask = GetNonAsciiBytes(AdvSimd.CompareLessThan(utf16Data.AsInt16(), vector8800).AsByte());
+                        }
+                        else
+                        {
+                            utf16Data = Sse2.Add(utf16Data, vectorA800);
+                            mask = (uint)Sse2.MoveMask(Sse2.CompareLessThan(utf16Data.AsInt16(), vector8800).AsByte());
+                        }
 
                         if (mask != 0)
                         {
@@ -178,7 +216,15 @@ static Utf16Utility()
                             //   Since 'mask' already has 00 in these positions (since the corresponding char
                             //   wasn't a surrogate), "mask AND mask2 == 00" holds for these positions.
 
-                            uint mask2 = (uint)Sse2.MoveMask(Sse2.ShiftRightLogical(utf16Data, 3).AsByte());
+                            uint mask2;
+                            if (AdvSimd.Arm64.IsSupported)
+                            {
+                                mask2 = GetNonAsciiBytes(AdvSimd.ShiftRightLogical(utf16Data, 3).AsByte());
+                            }
+                            else
+                            {
+                                mask2 = (uint)Sse2.MoveMask(Sse2.ShiftRightLogical(utf16Data, 3).AsByte());
+                            }
 
                             // 'lowSurrogatesMask' has its bits occur in pairs:
                             // - 01 if the corresponding char was a low surrogate char,
@@ -433,5 +479,24 @@ static Utf16Utility()
             scalarCountAdjustment = tempScalarCountAdjustment;
             return pInputBuffer;
         }
+
+        private static readonly Vector128<byte> s_bitMask128 = BitConverter.IsLittleEndian ?
+                                                Vector128.Create(0x80402010_08040201).AsByte() :
+                                                Vector128.Create(0x01020408_10204080).AsByte();
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static uint GetNonAsciiBytes(Vector128<byte> value)
+        {
+            Debug.Assert(AdvSimd.Arm64.IsSupported);
+
+            Vector128<byte> mostSignificantBitIsSet = AdvSimd.ShiftRightArithmetic(value.AsSByte(), 7).AsByte();
+            Vector128<byte> extractedBits = AdvSimd.And(mostSignificantBitIsSet, s_bitMask128);
+
+            // self-pairwise add until all flags have moved to the first two bytes of the vector
+            extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits);
+            extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits);
+            extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits);
+            return extractedBits.AsUInt16().ToScalar();
+        }
     }
 }
diff --git a/...ibraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs b/...ibraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs
@@ -16,11 +16,14 @@ internal readonly struct Vector64<T>
 
     internal static class Vector128
     {
+        public static Vector128<long> Create(long value) => throw new PlatformNotSupportedException();
         public static Vector128<short> Create(short value) => throw new PlatformNotSupportedException();
+        public static Vector128<ulong> Create(ulong value) => throw new PlatformNotSupportedException();
         public static Vector128<ushort> Create(ushort value) => throw new PlatformNotSupportedException();
         public static Vector128<ulong> CreateScalarUnsafe(ulong value) => throw new PlatformNotSupportedException();
         public static Vector128<byte> AsByte<T>(this Vector128<T> vector) where T : struct => throw new PlatformNotSupportedException();
         public static Vector128<short> AsInt16<T>(this Vector128<T> vector) where T : struct => throw new PlatformNotSupportedException();
+        public static Vector128<sbyte> AsSByte<T>(this Vector128<T> vector) where T : struct => throw new PlatformNotSupportedException();
         public static Vector128<ushort> AsUInt16<T>(this Vector128<T> vector) where T : struct => throw new PlatformNotSupportedException();
         public static Vector128<uint> AsUInt32<T>(this Vector128<T> vector) where T : struct => throw new PlatformNotSupportedException();
         public static Vector128<ulong> AsUInt64<T>(this Vector128<T> vector) where T : struct => throw new PlatformNotSupportedException();