Skip to content

Commit

Permalink
AdvSimd support for System.Text.Unicode.Utf16Utility.GetPointerToFirs…
Browse files Browse the repository at this point in the history
…tInvalidChar (dotnet#39050)

* AdvSimd support for System.Text.Unicode.Utf16Utility.GetPointerToFirstInvalidChar

* Move using directive outside #if.
Improve Arm64MoveMask.

* Change overloads

* UIn64 in Arm64MoveMask

* Build error implicit conversion fix

* Rename method and use simpler version

* Use ShiftRightArithmetic instead of CompareEqual + And.

* Remove unnecessary comment

* Add missing shims causing Linux build to fail
  • Loading branch information
carlossanlop authored and tannergooding committed Jul 21, 2020
1 parent 44446b1 commit 415074e
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;
using System.Numerics;

Expand Down Expand Up @@ -78,26 +79,38 @@ static Utf16Utility()
long tempUtf8CodeUnitCountAdjustment = 0;
int tempScalarCountAdjustment = 0;

if (Sse2.IsSupported)
if ((AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian) || Sse2.IsSupported)
{
if (inputLength >= Vector128<ushort>.Count)
{
Vector128<ushort> vector0080 = Vector128.Create((ushort)0x80);
Vector128<ushort> vectorA800 = Vector128.Create((ushort)0xA800);
Vector128<short> vector8800 = Vector128.Create(unchecked((short)0x8800));
Vector128<ushort> vectorZero = Vector128<ushort>.Zero;

do
{
Vector128<ushort> utf16Data = Sse2.LoadVector128((ushort*)pInputBuffer); // unaligned
uint mask;
Vector128<ushort> utf16Data;
if (AdvSimd.Arm64.IsSupported)
{
utf16Data = AdvSimd.LoadVector128((ushort*)pInputBuffer); // unaligned
}
else
{
utf16Data = Sse2.LoadVector128((ushort*)pInputBuffer); // unaligned
}

Vector128<ushort> charIsNonAscii;
if (Sse41.IsSupported)

if (AdvSimd.Arm64.IsSupported)
{
// Sets the 0x0080 bit of each element in 'charIsNonAscii' if the corresponding
// input was 0x0080 <= [value]. (i.e., [value] is non-ASCII.)
charIsNonAscii = AdvSimd.Min(utf16Data, vector0080);
}
else if (Sse41.IsSupported)
{
// Sets the 0x0080 bit of each element in 'charIsNonAscii' if the corresponding
// input was 0x0080 <= [value]. (i.e., [value] is non-ASCII.)

charIsNonAscii = Sse41.Min(utf16Data, vector0080);
}
else
Expand All @@ -111,16 +124,34 @@ static Utf16Utility()

#if DEBUG
// Quick check to ensure we didn't accidentally set the 0x8000 bit of any element.
uint debugMask = (uint)Sse2.MoveMask(charIsNonAscii.AsByte());
uint debugMask;
if (AdvSimd.Arm64.IsSupported)
{
debugMask = GetNonAsciiBytes(charIsNonAscii.AsByte());
}
else
{
debugMask = (uint)Sse2.MoveMask(charIsNonAscii.AsByte());
}
Debug.Assert((debugMask & 0b_1010_1010_1010_1010) == 0, "Shouldn't have set the 0x8000 bit of any element in 'charIsNonAscii'.");
#endif // DEBUG

// Sets the 0x8080 bits of each element in 'charIsNonAscii' if the corresponding
// input was 0x0800 <= [value]. This also handles the missing range a few lines above.

Vector128<ushort> charIsThreeByteUtf8Encoded = Sse2.Subtract(vectorZero, Sse2.ShiftRightLogical(utf16Data, 11));
Vector128<ushort> charIsThreeByteUtf8Encoded;
uint mask;

mask = (uint)Sse2.MoveMask(Sse2.Or(charIsNonAscii, charIsThreeByteUtf8Encoded).AsByte());
if (AdvSimd.IsSupported)
{
charIsThreeByteUtf8Encoded = AdvSimd.Subtract(vectorZero, AdvSimd.ShiftRightLogical(utf16Data, 11));
mask = GetNonAsciiBytes(AdvSimd.Or(charIsNonAscii, charIsThreeByteUtf8Encoded).AsByte());
}
else
{
charIsThreeByteUtf8Encoded = Sse2.Subtract(vectorZero, Sse2.ShiftRightLogical(utf16Data, 11));
mask = (uint)Sse2.MoveMask(Sse2.Or(charIsNonAscii, charIsThreeByteUtf8Encoded).AsByte());
}

// Each even bit of mask will be 1 only if the char was >= 0x0080,
// and each odd bit of mask will be 1 only if the char was >= 0x0800.
Expand Down Expand Up @@ -151,9 +182,16 @@ static Utf16Utility()
// Surrogates need to be special-cased for two reasons: (a) we need
// to account for the fact that we over-counted in the addition above;
// and (b) they require separate validation.

utf16Data = Sse2.Add(utf16Data, vectorA800);
mask = (uint)Sse2.MoveMask(Sse2.CompareLessThan(utf16Data.AsInt16(), vector8800).AsByte());
if (AdvSimd.Arm64.IsSupported)
{
utf16Data = AdvSimd.Add(utf16Data, vectorA800);
mask = GetNonAsciiBytes(AdvSimd.CompareLessThan(utf16Data.AsInt16(), vector8800).AsByte());
}
else
{
utf16Data = Sse2.Add(utf16Data, vectorA800);
mask = (uint)Sse2.MoveMask(Sse2.CompareLessThan(utf16Data.AsInt16(), vector8800).AsByte());
}

if (mask != 0)
{
Expand All @@ -178,7 +216,15 @@ static Utf16Utility()
// Since 'mask' already has 00 in these positions (since the corresponding char
// wasn't a surrogate), "mask AND mask2 == 00" holds for these positions.

uint mask2 = (uint)Sse2.MoveMask(Sse2.ShiftRightLogical(utf16Data, 3).AsByte());
uint mask2;
if (AdvSimd.Arm64.IsSupported)
{
mask2 = GetNonAsciiBytes(AdvSimd.ShiftRightLogical(utf16Data, 3).AsByte());
}
else
{
mask2 = (uint)Sse2.MoveMask(Sse2.ShiftRightLogical(utf16Data, 3).AsByte());
}

// 'lowSurrogatesMask' has its bits occur in pairs:
// - 01 if the corresponding char was a low surrogate char,
Expand Down Expand Up @@ -433,5 +479,24 @@ static Utf16Utility()
scalarCountAdjustment = tempScalarCountAdjustment;
return pInputBuffer;
}

private static readonly Vector128<byte> s_bitMask128 = BitConverter.IsLittleEndian ?
Vector128.Create(0x80402010_08040201).AsByte() :
Vector128.Create(0x01020408_10204080).AsByte();

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint GetNonAsciiBytes(Vector128<byte> value)
{
Debug.Assert(AdvSimd.Arm64.IsSupported);

Vector128<byte> mostSignificantBitIsSet = AdvSimd.ShiftRightArithmetic(value.AsSByte(), 7).AsByte();
Vector128<byte> extractedBits = AdvSimd.And(mostSignificantBitIsSet, s_bitMask128);

// self-pairwise add until all flags have moved to the first two bytes of the vector
extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits);
extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits);
extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits);
return extractedBits.AsUInt16().ToScalar();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,14 @@ internal readonly struct Vector64<T>

internal static class Vector128
{
public static Vector128<long> Create(long value) => throw new PlatformNotSupportedException();
public static Vector128<short> Create(short value) => throw new PlatformNotSupportedException();
public static Vector128<ulong> Create(ulong value) => throw new PlatformNotSupportedException();
public static Vector128<ushort> Create(ushort value) => throw new PlatformNotSupportedException();
public static Vector128<ulong> CreateScalarUnsafe(ulong value) => throw new PlatformNotSupportedException();
public static Vector128<byte> AsByte<T>(this Vector128<T> vector) where T : struct => throw new PlatformNotSupportedException();
public static Vector128<short> AsInt16<T>(this Vector128<T> vector) where T : struct => throw new PlatformNotSupportedException();
public static Vector128<sbyte> AsSByte<T>(this Vector128<T> vector) where T : struct => throw new PlatformNotSupportedException();
public static Vector128<ushort> AsUInt16<T>(this Vector128<T> vector) where T : struct => throw new PlatformNotSupportedException();
public static Vector128<uint> AsUInt32<T>(this Vector128<T> vector) where T : struct => throw new PlatformNotSupportedException();
public static Vector128<ulong> AsUInt64<T>(this Vector128<T> vector) where T : struct => throw new PlatformNotSupportedException();
Expand Down

0 comments on commit 415074e

Please sign in to comment.