Skip to content

Commit

Permalink
AdvSimd support for System.Text.Unicode.Utf8Utility.GetPointerToFirst…
Browse files Browse the repository at this point in the history
…InvalidByte (dotnet#38653)

* AdvSimd support for System.Text.Unicode.Utf8Utility.GetPointerToFirstInvalidByte

* Move comment to the top, add shims.

* Little endian checks

* Use custom MoveMask method for AdvSimd

* Address suggestions to improve the AdvSimdMoveMask method

* Define initialMask outside MoveMask method

* UInt64 in Arm64MoveMask

* Add unit test case to verify intrinsics improvement

* Avoid casting to smaller integer type

* Typo and comment

* Use ShiftRightArithmetic instead of CompareEqual + And.
Remove test case causing other unit tests to fail.

* Use AddPairwise version of GetNotAsciiBytes

* Add missing shims causing Linux build to fail

* Simplify GetNonAsciiBytes to only one AddPairwise call, shorter bitmask

* Respect data type returned by masking method

* Address suggestions - assert trailingzerocount and bring back uint mask

* Trailing zeroes in AdvSimd need to be divided by 4, and total number should not be larger than 16

* Avoid declaring static field which causes PNSE in Utf8String.Experimental (S.P.Corelib code is used for being NetStandard)

* Prefer using nuint for BitConverter.TrailingZeroCount
  • Loading branch information
carlossanlop authored and Jacksondr5 committed Aug 10, 2020
1 parent 6097d8c commit ef91788
Showing 1 changed file with 47 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;

#if SYSTEM_PRIVATE_CORELIB
Expand Down Expand Up @@ -117,22 +119,35 @@ internal static unsafe partial class Utf8Utility
// the alignment check consumes at most a single DWORD.)

byte* pInputBufferFinalPosAtWhichCanSafelyLoop = pFinalPosWhereCanReadDWordFromInputBuffer - 3 * sizeof(uint); // can safely read 4 DWORDs here
uint mask;
nuint trailingZeroCount;

Vector128<byte> bitMask128 = BitConverter.IsLittleEndian ?
Vector128.Create((ushort)0x1001).AsByte() :
Vector128.Create((ushort)0x0110).AsByte();

do
{
if (Sse2.IsSupported)
// pInputBuffer is 32-bit aligned but not necessary 128-bit aligned, so we're
// going to perform an unaligned load. We don't necessarily care about aligning
// this because we pessimistically assume we'll encounter non-ASCII data at some
// point in the not-too-distant future (otherwise we would've stayed entirely
// within the all-ASCII vectorized code at the entry to this method).
if (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian)
{
// pInputBuffer is 32-bit aligned but not necessary 128-bit aligned, so we're
// going to perform an unaligned load. We don't necessarily care about aligning
// this because we pessimistically assume we'll encounter non-ASCII data at some
// point in the not-too-distant future (otherwise we would've stayed entirely
// within the all-ASCII vectorized code at the entry to this method).

mask = (uint)Sse2.MoveMask(Sse2.LoadVector128((byte*)pInputBuffer));
ulong mask = GetNonAsciiBytes(AdvSimd.LoadVector128(pInputBuffer), bitMask128);
if (mask != 0)
{
trailingZeroCount = (nuint)BitOperations.TrailingZeroCount(mask) >> 2;
goto LoopTerminatedEarlyDueToNonAsciiData;
}
}
else if (Sse2.IsSupported)
{
uint mask = (uint)Sse2.MoveMask(Sse2.LoadVector128(pInputBuffer));
if (mask != 0)
{
goto Sse2LoopTerminatedEarlyDueToNonAsciiData;
trailingZeroCount = (nuint)BitOperations.TrailingZeroCount(mask);
goto LoopTerminatedEarlyDueToNonAsciiData;
}
}
else
Expand All @@ -153,19 +168,20 @@ internal static unsafe partial class Utf8Utility

continue; // need to perform a bounds check because we might be running out of data

Sse2LoopTerminatedEarlyDueToNonAsciiData:
LoopTerminatedEarlyDueToNonAsciiData:
// x86 can only be little endian, while ARM can be big or little endian
// so if we reached this label we need to check both combinations are supported
Debug.Assert((AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian) || Sse2.IsSupported);

Debug.Assert(BitConverter.IsLittleEndian);
Debug.Assert(Sse2.IsSupported);

// The 'mask' value will have a 0 bit for each ASCII byte we saw and a 1 bit
// for each non-ASCII byte we saw. We can count the number of ASCII bytes,
// for each non-ASCII byte we saw. trailingZeroCount will count the number of ASCII bytes,
// bump our input counter by that amount, and resume processing from the
// "the first byte is no longer ASCII" portion of the main loop.
// We should not expect a total number of zeroes equal or larger than 16.
Debug.Assert(trailingZeroCount < 16);

Debug.Assert(mask != 0);

pInputBuffer += BitOperations.TrailingZeroCount(mask);
pInputBuffer += trailingZeroCount;
if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer)
{
goto ProcessRemainingBytesSlow;
Expand Down Expand Up @@ -719,5 +735,19 @@ internal static unsafe partial class Utf8Utility
scalarCountAdjustment = tempScalarCountAdjustment;
return pInputBuffer;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static ulong GetNonAsciiBytes(Vector128<byte> value, Vector128<byte> bitMask128)
{
if (!AdvSimd.Arm64.IsSupported || !BitConverter.IsLittleEndian)
{
throw new PlatformNotSupportedException();
}

Vector128<byte> mostSignificantBitIsSet = AdvSimd.ShiftRightArithmetic(value.AsSByte(), 7).AsByte();
Vector128<byte> extractedBits = AdvSimd.And(mostSignificantBitIsSet, bitMask128);
extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits);
return extractedBits.AsUInt64().ToScalar();
}
}
}

0 comments on commit ef91788

Please sign in to comment.