Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use inline Vector128.Create for constants #33969

Merged
merged 2 commits into from
Jul 2, 2021
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 18 additions & 28 deletions src/Shared/ServerInfrastructure/StringUtilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,13 @@ public static unsafe bool TryGetAsciiString(byte* input, char* output, int count
Debug.Assert((long)end >= Vector256<sbyte>.Count);

// PERF: so the JIT can reuse the zero from a register
Vector128<sbyte> zero = Vector128<sbyte>.Zero;
var zero = Vector128<sbyte>.Zero;

if (Sse2.IsSupported)
{
if (Avx2.IsSupported && input <= end - Vector256<sbyte>.Count)
{
Vector256<sbyte> avxZero = Vector256<sbyte>.Zero;
var avxZero = Vector256<sbyte>.Zero;

do
{
Expand Down Expand Up @@ -233,8 +233,8 @@ out Unsafe.AsRef<Vector<short>>(output),
// BMI2 could be used, but this variant is faster on both Intel and AMD.
if (Sse2.X64.IsSupported)
{
Vector128<sbyte> vecNarrow = Sse2.X64.ConvertScalarToVector128Int64(value).AsSByte();
Vector128<ulong> vecWide = Sse2.UnpackLow(vecNarrow, zero).AsUInt64();
var vecNarrow = Sse2.X64.ConvertScalarToVector128Int64(value).AsSByte();
var vecWide = Sse2.UnpackLow(vecNarrow, zero).AsUInt64();
Sse2.Store((ulong*)output, vecWide);
}
else
Expand Down Expand Up @@ -570,8 +570,8 @@ private static unsafe void WidenFourAsciiBytesToUtf16AndWriteToBuffer(char* outp
// BMI2 could be used, but this variant is faster on both Intel and AMD.
if (Sse2.X64.IsSupported)
{
Vector128<sbyte> vecNarrow = Sse2.ConvertScalarToVector128Int32(value).AsSByte();
Vector128<ulong> vecWide = Sse2.UnpackLow(vecNarrow, zero).AsUInt64();
var vecNarrow = Sse2.ConvertScalarToVector128Int32(value).AsSByte();
var vecWide = Sse2.UnpackLow(vecNarrow, zero).AsUInt64();
Unsafe.WriteUnaligned(output, Sse2.X64.ConvertToUInt64(vecWide));
}
else
Expand All @@ -598,8 +598,8 @@ private static bool WidenFourAsciiBytesToUtf16AndCompareToChars(ref char charSta
// BMI2 could be used, but this variant is faster on both Intel and AMD.
if (Sse2.X64.IsSupported)
{
Vector128<byte> vecNarrow = Sse2.ConvertScalarToVector128UInt32(value).AsByte();
Vector128<ulong> vecWide = Sse2.UnpackLow(vecNarrow, Vector128<byte>.Zero).AsUInt64();
var vecNarrow = Sse2.ConvertScalarToVector128UInt32(value).AsByte();
var vecWide = Sse2.UnpackLow(vecNarrow, Vector128<byte>.Zero).AsUInt64();
return Unsafe.ReadUnaligned<ulong>(ref Unsafe.As<char, byte>(ref charStart)) ==
Sse2.X64.ConvertToUInt64(vecWide);
}
Expand Down Expand Up @@ -637,8 +637,8 @@ private static bool WidenTwoAsciiBytesToUtf16AndCompareToChars(ref char charStar
// BMI2 could be used, but this variant is faster on both Intel and AMD.
if (Sse2.IsSupported)
{
Vector128<byte> vecNarrow = Sse2.ConvertScalarToVector128UInt32(value).AsByte();
Vector128<uint> vecWide = Sse2.UnpackLow(vecNarrow, Vector128<byte>.Zero).AsUInt32();
var vecNarrow = Sse2.ConvertScalarToVector128UInt32(value).AsByte();
var vecWide = Sse2.UnpackLow(vecNarrow, Vector128<byte>.Zero).AsUInt32();
return Unsafe.ReadUnaligned<uint>(ref Unsafe.As<char, byte>(ref charStart)) ==
Sse2.ConvertToUInt32(vecWide);
}
Expand Down Expand Up @@ -725,34 +725,24 @@ private static void PopulateSpanWithHexSuffix(Span<char> buffer, (string? str, c

if (Ssse3.IsSupported)
{
// These must be explicity typed as ReadOnlySpan<byte>
// They then become a non-allocating mappings to the data section of the assembly.
// This uses C# compiler's ability to refer to static data directly. For more information see https://vcsjones.dev/2019/02/01/csharp-readonly-span-bytes-static
Tratcher marked this conversation as resolved.
Show resolved Hide resolved
ReadOnlySpan<byte> shuffleMaskData = new byte[16]
{
var lowNibbles = Ssse3.Shuffle(Vector128.CreateScalarUnsafe(tupleNumber).AsByte(), Vector128.Create(
0xF, 0xF, 3, 0xF,
0xF, 0xF, 2, 0xF,
0xF, 0xF, 1, 0xF,
0xF, 0xF, 0, 0xF
};
).AsByte());

ReadOnlySpan<byte> asciiUpperCaseData = new byte[16]
{
var highNibbles = Sse2.ShiftRightLogical(Sse2.ShiftRightLogical128BitLane(lowNibbles, 2).AsInt32(), 4).AsByte();
var indices = Sse2.And(Sse2.Or(lowNibbles, highNibbles), Vector128.Create((byte)0xF));

// Lookup the hex values at the positions of the indices
var hex = Ssse3.Shuffle(Vector128.Create(
(byte)'0', (byte)'1', (byte)'2', (byte)'3',
(byte)'4', (byte)'5', (byte)'6', (byte)'7',
(byte)'8', (byte)'9', (byte)'A', (byte)'B',
(byte)'C', (byte)'D', (byte)'E', (byte)'F'
};

// Load from data section memory into Vector128 registers
var shuffleMask = Unsafe.ReadUnaligned<Vector128<byte>>(ref MemoryMarshal.GetReference(shuffleMaskData));
var asciiUpperCase = Unsafe.ReadUnaligned<Vector128<byte>>(ref MemoryMarshal.GetReference(asciiUpperCaseData));
), indices);

var lowNibbles = Ssse3.Shuffle(Vector128.CreateScalarUnsafe(tupleNumber).AsByte(), shuffleMask);
var highNibbles = Sse2.ShiftRightLogical(Sse2.ShiftRightLogical128BitLane(lowNibbles, 2).AsInt32(), 4).AsByte();
var indices = Sse2.And(Sse2.Or(lowNibbles, highNibbles), Vector128.Create((byte)0xF));
// Lookup the hex values at the positions of the indices
var hex = Ssse3.Shuffle(asciiUpperCase, indices);
// The high bytes (0x00) of the chars have also been converted to ascii hex '0', so clear them out.
hex = Sse2.And(hex, Vector128.Create((ushort)0xFF).AsByte());

Expand Down