Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compare最適化 #135

Merged
merged 5 commits into from
Sep 23, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 35 additions & 46 deletions Source/Utf8Utility/Utf8Array.Comparison.cs
Original file line number Diff line number Diff line change
Expand Up @@ -48,58 +48,44 @@ partial struct Utf8Array
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int Compare(Utf8Array x, Utf8Array y, StringComparison comparisonType)
{
nuint xIndex = 0;
nuint yIndex = 0;
if (x.IsEmpty || y.IsEmpty)
{
goto Count;
}

Span<char> xBuffer = stackalloc char[2];
Span<char> yBuffer = stackalloc char[2];

while ((int)xIndex < x.ByteCount && (int)yIndex < y.ByteCount)
{
ref var xValueStart = ref Unsafe.AddByteOffset(ref x.DangerousGetReference(), xIndex);
ref var yValueStart = ref Unsafe.AddByteOffset(ref y.DangerousGetReference(), yIndex);
ref var xBufferStart = ref MemoryMarshal.GetReference(xBuffer);
ref var yBufferStart = ref MemoryMarshal.GetReference(yBuffer);

// Ascii文字の場合のみ、処理を最適化する。
scoped ReadOnlySpan<char> xSpan;
if (UnicodeUtility.IsAsciiCodePoint(xValueStart))
{
var charXValue = (char)xValueStart;
scoped ref var charXValueStart = ref charXValue;
ref var xStart = ref x.DangerousGetReference();
ref var yStart = ref y.DangerousGetReference();

xSpan = GetAsciiSpan(charXValueStart);
xIndex++;
}
else
{
WriteUtf16Span(ref xValueStart, x.ByteCount - (int)xIndex, out var bytesConsumed, xBuffer, out var charsWritten);
xSpan = MemoryMarshal.CreateReadOnlySpan(ref MemoryMarshal.GetReference(xBuffer), charsWritten);
xIndex += (uint)bytesConsumed;
}
ref var xEnd = ref Unsafe.AddByteOffset(ref xStart, (nint)(uint)x.ByteCount);
ref var yEnd = ref Unsafe.AddByteOffset(ref yStart, (nint)(uint)y.ByteCount);

// Ascii文字の場合のみ、処理を最適化する。
scoped ReadOnlySpan<char> ySpan;
if (UnicodeUtility.IsAsciiCodePoint(yValueStart))
{
var charYValue = (char)yValueStart;
scoped ref var charYValueStart = ref charYValue;
do
{
WriteUtf16Span(ref xStart, out var xBytesConsumed, xBuffer, out var charsWritten);
var xSpan = MemoryMarshal.CreateReadOnlySpan(ref xBufferStart, charsWritten);

ySpan = GetAsciiSpan(charYValueStart);
yIndex++;
}
else
{
WriteUtf16Span(ref yValueStart, y.ByteCount - (int)yIndex, out var bytesConsumed, yBuffer, out var charsWritten);
ySpan = MemoryMarshal.CreateReadOnlySpan(ref MemoryMarshal.GetReference(yBuffer), charsWritten);
yIndex += (uint)bytesConsumed;
}
WriteUtf16Span(ref yStart, out var yBytesConsumed, yBuffer, out charsWritten);
var ySpan = MemoryMarshal.CreateReadOnlySpan(ref yBufferStart, charsWritten);

var result = xSpan.CompareTo(ySpan, comparisonType);

if (result != 0)
{
return result;
}

xStart = ref Unsafe.AddByteOffset(ref xStart, (nint)(uint)xBytesConsumed);
yStart = ref Unsafe.AddByteOffset(ref yStart, (nint)(uint)yBytesConsumed);
}
while (Unsafe.IsAddressLessThan(ref xStart, ref xEnd) && Unsafe.IsAddressLessThan(ref yStart, ref yEnd));

Count:

// 到達条件
// 1. 比較対象の片方または両方のUTF-8配列が空。
Expand All @@ -108,21 +94,24 @@ public static int Compare(Utf8Array x, Utf8Array y, StringComparison comparisonT
return x.ByteCount - y.ByteCount;

[MethodImpl(MethodImplOptions.AggressiveInlining)]
static ReadOnlySpan<char> GetAsciiSpan(in char valueStart)
#if NET7_0_OR_GREATER
=> new(valueStart);
#else
=> MemoryMarshal.CreateReadOnlySpan(ref Unsafe.AsRef(valueStart), 1);
#endif

[MethodImpl(MethodImplOptions.AggressiveInlining)]
static void WriteUtf16Span(ref byte valueStart, int length, out int bytesConsumed, Span<char> destination, out int charsWritten)
static void WriteUtf16Span(ref byte valueStart, out int bytesConsumed, Span<char> destination, out int charsWritten)
{
// Ascii文字の場合のみ、処理を最適化する。
if (UnicodeUtility.IsAsciiCodePoint(valueStart))
{
MemoryMarshal.GetReference(destination) = (char)valueStart;
bytesConsumed = 1;
charsWritten = 1;
return;
}

// UTF-8文字を1文字だけ取得する。
var length = UnicodeUtility.GetUtf8SequenceLength(valueStart);
var span = MemoryMarshal.CreateReadOnlySpan(ref valueStart, length);
Rune.DecodeFromUtf8(span, out var rune, out bytesConsumed);

// UTF-16にエンコードできないことはないはず。
// https://github.com/dotnet/runtime/blob/v6.0.0/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs#L997-L1039
// https://github.com/dotnet/runtime/blob/v7.0.0-rc.1.22426.10/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs#L992-L1034
rune.TryEncodeToUtf16(destination, out charsWritten);
}
}
Expand Down