From d7eae3b39fbf5647de4754967c2d8f4449401406 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Thu, 8 Apr 2021 18:07:35 -0700 Subject: [PATCH] Fast-track ASCII/UTF8 conversion in wasm --- .../Text/Unicode/Utf8Utility.SizeOpt.cs | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.SizeOpt.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.SizeOpt.cs index 856d4d38d36bb..e808cab8dd6e1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.SizeOpt.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.SizeOpt.cs @@ -19,8 +19,10 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng Debug.Assert(outputCharsRemaining >= 0, "Destination length must not be negative."); Debug.Assert(pOutputBuffer != null || outputCharsRemaining == 0, "Destination length must be zero if destination buffer pointer is null."); - var input = new ReadOnlySpan(pInputBuffer, inputLength); - var output = new Span(pOutputBuffer, outputCharsRemaining); + // try fast-tracking ASCII first before falling back to the standard loop + int numAsciiBytesTranscoded = (int)ASCIIUtility.WidenAsciiToUtf16(pInputBuffer, pOutputBuffer, (uint)Math.Min(inputLength, outputCharsRemaining)); + var input = new ReadOnlySpan(pInputBuffer, inputLength).Slice(numAsciiBytesTranscoded); + var output = new Span(pOutputBuffer, outputCharsRemaining).Slice(numAsciiBytesTranscoded); OperationStatus opStatus = OperationStatus.Done; while (!input.IsEmpty) @@ -49,9 +51,10 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt Debug.Assert(outputBytesRemaining >= 0, "Destination length must not be negative."); Debug.Assert(pOutputBuffer != null || outputBytesRemaining == 0, "Destination length must be zero if destination buffer pointer is null."); - - var input = new ReadOnlySpan(pInputBuffer, inputLength); - var output = new Span(pOutputBuffer, outputBytesRemaining); + // try fast-tracking ASCII first before falling back to the standard loop + int numAsciiCharsTranscoded = (int)ASCIIUtility.NarrowUtf16ToAscii(pInputBuffer, pOutputBuffer, (uint)Math.Min(inputLength, outputBytesRemaining)); + var input = new ReadOnlySpan(pInputBuffer, inputLength).Slice(numAsciiCharsTranscoded); + var output = new Span(pOutputBuffer, outputBytesRemaining).Slice(numAsciiCharsTranscoded); OperationStatus opStatus = OperationStatus.Done; while (!input.IsEmpty) @@ -86,9 +89,12 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt Debug.Assert(inputLength >= 0, "Input length must not be negative."); Debug.Assert(pInputBuffer != null || inputLength == 0, "Input length must be zero if input buffer pointer is null."); - var input = new ReadOnlySpan(pInputBuffer, inputLength); - int cumulativeUtf16CodeUnitCount = 0; - int cumulativeScalarValueCount = 0; + // try fast-tracking ASCII first before falling back to the standard loop + int numLeadingAsciiBytes = (int)ASCIIUtility.GetIndexOfFirstNonAsciiByte(pInputBuffer, (uint)inputLength); + var input = new ReadOnlySpan(pInputBuffer, inputLength).Slice(numLeadingAsciiBytes); + int cumulativeUtf16CodeUnitCount = numLeadingAsciiBytes; + int cumulativeScalarValueCount = numLeadingAsciiBytes; + while (!input.IsEmpty) { if (Rune.DecodeFromUtf8(input, out Rune rune, out int bytesConsumed) != OperationStatus.Done)