From ceef11f36c80cf4340618129ae5f1f5b1e203fa9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 17 Aug 2022 10:55:18 -0600 Subject: [PATCH] [release/7.0-rc1] Optimized string.Replace(char, char) (#74047) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Optimized string.Replace(char, char) vector code path * Optimized code pathes even further * Do vectorized operation at the end of the string only once When the remaining length is a multiple of the vector size, then the remainder is processed twice. This is redundant, and not needed. This commit changes that, so that the remainder is processed only once when the remaining elements match. * Don't use trick for collapsed epilogs Cf. https://github.com/dotnet/runtime/pull/67049#discussion_r833689895 * Handle remainder vectorized even if remainingLength <= Vector.Count and added tests for this * Introduce (internal) Vector.LoadUnsafe and Vector.StoreUnsafe and use it in string.Replace(char, char) * Avoid Unsafe.As reinterpret casts by introducing string.GetRawStringDataAsUshort() internal method * Fixed copy/paste error (from local dev to repo) * PR Feedback * Fixed bug and added tests for this * Make condition about lengthToExamine clearer as suggested Co-authored-by: Günther Foidl --- .../Common/tests/Tests/System/StringTests.cs | 12 +++- .../src/System/Numerics/Vector.cs | 16 +++++ .../src/System/String.Manipulation.cs | 67 ++++++++++++------- .../src/System/String.cs | 1 + 4 files changed, 71 insertions(+), 25 deletions(-) diff --git a/src/libraries/Common/tests/Tests/System/StringTests.cs b/src/libraries/Common/tests/Tests/System/StringTests.cs index 7d2558a959324..c16a31a1f6014 100644 --- a/src/libraries/Common/tests/Tests/System/StringTests.cs +++ b/src/libraries/Common/tests/Tests/System/StringTests.cs @@ -4697,14 +4697,22 @@ public static void Remove_Invalid() [InlineData("Aaaaaaaa", 'A', 'a', "aaaaaaaa")] // Single iteration of vectorised path; no remainders through non-vectorised path // Three leading 'a's before a match (copyLength > 0), Single iteration of vectorised path; no remainders through non-vectorised path [InlineData("aaaAaaaaaaa", 'A', 'a', "aaaaaaaaaaa")] - // Single iteration of vectorised path; 3 remainders through non-vectorised path + // Single iteration of vectorised path; 3 remainders handled by vectorized path [InlineData("AaaaaaaaaAa", 'A', 'a', "aaaaaaaaaaa")] + // Single iteration of vectorized path; 0 remainders handled by vectorized path + [InlineData("aaaaaaaaaAa", 'A', 'a', "aaaaaaaaaaa")] + // Eight chars before a match (copyLength > 0), single iteration of vectorized path for the remainder + [InlineData("12345678AAAAAAA", 'A', 'a', "12345678aaaaaaa")] // ------------------------- For Vector.Count == 16 (AVX2) ------------------------- [InlineData("AaaaaaaaAaaaaaaa", 'A', 'a', "aaaaaaaaaaaaaaaa")] // Single iteration of vectorised path; no remainders through non-vectorised path // Three leading 'a's before a match (copyLength > 0), Single iteration of vectorised path; no remainders through non-vectorised path [InlineData("aaaAaaaaaaaAaaaaaaa", 'A', 'a', "aaaaaaaaaaaaaaaaaaa")] - // Single iteration of vectorised path; 3 remainders through non-vectorised path + // Single iteration of vectorised path; 3 remainders handled by vectorized path [InlineData("AaaaaaaaAaaaaaaaaAa", 'A', 'a', "aaaaaaaaaaaaaaaaaaa")] + // Single iteration of vectorized path; 0 remainders handled by vectorized path + [InlineData("aaaaaaaaaaaaaaaaaAa", 'A', 'a', "aaaaaaaaaaaaaaaaaaa")] + // Sixteen chars before a match (copyLength > 0), single iteration of vectorized path for the remainder + [InlineData("1234567890123456AAAAAAAAAAAAAAA", 'A', 'a', "1234567890123456aaaaaaaaaaaaaaa")] // ----------------------------------- General test data ----------------------------------- [InlineData("Hello", 'l', '!', "He!!o")] // 2 match, non-vectorised path [InlineData("Hello", 'e', 'e', "Hello")] // oldChar and newChar are same; nothing to replace diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs index b9eb29598bb6e..5dee06aa6adf0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs @@ -895,6 +895,14 @@ public static bool LessThanOrEqualAll(Vector left, Vector right) public static bool LessThanOrEqualAny(Vector left, Vector right) where T : struct => LessThanOrEqual(left, right).As() != Vector.Zero; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static Vector LoadUnsafe(ref T source, nuint elementOffset) + where T : struct + { + source = ref Unsafe.Add(ref source, elementOffset); + return Unsafe.ReadUnaligned>(ref Unsafe.As(ref source)); + } + /// Computes the maximum of two vectors on a per-element basis. /// The vector to compare with . /// The vector to compare with . @@ -1658,6 +1666,14 @@ public static Vector SquareRoot(Vector value) return result; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void StoreUnsafe(this Vector source, ref T destination, nuint elementOffset) + where T : struct + { + destination = ref Unsafe.Add(ref destination, elementOffset); + Unsafe.WriteUnaligned(ref Unsafe.As(ref destination), source); + } + /// Subtracts two vectors to compute their difference. /// The vector from which will be subtracted. /// The vector to subtract from . diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs index e9df30a7d78ec..675a8e188477b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs @@ -994,7 +994,7 @@ public string Replace(char oldChar, char newChar) if (firstIndex < 0) return this; - int remainingLength = Length - firstIndex; + nuint remainingLength = (uint)(Length - firstIndex); string result = FastAllocateString(Length); int copyLength = firstIndex; @@ -1006,35 +1006,56 @@ public string Replace(char oldChar, char newChar) } // Copy the remaining characters, doing the replacement as we go. - ref ushort pSrc = ref Unsafe.Add(ref Unsafe.As(ref _firstChar), copyLength); - ref ushort pDst = ref Unsafe.Add(ref Unsafe.As(ref result._firstChar), copyLength); + ref ushort pSrc = ref Unsafe.Add(ref GetRawStringDataAsUInt16(), (uint)copyLength); + ref ushort pDst = ref Unsafe.Add(ref result.GetRawStringDataAsUInt16(), (uint)copyLength); + nuint i = 0; - if (Vector.IsHardwareAccelerated && remainingLength >= Vector.Count) + if (Vector.IsHardwareAccelerated && Length >= Vector.Count) { - Vector oldChars = new Vector(oldChar); - Vector newChars = new Vector(newChar); + Vector oldChars = new(oldChar); + Vector newChars = new(newChar); - do + Vector original; + Vector equals; + Vector results; + + if (remainingLength > (nuint)Vector.Count) { - Vector original = Unsafe.ReadUnaligned>(ref Unsafe.As(ref pSrc)); - Vector equals = Vector.Equals(original, oldChars); - Vector results = Vector.ConditionalSelect(equals, newChars, original); - Unsafe.WriteUnaligned(ref Unsafe.As(ref pDst), results); - - pSrc = ref Unsafe.Add(ref pSrc, Vector.Count); - pDst = ref Unsafe.Add(ref pDst, Vector.Count); - remainingLength -= Vector.Count; + nuint lengthToExamine = remainingLength - (nuint)Vector.Count; + + do + { + original = Vector.LoadUnsafe(ref pSrc, i); + equals = Vector.Equals(original, oldChars); + results = Vector.ConditionalSelect(equals, newChars, original); + results.StoreUnsafe(ref pDst, i); + + i += (nuint)Vector.Count; + } + while (i < lengthToExamine); } - while (remainingLength >= Vector.Count); - } - for (; remainingLength > 0; remainingLength--) - { - ushort currentChar = pSrc; - pDst = currentChar == oldChar ? newChar : currentChar; + // There are [0, Vector.Count) elements remaining now. + // As the operation is idempotent, and we know that in total there are at least Vector.Count + // elements available, we read a vector from the very end of the string, perform the replace + // and write to the destination at the very end. + // Thus we can eliminate the scalar processing of the remaining elements. + // We perform this operation even if there are 0 elements remaining, as it is cheaper than the + // additional check which would introduce a branch here. - pSrc = ref Unsafe.Add(ref pSrc, 1); - pDst = ref Unsafe.Add(ref pDst, 1); + i = (uint)(Length - Vector.Count); + original = Vector.LoadUnsafe(ref GetRawStringDataAsUInt16(), i); + equals = Vector.Equals(original, oldChars); + results = Vector.ConditionalSelect(equals, newChars, original); + results.StoreUnsafe(ref result.GetRawStringDataAsUInt16(), i); + } + else + { + for (; i < remainingLength; ++i) + { + ushort currentChar = Unsafe.Add(ref pSrc, i); + Unsafe.Add(ref pDst, i) = currentChar == oldChar ? newChar : currentChar; + } } return result; diff --git a/src/libraries/System.Private.CoreLib/src/System/String.cs b/src/libraries/System.Private.CoreLib/src/System/String.cs index 77d2168b0d38b..6fdbdfdc449e5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.cs @@ -508,6 +508,7 @@ public static bool IsNullOrWhiteSpace([NotNullWhen(false)] string? value) public ref readonly char GetPinnableReference() => ref _firstChar; internal ref char GetRawStringData() => ref _firstChar; + internal ref ushort GetRawStringDataAsUInt16() => ref Unsafe.As(ref _firstChar); // Helper for encodings so they can talk to our buffer directly // stringLength must be the exact size we'll expect