From 085963c52c95edcbf35150dcaacfb4196504ff83 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 14 Nov 2020 21:24:03 +0300 Subject: [PATCH 01/19] Fix GetNonRandomizedHashCodeOrdinalIgnoreCase --- .../src/System/String.Comparison.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index 44deea83bf005..0b439e293f22e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -853,21 +853,21 @@ internal unsafe int GetNonRandomizedHashCodeOrdinalIgnoreCase() // a very wide net because it will change, e.g., '^' to '~'. But that should // be ok because we expect this to be very rare in practice. - const uint NormalizeToLowercase = 0x0020_0020u; // valid both for big-endian and for little-endian + uint normalizeToLowercase = BitConverter.IsLittleEndian ? 0x0000_0020u : 0x0020_0000u; while (length > 2) { length -= 4; // Where length is 4n-1 (e.g. 3,7,11,15,19) this additionally consumes the null terminator - hash1 = (BitOperations.RotateLeft(hash1, 5) + hash1) ^ (ptr[0] | NormalizeToLowercase); - hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (ptr[1] | NormalizeToLowercase); + hash1 = (BitOperations.RotateLeft(hash1, 5) + hash1) ^ (ptr[0] | normalizeToLowercase); + hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (ptr[1] | normalizeToLowercase); ptr += 2; } if (length > 0) { // Where length is 4n-3 (e.g. 1,5,9,13,17) this additionally consumes the null terminator - hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (ptr[0] | NormalizeToLowercase); + hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (ptr[0] | normalizeToLowercase); } return (int)(hash1 + (hash2 * 1566083941)); From dbf4227948fcf06cc21b07e4027efbaf3244f108 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 14 Nov 2020 21:29:31 +0300 Subject: [PATCH 02/19] Add a test --- .../tests/Generic/Dictionary/Dictionary.Tests.cs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/libraries/System.Collections/tests/Generic/Dictionary/Dictionary.Tests.cs b/src/libraries/System.Collections/tests/Generic/Dictionary/Dictionary.Tests.cs index ee9957b938448..e1848b6773534 100644 --- a/src/libraries/System.Collections/tests/Generic/Dictionary/Dictionary.Tests.cs +++ b/src/libraries/System.Collections/tests/Generic/Dictionary/Dictionary.Tests.cs @@ -337,6 +337,22 @@ public void CantAcceptDuplicateKeysFromSourceDictionary() AssertExtensions.Throws(null, () => new Dictionary(source, StringComparer.OrdinalIgnoreCase)); } + [Fact] + // https://github.com/dotnet/runtime/issues/44681 + public void DictionaryOrdinalIgnoreCaseCyrillicKeys() + { + const string Lower = "абвгдеёжзийклмнопрстуфхцчшщьыъэюя"; + const string Higher = "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЫЪЭЮЯ"; + + var dictionary = new Dictionary(StringComparer.OrdinalIgnoreCase); + + for (int i = 0; i < Lower.Length; i++) + { + dictionary[Lower[i].ToString()] = i; + Assert.Equal(i, dictionary[Higher[i].ToString()]); + } + } + public static IEnumerable CopyConstructorStringComparerData { get From c4de34fd7d1813a7fb0c09375e2bb552a9a051fb Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 14 Nov 2020 22:16:39 +0300 Subject: [PATCH 03/19] correct (but slow) fix --- .../src/System/String.Comparison.cs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index 0b439e293f22e..cde461bdef70c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -857,6 +857,11 @@ internal unsafe int GetNonRandomizedHashCodeOrdinalIgnoreCase() while (length > 2) { + if (!Utf16Utility.AllCharsInUInt32AreAscii(ptr[0]) || !Utf16Utility.AllCharsInUInt32AreAscii(ptr[1])) + { + goto NotAscii; + } + length -= 4; // Where length is 4n-1 (e.g. 3,7,11,15,19) this additionally consumes the null terminator hash1 = (BitOperations.RotateLeft(hash1, 5) + hash1) ^ (ptr[0] | normalizeToLowercase); @@ -866,12 +871,19 @@ internal unsafe int GetNonRandomizedHashCodeOrdinalIgnoreCase() if (length > 0) { + if (!Utf16Utility.AllCharsInUInt32AreAscii(ptr[0])) + { + goto NotAscii; + } + // Where length is 4n-3 (e.g. 1,5,9,13,17) this additionally consumes the null terminator hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (ptr[0] | normalizeToLowercase); } return (int)(hash1 + (hash2 * 1566083941)); } + NotAscii: + return str.GetHashCodeOrdinalIgnoreCase(); } // Determines whether a specified string is a prefix of the current instance From adbb85a0a02ab156be0c9d5d047bfd478e5ed37c Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 14 Nov 2020 22:19:43 +0300 Subject: [PATCH 04/19] clean up --- .../System.Private.CoreLib/src/System/String.Comparison.cs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index cde461bdef70c..e4b1864e5abb0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -853,7 +853,7 @@ internal unsafe int GetNonRandomizedHashCodeOrdinalIgnoreCase() // a very wide net because it will change, e.g., '^' to '~'. But that should // be ok because we expect this to be very rare in practice. - uint normalizeToLowercase = BitConverter.IsLittleEndian ? 0x0000_0020u : 0x0020_0000u; + const uint NormalizeToLowercase = 0x0020_0020u; // valid both for big-endian and for little-endian while (length > 2) { @@ -882,8 +882,10 @@ internal unsafe int GetNonRandomizedHashCodeOrdinalIgnoreCase() return (int)(hash1 + (hash2 * 1566083941)); } + + // Slow Non-ASCII fallback NotAscii: - return str.GetHashCodeOrdinalIgnoreCase(); + return Marvin.ComputeHash32OrdinalIgnoreCase(ref _firstChar, _stringLength, 0, 0); // zero seed to make non-random } // Determines whether a specified string is a prefix of the current instance From 093412d2768ef43d882c74724889d00d1f052a7a Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sat, 14 Nov 2020 22:20:28 +0300 Subject: [PATCH 05/19] Update String.Comparison.cs --- .../System.Private.CoreLib/src/System/String.Comparison.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index e4b1864e5abb0..83863e4b47ad7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -864,8 +864,8 @@ internal unsafe int GetNonRandomizedHashCodeOrdinalIgnoreCase() length -= 4; // Where length is 4n-1 (e.g. 3,7,11,15,19) this additionally consumes the null terminator - hash1 = (BitOperations.RotateLeft(hash1, 5) + hash1) ^ (ptr[0] | normalizeToLowercase); - hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (ptr[1] | normalizeToLowercase); + hash1 = (BitOperations.RotateLeft(hash1, 5) + hash1) ^ (ptr[0] | NormalizeToLowercase); + hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (ptr[1] | NormalizeToLowercase); ptr += 2; } @@ -877,7 +877,7 @@ internal unsafe int GetNonRandomizedHashCodeOrdinalIgnoreCase() } // Where length is 4n-3 (e.g. 1,5,9,13,17) this additionally consumes the null terminator - hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (ptr[0] | normalizeToLowercase); + hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (ptr[0] | NormalizeToLowercase); } return (int)(hash1 + (hash2 * 1566083941)); From a6458842f98d7e59a46eefcfbaebf8f72c78423f Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sat, 14 Nov 2020 22:21:01 +0300 Subject: [PATCH 06/19] Update String.Comparison.cs --- .../System.Private.CoreLib/src/System/String.Comparison.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index 83863e4b47ad7..5f07388235d47 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -885,7 +885,7 @@ internal unsafe int GetNonRandomizedHashCodeOrdinalIgnoreCase() // Slow Non-ASCII fallback NotAscii: - return Marvin.ComputeHash32OrdinalIgnoreCase(ref _firstChar, _stringLength, 0, 0); // zero seed to make non-random + return Marvin.ComputeHash32OrdinalIgnoreCase(ref _firstChar, _stringLength, 0, 0); // zero seed to make it non-random } // Determines whether a specified string is a prefix of the current instance From 101ce22e65526a775c11dc3151d846f5b8df3862 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sat, 14 Nov 2020 22:28:23 +0300 Subject: [PATCH 07/19] Update String.Comparison.cs --- .../System.Private.CoreLib/src/System/String.Comparison.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index 5f07388235d47..46b3cf3f12ed0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -6,6 +6,7 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Text.Unicode; using Internal.Runtime.CompilerServices; From d9cd23c568662a23ff5e1d3f4c2fc70c8671f647 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 14 Nov 2020 23:38:38 +0300 Subject: [PATCH 08/19] Address feedback and fix test --- .../HashCollisionScenarios/OutOfBoundsRegression.cs | 4 ++-- .../src/System/String.Comparison.cs | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/libraries/System.Collections/tests/Generic/Dictionary/HashCollisionScenarios/OutOfBoundsRegression.cs b/src/libraries/System.Collections/tests/Generic/Dictionary/HashCollisionScenarios/OutOfBoundsRegression.cs index 934e57aafe4cc..bd8289fa21a06 100644 --- a/src/libraries/System.Collections/tests/Generic/Dictionary/HashCollisionScenarios/OutOfBoundsRegression.cs +++ b/src/libraries/System.Collections/tests/Generic/Dictionary/HashCollisionScenarios/OutOfBoundsRegression.cs @@ -193,7 +193,7 @@ private static void RunCollectionTestCommon( { string newKey = GenerateCollidingString(i * Stride + StartOfRange); Assert.Equal(0, _lazyGetNonRandomizedHashCodeDel.Value(newKey)); // ensure has a zero hash code Ordinal - Assert.Equal(0x24716ca0, _lazyGetNonRandomizedOrdinalIgnoreCaseHashCodeDel.Value(newKey)); // ensure has a zero hash code OrdinalIgnoreCase + Assert.Equal(0x6C3F4A5C, _lazyGetNonRandomizedOrdinalIgnoreCaseHashCodeDel.Value(newKey)); // ensure has a zero hash code OrdinalIgnoreCase addKeyCallback(collection, newKey); allKeys.Add(newKey); @@ -212,7 +212,7 @@ private static void RunCollectionTestCommon( { string newKey = GenerateCollidingString(i * Stride + StartOfRange); Assert.Equal(0, _lazyGetNonRandomizedHashCodeDel.Value(newKey)); // ensure has a zero hash code Ordinal - Assert.Equal(0x24716ca0, _lazyGetNonRandomizedOrdinalIgnoreCaseHashCodeDel.Value(newKey)); // ensure has a zero hash code OrdinalIgnoreCase + Assert.Equal(0x6C3F4A5C, _lazyGetNonRandomizedOrdinalIgnoreCaseHashCodeDel.Value(newKey)); // ensure has a zero hash code OrdinalIgnoreCase addKeyCallback(collection, newKey); allKeys.Add(newKey); diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index 46b3cf3f12ed0..1c953265361f1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -858,15 +858,17 @@ internal unsafe int GetNonRandomizedHashCodeOrdinalIgnoreCase() while (length > 2) { - if (!Utf16Utility.AllCharsInUInt32AreAscii(ptr[0]) || !Utf16Utility.AllCharsInUInt32AreAscii(ptr[1])) + uint p0 = ptr[0]; + uint p1 = ptr[1]; + if (!Utf16Utility.AllCharsInUInt32AreAscii(p0 | p1)) { goto NotAscii; } length -= 4; // Where length is 4n-1 (e.g. 3,7,11,15,19) this additionally consumes the null terminator - hash1 = (BitOperations.RotateLeft(hash1, 5) + hash1) ^ (ptr[0] | NormalizeToLowercase); - hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (ptr[1] | NormalizeToLowercase); + hash1 = (BitOperations.RotateLeft(hash1, 5) + hash1) ^ (p0 | NormalizeToLowercase); + hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (p1 | NormalizeToLowercase); ptr += 2; } From 641fcbfd7db7108b9068743620dd3fb0ee348bf8 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 15 Nov 2020 01:04:08 +0300 Subject: [PATCH 09/19] undo change in tests --- .../HashCollisionScenarios/OutOfBoundsRegression.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Collections/tests/Generic/Dictionary/HashCollisionScenarios/OutOfBoundsRegression.cs b/src/libraries/System.Collections/tests/Generic/Dictionary/HashCollisionScenarios/OutOfBoundsRegression.cs index bd8289fa21a06..934e57aafe4cc 100644 --- a/src/libraries/System.Collections/tests/Generic/Dictionary/HashCollisionScenarios/OutOfBoundsRegression.cs +++ b/src/libraries/System.Collections/tests/Generic/Dictionary/HashCollisionScenarios/OutOfBoundsRegression.cs @@ -193,7 +193,7 @@ private static void RunCollectionTestCommon( { string newKey = GenerateCollidingString(i * Stride + StartOfRange); Assert.Equal(0, _lazyGetNonRandomizedHashCodeDel.Value(newKey)); // ensure has a zero hash code Ordinal - Assert.Equal(0x6C3F4A5C, _lazyGetNonRandomizedOrdinalIgnoreCaseHashCodeDel.Value(newKey)); // ensure has a zero hash code OrdinalIgnoreCase + Assert.Equal(0x24716ca0, _lazyGetNonRandomizedOrdinalIgnoreCaseHashCodeDel.Value(newKey)); // ensure has a zero hash code OrdinalIgnoreCase addKeyCallback(collection, newKey); allKeys.Add(newKey); @@ -212,7 +212,7 @@ private static void RunCollectionTestCommon( { string newKey = GenerateCollidingString(i * Stride + StartOfRange); Assert.Equal(0, _lazyGetNonRandomizedHashCodeDel.Value(newKey)); // ensure has a zero hash code Ordinal - Assert.Equal(0x6C3F4A5C, _lazyGetNonRandomizedOrdinalIgnoreCaseHashCodeDel.Value(newKey)); // ensure has a zero hash code OrdinalIgnoreCase + Assert.Equal(0x24716ca0, _lazyGetNonRandomizedOrdinalIgnoreCaseHashCodeDel.Value(newKey)); // ensure has a zero hash code OrdinalIgnoreCase addKeyCallback(collection, newKey); allKeys.Add(newKey); From 842b9805f50c7055c4ee3e889d5e63dd88ffc9f2 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 15 Nov 2020 02:08:22 +0300 Subject: [PATCH 10/19] Address feedback --- .../src/System/String.Comparison.cs | 95 +++++++++++-------- 1 file changed, 55 insertions(+), 40 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index 1c953265361f1..b54894387d993 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -7,6 +7,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Text.Unicode; +using System.Buffers; using Internal.Runtime.CompilerServices; @@ -835,60 +836,74 @@ internal unsafe int GetNonRandomizedHashCode() } } - // Use this if and only if 'Denial of Service' attacks are not a concern (i.e. never used for free-form user input), - // or are otherwise mitigated - internal unsafe int GetNonRandomizedHashCodeOrdinalIgnoreCase() + internal int GetNonRandomizedHashCodeOrdinalIgnoreCase() { - fixed (char* src = &_firstChar) - { - Debug.Assert(src[this.Length] == '\0', "src[this.Length] == '\\0'"); - Debug.Assert(((int)src) % 4 == 0, "Managed string should start at 4 bytes boundary"); + return GetNonRandomizedHashCodeOrdinalIgnoreCaseStatic(ref _firstChar, Length, true); + } - uint hash1 = (5381 << 16) + 5381; - uint hash2 = hash1; + private static int GetNonRandomizedHashCodeOrdinalIgnoreCaseStatic(ref char firstChar, int length, bool normalizeNonAscii) + { + uint hash1 = (5381 << 16) + 5381; + uint hash2 = hash1; - uint* ptr = (uint*)src; - int length = this.Length; + // We "normalize to lowercase" every char by ORing with 0x0020. This casts + // a very wide net because it will change, e.g., '^' to '~'. But that should + // be ok because we expect this to be very rare in practice. - // We "normalize to lowercase" every char by ORing with 0x0020. This casts - // a very wide net because it will change, e.g., '^' to '~'. But that should - // be ok because we expect this to be very rare in practice. + const uint NormalizeToLowercase = 0x0020_0020u; // valid both for big-endian and for little-endian - const uint NormalizeToLowercase = 0x0020_0020u; // valid both for big-endian and for little-endian + int i = 0; + int count = 0; - while (length > 2) + while (count > 2) + { + uint p0 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref Unsafe.As(ref firstChar), i)); + uint p1 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref Unsafe.As(ref firstChar), i + 4)); + if (normalizeNonAscii && !Utf16Utility.AllCharsInUInt32AreAscii(p0 | p1)) { - uint p0 = ptr[0]; - uint p1 = ptr[1]; - if (!Utf16Utility.AllCharsInUInt32AreAscii(p0 | p1)) - { - goto NotAscii; - } - - length -= 4; - // Where length is 4n-1 (e.g. 3,7,11,15,19) this additionally consumes the null terminator - hash1 = (BitOperations.RotateLeft(hash1, 5) + hash1) ^ (p0 | NormalizeToLowercase); - hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (p1 | NormalizeToLowercase); - ptr += 2; + goto NotAscii; } + count -= 4; + // Where count is 4n-1 (e.g. 3,7,11,15,19) this additionally consumes the null terminator + hash1 = (BitOperations.RotateLeft(hash1, 5) + hash1) ^ (p0 | NormalizeToLowercase); + hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (p1 | NormalizeToLowercase); + i += 8; + } - if (length > 0) + if (count > 0) + { + uint p0 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref Unsafe.As(ref firstChar), i)); + if (normalizeNonAscii && !Utf16Utility.AllCharsInUInt32AreAscii(p0)) { - if (!Utf16Utility.AllCharsInUInt32AreAscii(ptr[0])) - { - goto NotAscii; - } - - // Where length is 4n-3 (e.g. 1,5,9,13,17) this additionally consumes the null terminator - hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (ptr[0] | NormalizeToLowercase); + goto NotAscii; } - return (int)(hash1 + (hash2 * 1566083941)); + // Where count is 4n-3 (e.g. 1,5,9,13,17) this additionally consumes the null terminator + hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (p0 | NormalizeToLowercase); } - // Slow Non-ASCII fallback - NotAscii: - return Marvin.ComputeHash32OrdinalIgnoreCase(ref _firstChar, _stringLength, 0, 0); // zero seed to make it non-random + return (int)(hash1 + (hash2 * 1566083941)); + + NotAscii: + return GetNonRandomizedHashCodeOrdinalIgnoreCaseSlow(ref firstChar, length); + + static int GetNonRandomizedHashCodeOrdinalIgnoreCaseSlow(ref char firstChar, int length) + { + char[]? borrowedArr = null; + Span scratch = (uint)length <= 64 ? stackalloc char[64] : (borrowedArr = ArrayPool.Shared.Rent(length)); + + int charsWritten = System.Globalization.Ordinal.ToUpperOrdinal( + MemoryMarshal.CreateReadOnlySpan(ref firstChar, length), scratch); + + ref char upperCase = ref MemoryMarshal.GetReference(scratch); + int hashCode = GetNonRandomizedHashCodeOrdinalIgnoreCaseStatic(ref upperCase, length, false); + + if (borrowedArr != null) + { + ArrayPool.Shared.Return(borrowedArr); + } + return hashCode; + } } // Determines whether a specified string is a prefix of the current instance From b0e7e86485f70b5f2b5ec833c9a2e10a87056003 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 15 Nov 2020 02:21:57 +0300 Subject: [PATCH 11/19] Clean up --- .../src/System/String.Comparison.cs | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index b54894387d993..839af993255d1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -843,6 +843,8 @@ internal int GetNonRandomizedHashCodeOrdinalIgnoreCase() private static int GetNonRandomizedHashCodeOrdinalIgnoreCaseStatic(ref char firstChar, int length, bool normalizeNonAscii) { + Debug.Assert(Unsafe.Add(ref firstChar, length) == '\0', "*(&firstChar + length) == '\\0'"); + uint hash1 = (5381 << 16) + 5381; uint hash2 = hash1; @@ -857,8 +859,9 @@ private static int GetNonRandomizedHashCodeOrdinalIgnoreCaseStatic(ref char firs while (count > 2) { - uint p0 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref Unsafe.As(ref firstChar), i)); - uint p1 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref Unsafe.As(ref firstChar), i + 4)); + ref byte firstByte = ref Unsafe.As(ref firstChar); + uint p0 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref firstByte, i)); + uint p1 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref firstByte, i + 4)); if (normalizeNonAscii && !Utf16Utility.AllCharsInUInt32AreAscii(p0 | p1)) { goto NotAscii; @@ -872,7 +875,8 @@ private static int GetNonRandomizedHashCodeOrdinalIgnoreCaseStatic(ref char firs if (count > 0) { - uint p0 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref Unsafe.As(ref firstChar), i)); + uint p0 = Unsafe.ReadUnaligned( + ref Unsafe.Add(ref Unsafe.As(ref firstChar), i)); if (normalizeNonAscii && !Utf16Utility.AllCharsInUInt32AreAscii(p0)) { goto NotAscii; @@ -890,13 +894,16 @@ private static int GetNonRandomizedHashCodeOrdinalIgnoreCaseStatic(ref char firs static int GetNonRandomizedHashCodeOrdinalIgnoreCaseSlow(ref char firstChar, int length) { char[]? borrowedArr = null; - Span scratch = (uint)length <= 64 ? stackalloc char[64] : (borrowedArr = ArrayPool.Shared.Rent(length)); + Span scratch = (uint)length <= 64 ? + stackalloc char[64] : (borrowedArr = ArrayPool.Shared.Rent(length)); int charsWritten = System.Globalization.Ordinal.ToUpperOrdinal( MemoryMarshal.CreateReadOnlySpan(ref firstChar, length), scratch); - ref char upperCase = ref MemoryMarshal.GetReference(scratch); - int hashCode = GetNonRandomizedHashCodeOrdinalIgnoreCaseStatic(ref upperCase, length, false); + Debug.Assert(charsWritten == length); + + int hashCode = GetNonRandomizedHashCodeOrdinalIgnoreCaseStatic( + ref MemoryMarshal.GetReference(scratch), length, false); if (borrowedArr != null) { From 7e007d6109ed9f156d045a61186263d1e8a280cb Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 15 Nov 2020 02:42:10 +0300 Subject: [PATCH 12/19] Address feedback --- .../System.Private.CoreLib/src/System/String.Comparison.cs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index 839af993255d1..04458909dd44e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -855,7 +855,7 @@ private static int GetNonRandomizedHashCodeOrdinalIgnoreCaseStatic(ref char firs const uint NormalizeToLowercase = 0x0020_0020u; // valid both for big-endian and for little-endian int i = 0; - int count = 0; + int count = length; while (count > 2) { @@ -894,14 +894,15 @@ private static int GetNonRandomizedHashCodeOrdinalIgnoreCaseStatic(ref char firs static int GetNonRandomizedHashCodeOrdinalIgnoreCaseSlow(ref char firstChar, int length) { char[]? borrowedArr = null; - Span scratch = (uint)length <= 64 ? - stackalloc char[64] : (borrowedArr = ArrayPool.Shared.Rent(length)); + Span scratch = (uint)length < 64 ? + stackalloc char[64] : (borrowedArr = ArrayPool.Shared.Rent(length + 1)); int charsWritten = System.Globalization.Ordinal.ToUpperOrdinal( MemoryMarshal.CreateReadOnlySpan(ref firstChar, length), scratch); Debug.Assert(charsWritten == length); + scratch[length] = '\0'; int hashCode = GetNonRandomizedHashCodeOrdinalIgnoreCaseStatic( ref MemoryMarshal.GetReference(scratch), length, false); From 8004abd37d1d4bd10e4b824fda9fbdd7c28a5771 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 15 Nov 2020 03:28:52 +0300 Subject: [PATCH 13/19] Address feedback --- .../System.Private.CoreLib/src/System/String.Comparison.cs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index 04458909dd44e..82e83d630e6da 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -1,13 +1,13 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers; using System.Diagnostics; using System.Globalization; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Text.Unicode; -using System.Buffers; using Internal.Runtime.CompilerServices; @@ -854,7 +854,7 @@ private static int GetNonRandomizedHashCodeOrdinalIgnoreCaseStatic(ref char firs const uint NormalizeToLowercase = 0x0020_0020u; // valid both for big-endian and for little-endian - int i = 0; + nint i = 0; int count = length; while (count > 2) @@ -889,11 +889,14 @@ private static int GetNonRandomizedHashCodeOrdinalIgnoreCaseStatic(ref char firs return (int)(hash1 + (hash2 * 1566083941)); NotAscii: + // Convert the string to upper case using Globalization + // and try again the same algorithm (without IsAscii validation this time) return GetNonRandomizedHashCodeOrdinalIgnoreCaseSlow(ref firstChar, length); static int GetNonRandomizedHashCodeOrdinalIgnoreCaseSlow(ref char firstChar, int length) { char[]? borrowedArr = null; + // Important: add an additional char for '\0' Span scratch = (uint)length < 64 ? stackalloc char[64] : (borrowedArr = ArrayPool.Shared.Rent(length + 1)); From 3f91d95c33c442d59e06ab7149809c28284d6f1d Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 15 Nov 2020 17:57:36 +0300 Subject: [PATCH 14/19] Minor optimizations: manual loop unswitching --- .../src/System/String.Comparison.cs | 66 +++++++++++++------ 1 file changed, 45 insertions(+), 21 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index 82e83d630e6da..94701e67decd6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -857,33 +857,57 @@ private static int GetNonRandomizedHashCodeOrdinalIgnoreCaseStatic(ref char firs nint i = 0; int count = length; - while (count > 2) + if (normalizeNonAscii) { - ref byte firstByte = ref Unsafe.As(ref firstChar); - uint p0 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref firstByte, i)); - uint p1 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref firstByte, i + 4)); - if (normalizeNonAscii && !Utf16Utility.AllCharsInUInt32AreAscii(p0 | p1)) + while (count > 2) { - goto NotAscii; + ref byte firstByte = ref Unsafe.As(ref firstChar); + uint p0 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref firstByte, i)); + uint p1 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref firstByte, i + 4)); + if (!AllCharsInUInt32AreAscii(p0 | p1)) + { + goto NotAscii; + } + + // Where count is 4n-1 (e.g. 3,7,11,15,19) this additionally consumes the null terminator + hash1 = (BitOperations.RotateLeft(hash1, 5) + hash1) ^ (p0 | NormalizeToLowercase); + hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (p1 | NormalizeToLowercase); + + count -= 4; + i += 8; } - count -= 4; - // Where count is 4n-1 (e.g. 3,7,11,15,19) this additionally consumes the null terminator - hash1 = (BitOperations.RotateLeft(hash1, 5) + hash1) ^ (p0 | NormalizeToLowercase); - hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (p1 | NormalizeToLowercase); - i += 8; - } - if (count > 0) + if (count > 0) + { + uint p0 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref Unsafe.As(ref firstChar), i)); + if (!AllCharsInUInt32AreAscii(p0)) + { + goto NotAscii; + } + + // Where count is 4n-3 (e.g. 1,5,9,13,17) this additionally consumes the null terminator + hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (p0 | NormalizeToLowercase); + } + } + else { - uint p0 = Unsafe.ReadUnaligned( - ref Unsafe.Add(ref Unsafe.As(ref firstChar), i)); - if (normalizeNonAscii && !Utf16Utility.AllCharsInUInt32AreAscii(p0)) + // The duplication can be removed once JIT gets "Loop Unswitching" optimization + // so the invariant "if (normalizeNonAscii)" check can be done inside the loop + while (count > 2) { - goto NotAscii; + ref byte firstByte = ref Unsafe.As(ref firstChar); + uint p0 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref firstByte, i)); + uint p1 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref firstByte, i + 4)); + count -= 4; + hash1 = (BitOperations.RotateLeft(hash1, 5) + hash1) ^ (p0 | NormalizeToLowercase); + hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (p1 | NormalizeToLowercase); + i += 8; + } + if (count > 0) + { + uint p0 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref Unsafe.As(ref firstChar), i)); + hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (p0 | NormalizeToLowercase); } - - // Where count is 4n-3 (e.g. 1,5,9,13,17) this additionally consumes the null terminator - hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (p0 | NormalizeToLowercase); } return (int)(hash1 + (hash2 * 1566083941)); @@ -896,7 +920,7 @@ private static int GetNonRandomizedHashCodeOrdinalIgnoreCaseStatic(ref char firs static int GetNonRandomizedHashCodeOrdinalIgnoreCaseSlow(ref char firstChar, int length) { char[]? borrowedArr = null; - // Important: add an additional char for '\0' + // Important: leave an additional space for '\0' Span scratch = (uint)length < 64 ? stackalloc char[64] : (borrowedArr = ArrayPool.Shared.Rent(length + 1)); From 8e8d9005756e2435d69f7030a10ba2a71ab714ea Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Sat, 14 Nov 2020 17:27:29 -0800 Subject: [PATCH 15/19] Update internal comparers & out-of-bounds regression tests --- .../OutOfBoundsRegression.cs | 225 ++++++++++++------ .../RandomizedStringEqualityComparer.cs | 25 -- 2 files changed, 150 insertions(+), 100 deletions(-) diff --git a/src/libraries/System.Collections/tests/Generic/Dictionary/HashCollisionScenarios/OutOfBoundsRegression.cs b/src/libraries/System.Collections/tests/Generic/Dictionary/HashCollisionScenarios/OutOfBoundsRegression.cs index 934e57aafe4cc..eb7a7bf287542 100644 --- a/src/libraries/System.Collections/tests/Generic/Dictionary/HashCollisionScenarios/OutOfBoundsRegression.cs +++ b/src/libraries/System.Collections/tests/Generic/Dictionary/HashCollisionScenarios/OutOfBoundsRegression.cs @@ -54,56 +54,56 @@ public static void ComparerImplementations_Dictionary_WithWellKnownStringCompare RunDictionaryTest( equalityComparer: null, - expectedInternalComparerBeforeCollisionThreshold: nonRandomizedOrdinalComparerType, - expectedPublicComparerBeforeCollisionThreshold: EqualityComparer.Default.GetType(), - expectedComparerAfterCollisionThreshold: randomizedOrdinalComparerType); + expectedInternalComparerTypeBeforeCollisionThreshold: nonRandomizedOrdinalComparerType, + expectedPublicComparerBeforeCollisionThreshold: EqualityComparer.Default, + expectedInternalComparerTypeAfterCollisionThreshold: randomizedOrdinalComparerType); // EqualityComparer.Default comparer RunDictionaryTest( equalityComparer: EqualityComparer.Default, - expectedInternalComparerBeforeCollisionThreshold: nonRandomizedOrdinalComparerType, - expectedPublicComparerBeforeCollisionThreshold: EqualityComparer.Default.GetType(), - expectedComparerAfterCollisionThreshold: randomizedOrdinalComparerType); + expectedInternalComparerTypeBeforeCollisionThreshold: nonRandomizedOrdinalComparerType, + expectedPublicComparerBeforeCollisionThreshold: EqualityComparer.Default, + expectedInternalComparerTypeAfterCollisionThreshold: randomizedOrdinalComparerType); // Ordinal comparer RunDictionaryTest( equalityComparer: StringComparer.Ordinal, - expectedInternalComparerBeforeCollisionThreshold: nonRandomizedOrdinalComparerType, - expectedPublicComparerBeforeCollisionThreshold: StringComparer.Ordinal.GetType(), - expectedComparerAfterCollisionThreshold: randomizedOrdinalComparerType); + expectedInternalComparerTypeBeforeCollisionThreshold: nonRandomizedOrdinalComparerType, + expectedPublicComparerBeforeCollisionThreshold: StringComparer.Ordinal, + expectedInternalComparerTypeAfterCollisionThreshold: randomizedOrdinalComparerType); // OrdinalIgnoreCase comparer RunDictionaryTest( equalityComparer: StringComparer.OrdinalIgnoreCase, - expectedInternalComparerBeforeCollisionThreshold: nonRandomizedOrdinalIgnoreCaseComparerType, - expectedPublicComparerBeforeCollisionThreshold: StringComparer.OrdinalIgnoreCase.GetType(), - expectedComparerAfterCollisionThreshold: randomizedOrdinalIgnoreCaseComparerType); + expectedInternalComparerTypeBeforeCollisionThreshold: nonRandomizedOrdinalIgnoreCaseComparerType, + expectedPublicComparerBeforeCollisionThreshold: StringComparer.OrdinalIgnoreCase, + expectedInternalComparerTypeAfterCollisionThreshold: randomizedOrdinalIgnoreCaseComparerType); // linguistic comparer (not optimized) RunDictionaryTest( equalityComparer: StringComparer.InvariantCulture, - expectedInternalComparerBeforeCollisionThreshold: StringComparer.InvariantCulture.GetType(), - expectedPublicComparerBeforeCollisionThreshold: StringComparer.InvariantCulture.GetType(), - expectedComparerAfterCollisionThreshold: StringComparer.InvariantCulture.GetType()); + expectedInternalComparerTypeBeforeCollisionThreshold: StringComparer.InvariantCulture.GetType(), + expectedPublicComparerBeforeCollisionThreshold: StringComparer.InvariantCulture, + expectedInternalComparerTypeAfterCollisionThreshold: StringComparer.InvariantCulture.GetType()); static void RunDictionaryTest( IEqualityComparer equalityComparer, - Type expectedInternalComparerBeforeCollisionThreshold, - Type expectedPublicComparerBeforeCollisionThreshold, - Type expectedComparerAfterCollisionThreshold) + Type expectedInternalComparerTypeBeforeCollisionThreshold, + IEqualityComparer expectedPublicComparerBeforeCollisionThreshold, + Type expectedInternalComparerTypeAfterCollisionThreshold) { RunCollectionTestCommon( () => new Dictionary(equalityComparer), (dictionary, key) => dictionary.Add(key, null), (dictionary, key) => dictionary.ContainsKey(key), dictionary => dictionary.Comparer, - expectedInternalComparerBeforeCollisionThreshold, + expectedInternalComparerTypeBeforeCollisionThreshold, expectedPublicComparerBeforeCollisionThreshold, - expectedComparerAfterCollisionThreshold); + expectedInternalComparerTypeAfterCollisionThreshold); } } @@ -119,56 +119,56 @@ public static void ComparerImplementations_HashSet_WithWellKnownStringComparers( RunHashSetTest( equalityComparer: null, - expectedInternalComparerBeforeCollisionThreshold: nonRandomizedOrdinalComparerType, - expectedPublicComparerBeforeCollisionThreshold: EqualityComparer.Default.GetType(), - expectedComparerAfterCollisionThreshold: randomizedOrdinalComparerType); + expectedInternalComparerTypeBeforeCollisionThreshold: nonRandomizedOrdinalComparerType, + expectedPublicComparerBeforeCollisionThreshold: EqualityComparer.Default, + expectedInternalComparerTypeAfterCollisionThreshold: randomizedOrdinalComparerType); // EqualityComparer.Default comparer RunHashSetTest( equalityComparer: EqualityComparer.Default, - expectedInternalComparerBeforeCollisionThreshold: nonRandomizedOrdinalComparerType, - expectedPublicComparerBeforeCollisionThreshold: EqualityComparer.Default.GetType(), - expectedComparerAfterCollisionThreshold: randomizedOrdinalComparerType); + expectedInternalComparerTypeBeforeCollisionThreshold: nonRandomizedOrdinalComparerType, + expectedPublicComparerBeforeCollisionThreshold: EqualityComparer.Default, + expectedInternalComparerTypeAfterCollisionThreshold: randomizedOrdinalComparerType); // Ordinal comparer RunHashSetTest( equalityComparer: StringComparer.Ordinal, - expectedInternalComparerBeforeCollisionThreshold: nonRandomizedOrdinalComparerType, - expectedPublicComparerBeforeCollisionThreshold: StringComparer.Ordinal.GetType(), - expectedComparerAfterCollisionThreshold: randomizedOrdinalComparerType); + expectedInternalComparerTypeBeforeCollisionThreshold: nonRandomizedOrdinalComparerType, + expectedPublicComparerBeforeCollisionThreshold: StringComparer.Ordinal, + expectedInternalComparerTypeAfterCollisionThreshold: randomizedOrdinalComparerType); // OrdinalIgnoreCase comparer RunHashSetTest( equalityComparer: StringComparer.OrdinalIgnoreCase, - expectedInternalComparerBeforeCollisionThreshold: nonRandomizedOrdinalIgnoreCaseComparerType, - expectedPublicComparerBeforeCollisionThreshold: StringComparer.OrdinalIgnoreCase.GetType(), - expectedComparerAfterCollisionThreshold: randomizedOrdinalIgnoreCaseComparerType); + expectedInternalComparerTypeBeforeCollisionThreshold: nonRandomizedOrdinalIgnoreCaseComparerType, + expectedPublicComparerBeforeCollisionThreshold: StringComparer.OrdinalIgnoreCase, + expectedInternalComparerTypeAfterCollisionThreshold: randomizedOrdinalIgnoreCaseComparerType); // linguistic comparer (not optimized) RunHashSetTest( equalityComparer: StringComparer.InvariantCulture, - expectedInternalComparerBeforeCollisionThreshold: StringComparer.InvariantCulture.GetType(), - expectedPublicComparerBeforeCollisionThreshold: StringComparer.InvariantCulture.GetType(), - expectedComparerAfterCollisionThreshold: StringComparer.InvariantCulture.GetType()); + expectedInternalComparerTypeBeforeCollisionThreshold: StringComparer.InvariantCulture.GetType(), + expectedPublicComparerBeforeCollisionThreshold: StringComparer.InvariantCulture, + expectedInternalComparerTypeAfterCollisionThreshold: StringComparer.InvariantCulture.GetType()); static void RunHashSetTest( IEqualityComparer equalityComparer, - Type expectedInternalComparerBeforeCollisionThreshold, - Type expectedPublicComparerBeforeCollisionThreshold, - Type expectedComparerAfterCollisionThreshold) + Type expectedInternalComparerTypeBeforeCollisionThreshold, + IEqualityComparer expectedPublicComparerBeforeCollisionThreshold, + Type expectedInternalComparerTypeAfterCollisionThreshold) { RunCollectionTestCommon( () => new HashSet(equalityComparer), (set, key) => Assert.True(set.Add(key)), (set, key) => set.Contains(key), set => set.Comparer, - expectedInternalComparerBeforeCollisionThreshold, + expectedInternalComparerTypeBeforeCollisionThreshold, expectedPublicComparerBeforeCollisionThreshold, - expectedComparerAfterCollisionThreshold); + expectedInternalComparerTypeAfterCollisionThreshold); } } @@ -177,24 +177,18 @@ private static void RunCollectionTestCommon( Action addKeyCallback, Func containsKeyCallback, Func> getComparerCallback, - Type expectedInternalComparerBeforeCollisionThreshold, - Type expectedPublicComparerBeforeCollisionThreshold, - Type expectedComparerAfterCollisionThreshold) + Type expectedInternalComparerTypeBeforeCollisionThreshold, + IEqualityComparer expectedPublicComparerBeforeCollisionThreshold, + Type expectedInternalComparerTypeAfterCollisionThreshold) { TCollection collection = collectionFactory(); List allKeys = new List(); - const int StartOfRange = 0xE020; // use the Unicode Private Use range to avoid accidentally creating strings that really do compare as equal OrdinalIgnoreCase - const int Stride = 0x40; // to ensure we don't accidentally reset the 0x20 bit of the seed, which is used to negate OrdinalIgnoreCase effects - // First, go right up to the collision threshold, but don't exceed it. for (int i = 0; i < 100; i++) { - string newKey = GenerateCollidingString(i * Stride + StartOfRange); - Assert.Equal(0, _lazyGetNonRandomizedHashCodeDel.Value(newKey)); // ensure has a zero hash code Ordinal - Assert.Equal(0x24716ca0, _lazyGetNonRandomizedOrdinalIgnoreCaseHashCodeDel.Value(newKey)); // ensure has a zero hash code OrdinalIgnoreCase - + string newKey = _collidingStrings[i]; addKeyCallback(collection, newKey); allKeys.Add(newKey); } @@ -202,15 +196,18 @@ private static void RunCollectionTestCommon( FieldInfo internalComparerField = collection.GetType().GetField("_comparer", BindingFlags.NonPublic | BindingFlags.Instance); Assert.NotNull(internalComparerField); - Assert.Equal(expectedInternalComparerBeforeCollisionThreshold, internalComparerField.GetValue(collection)?.GetType()); - Assert.Equal(expectedPublicComparerBeforeCollisionThreshold, getComparerCallback(collection).GetType()); + IEqualityComparer actualInternalComparerBeforeCollisionThreshold = (IEqualityComparer)internalComparerField.GetValue(collection); + ValidateBehaviorOfInternalComparerVsPublicComparer(actualInternalComparerBeforeCollisionThreshold, expectedPublicComparerBeforeCollisionThreshold); + + Assert.Equal(expectedInternalComparerTypeBeforeCollisionThreshold, actualInternalComparerBeforeCollisionThreshold?.GetType()); + Assert.Equal(expectedPublicComparerBeforeCollisionThreshold, getComparerCallback(collection)); // Now exceed the collision threshold, which should rebucket entries. // Continue adding a few more entries to ensure we didn't corrupt internal state. for (int i = 100; i < 110; i++) { - string newKey = GenerateCollidingString(i * Stride + StartOfRange); + string newKey = _collidingStrings[i]; Assert.Equal(0, _lazyGetNonRandomizedHashCodeDel.Value(newKey)); // ensure has a zero hash code Ordinal Assert.Equal(0x24716ca0, _lazyGetNonRandomizedOrdinalIgnoreCaseHashCodeDel.Value(newKey)); // ensure has a zero hash code OrdinalIgnoreCase @@ -218,8 +215,11 @@ private static void RunCollectionTestCommon( allKeys.Add(newKey); } - Assert.Equal(expectedComparerAfterCollisionThreshold, internalComparerField.GetValue(collection)?.GetType()); - Assert.Equal(expectedPublicComparerBeforeCollisionThreshold, getComparerCallback(collection).GetType()); // shouldn't change this return value after collision threshold met + IEqualityComparer actualInternalComparerAfterCollisionThreshold = (IEqualityComparer)internalComparerField.GetValue(collection); + ValidateBehaviorOfInternalComparerVsPublicComparer(actualInternalComparerAfterCollisionThreshold, expectedPublicComparerBeforeCollisionThreshold); + + Assert.Equal(expectedInternalComparerTypeAfterCollisionThreshold, actualInternalComparerAfterCollisionThreshold?.GetType()); + Assert.Equal(expectedPublicComparerBeforeCollisionThreshold, getComparerCallback(collection)); // shouldn't change this return value after collision threshold met // And validate that all strings are present in the dictionary. @@ -235,7 +235,7 @@ private static void RunCollectionTestCommon( ((ISerializable)collection).GetObjectData(si, new StreamingContext()); object serializedComparer = si.GetValue("Comparer", typeof(IEqualityComparer)); - Assert.Equal(expectedPublicComparerBeforeCollisionThreshold, serializedComparer.GetType()); + Assert.Equal(expectedPublicComparerBeforeCollisionThreshold, serializedComparer); } private static Lazy> _lazyGetNonRandomizedHashCodeDel = new Lazy>( @@ -244,27 +244,63 @@ private static void RunCollectionTestCommon( private static Lazy> _lazyGetNonRandomizedOrdinalIgnoreCaseHashCodeDel = new Lazy>( () => GetStringHashCodeOpenDelegate("GetNonRandomizedHashCodeOrdinalIgnoreCase")); - // Generates a string with a well-known non-randomized hash code: - // - string.GetNonRandomizedHashCode returns 0. - // - string.GetNonRandomizedHashCodeOrdinalIgnoreCase returns 0x24716ca0. - // Provide a different seed to produce a different string. - private static string GenerateCollidingString(int seed) + // n.b., must be initialized *after* delegate fields above + private static readonly List _collidingStrings = GenerateCollidingStrings(110); + + private static List GenerateCollidingStrings(int count) { - return string.Create(8, seed, (span, seed) => + const int StartOfRange = 0xE020; // use the Unicode Private Use range to avoid accidentally creating strings that really do compare as equal OrdinalIgnoreCase + const int Stride = 0x40; // to ensure we don't accidentally reset the 0x20 bit of the seed, which is used to negate OrdinalIgnoreCase effects + + int currentSeed = StartOfRange; + + List collidingStrings = new List(count); + while (collidingStrings.Count < count) { - Span asBytes = MemoryMarshal.AsBytes(span); - - uint hash1 = (5381 << 16) + 5381; - uint hash2 = BitOperations.RotateLeft(hash1, 5) + hash1; - - MemoryMarshal.Write(asBytes, ref seed); - MemoryMarshal.Write(asBytes.Slice(4), ref hash2); // set hash2 := 0 (for Ordinal) - - hash1 = (BitOperations.RotateLeft(hash1, 5) + hash1) ^ (uint)seed; - hash1 = (BitOperations.RotateLeft(hash1, 5) + hash1); - - MemoryMarshal.Write(asBytes.Slice(8), ref hash1); // set hash1 := 0 (for Ordinal) - }); + if (currentSeed > ushort.MaxValue) + { + throw new Exception($"Couldn't create enough colliding strings? Created {collidingStrings.Count}, needed {count}."); + } + + string candidate = GenerateCollidingStringCandidate(currentSeed); + + int ordinalHashCode = _lazyGetNonRandomizedHashCodeDel.Value(candidate); + Assert.Equal(0, ordinalHashCode); // ensure has a zero hash code Ordinal + + int ordinalIgnoreCaseHashCode = _lazyGetNonRandomizedOrdinalIgnoreCaseHashCodeDel.Value(candidate); + if (ordinalIgnoreCaseHashCode == 0x24716ca0) // ensure has a zero hash code OrdinalIgnoreCase (might not have one) + { + collidingStrings.Add(candidate); // success! + } + + currentSeed += Stride; + } + + return collidingStrings; + + // Generates a possible string with a well-known non-randomized hash code: + // - string.GetNonRandomizedHashCode returns 0. + // - string.GetNonRandomizedHashCodeOrdinalIgnoreCase returns 0x24716ca0. + // Provide a different seed to produce a different string. + // Caller must check OrdinalIgnoreCase hash code to ensure correctness. + static string GenerateCollidingStringCandidate(int seed) + { + return string.Create(8, seed, (span, seed) => + { + Span asBytes = MemoryMarshal.AsBytes(span); + + uint hash1 = (5381 << 16) + 5381; + uint hash2 = BitOperations.RotateLeft(hash1, 5) + hash1; + + MemoryMarshal.Write(asBytes, ref seed); + MemoryMarshal.Write(asBytes.Slice(4), ref hash2); // set hash2 := 0 (for Ordinal) + + hash1 = (BitOperations.RotateLeft(hash1, 5) + hash1) ^ (uint)seed; + hash1 = (BitOperations.RotateLeft(hash1, 5) + hash1); + + MemoryMarshal.Write(asBytes.Slice(8), ref hash1); // set hash1 := 0 (for Ordinal) + }); + } } private static Func GetStringHashCodeOpenDelegate(string methodName) @@ -274,5 +310,44 @@ private static Func GetStringHashCodeOpenDelegate(string methodName return method.CreateDelegate>(target: null); // create open delegate unbound to 'this' } + + private static void ValidateBehaviorOfInternalComparerVsPublicComparer(IEqualityComparer internalComparer, IEqualityComparer publicComparer) + { + // This helper ensures that when we substitute one of our internal comparers + // in place of the expected public comparer, the internal comparer's Equals + // and GetHashCode behavior are consistent with the public comparer's. + + if (internalComparer is null) + { + internalComparer = EqualityComparer.Default; + } + if (publicComparer is null) + { + publicComparer = EqualityComparer.Default; + } + + foreach (var pair in new[] { + ("Hello", "Hello"), // exactly equal + ("Hello", "Goodbye"), // not equal at all + ("Hello", "hello"), // case-insensitive equal + ("Hello", "He\u200dllo"), // equal under linguistic comparer + ("Hello", "HE\u200dLLO"), // equal under case-insensitive linguistic comparer + ("абвгдеёжзийклмнопрстуфхцчшщьыъэюя", "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЫЪЭЮЯ"), // Cyrillic, case-insensitive equal + }) + { + bool arePairElementsExpectedEqual = publicComparer.Equals(pair.Item1, pair.Item2); + Assert.Equal(arePairElementsExpectedEqual, internalComparer.Equals(pair.Item1, pair.Item2)); + + bool areInternalHashCodesEqual = internalComparer.GetHashCode(pair.Item1) == internalComparer.GetHashCode(pair.Item2); + if (arePairElementsExpectedEqual) + { + Assert.True(areInternalHashCodesEqual); + } + else if (!areInternalHashCodesEqual) + { + Assert.False(arePairElementsExpectedEqual); + } + } + } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/RandomizedStringEqualityComparer.cs b/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/RandomizedStringEqualityComparer.cs index 168959d83386a..30db6049d22f7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/RandomizedStringEqualityComparer.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/RandomizedStringEqualityComparer.cs @@ -80,31 +80,6 @@ internal OrdinalIgnoreCaseComparer(IEqualityComparer wrappedComparer) public override bool Equals(string? x, string? y) => string.EqualsOrdinalIgnoreCase(x, y); - public override int GetHashCode(string? obj) - { - if (obj is null) - { - return 0; - } - - // The Ordinal version of Marvin32 operates over bytes, so convert - // char count -> byte count. Guaranteed not to integer overflow. - return Marvin.ComputeHash32( - ref Unsafe.As(ref obj.GetRawStringData()), - (uint)obj.Length * sizeof(char), - _seed.p0, _seed.p1); - } - } - - private sealed class RandomizedOrdinalIgnoreCaseComparer : RandomizedStringEqualityComparer - { - internal RandomizedOrdinalIgnoreCaseComparer(IEqualityComparer underlyingComparer) - : base(underlyingComparer) - { - } - - public override bool Equals(string? x, string? y) => string.EqualsOrdinalIgnoreCase(x, y); - public override int GetHashCode(string? obj) { if (obj is null) From 970d2eb3f4c92bf0dd0e734f1f87a15c95328076 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 15 Nov 2020 18:13:05 +0300 Subject: [PATCH 16/19] fix bad merge --- .../System.Private.CoreLib/src/System/String.Comparison.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index 94701e67decd6..2bb50de952820 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -864,7 +864,7 @@ private static int GetNonRandomizedHashCodeOrdinalIgnoreCaseStatic(ref char firs ref byte firstByte = ref Unsafe.As(ref firstChar); uint p0 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref firstByte, i)); uint p1 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref firstByte, i + 4)); - if (!AllCharsInUInt32AreAscii(p0 | p1)) + if (!Utf16Utility.AllCharsInUInt32AreAscii(p0 | p1)) { goto NotAscii; } @@ -880,7 +880,7 @@ private static int GetNonRandomizedHashCodeOrdinalIgnoreCaseStatic(ref char firs if (count > 0) { uint p0 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref Unsafe.As(ref firstChar), i)); - if (!AllCharsInUInt32AreAscii(p0)) + if (!Utf16Utility.AllCharsInUInt32AreAscii(p0)) { goto NotAscii; } From bef2782a5e761e1b698265fc37574afdb689a027 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 17 Nov 2020 13:52:48 -0800 Subject: [PATCH 17/19] Address Jan's feedback --- .../src/System/String.Comparison.cs | 105 ++++++++---------- 1 file changed, 47 insertions(+), 58 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index 2bb50de952820..36f6d09540a4e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -836,76 +836,49 @@ internal unsafe int GetNonRandomizedHashCode() } } - internal int GetNonRandomizedHashCodeOrdinalIgnoreCase() + internal unsafe int GetNonRandomizedHashCodeOrdinalIgnoreCase() { - return GetNonRandomizedHashCodeOrdinalIgnoreCaseStatic(ref _firstChar, Length, true); - } - - private static int GetNonRandomizedHashCodeOrdinalIgnoreCaseStatic(ref char firstChar, int length, bool normalizeNonAscii) - { - Debug.Assert(Unsafe.Add(ref firstChar, length) == '\0', "*(&firstChar + length) == '\\0'"); - uint hash1 = (5381 << 16) + 5381; uint hash2 = hash1; - // We "normalize to lowercase" every char by ORing with 0x0020. This casts - // a very wide net because it will change, e.g., '^' to '~'. But that should - // be ok because we expect this to be very rare in practice. + fixed (char* src = &_firstChar) + { + Debug.Assert(src[this.Length] == '\0', "src[this.Length] == '\\0'"); + Debug.Assert(((int) src) % 4 == 0, "Managed string should start at 4 bytes boundary"); - const uint NormalizeToLowercase = 0x0020_0020u; // valid both for big-endian and for little-endian + uint* ptr = (uint*) src; + int length = this.Length; - nint i = 0; - int count = length; + // We "normalize to lowercase" every char by ORing with 0x0020. This casts + // a very wide net because it will change, e.g., '^' to '~'. But that should + // be ok because we expect this to be very rare in practice. + const uint NormalizeToLowercase = 0x0020_0020u; // valid both for big-endian and for little-endian - if (normalizeNonAscii) - { - while (count > 2) + while (length > 2) { - ref byte firstByte = ref Unsafe.As(ref firstChar); - uint p0 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref firstByte, i)); - uint p1 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref firstByte, i + 4)); + uint p0 = ptr[0]; + uint p1 = ptr[1]; if (!Utf16Utility.AllCharsInUInt32AreAscii(p0 | p1)) { goto NotAscii; } - // Where count is 4n-1 (e.g. 3,7,11,15,19) this additionally consumes the null terminator + length -= 4; + // Where length is 4n-1 (e.g. 3,7,11,15,19) this additionally consumes the null terminator hash1 = (BitOperations.RotateLeft(hash1, 5) + hash1) ^ (p0 | NormalizeToLowercase); hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (p1 | NormalizeToLowercase); - - count -= 4; - i += 8; + ptr += 2; } - if (count > 0) + if (length > 0) { - uint p0 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref Unsafe.As(ref firstChar), i)); + uint p0 = ptr[0]; if (!Utf16Utility.AllCharsInUInt32AreAscii(p0)) { goto NotAscii; } - // Where count is 4n-3 (e.g. 1,5,9,13,17) this additionally consumes the null terminator - hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (p0 | NormalizeToLowercase); - } - } - else - { - // The duplication can be removed once JIT gets "Loop Unswitching" optimization - // so the invariant "if (normalizeNonAscii)" check can be done inside the loop - while (count > 2) - { - ref byte firstByte = ref Unsafe.As(ref firstChar); - uint p0 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref firstByte, i)); - uint p1 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref firstByte, i + 4)); - count -= 4; - hash1 = (BitOperations.RotateLeft(hash1, 5) + hash1) ^ (p0 | NormalizeToLowercase); - hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (p1 | NormalizeToLowercase); - i += 8; - } - if (count > 0) - { - uint p0 = Unsafe.ReadUnaligned(ref Unsafe.Add(ref Unsafe.As(ref firstChar), i)); + // Where length is 4n-3 (e.g. 1,5,9,13,17) this additionally consumes the null terminator hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (p0 | NormalizeToLowercase); } } @@ -913,31 +886,47 @@ private static int GetNonRandomizedHashCodeOrdinalIgnoreCaseStatic(ref char firs return (int)(hash1 + (hash2 * 1566083941)); NotAscii: - // Convert the string to upper case using Globalization - // and try again the same algorithm (without IsAscii validation this time) - return GetNonRandomizedHashCodeOrdinalIgnoreCaseSlow(ref firstChar, length); + return GetNonRandomizedHashCodeOrdinalIgnoreCaseSlow(this, hash1, hash2); - static int GetNonRandomizedHashCodeOrdinalIgnoreCaseSlow(ref char firstChar, int length) + static int GetNonRandomizedHashCodeOrdinalIgnoreCaseSlow(string str, uint hash1, uint hash2) { + int length = str.Length; char[]? borrowedArr = null; // Important: leave an additional space for '\0' Span scratch = (uint)length < 64 ? stackalloc char[64] : (borrowedArr = ArrayPool.Shared.Rent(length + 1)); - int charsWritten = System.Globalization.Ordinal.ToUpperOrdinal( - MemoryMarshal.CreateReadOnlySpan(ref firstChar, length), scratch); - + int charsWritten = System.Globalization.Ordinal.ToUpperOrdinal(str, scratch); Debug.Assert(charsWritten == length); - scratch[length] = '\0'; - int hashCode = GetNonRandomizedHashCodeOrdinalIgnoreCaseStatic( - ref MemoryMarshal.GetReference(scratch), length, false); + + const uint NormalizeToLowercase = 0x0020_0020u; + + // Duplicate the main loop, can be removed once JIT gets "Loop Unswitching" optimization + fixed (char* src = scratch) + { + uint* ptr = (uint*)src; + while (length > 2) + { + length -= 4; + // Where length is 4n-1 (e.g. 3,7,11,15,19) this additionally consumes the null terminator + hash1 = (BitOperations.RotateLeft(hash1, 5) + hash1) ^ (ptr[0] | NormalizeToLowercase); + hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (ptr[1] | NormalizeToLowercase); + ptr += 2; + } + + if (length > 0) + { + // Where length is 4n-3 (e.g. 1,5,9,13,17) this additionally consumes the null terminator + hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (ptr[0] | NormalizeToLowercase); + } + } if (borrowedArr != null) { ArrayPool.Shared.Return(borrowedArr); } - return hashCode; + return (int)(hash1 + (hash2 * 1566083941)); } } From 561847fb1cb3854a01ceaca6f3a1c1c07ec8e6f9 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 18 Nov 2020 02:05:29 -0800 Subject: [PATCH 18/19] don't pass hash1 and hash2 --- .../src/System/String.Comparison.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index 36f6d09540a4e..bab6ff424b5b1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -886,9 +886,9 @@ internal unsafe int GetNonRandomizedHashCodeOrdinalIgnoreCase() return (int)(hash1 + (hash2 * 1566083941)); NotAscii: - return GetNonRandomizedHashCodeOrdinalIgnoreCaseSlow(this, hash1, hash2); + return GetNonRandomizedHashCodeOrdinalIgnoreCaseSlow(this); - static int GetNonRandomizedHashCodeOrdinalIgnoreCaseSlow(string str, uint hash1, uint hash2) + static int GetNonRandomizedHashCodeOrdinalIgnoreCaseSlow(string str) { int length = str.Length; char[]? borrowedArr = null; @@ -901,6 +901,8 @@ static int GetNonRandomizedHashCodeOrdinalIgnoreCaseSlow(string str, uint hash1, scratch[length] = '\0'; const uint NormalizeToLowercase = 0x0020_0020u; + uint hash1 = (5381 << 16) + 5381; + uint hash2 = hash1; // Duplicate the main loop, can be removed once JIT gets "Loop Unswitching" optimization fixed (char* src = scratch) @@ -909,7 +911,6 @@ static int GetNonRandomizedHashCodeOrdinalIgnoreCaseSlow(string str, uint hash1, while (length > 2) { length -= 4; - // Where length is 4n-1 (e.g. 3,7,11,15,19) this additionally consumes the null terminator hash1 = (BitOperations.RotateLeft(hash1, 5) + hash1) ^ (ptr[0] | NormalizeToLowercase); hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (ptr[1] | NormalizeToLowercase); ptr += 2; @@ -917,7 +918,6 @@ static int GetNonRandomizedHashCodeOrdinalIgnoreCaseSlow(string str, uint hash1, if (length > 0) { - // Where length is 4n-3 (e.g. 1,5,9,13,17) this additionally consumes the null terminator hash2 = (BitOperations.RotateLeft(hash2, 5) + hash2) ^ (ptr[0] | NormalizeToLowercase); } } From f699c0fa0b55ab523c484e560b4092a8727daa6c Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sat, 21 Nov 2020 02:56:08 +0300 Subject: [PATCH 19/19] Update Dictionary.Tests.cs --- .../tests/Generic/Dictionary/Dictionary.Tests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Collections/tests/Generic/Dictionary/Dictionary.Tests.cs b/src/libraries/System.Collections/tests/Generic/Dictionary/Dictionary.Tests.cs index e1848b6773534..653f6c7dacd72 100644 --- a/src/libraries/System.Collections/tests/Generic/Dictionary/Dictionary.Tests.cs +++ b/src/libraries/System.Collections/tests/Generic/Dictionary/Dictionary.Tests.cs @@ -337,7 +337,7 @@ public void CantAcceptDuplicateKeysFromSourceDictionary() AssertExtensions.Throws(null, () => new Dictionary(source, StringComparer.OrdinalIgnoreCase)); } - [Fact] + [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotInvariantGlobalization))] // https://github.com/dotnet/runtime/issues/44681 public void DictionaryOrdinalIgnoreCaseCyrillicKeys() {