From bbb97a76942d9ba95e20e7006b21325266a1baa1 Mon Sep 17 00:00:00 2001 From: Andrew J Said Date: Thu, 15 Feb 2024 19:28:22 +0000 Subject: [PATCH] Reintroduce case sensitive comparison optimization for FrozenDictionary in some cases (#95232) * Reintroduce case sensitive comparison optimization for FrozenDictionary in some cases * Renamed a few symbols and added comments for additional clarity --------- Co-authored-by: Stephen Toub --- .../Collections/Frozen/String/KeyAnalyzer.cs | 50 ++++++++++++------- .../tests/Frozen/KeyAnalyzerTests.cs | 13 +++-- 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/KeyAnalyzer.cs b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/KeyAnalyzer.cs index ddea9bfbe7699..9f6094edb9776 100644 --- a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/KeyAnalyzer.cs +++ b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/KeyAnalyzer.cs @@ -37,7 +37,7 @@ public static AnalysisResults Analyze( AnalysisResults results; if (minLength == 0 || !TryUseSubstring(uniqueStrings, ignoreCase, minLength, maxLength, out results)) { - results = CreateAnalysisResults(uniqueStrings, ignoreCase, minLength, maxLength, 0, 0, isSubstring: false, static (s, _, _) => s.AsSpan()); + results = CreateAnalysisResults(uniqueStrings, ignoreCase, minLength, maxLength, 0, 0, static (s, _, _) => s.AsSpan()); } return results; @@ -77,7 +77,7 @@ private static bool TryUseSubstring(ReadOnlySpan uniqueStrings, bool ign if (HasSufficientUniquenessFactor(set, uniqueStrings, acceptableNonUniqueCount)) { results = CreateAnalysisResults( - uniqueStrings, ignoreCase, minLength, maxLength, index, count, isSubstring: true, + uniqueStrings, ignoreCase, minLength, maxLength, index, count, static (string s, int index, int count) => s.AsSpan(index, count)); return true; } @@ -101,7 +101,7 @@ private static bool TryUseSubstring(ReadOnlySpan uniqueStrings, bool ign if (HasSufficientUniquenessFactor(set, uniqueStrings, acceptableNonUniqueCount)) { results = CreateAnalysisResults( - uniqueStrings, ignoreCase, minLength, maxLength, comparer.Index, count, isSubstring: true, + uniqueStrings, ignoreCase, minLength, maxLength, comparer.Index, count, static (string s, int index, int count) => s.AsSpan(s.Length + index, count)); return true; } @@ -115,7 +115,7 @@ private static bool TryUseSubstring(ReadOnlySpan uniqueStrings, bool ign } private static AnalysisResults CreateAnalysisResults( - ReadOnlySpan uniqueStrings, bool ignoreCase, int minLength, int maxLength, int index, int count, bool isSubstring, GetSpan getSubstringSpan) + ReadOnlySpan uniqueStrings, bool ignoreCase, int minLength, int maxLength, int index, int count, GetSpan getHashString) { // Start off by assuming all strings are ASCII bool allAsciiIfIgnoreCase = true; @@ -125,30 +125,42 @@ private static AnalysisResults CreateAnalysisResults( // substrings are ASCII, so we check each. if (ignoreCase) { - // Further, if the ASCII keys (in their entirety) don't contain any letters, then we can - // actually perform the comparison as case-sensitive even if case-insensitive - // was requested, as there's nothing that would compare equally to the substring - // other than the substring itself. - bool canSwitchIgnoreCaseHashToCaseSensitive = !isSubstring; + // Further, if the ASCII keys (in their entirety) don't contain any letters, + // then we can actually perform the comparison as case-sensitive even if + // case-insensitive was requested, as there's nothing that would compare + // equally to the key other than the key itself. + bool canSwitchIgnoreCaseHashToCaseSensitive = true; - foreach (string s in uniqueStrings) + foreach (string uniqueString in uniqueStrings) { - // Get the span for the substring. - ReadOnlySpan substring = getSubstringSpan(s, index, count); + // Get a span representing the slice of the uniqueString which will be hashed. + ReadOnlySpan hashString = getHashString(uniqueString, index, count); - // If the substring isn't ASCII, bail out to return the results. - if (!IsAllAscii(substring)) + // If the slice isn't ASCII, bail out to return the results. + if (!IsAllAscii(hashString)) { allAsciiIfIgnoreCase = false; canSwitchIgnoreCaseHashToCaseSensitive = false; break; } - // All substrings so far are still ASCII only. If this one contains any ASCII - // letters, mark that we can't switch to case-sensitive. - if (canSwitchIgnoreCaseHashToCaseSensitive && ContainsAnyLetters(substring)) + // The hash string is ASCII only. We disable the switch to + // case sensitive if by examining the entire uniqueString we + // find that it is not ASCII, or that it contains ASCII letters. + if (canSwitchIgnoreCaseHashToCaseSensitive) { - canSwitchIgnoreCaseHashToCaseSensitive = false; + // If count is 0 then uniqueString equals hashString, + // and as we have just checked that IsAllAscii(hashString) is true + // then we know IsAllAscii(uniqueString) must be true, + // so we can skip the check. + if (count > 0 && !IsAllAscii(uniqueString.AsSpan())) + { + canSwitchIgnoreCaseHashToCaseSensitive = false; + } + else if (ContainsAnyAsciiLetters(uniqueString.AsSpan())) + { + canSwitchIgnoreCaseHashToCaseSensitive = false; + } } } @@ -207,7 +219,7 @@ internal static unsafe bool IsAllAscii(ReadOnlySpan s) #if NET8_0_OR_GREATER private static readonly SearchValues s_asciiLetters = SearchValues.Create("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); #endif - internal static bool ContainsAnyLetters(ReadOnlySpan s) + internal static bool ContainsAnyAsciiLetters(ReadOnlySpan s) { Debug.Assert(IsAllAscii(s)); diff --git a/src/libraries/System.Collections.Immutable/tests/Frozen/KeyAnalyzerTests.cs b/src/libraries/System.Collections.Immutable/tests/Frozen/KeyAnalyzerTests.cs index 31f6007b72442..00a16bf3cef54 100644 --- a/src/libraries/System.Collections.Immutable/tests/Frozen/KeyAnalyzerTests.cs +++ b/src/libraries/System.Collections.Immutable/tests/Frozen/KeyAnalyzerTests.cs @@ -107,6 +107,13 @@ public static void LeftHandCaseInsensitive() Assert.Equal(0, r.HashIndex); Assert.Equal(1, r.HashCount); + r = RunAnalysis(new[] { "0001", "0002", "0003", "0004", "0005", "0006" }, true); + Assert.False(r.RightJustifiedSubstring); + Assert.False(r.IgnoreCase); + Assert.True(r.AllAsciiIfIgnoreCase); + Assert.Equal(3, r.HashIndex); + Assert.Equal(1, r.HashCount); + } [Fact] @@ -226,9 +233,9 @@ public static void IsAllAscii() [Fact] public static void ContainsAnyLetters() { - Assert.True(KeyAnalyzer.ContainsAnyLetters("abc".AsSpan())); - Assert.True(KeyAnalyzer.ContainsAnyLetters("ABC".AsSpan())); - Assert.False(KeyAnalyzer.ContainsAnyLetters("123".AsSpan())); + Assert.True(KeyAnalyzer.ContainsAnyAsciiLetters("abc".AsSpan())); + Assert.True(KeyAnalyzer.ContainsAnyAsciiLetters("ABC".AsSpan())); + Assert.False(KeyAnalyzer.ContainsAnyAsciiLetters("123".AsSpan())); // note, must only pass ASCII to ContainsAnyLetters, anything else is a // Debug.Assert and would not have been called in the actual implementation }