Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reintroduce case sensitive comparison optimization for FrozenDictionary in some cases #95232

Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public static AnalysisResults Analyze(
AnalysisResults results;
if (minLength == 0 || !TryUseSubstring(uniqueStrings, ignoreCase, minLength, maxLength, out results))
{
results = CreateAnalysisResults(uniqueStrings, ignoreCase, minLength, maxLength, 0, 0, isSubstring: false, static (s, _, _) => s.AsSpan());
results = CreateAnalysisResults(uniqueStrings, ignoreCase, minLength, maxLength, 0, 0, static (s, _, _) => s.AsSpan());
}

return results;
Expand Down Expand Up @@ -77,7 +77,7 @@ private static bool TryUseSubstring(ReadOnlySpan<string> uniqueStrings, bool ign
if (HasSufficientUniquenessFactor(set, uniqueStrings, acceptableNonUniqueCount))
{
results = CreateAnalysisResults(
uniqueStrings, ignoreCase, minLength, maxLength, index, count, isSubstring: true,
uniqueStrings, ignoreCase, minLength, maxLength, index, count,
static (string s, int index, int count) => s.AsSpan(index, count));
return true;
}
Expand All @@ -101,7 +101,7 @@ private static bool TryUseSubstring(ReadOnlySpan<string> uniqueStrings, bool ign
if (HasSufficientUniquenessFactor(set, uniqueStrings, acceptableNonUniqueCount))
{
results = CreateAnalysisResults(
uniqueStrings, ignoreCase, minLength, maxLength, comparer.Index, count, isSubstring: true,
uniqueStrings, ignoreCase, minLength, maxLength, comparer.Index, count,
static (string s, int index, int count) => s.AsSpan(s.Length + index, count));
return true;
}
Expand All @@ -115,7 +115,7 @@ private static bool TryUseSubstring(ReadOnlySpan<string> uniqueStrings, bool ign
}

private static AnalysisResults CreateAnalysisResults(
ReadOnlySpan<string> uniqueStrings, bool ignoreCase, int minLength, int maxLength, int index, int count, bool isSubstring, GetSpan getSubstringSpan)
ReadOnlySpan<string> uniqueStrings, bool ignoreCase, int minLength, int maxLength, int index, int count, GetSpan getHashString)
{
// Start off by assuming all strings are ASCII
bool allAsciiIfIgnoreCase = true;
Expand All @@ -125,30 +125,42 @@ private static AnalysisResults CreateAnalysisResults(
// substrings are ASCII, so we check each.
if (ignoreCase)
{
// Further, if the ASCII keys (in their entirety) don't contain any letters, then we can
// actually perform the comparison as case-sensitive even if case-insensitive
// was requested, as there's nothing that would compare equally to the substring
// other than the substring itself.
bool canSwitchIgnoreCaseHashToCaseSensitive = !isSubstring;
// Further, if the ASCII keys (in their entirety) don't contain any letters,
// then we can actually perform the comparison as case-sensitive even if
// case-insensitive was requested, as there's nothing that would compare
// equally to the key other than the key itself.
bool canSwitchIgnoreCaseHashToCaseSensitive = true;

foreach (string s in uniqueStrings)
foreach (string uniqueString in uniqueStrings)
{
// Get the span for the substring.
ReadOnlySpan<char> substring = getSubstringSpan(s, index, count);
// Get a span representing the slice of the uniqueString which will be hashed.
ReadOnlySpan<char> hashString = getHashString(uniqueString, index, count);

// If the substring isn't ASCII, bail out to return the results.
if (!IsAllAscii(substring))
// If the slice isn't ASCII, bail out to return the results.
if (!IsAllAscii(hashString))
{
allAsciiIfIgnoreCase = false;
canSwitchIgnoreCaseHashToCaseSensitive = false;
break;
}

// All substrings so far are still ASCII only. If this one contains any ASCII
// letters, mark that we can't switch to case-sensitive.
if (canSwitchIgnoreCaseHashToCaseSensitive && ContainsAnyLetters(substring))
// The hash string is ASCII only. We disable the switch to
// case sensitive if by examining the entire uniqueString we
// find that it is not ASCII, or that it contains ASCII letters.
if (canSwitchIgnoreCaseHashToCaseSensitive)
{
canSwitchIgnoreCaseHashToCaseSensitive = false;
// If count is 0 then uniqueString equals hashString,
// and as we have just checked that IsAllAscii(hashString) is true
// then we know IsAllAscii(uniqueString) must be true,
// so we can skip the check.
if (count > 0 && !IsAllAscii(uniqueString.AsSpan()))
{
canSwitchIgnoreCaseHashToCaseSensitive = false;
}
else if (ContainsAnyAsciiLetters(uniqueString.AsSpan()))
{
canSwitchIgnoreCaseHashToCaseSensitive = false;
}
}
}

Expand Down Expand Up @@ -207,7 +219,7 @@ internal static unsafe bool IsAllAscii(ReadOnlySpan<char> s)
#if NET8_0_OR_GREATER
private static readonly SearchValues<char> s_asciiLetters = SearchValues.Create("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
#endif
internal static bool ContainsAnyLetters(ReadOnlySpan<char> s)
internal static bool ContainsAnyAsciiLetters(ReadOnlySpan<char> s)
{
Debug.Assert(IsAllAscii(s));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,13 @@ public static void LeftHandCaseInsensitive()
Assert.Equal(0, r.HashIndex);
Assert.Equal(1, r.HashCount);

r = RunAnalysis(new[] { "0001", "0002", "0003", "0004", "0005", "0006" }, true);
Assert.False(r.RightJustifiedSubstring);
Assert.False(r.IgnoreCase);
Assert.True(r.AllAsciiIfIgnoreCase);
Assert.Equal(3, r.HashIndex);
Assert.Equal(1, r.HashCount);

}

[Fact]
Expand Down Expand Up @@ -226,9 +233,9 @@ public static void IsAllAscii()
[Fact]
public static void ContainsAnyLetters()
{
Assert.True(KeyAnalyzer.ContainsAnyLetters("abc".AsSpan()));
Assert.True(KeyAnalyzer.ContainsAnyLetters("ABC".AsSpan()));
Assert.False(KeyAnalyzer.ContainsAnyLetters("123".AsSpan()));
Assert.True(KeyAnalyzer.ContainsAnyAsciiLetters("abc".AsSpan()));
Assert.True(KeyAnalyzer.ContainsAnyAsciiLetters("ABC".AsSpan()));
Assert.False(KeyAnalyzer.ContainsAnyAsciiLetters("123".AsSpan()));
// note, must only pass ASCII to ContainsAnyLetters, anything else is a
// Debug.Assert and would not have been called in the actual implementation
}
Expand Down