From 7ee4cbff19864a1e3e157f636420cc10ebafe20d Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 26 Apr 2024 10:47:31 -0400 Subject: [PATCH 1/2] Fix handling of case-sensitive set loops in RegexPrefixAnalyzer.FindPrefixes For an expression like `[Aa]{2}`, we were generating the strings "AA" and "aa" but not "Aa" or "aA". This code isn't exercised yet, as we're currently only using FindPrefixes for case-insensitive, but I'm trying to enable it for case-sensitive as well, and hit this. I'm not adding new tests here as plenty of existing tests catch it once it's enabled. --- .../RegularExpressions/RegexPrefixAnalyzer.cs | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs index b46af7b1b9d06..3ff91d642f5a3 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs @@ -162,23 +162,26 @@ static bool FindPrefixesCore(RegexNode node, List results, bool i int reps = node.Kind is RegexNodeKind.Set ? 1 : Math.Min(node.M, MaxPrefixLength); if (!ignoreCase) { - int existingCount = results.Count; - - // Duplicate all of the existing strings for all of the new suffixes, other than the first. - foreach (char suffix in setChars.Slice(1, charCount - 1)) + for (int rep = 0; rep < reps; rep++) { - for (int existing = 0; existing < existingCount; existing++) + int existingCount = results.Count; + + // Duplicate all of the existing strings for all of the new suffixes, other than the first. + foreach (char suffix in setChars.Slice(1, charCount - 1)) { - StringBuilder newSb = new StringBuilder().Append(results[existing]); - newSb.Append(suffix, reps); - results.Add(newSb); + for (int existing = 0; existing < existingCount; existing++) + { + StringBuilder newSb = new StringBuilder().Append(results[existing]); + newSb.Append(suffix); + results.Add(newSb); + } } - } - // Then append the first suffix to all of the existing strings. - for (int existing = 0; existing < existingCount; existing++) - { - results[existing].Append(setChars[0], reps); + // Then append the first suffix to all of the existing strings. + for (int existing = 0; existing < existingCount; existing++) + { + results[existing].Append(setChars[0]); + } } } else From db16aa9abfebdb18e691f45b7bb4347f3a11f804 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 26 Apr 2024 12:33:18 -0400 Subject: [PATCH 2/2] Also exit early as soon as we can detect too many possible prefixes --- .../Text/RegularExpressions/RegexPrefixAnalyzer.cs | 14 +++++++++++++- .../tests/FunctionalTests/Regex.Match.Tests.cs | 11 ++++++----- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs index 3ff91d642f5a3..556a187203f03 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs @@ -59,9 +59,11 @@ static bool FindPrefixesCore(RegexNode node, List results, bool i // If we're too deep to analyze further, we can't trust what we've already computed, so stop iterating. // Also bail if any of our results is already hitting the threshold, or if this node is RTL, which is // not worth the complexity of handling. + // Or if we've already discovered more than the allowed number of prefixes. if (!StackHelper.TryEnsureSufficientExecutionStack() || !results.TrueForAll(sb => sb.Length < MaxPrefixLength) || - (node.Options & RegexOptions.RightToLeft) != 0) + (node.Options & RegexOptions.RightToLeft) != 0 || + results.Count > MaxPrefixes) { return false; } @@ -165,6 +167,10 @@ static bool FindPrefixesCore(RegexNode node, List results, bool i for (int rep = 0; rep < reps; rep++) { int existingCount = results.Count; + if (existingCount * charCount > MaxPrefixes) + { + return false; + } // Duplicate all of the existing strings for all of the new suffixes, other than the first. foreach (char suffix in setChars.Slice(1, charCount - 1)) @@ -251,6 +257,12 @@ static bool FindPrefixesCore(RegexNode node, List results, bool i { _ = FindPrefixesCore(node.Child(i), alternateBranchResults, ignoreCase); + // If we now have too many results, bail. + if ((allBranchResults?.Count ?? 0) + alternateBranchResults.Count > MaxPrefixes) + { + return false; + } + Debug.Assert(alternateBranchResults.Count > 0); foreach (StringBuilder sb in alternateBranchResults) { diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs index d58290174c222..9d3679be60b93 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs @@ -1172,6 +1172,7 @@ public async Task Match_VaryingLengthStrings_Huge(RegexEngine engine) public static IEnumerable Match_DeepNesting_MemberData() { + foreach (RegexOptions options in new[] { RegexOptions.None, RegexOptions.IgnoreCase }) foreach (RegexEngine engine in RegexHelpers.AvailableEngines) { if (RegexHelpers.IsNonBacktracking(engine)) @@ -1180,15 +1181,15 @@ public static IEnumerable Match_DeepNesting_MemberData() continue; } - yield return new object[] { engine, 1 }; - yield return new object[] { engine, 10 }; - yield return new object[] { engine, 100 }; + yield return new object[] { engine, options, 1 }; + yield return new object[] { engine, options, 10 }; + yield return new object[] { engine, options, 100 }; } } [Theory] [MemberData(nameof(Match_DeepNesting_MemberData))] - public async Task Match_DeepNesting(RegexEngine engine, int count) + public async Task Match_DeepNesting(RegexEngine engine, RegexOptions options, int count) { const string Start = @"((?>abc|(?:def[ghi]", End = @")))"; const string Match = "defg"; @@ -1196,7 +1197,7 @@ public async Task Match_DeepNesting(RegexEngine engine, int count) string pattern = string.Concat(Enumerable.Repeat(Start, count)) + string.Concat(Enumerable.Repeat(End, count)); string input = string.Concat(Enumerable.Repeat(Match, count)); - Regex r = await RegexHelpers.GetRegexAsync(engine, pattern); + Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options); Match m = r.Match(input); Assert.True(m.Success);