Skip to content

Commit 73a428d

Browse files
authored
Fix handling of case-sensitive set loops in RegexPrefixAnalyzer.FindPrefixes (#101608)
* Fix handling of case-sensitive set loops in RegexPrefixAnalyzer.FindPrefixes For an expression like `[Aa]{2}`, we were generating the strings "AA" and "aa" but not "Aa" or "aA". This code isn't exercised yet, as we're currently only using FindPrefixes for case-insensitive, but I'm trying to enable it for case-sensitive as well, and hit this. I'm not adding new tests here as plenty of existing tests catch it once it's enabled. * Also exit early as soon as we can detect too many possible prefixes
1 parent b88785f commit 73a428d

File tree

2 files changed

+35
-19
lines changed

2 files changed

+35
-19
lines changed

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs

+29-14
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,11 @@ static bool FindPrefixesCore(RegexNode node, List<StringBuilder> results, bool i
5959
// If we're too deep to analyze further, we can't trust what we've already computed, so stop iterating.
6060
// Also bail if any of our results is already hitting the threshold, or if this node is RTL, which is
6161
// not worth the complexity of handling.
62+
// Or if we've already discovered more than the allowed number of prefixes.
6263
if (!StackHelper.TryEnsureSufficientExecutionStack() ||
6364
!results.TrueForAll(sb => sb.Length < MaxPrefixLength) ||
64-
(node.Options & RegexOptions.RightToLeft) != 0)
65+
(node.Options & RegexOptions.RightToLeft) != 0 ||
66+
results.Count > MaxPrefixes)
6567
{
6668
return false;
6769
}
@@ -162,23 +164,30 @@ static bool FindPrefixesCore(RegexNode node, List<StringBuilder> results, bool i
162164
int reps = node.Kind is RegexNodeKind.Set ? 1 : Math.Min(node.M, MaxPrefixLength);
163165
if (!ignoreCase)
164166
{
165-
int existingCount = results.Count;
166-
167-
// Duplicate all of the existing strings for all of the new suffixes, other than the first.
168-
foreach (char suffix in setChars.Slice(1, charCount - 1))
167+
for (int rep = 0; rep < reps; rep++)
169168
{
170-
for (int existing = 0; existing < existingCount; existing++)
169+
int existingCount = results.Count;
170+
if (existingCount * charCount > MaxPrefixes)
171171
{
172-
StringBuilder newSb = new StringBuilder().Append(results[existing]);
173-
newSb.Append(suffix, reps);
174-
results.Add(newSb);
172+
return false;
175173
}
176-
}
177174

178-
// Then append the first suffix to all of the existing strings.
179-
for (int existing = 0; existing < existingCount; existing++)
180-
{
181-
results[existing].Append(setChars[0], reps);
175+
// Duplicate all of the existing strings for all of the new suffixes, other than the first.
176+
foreach (char suffix in setChars.Slice(1, charCount - 1))
177+
{
178+
for (int existing = 0; existing < existingCount; existing++)
179+
{
180+
StringBuilder newSb = new StringBuilder().Append(results[existing]);
181+
newSb.Append(suffix);
182+
results.Add(newSb);
183+
}
184+
}
185+
186+
// Then append the first suffix to all of the existing strings.
187+
for (int existing = 0; existing < existingCount; existing++)
188+
{
189+
results[existing].Append(setChars[0]);
190+
}
182191
}
183192
}
184193
else
@@ -248,6 +257,12 @@ static bool FindPrefixesCore(RegexNode node, List<StringBuilder> results, bool i
248257
{
249258
_ = FindPrefixesCore(node.Child(i), alternateBranchResults, ignoreCase);
250259

260+
// If we now have too many results, bail.
261+
if ((allBranchResults?.Count ?? 0) + alternateBranchResults.Count > MaxPrefixes)
262+
{
263+
return false;
264+
}
265+
251266
Debug.Assert(alternateBranchResults.Count > 0);
252267
foreach (StringBuilder sb in alternateBranchResults)
253268
{

src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs

+6-5
Original file line numberDiff line numberDiff line change
@@ -1172,6 +1172,7 @@ public async Task Match_VaryingLengthStrings_Huge(RegexEngine engine)
11721172

11731173
public static IEnumerable<object[]> Match_DeepNesting_MemberData()
11741174
{
1175+
foreach (RegexOptions options in new[] { RegexOptions.None, RegexOptions.IgnoreCase })
11751176
foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
11761177
{
11771178
if (RegexHelpers.IsNonBacktracking(engine))
@@ -1180,23 +1181,23 @@ public static IEnumerable<object[]> Match_DeepNesting_MemberData()
11801181
continue;
11811182
}
11821183

1183-
yield return new object[] { engine, 1 };
1184-
yield return new object[] { engine, 10 };
1185-
yield return new object[] { engine, 100 };
1184+
yield return new object[] { engine, options, 1 };
1185+
yield return new object[] { engine, options, 10 };
1186+
yield return new object[] { engine, options, 100 };
11861187
}
11871188
}
11881189

11891190
[Theory]
11901191
[MemberData(nameof(Match_DeepNesting_MemberData))]
1191-
public async Task Match_DeepNesting(RegexEngine engine, int count)
1192+
public async Task Match_DeepNesting(RegexEngine engine, RegexOptions options, int count)
11921193
{
11931194
const string Start = @"((?>abc|(?:def[ghi]", End = @")))";
11941195
const string Match = "defg";
11951196

11961197
string pattern = string.Concat(Enumerable.Repeat(Start, count)) + string.Concat(Enumerable.Repeat(End, count));
11971198
string input = string.Concat(Enumerable.Repeat(Match, count));
11981199

1199-
Regex r = await RegexHelpers.GetRegexAsync(engine, pattern);
1200+
Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options);
12001201
Match m = r.Match(input);
12011202

12021203
Assert.True(m.Success);

0 commit comments

Comments
 (0)