diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs
index 88a4211c251872..10ec814bea2649 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs
@@ -154,7 +154,15 @@ private void MakeRep(RegexNodeKind kind, int min, int max)
N = max;
}
- private void MakeLoopAtomic()
+ /// Converts this loop node to be atomic.
+ ///
+ /// If a loop is atomic by construction, e.g. it's at the end of the pattern
+ /// or its parent is an atomic group, there's no backtracking into it, which means it does its full
+ /// initial processing and then stops. For an eager loop, that means consuming as much as possible,
+ /// but for a lazy loop, that means consuming as little as possible. Thus, if this is true, a lazy
+ /// loop should lower its max iteration count to its min iteration count.
+ ///
+ private void MakeLoopAtomic(bool noBacktrackingByPosition = false)
{
switch (Kind)
{
@@ -165,11 +173,15 @@ private void MakeLoopAtomic()
break;
case RegexNodeKind.Onelazy or RegexNodeKind.Notonelazy or RegexNodeKind.Setlazy:
- // For lazy, we not only change the Type, we also lower the max number of iterations
- // to the minimum number of iterations, creating a repeater, as they should end up
- // matching as little as possible.
+ // For lazy, we not only change the Type, if we're at the end of the pattern,
+ // we also lower the max number of iterations to the minimum number of iterations,
+ // creating a repeater, as they should end up matching as little as possible.
Kind += RegexNodeKind.Oneloopatomic - RegexNodeKind.Onelazy;
- N = M;
+ if (noBacktrackingByPosition)
+ {
+ N = M;
+ }
+
if (N == 0)
{
// If moving the max to be the same as the min dropped it to 0, there's no
@@ -189,6 +201,34 @@ private void MakeLoopAtomic()
}
break;
+ case RegexNodeKind.Loop:
+ if (Parent is not { Kind: RegexNodeKind.Atomic })
+ {
+ RegexNode loopAsChild = new(RegexNodeKind.Loop, Options, M, N);
+ Kind = RegexNodeKind.Atomic;
+ M = N = 0;
+ loopAsChild.AddChild(Child(0));
+ ReplaceChild(0, loopAsChild);
+ }
+ break;
+
+ case RegexNodeKind.Lazyloop:
+ if (noBacktrackingByPosition)
+ {
+ N = M;
+ }
+
+ if (N != 0)
+ {
+ // A lazy loop that becomes atomic gets treated the same as a greedy loop,
+ // so we can share the same logic.
+ goto case RegexNodeKind.Loop;
+ }
+
+ Kind = RegexNodeKind.Empty;
+ Children = null;
+ break;
+
default:
Debug.Fail($"Unexpected type: {Kind}");
break;
@@ -435,7 +475,7 @@ private void EliminateEndingBacktracking()
// or even empty nodes.
case RegexNodeKind.Oneloop or RegexNodeKind.Notoneloop or RegexNodeKind.Setloop:
case RegexNodeKind.Onelazy or RegexNodeKind.Notonelazy or RegexNodeKind.Setlazy:
- node.MakeLoopAtomic();
+ node.MakeLoopAtomic(noBacktrackingByPosition: true);
break;
// Just because a particular node is atomic doesn't mean all its descendants are.
@@ -456,7 +496,7 @@ private void EliminateEndingBacktracking()
case RegexNodeKind.Concatenate when !rtl:
RegexNode existingChild = node.Child(node.ChildCount() - 1);
if ((existingChild.Kind is RegexNodeKind.Alternate or RegexNodeKind.BackreferenceConditional or RegexNodeKind.ExpressionConditional or RegexNodeKind.Loop or RegexNodeKind.Lazyloop) &&
- (node.Parent is null || node.Parent.Kind != RegexNodeKind.Atomic)) // validate grandparent isn't atomic
+ node.Parent is not { Kind: RegexNodeKind.Atomic }) // validate grandparent isn't atomic
{
var atomic = new RegexNode(RegexNodeKind.Atomic, existingChild.Options);
atomic.AddChild(existingChild);
@@ -494,26 +534,30 @@ private void EliminateEndingBacktracking()
// e.g. (?:abc*)* => (?:ab(?>c*))*
// e.g. (abc*?)+? => (ab){1}
case RegexNodeKind.Lazyloop:
- node.N = node.M;
- goto case RegexNodeKind.Loop;
case RegexNodeKind.Loop:
{
- if (node.N == 1)
+ // Make the loop atomic, if it isn't already. This entails changing node to instead be an Atomic node
+ // that has the {Lazy}Loop as its child. If the parent of the loop is already Atomic, this will be a nop.
+ node.MakeLoopAtomic(noBacktrackingByPosition: true);
+ Debug.Assert(node.Kind is RegexNodeKind.Atomic or RegexNodeKind.Empty or RegexNodeKind.Loop or RegexNodeKind.Lazyloop);
+
+ if (node.Kind is RegexNodeKind.Atomic)
{
- // If the loop has a max iteration count of 1 (e.g. it's an optional node),
- // there's no possibility for conflict between multiple iterations, so
- // we can process it.
node = node.Child(0);
- continue;
+ Debug.Assert(node.Kind is RegexNodeKind.Loop or RegexNodeKind.Lazyloop);
}
- if (!rtl)
+ if (node.Kind is RegexNodeKind.Loop or RegexNodeKind.Lazyloop)
{
- RegexNode? loopDescendent = node.FindLastExpressionInLoopForAutoAtomic();
- if (loopDescendent != null)
+ if (node.N == 1 || CanBeMadeAtomic(node.Child(0), node.Child(0), iterateNullableSubsequent: false, allowLazy: false))
{
- node = loopDescendent;
- continue; // loop around to process node
+ // If the loop has a max iteration count of 1 (e.g. it's an optional node),
+ // there's no possibility for conflict between multiple iterations, so
+ // we can process it. Or, if the node can be made atomic with itself as a subsequent
+ // node (which is logically what happens when there are multiple iterations), we can also
+ // recur into its child.
+ node = node.Child(0);
+ continue;
}
}
}
@@ -643,7 +687,7 @@ private RegexNode ReduceAtomic()
case RegexNodeKind.Onelazy:
case RegexNodeKind.Notonelazy:
case RegexNodeKind.Setlazy:
- child.MakeLoopAtomic();
+ child.MakeLoopAtomic(noBacktrackingByPosition: true);
return child;
// Alternations have a variety of possible optimizations that can be applied
@@ -759,7 +803,9 @@ private RegexNode ReduceAtomic()
// For everything else, try to reduce ending backtracking of the last contained expression.
default:
child.EliminateEndingBacktracking();
- return atomic;
+ return child.Kind == RegexNodeKind.Empty ?
+ child : // if the child became empty, then the atomic node isn't needed
+ atomic;
}
}
@@ -1850,31 +1896,9 @@ static void ProcessNode(RegexNode node, RegexNode subsequent)
}
// Skip down the node past irrelevant nodes.
- while (true)
+ while (node.Kind is RegexNodeKind.Capture or RegexNodeKind.Concatenate)
{
- // We can always recur into captures and into the last node of concatenations.
- if (node.Kind is RegexNodeKind.Capture or RegexNodeKind.Concatenate)
- {
- node = node.Child(node.ChildCount() - 1);
- continue;
- }
-
- // For loops with at least one guaranteed iteration, we can recur into them, but
- // we need to be careful not to just always do so; the ending node of a loop can only
- // be made atomic if what comes after the loop but also the beginning of the loop are
- // compatible for the optimization.
- if (node.Kind == RegexNodeKind.Loop)
- {
- RegexNode? loopDescendent = node.FindLastExpressionInLoopForAutoAtomic();
- if (loopDescendent != null)
- {
- node = loopDescendent;
- continue;
- }
- }
-
- // Can't skip any further.
- break;
+ node = node.Child(node.ChildCount() - 1);
}
// If the node can be changed to atomic based on what comes after it, do so.
@@ -1909,6 +1933,66 @@ static void ProcessNode(RegexNode node, RegexNode subsequent)
node.MakeLoopAtomic();
break;
+ case RegexNodeKind.Loop when CanBeMadeAtomic(node, subsequent, iterateNullableSubsequent: true, allowLazy: false):
+ case RegexNodeKind.Lazyloop when CanBeMadeAtomic(node, subsequent, iterateNullableSubsequent: false, allowLazy: true):
+ // General loops and lazy loops can also be made atomic, but we need to be very careful in doing so. Making such loops
+ // atomic means wrapping them in an atomic group, and children of these loops can look up through their ancestry, see
+ // such an atomic group, and then decide to alter their behavior because backtracking isn't possible. For example, if
+ // a developer writes the pattern (?>(abcd*?)+)e, it is safe for that inner lazy char loop to see that nothing can
+ // backtrack into it, such that the lazy loop can match the minimum possible, such that the loop evaporates entirely,
+ // and it becomes (?>(abc)+)e. Thus, given a pattern like (abcd*?)+e, even though the outer loop can be made atomic,
+ // because the beginning/end of the loop don't overlap with each other or with their successor, we can't just wrap it
+ // in an atomic block, because that would then trigger the nested loop to behave incorrectly. We can address this in
+ // multiple ways, such as by tagging Atomic nodes we introduce as being different from ones originally part of the pattern,
+ // and then having children treat them differently when looking at their ancestors, or we can address it by only introducing
+ // such an atomic node when we can see it's safe for the children. For now, this does the latter, and to be conservative,
+ // it allowlists a small known set of children types.
+ RegexNode loopChild = node.Child(0);
+ while (loopChild.Kind is RegexNodeKind.Capture or RegexNodeKind.Concatenate)
+ {
+ loopChild = loopChild.Child(loopChild.ChildCount() - 1);
+ }
+
+ if (loopChild.Kind is
+ RegexNodeKind.Boundary or RegexNodeKind.ECMABoundary or
+ RegexNodeKind.Multi or
+ RegexNodeKind.One or RegexNodeKind.Notone or RegexNodeKind.Set)
+ {
+ // For types on the allow list, we can make the loop itself atomic.
+ node.MakeLoopAtomic();
+ }
+ else if (node.Kind is RegexNodeKind.Loop or RegexNodeKind.Lazyloop)
+ {
+ // For everything else, we can't make the loop itself atomic, but we can
+ // possibly continue to make children of the loop atomic.
+ goto case RegexNodeKind.Loop;
+ }
+ break;
+
+ // For all other loops, we may not be able to make them atomic, but we might still be able to make a node
+ // they end with be atomic. If the loop has a max iteration count of 1, then we don't need to worry about it
+ // following itself and can simply examine its child. If it has a max iteration count greater than 1, then
+ // we can examine its child iff its child could be made atomic against itself.
+ case RegexNodeKind.Loop:
+ {
+ RegexNode child = node.Child(0);
+ if (node.N == 1 || CanBeMadeAtomic(child, child, iterateNullableSubsequent: false, allowLazy: false))
+ {
+ ProcessNode(child, subsequent);
+ }
+ }
+ break;
+
+ case RegexNodeKind.Lazyloop:
+ {
+ RegexNode child = node.Child(0);
+ if (node.N == 1 || CanBeMadeAtomic(child, child, iterateNullableSubsequent: false, allowLazy: true))
+ {
+ ProcessNode(child, subsequent);
+ }
+ }
+ break;
+
case RegexNodeKind.Alternate or RegexNodeKind.BackreferenceConditional or RegexNodeKind.ExpressionConditional:
// In the case of alternation, we can't change the alternation node itself
// based on what comes after it (at least not with more complicated analysis
@@ -1931,47 +2015,6 @@ static void ProcessNode(RegexNode node, RegexNode subsequent)
}
}
- ///
- /// Recurs into the last expression of a loop node, looking to see if it can find a node
- /// that could be made atomic _assuming_ the conditions exist for it with the loop's ancestors.
- ///
- /// The found node that should be explored further for auto-atomicity; null if it doesn't exist.
- private RegexNode? FindLastExpressionInLoopForAutoAtomic()
- {
- RegexNode node = this;
-
- Debug.Assert((node.Options & RegexOptions.RightToLeft) == 0, "Currently only implemented for left-to-right");
- Debug.Assert(node.Kind is RegexNodeKind.Loop or RegexNodeKind.Lazyloop);
-
- // Start by looking at the loop's sole child.
- node = node.Child(0);
-
- // Skip past captures.
- while (node.Kind == RegexNodeKind.Capture)
- {
- node = node.Child(0);
- }
-
- // If the loop's body is a concatenate, we can skip to its last child iff that
- // last child doesn't conflict with the first child, since this whole concatenation
- // could be repeated, such that the first node ends up following the last. For
- // example, in the expression (a+[def])*, the last child is [def] and the first is
- // a+, which can't possibly overlap with [def]. In contrast, if we had (a+[ade])*,
- // [ade] could potentially match the starting 'a'.
- if (node.Kind == RegexNodeKind.Concatenate)
- {
- int concatCount = node.ChildCount();
- RegexNode lastConcatChild = node.Child(concatCount - 1);
- if (CanBeMadeAtomic(lastConcatChild, node.Child(0), iterateNullableSubsequent: false, allowLazy: false))
- {
- return lastConcatChild;
- }
- }
-
- // Otherwise, the loop has nothing that can participate in auto-atomicity.
- return null;
- }
-
/// Optimizations for positive and negative lookaheads/behinds.
private RegexNode ReduceLookaround()
{
@@ -2065,6 +2108,12 @@ private static bool CanBeMadeAtomic(RegexNode node, RegexNode subsequent, bool i
return false;
}
+ // Skip down past irrelevant nodes.
+ while (node.Kind is RegexNodeKind.Capture or RegexNodeKind.Concatenate)
+ {
+ node = node.Child(node.ChildCount() - 1);
+ }
+
// In most case, we'll simply check the node against whatever subsequent is. However, in case
// subsequent ends up being a loop with a min bound of 0, we'll also need to evaluate the node
// against whatever comes after subsequent. In that case, we'll walk the tree to find the
@@ -2072,16 +2121,13 @@ private static bool CanBeMadeAtomic(RegexNode node, RegexNode subsequent, bool i
while (true)
{
// Skip the successor down to the closest node that's guaranteed to follow it.
- int childCount;
- while ((childCount = subsequent.ChildCount()) > 0)
+ while (true)
{
- Debug.Assert(subsequent.Kind != RegexNodeKind.Group);
switch (subsequent.Kind)
{
case RegexNodeKind.Concatenate:
case RegexNodeKind.Capture:
case RegexNodeKind.Atomic:
- case RegexNodeKind.PositiveLookaround when (subsequent.Options & RegexOptions.RightToLeft) == 0: // only lookaheads, not lookbehinds (represented as RTL PositiveLookaround nodes)
case RegexNodeKind.Loop or RegexNodeKind.Lazyloop when subsequent.M > 0:
subsequent = subsequent.Child(0);
continue;
@@ -2103,10 +2149,11 @@ private static bool CanBeMadeAtomic(RegexNode node, RegexNode subsequent, bool i
// only a yes branch, we'd need to also check whatever comes after the conditional). It doesn't apply to
// backreference conditionals, as the condition itself is unknown statically and could overlap with the
// loop being considered for atomicity.
+ int childCount = subsequent.ChildCount();
switch (subsequent.Kind)
{
case RegexNodeKind.Alternate:
- case RegexNodeKind.ExpressionConditional when childCount == 3: // condition, yes, and no branch
+ case RegexNodeKind.ExpressionConditional when childCount is 3: // condition, yes, and no branch
for (int i = 0; i < childCount; i++)
{
if (!CanBeMadeAtomic(node, subsequent.Child(i), iterateNullableSubsequent, allowLazy: false))
@@ -2198,6 +2245,53 @@ private static bool CanBeMadeAtomic(RegexNode node, RegexNode subsequent, bool i
}
break;
+ case RegexNodeKind.Loop:
+ case RegexNodeKind.Lazyloop when allowLazy:
+ // With single character loops (e.g. OneLoop, NotOneLoop, SetLoop), we only need to prove there's no overlap between
+ // what that single character could be and what comes next. For arbitrary loops, we have more to prove. First, we need
+ // to understand what the loop can possibly start with and what it can possibly end with (with a single character loop,
+ // those are the same things), and we need to ensure that there's no overlap between those two sets; otherwise, a second
+ // iteration of a loop could end up giving back characters that could be consumed by the previous iteration. Second, we need
+ // to ensure that neither the starting set nor the ending set overlaps with what could possibly come after it, for the same reason.
+ RegexNode loopChild = node.Child(0);
+ if (RegexPrefixAnalyzer.FindFirstCharClass(loopChild) is not string loopStartingSet ||
+ RegexPrefixAnalyzer.FindLastCharClass(loopChild) is not string loopEndingSet ||
+ (node.N > 1 && RegexCharClass.MayOverlap(loopStartingSet, loopEndingSet)))
+ {
+ return false;
+ }
+
+ bool CharInStartingOrEndingSet(char ch) =>
+ RegexCharClass.CharInClass(ch, loopStartingSet) || RegexCharClass.CharInClass(ch, loopEndingSet);
+
+ bool MayOverlapStartingOrEndingSet(string set) =>
+ RegexCharClass.MayOverlap(set, loopStartingSet) || RegexCharClass.MayOverlap(set, loopEndingSet);
+
+ switch (subsequent.Kind)
+ {
+ case RegexNodeKind.One when !CharInStartingOrEndingSet(subsequent.Ch):
+ case RegexNodeKind.Set when !MayOverlapStartingOrEndingSet(subsequent.Str!):
+ case RegexNodeKind.Onelazy or RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic when subsequent.M > 0 && !CharInStartingOrEndingSet(subsequent.Ch):
+ case RegexNodeKind.Setlazy or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic when subsequent.M > 0 && !MayOverlapStartingOrEndingSet(subsequent.Str!):
+ case RegexNodeKind.Multi when !CharInStartingOrEndingSet(subsequent.Str![0]):
+ case RegexNodeKind.End:
+ case RegexNodeKind.EndZ or RegexNodeKind.Eol when !CharInStartingOrEndingSet('\n'):
+ return true;
+
+ case RegexNodeKind.Onelazy or RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic when subsequent.M == 0 && !CharInStartingOrEndingSet(subsequent.Ch):
+ case RegexNodeKind.Setlazy or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic when subsequent.M == 0 && !MayOverlapStartingOrEndingSet(subsequent.Str!):
+ case RegexNodeKind.Boundary when node.M > 0 && RegexCharClass.IsKnownWordClassSubset(loopStartingSet) && RegexCharClass.IsKnownWordClassSubset(loopEndingSet):
+ case RegexNodeKind.NonBoundary when node.M > 0 && (loopStartingSet is RegexCharClass.NotWordClass or RegexCharClass.NotDigitClass) && (loopEndingSet is RegexCharClass.NotWordClass or RegexCharClass.NotDigitClass):
+ case RegexNodeKind.ECMABoundary when node.M > 0 && (loopStartingSet is RegexCharClass.ECMAWordClass or RegexCharClass.ECMADigitClass) && (loopEndingSet is RegexCharClass.ECMAWordClass or RegexCharClass.ECMADigitClass):
+ case RegexNodeKind.NonECMABoundary when node.M > 0 && (loopStartingSet is RegexCharClass.NotECMAWordClass or RegexCharClass.NotDigitClass) && (loopEndingSet is RegexCharClass.NotECMAWordClass or RegexCharClass.NotDigitClass):
+ // The loop can be made atomic based on this subsequent node, but we'll need to evaluate the next one as well.
+ break;
+
+ default:
+ return false;
+ }
+ break;
+
default:
return false;
}
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs
index 6be7fdda3fc948..93ab67a2c392a0 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs
@@ -895,13 +895,22 @@ static int GetRangeLength((char LowInclusive, char HighInclusive) range, bool ne
});
///
- /// Computes a character class for the first character in tree. This uses a more robust algorithm
- /// than is used by TryFindFixedLiterals and thus can find starting sets it couldn't. For example,
+ /// Computes a character class for the first character in the node.
+ ///
+ ///
+ /// This uses a more robust algorithm than is used by TryFindFixedLiterals and thus can find starting sets it couldn't. For example,
/// fixed literals won't find the starting set for a*b, as the a isn't guaranteed and the b is at a
/// variable position, but this will find [ab] as it's instead looking for anything that under any
/// circumstance could possibly start a match.
+ ///
+ public static string? FindFirstCharClass(RegexNode root) => FindFirstOrLastCharClass(root, findFirst: true);
+
+ ///
+ /// Computes a character class for the last character in the node.
///
- public static string? FindFirstCharClass(RegexNode root)
+ public static string? FindLastCharClass(RegexNode root) => FindFirstOrLastCharClass(root, findFirst: false);
+
+ private static string? FindFirstOrLastCharClass(RegexNode root, bool findFirst)
{
// Explore the graph, adding found chars into a result set, which is lazily initialized so that
// we can initialize it to a parsed set if we discover one first (this is helpful not just for allocation
@@ -913,7 +922,7 @@ static int GetRangeLength((char LowInclusive, char HighInclusive) range, bool ne
// whole pattern was nullable such that it could match an empty string, in which case we
// can't make any statements about what begins a match.
RegexCharClass? cc = null;
- return TryFindFirstCharClass(root, ref cc) == true ?
+ return TryFindFirstOrLastCharClass(root, findFirst, ref cc) == true ?
cc!.ToStringClass() :
null;
@@ -930,7 +939,7 @@ static int GetRangeLength((char LowInclusive, char HighInclusive) range, bool ne
// it's zero-width (e.g. empty, a lookaround, an anchor, etc.) or it could be zero-width
// (e.g. a loop with a min bound of 0). A concatenation processing a child that returns
// null needs to keep processing the next child.
- static bool? TryFindFirstCharClass(RegexNode node, ref RegexCharClass? cc)
+ static bool? TryFindFirstOrLastCharClass(RegexNode node, bool findFirst, ref RegexCharClass? cc)
{
if (!StackHelper.TryEnsureSufficientExecutionStack())
{
@@ -992,7 +1001,8 @@ static int GetRangeLength((char LowInclusive, char HighInclusive) range, bool ne
if (cc is null || cc.CanMerge)
{
cc ??= new RegexCharClass();
- cc.AddChar(node.Str![(node.Options & RegexOptions.RightToLeft) != 0 ? node.Str.Length - 1 : 0]);
+ bool firstChar = findFirst == ((node.Options & RegexOptions.RightToLeft) == 0);
+ cc.AddChar(node.Str![firstChar ? 0 : node.Str.Length - 1]);
return true;
}
return false;
@@ -1019,14 +1029,14 @@ static int GetRangeLength((char LowInclusive, char HighInclusive) range, bool ne
// Groups. These don't contribute anything of their own, and are just pass-throughs to their children.
case RegexNodeKind.Atomic:
case RegexNodeKind.Capture:
- return TryFindFirstCharClass(node.Child(0), ref cc);
+ return TryFindFirstOrLastCharClass(node.Child(0), findFirst, ref cc);
// Loops. Like groups, these are mostly pass-through: if the child fails, then the whole operation needs
// to fail, and if the child is nullable, then the loop is as well. However, if the child succeeds but
// the loop has a lower bound of 0, then the loop is still nullable.
case RegexNodeKind.Loop:
case RegexNodeKind.Lazyloop:
- return TryFindFirstCharClass(node.Child(0), ref cc) switch
+ return TryFindFirstOrLastCharClass(node.Child(0), findFirst, ref cc) switch
{
false => false,
null => null,
@@ -1040,12 +1050,26 @@ static int GetRangeLength((char LowInclusive, char HighInclusive) range, bool ne
case RegexNodeKind.Concatenate:
{
int childCount = node.ChildCount();
- for (int i = 0; i < childCount; i++)
+ if (findFirst)
+ {
+ for (int i = 0; i < childCount; i++)
+ {
+ bool? childResult = TryFindFirstOrLastCharClass(node.Child(i), findFirst, ref cc);
+ if (childResult != null)
+ {
+ return childResult;
+ }
+ }
+ }
+ else
{
- bool? childResult = TryFindFirstCharClass(node.Child(i), ref cc);
- if (childResult != null)
+ for (int i = childCount - 1; i >= 0; i--)
{
- return childResult;
+ bool? childResult = TryFindFirstOrLastCharClass(node.Child(i), findFirst, ref cc);
+ if (childResult != null)
+ {
+ return childResult;
+ }
}
}
return null;
@@ -1060,7 +1084,7 @@ static int GetRangeLength((char LowInclusive, char HighInclusive) range, bool ne
bool anyChildWasNull = false;
for (int i = 0; i < childCount; i++)
{
- bool? childResult = TryFindFirstCharClass(node.Child(i), ref cc);
+ bool? childResult = TryFindFirstOrLastCharClass(node.Child(i), findFirst, ref cc);
if (childResult is null)
{
anyChildWasNull = true;
@@ -1078,7 +1102,7 @@ static int GetRangeLength((char LowInclusive, char HighInclusive) range, bool ne
case RegexNodeKind.BackreferenceConditional:
case RegexNodeKind.ExpressionConditional:
int branchStart = node.Kind is RegexNodeKind.BackreferenceConditional ? 0 : 1;
- return (TryFindFirstCharClass(node.Child(branchStart), ref cc), TryFindFirstCharClass(node.Child(branchStart + 1), ref cc)) switch
+ return (TryFindFirstOrLastCharClass(node.Child(branchStart), findFirst, ref cc), TryFindFirstOrLastCharClass(node.Child(branchStart + 1), findFirst, ref cc)) switch
{
(false, _) or (_, false) => false,
(null, _) or (_, null) => null,
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs
index b7f48470256d02..b720936b59b995 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs
@@ -272,6 +272,12 @@ public static IEnumerable