diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index 3dce343ea8867a..de8df332e36359 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -2096,8 +2096,10 @@ private RegexNode ReduceLookaround() Debug.Assert(ChildCount() == 1); // Captures inside of negative lookarounds are undone after the lookaround. Thus, if there's nothing - // inside of the negative lookaround that needs that capture group (namely a backreference), we can - // remove the capture. + // inside of the negative lookaround that relies on or impacts persisted state, we can remove the capture. + // This includes backreferences (because backreferences within the lookaround still need to refer to that + // capture group) and balancing groups (because they can impact and are impacted by capture stacks from + // captures outside of the lookaround). if (Kind is RegexNodeKind.NegativeLookaround && ContainsKind(Child(0), [RegexNodeKind.Backreference, RegexNodeKind.BackreferenceConditional]) is false) { if (RemoveCaptures(this, 0)) @@ -2111,7 +2113,9 @@ static bool RemoveCaptures(RegexNode parent, int nodeIndex) { RegexNode node = parent.Child(nodeIndex); - if (node.Kind is RegexNodeKind.Capture) + // Only remove captures that don't rely on or impact persisted state. + // Balancing groups (N != -1) impact capture stacks and must be preserved. + if (node is { Kind: RegexNodeKind.Capture, N: -1 }) { parent.ReplaceChild(nodeIndex, node.Child(0)); RemoveCaptures(parent, nodeIndex); diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Count.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Count.Tests.cs index d568aa2027406f..76fa7713faab68 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Count.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Count.Tests.cs @@ -90,6 +90,9 @@ public static IEnumerable Count_ReturnsExpectedCount_TestData() yield return new object[] { engine, @"\b\w+\b", "abc def ghi jkl", 15, RegexOptions.RightToLeft, 4 }; yield return new object[] { RegexEngine.Interpreter, @"(?<=abc)\w", "abcxabcy", 8, RegexOptions.RightToLeft, 2 }; yield return new object[] { engine, @"(?<=abc)\w", "abcxabcy", 7, RegexOptions.RightToLeft, 1 }; + + // Balancing groups in negative lookarounds should not be removed + yield return new object[] { engine, @"()(?'-1')(?!(?'-1'))", "abc", 0, RegexOptions.None, 4 }; } } } diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs index 01553e0659b850..f0622cb83d360c 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs @@ -649,6 +649,11 @@ public static IEnumerable Match_MemberData() yield return (@"(?cat)\w+(?dog)", "cat_Hello_World_dog", RegexOptions.None, 0, 19, false, string.Empty); yield return (@"(.)(?'2-1'(?'-1'))", "cat", RegexOptions.None, 0, 3, false, string.Empty); yield return (@"(?'2-1'(.))", "cat", RegexOptions.None, 0, 3, true, "c"); + + // Balancing groups in negative lookarounds should not be removed + // The pattern captures group 1, uncaptures it, then checks the negative lookahead + // The negative lookahead contains a balancing group that should not be removed + yield return (@"()(?'-1')(?!(?'-1'))", "a", RegexOptions.None, 0, 1, true, string.Empty); } // Atomic Zero-Width Assertions \A \Z \z \b \B