From 8cebd6608592ec4d7efc01fd7791d026ab832064 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 17 Oct 2025 19:44:50 +0000 Subject: [PATCH 1/3] Initial plan From 807f5c63249ed3feee503f6799841f7f6fc87b5d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 17 Oct 2025 20:23:46 +0000 Subject: [PATCH 2/3] Fix balancing groups incorrectly removed from negative lookarounds Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../src/System/Text/RegularExpressions/RegexNode.cs | 10 +++++++--- .../tests/FunctionalTests/Regex.Count.Tests.cs | 3 +++ .../tests/FunctionalTests/Regex.Match.Tests.cs | 5 +++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index a632eaccbc8ae8..b4a083633bb054 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -2113,9 +2113,13 @@ static bool RemoveCaptures(RegexNode parent, int nodeIndex) if (node.Kind is RegexNodeKind.Capture) { - parent.ReplaceChild(nodeIndex, node.Child(0)); - RemoveCaptures(parent, nodeIndex); - return true; + // Don't remove balancing groups (N != -1 indicates a balancing group with an uncapture) + if (node.N == -1) + { + parent.ReplaceChild(nodeIndex, node.Child(0)); + RemoveCaptures(parent, nodeIndex); + return true; + } } bool changesMade = false; diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Count.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Count.Tests.cs index d568aa2027406f..76fa7713faab68 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Count.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Count.Tests.cs @@ -90,6 +90,9 @@ public static IEnumerable Count_ReturnsExpectedCount_TestData() yield return new object[] { engine, @"\b\w+\b", "abc def ghi jkl", 15, RegexOptions.RightToLeft, 4 }; yield return new object[] { RegexEngine.Interpreter, @"(?<=abc)\w", "abcxabcy", 8, RegexOptions.RightToLeft, 2 }; yield return new object[] { engine, @"(?<=abc)\w", "abcxabcy", 7, RegexOptions.RightToLeft, 1 }; + + // Balancing groups in negative lookarounds should not be removed + yield return new object[] { engine, @"()(?'-1')(?!(?'-1'))", "abc", 0, RegexOptions.None, 4 }; } } } diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs index 13a7d4daaea6dc..accc57e9cd132b 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs @@ -655,6 +655,11 @@ public static IEnumerable Match_MemberData() yield return (@"(?cat)\w+(?dog)", "cat_Hello_World_dog", RegexOptions.None, 0, 19, false, string.Empty); yield return (@"(.)(?'2-1'(?'-1'))", "cat", RegexOptions.None, 0, 3, false, string.Empty); yield return (@"(?'2-1'(.))", "cat", RegexOptions.None, 0, 3, true, "c"); + + // Balancing groups in negative lookarounds should not be removed + // The pattern captures group 1, uncaptures it, then checks the negative lookahead + // The negative lookahead contains a balancing group that should not be removed + yield return (@"()(?'-1')(?!(?'-1'))", "a", RegexOptions.None, 0, 1, true, string.Empty); } // Atomic Zero-Width Assertions \A \Z \z \b \B From 575c5e825bd25e006fa09176f1435f38fdc03bb2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 17 Oct 2025 21:01:43 +0000 Subject: [PATCH 3/3] Apply code review feedback: use pattern matching and improve comments Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../Text/RegularExpressions/RegexNode.cs | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index b4a083633bb054..37f7d4b820e636 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -2096,8 +2096,10 @@ private RegexNode ReduceLookaround() Debug.Assert(ChildCount() == 1); // Captures inside of negative lookarounds are undone after the lookaround. Thus, if there's nothing - // inside of the negative lookaround that needs that capture group (namely a backreference), we can - // remove the capture. + // inside of the negative lookaround that relies on or impacts persisted state, we can remove the capture. + // This includes backreferences (because backreferences within the lookaround still need to refer to that + // capture group) and balancing groups (because they can impact and are impacted by capture stacks from + // captures outside of the lookaround). if (Kind is RegexNodeKind.NegativeLookaround && ContainsKind(Child(0), [RegexNodeKind.Backreference, RegexNodeKind.BackreferenceConditional]) is false) { if (RemoveCaptures(this, 0)) @@ -2111,15 +2113,13 @@ static bool RemoveCaptures(RegexNode parent, int nodeIndex) { RegexNode node = parent.Child(nodeIndex); - if (node.Kind is RegexNodeKind.Capture) + // Only remove captures that don't rely on or impact persisted state. + // Balancing groups (N != -1) impact capture stacks and must be preserved. + if (node is { Kind: RegexNodeKind.Capture, N: -1 }) { - // Don't remove balancing groups (N != -1 indicates a balancing group with an uncapture) - if (node.N == -1) - { - parent.ReplaceChild(nodeIndex, node.Child(0)); - RemoveCaptures(parent, nodeIndex); - return true; - } + parent.ReplaceChild(nodeIndex, node.Child(0)); + RemoveCaptures(parent, nodeIndex); + return true; } bool changesMade = false;