diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs index 8ce8bcc203dab9..02367fa02aeab1 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs @@ -474,7 +474,9 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan input, i } else { - Registers endRegisters = _containsAnyAnchor ? + // Use DefaultInputReader when pattern contains \Z anchor (needs special \n handling), + // otherwise use NoZAnchorOptimizedInputReader for correct capture tracking + Registers endRegisters = _containsEndZAnchor ? FindSubcaptures(input, matchStart, matchEnd, perThreadData) : FindSubcaptures(input, matchStart, matchEnd, perThreadData); return new SymbolicMatch(matchStart, matchEnd - matchStart, endRegisters.CaptureStarts, endRegisters.CaptureEnds); @@ -1481,9 +1483,9 @@ public static int GetPositionId(SymbolicRegexMatcher matcher, ReadOnlySpan { if ((uint)pos < (uint)input.Length) { - // Find the minterm, handling the special case for the last \n for states that start with a relevant anchor + // Find the minterm, handling the special case for the last \n for \Z anchor int c = input[pos]; - return c == '\n' && pos == input.Length - 1 ? + return c == '\n' && pos == input.Length - 1 && matcher._containsEndZAnchor ? matcher._minterms.Length : // mintermId = minterms.Length represents an \n at the very end of input matcher._mintermClassifier.GetMintermID(c); } diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs index f95299c430ee7e..6d7c7917f8e3ef 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs @@ -461,6 +461,17 @@ public static IEnumerable Groups_MemberData() yield return (enUS, @"^(cat)\s+(dog)", "cat \n\n\n dog", RegexOptions.None, new string[] { "cat \n\n\n dog", "cat", "dog" }); yield return (enUS, @"^(cat)\s+(dog)", "cat \n\n\n dog", RegexOptions.Multiline, new string[] { "cat \n\n\n dog", "cat", "dog" }); yield return (enUS, @"(mouse)\s\n^(cat)\s+(dog)", "mouse\n\ncat \n\n\n dog", RegexOptions.Multiline, new string[] { "mouse\n\ncat \n\n\n dog", "mouse", "cat", "dog" }); + + // Regression test for https://github.com/dotnet/runtime/issues/120202 + // Capture groups with beginning anchor and newline at end should work correctly with different end anchors + yield return (enUS, @"^(A)(\s)", "A\n", RegexOptions.None, new string[] { "A\n", "A", "\n" }); + yield return (enUS, @"^(A)(\s)\z", "A\n", RegexOptions.None, new string[] { "A\n", "A", "\n" }); + yield return (enUS, @"^(A)(\s)$", "A\n", RegexOptions.None, new string[] { "A\n", "A", "\n" }); + yield return (enUS, @"^(A)(\s)\Z", "A\n", RegexOptions.None, new string[] { "A\n", "A", "\n" }); + yield return (enUS, @"(A)(\s)\z", "A\n", RegexOptions.None, new string[] { "A\n", "A", "\n" }); // without beginning anchor + yield return (enUS, @"^(A)(\s)", "A\n", RegexOptions.Multiline, new string[] { "A\n", "A", "\n" }); + yield return (enUS, @"^(A)(\s)", "A ", RegexOptions.None, new string[] { "A ", "A", " " }); + if (!RegexHelpers.IsNonBacktracking(engine)) // ECMAScript not supported { yield return (enUS, @"^cat\s+dog", "cat \n\n\n dog", RegexOptions.ECMAScript, new string[] { "cat \n\n\n dog" });