From 8273d0f1d56839178daa0e3af461f474c63218d5 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 11 Oct 2025 15:59:31 +0000
Subject: [PATCH 1/5] Initial plan


From e923223d4c6c000359bcab4a8047cbb53fcad089 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 11 Oct 2025 16:34:43 +0000
Subject: [PATCH 2/5] Fix NonBacktracking regex empty capture groups with
 newline at end

The issue was in DefaultInputReader.GetPositionId which was applying special
handling for '\n' at the end of input unconditionally. This special handling
is only needed for the \Z anchor. Added a check for _containsEndZAnchor to
only apply the special handling when needed.

Also added regression tests to verify the fix.

Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com>
---
 .../RegularExpressions/Symbolic/SymbolicRegexMatcher.cs    | 2 +-
 .../tests/FunctionalTests/Regex.Groups.Tests.cs            | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs
index 8ce8bcc203dab9..e385eede9c6d9a 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs
@@ -1483,7 +1483,7 @@ public static int GetPositionId(SymbolicRegexMatcher<TSet> matcher, ReadOnlySpan
                 {
                     // Find the minterm, handling the special case for the last \n for states that start with a relevant anchor
                     int c = input[pos];
-                    return c == '\n' && pos == input.Length - 1 ?
+                    return c == '\n' && pos == input.Length - 1 && matcher._containsEndZAnchor ?
                         matcher._minterms.Length : // mintermId = minterms.Length represents an \n at the very end of input
                         matcher._mintermClassifier.GetMintermID(c);
                 }
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs
index f95299c430ee7e..03e0b52d3efbda 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs
@@ -461,6 +461,13 @@ public static IEnumerable<object[]> Groups_MemberData()
             yield return (enUS, @"^(cat)\s+(dog)", "cat   \n\n\n   dog", RegexOptions.None, new string[] { "cat   \n\n\n   dog", "cat", "dog" });
             yield return (enUS, @"^(cat)\s+(dog)", "cat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog", "cat", "dog" });
             yield return (enUS, @"(mouse)\s\n^(cat)\s+(dog)", "mouse\n\ncat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "mouse\n\ncat   \n\n\n   dog", "mouse", "cat", "dog" });
+            
+            // Regression test for https://github.com/dotnet/runtime/issues/110604
+            // Capture groups with beginning anchor and newline at end should work correctly  
+            yield return (enUS, @"^(A)(\s)", "A\n", RegexOptions.None, new string[] { "A\n", "A", "\n" });
+            yield return (enUS, @"^(A)(\s)", "A\n", RegexOptions.Multiline, new string[] { "A\n", "A", "\n" });
+            yield return (enUS, @"^(A)(\s)", "A ", RegexOptions.None, new string[] { "A ", "A", " " });
+            
             if (!RegexHelpers.IsNonBacktracking(engine)) // ECMAScript not supported
             {
                 yield return (enUS, @"^cat\s+dog", "cat   \n\n\n   dog", RegexOptions.ECMAScript, new string[] { "cat   \n\n\n   dog" });

From 8a51bd5a2043b6b2a803719262815c9da556f65f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 16 Oct 2025 12:15:41 +0000
Subject: [PATCH 3/5] Fix issue number reference in test comment

Corrected the issue number from #110604 to #120202 in the regression test comment.

Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com>
---
 .../tests/FunctionalTests/Regex.Groups.Tests.cs                 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs
index 03e0b52d3efbda..2a04c99028779e 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs
@@ -462,7 +462,7 @@ public static IEnumerable<object[]> Groups_MemberData()
             yield return (enUS, @"^(cat)\s+(dog)", "cat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog", "cat", "dog" });
             yield return (enUS, @"(mouse)\s\n^(cat)\s+(dog)", "mouse\n\ncat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "mouse\n\ncat   \n\n\n   dog", "mouse", "cat", "dog" });
             
-            // Regression test for https://github.com/dotnet/runtime/issues/110604
+            // Regression test for https://github.com/dotnet/runtime/issues/120202
             // Capture groups with beginning anchor and newline at end should work correctly  
             yield return (enUS, @"^(A)(\s)", "A\n", RegexOptions.None, new string[] { "A\n", "A", "\n" });
             yield return (enUS, @"^(A)(\s)", "A\n", RegexOptions.Multiline, new string[] { "A\n", "A", "\n" });

From 4fe286eb2be21d732a1e6a79676b1805dc6d1acb Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 16 Oct 2025 12:52:33 +0000
Subject: [PATCH 4/5] Fix capture groups for all anchor combinations with
 newline at end

The issue was that FindSubcaptures was using DefaultInputReader which applies
special \n handling for \Z anchor. This interfered with capture tracking for
patterns with beginning anchors but without \Z.

The fix is to make FindSubcaptures always use NoZAnchorOptimizedInputReader,
which doesn't apply the special \n handling. This allows:
- Match finding to use the special \n handling for \Z anchor (correct)
- Capture tracking to use the actual character without special handling (correct)

Added regression tests for all anchor combinations.

Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com>
---
 .../RegularExpressions/Symbolic/SymbolicRegexMatcher.cs   | 8 ++++----
 .../tests/FunctionalTests/Regex.Groups.Tests.cs           | 5 ++++-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs
index e385eede9c6d9a..095394db016c8e 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs
@@ -474,9 +474,9 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan<char> input, i
             }
             else
             {
-                Registers endRegisters = _containsAnyAnchor ?
-                    FindSubcaptures<DefaultInputReader>(input, matchStart, matchEnd, perThreadData) :
-                    FindSubcaptures<NoZAnchorOptimizedInputReader>(input, matchStart, matchEnd, perThreadData);
+                // Always use NoZAnchorOptimizedInputReader for FindSubcaptures to avoid the special \n handling
+                // that's needed for \Z anchor matching but interferes with capture tracking
+                Registers endRegisters = FindSubcaptures<NoZAnchorOptimizedInputReader>(input, matchStart, matchEnd, perThreadData);
                 return new SymbolicMatch(matchStart, matchEnd - matchStart, endRegisters.CaptureStarts, endRegisters.CaptureEnds);
             }
         }
@@ -1483,7 +1483,7 @@ public static int GetPositionId(SymbolicRegexMatcher<TSet> matcher, ReadOnlySpan
                 {
                     // Find the minterm, handling the special case for the last \n for states that start with a relevant anchor
                     int c = input[pos];
-                    return c == '\n' && pos == input.Length - 1 && matcher._containsEndZAnchor ?
+                    return c == '\n' && pos == input.Length - 1 ?
                         matcher._minterms.Length : // mintermId = minterms.Length represents an \n at the very end of input
                         matcher._mintermClassifier.GetMintermID(c);
                 }
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs
index 2a04c99028779e..096c5829b4ec3c 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs
@@ -463,8 +463,11 @@ public static IEnumerable<object[]> Groups_MemberData()
             yield return (enUS, @"(mouse)\s\n^(cat)\s+(dog)", "mouse\n\ncat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "mouse\n\ncat   \n\n\n   dog", "mouse", "cat", "dog" });
             
             // Regression test for https://github.com/dotnet/runtime/issues/120202
-            // Capture groups with beginning anchor and newline at end should work correctly  
+            // Capture groups with beginning anchor and newline at end should work correctly with different end anchors
             yield return (enUS, @"^(A)(\s)", "A\n", RegexOptions.None, new string[] { "A\n", "A", "\n" });
+            yield return (enUS, @"^(A)(\s)\z", "A\n", RegexOptions.None, new string[] { "A\n", "A", "\n" });
+            yield return (enUS, @"^(A)(\s)$", "A\n", RegexOptions.None, new string[] { "A\n", "A", "\n" });
+            yield return (enUS, @"^(A)(\s)\Z", "A\n", RegexOptions.None, new string[] { "A\n", "A", "\n" });
             yield return (enUS, @"^(A)(\s)", "A\n", RegexOptions.Multiline, new string[] { "A\n", "A", "\n" });
             yield return (enUS, @"^(A)(\s)", "A ", RegexOptions.None, new string[] { "A ", "A", " " });
             

From ddba3c616832b9ca18003e7dd899f285a54dae2c Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 16 Oct 2025 16:48:06 +0000
Subject: [PATCH 5/5] Refine fix to only apply special \n handling for \Z
 anchor

Based on feedback from @ieviev, the special \n handling should only apply for
\Z anchor (uppercase), not for \z or other anchors. This ensures:
- Patterns with \Z use special handling (for lookahead semantics)
- Patterns with \z, ^, or no end anchor use normal handling (correct captures)

Added test case for (A)(\s)\z without beginning anchor as requested.
All 29,315 tests pass.

Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com>
---
 .../Symbolic/SymbolicRegexMatcher.cs                 | 12 +++++++-----
 .../tests/FunctionalTests/Regex.Groups.Tests.cs      |  1 +
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs
index 095394db016c8e..02367fa02aeab1 100644
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs
@@ -474,9 +474,11 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan<char> input, i
             }
             else
             {
-                // Always use NoZAnchorOptimizedInputReader for FindSubcaptures to avoid the special \n handling
-                // that's needed for \Z anchor matching but interferes with capture tracking
-                Registers endRegisters = FindSubcaptures<NoZAnchorOptimizedInputReader>(input, matchStart, matchEnd, perThreadData);
+                // Use DefaultInputReader when pattern contains \Z anchor (needs special \n handling),
+                // otherwise use NoZAnchorOptimizedInputReader for correct capture tracking
+                Registers endRegisters = _containsEndZAnchor ?
+                    FindSubcaptures<DefaultInputReader>(input, matchStart, matchEnd, perThreadData) :
+                    FindSubcaptures<NoZAnchorOptimizedInputReader>(input, matchStart, matchEnd, perThreadData);
                 return new SymbolicMatch(matchStart, matchEnd - matchStart, endRegisters.CaptureStarts, endRegisters.CaptureEnds);
             }
         }
@@ -1481,9 +1483,9 @@ public static int GetPositionId(SymbolicRegexMatcher<TSet> matcher, ReadOnlySpan
             {
                 if ((uint)pos < (uint)input.Length)
                 {
-                    // Find the minterm, handling the special case for the last \n for states that start with a relevant anchor
+                    // Find the minterm, handling the special case for the last \n for \Z anchor
                     int c = input[pos];
-                    return c == '\n' && pos == input.Length - 1 ?
+                    return c == '\n' && pos == input.Length - 1 && matcher._containsEndZAnchor ?
                         matcher._minterms.Length : // mintermId = minterms.Length represents an \n at the very end of input
                         matcher._mintermClassifier.GetMintermID(c);
                 }
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs
index 096c5829b4ec3c..6d7c7917f8e3ef 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Groups.Tests.cs
@@ -468,6 +468,7 @@ public static IEnumerable<object[]> Groups_MemberData()
             yield return (enUS, @"^(A)(\s)\z", "A\n", RegexOptions.None, new string[] { "A\n", "A", "\n" });
             yield return (enUS, @"^(A)(\s)$", "A\n", RegexOptions.None, new string[] { "A\n", "A", "\n" });
             yield return (enUS, @"^(A)(\s)\Z", "A\n", RegexOptions.None, new string[] { "A\n", "A", "\n" });
+            yield return (enUS, @"(A)(\s)\z", "A\n", RegexOptions.None, new string[] { "A\n", "A", "\n" }); // without beginning anchor
             yield return (enUS, @"^(A)(\s)", "A\n", RegexOptions.Multiline, new string[] { "A\n", "A", "\n" });
             yield return (enUS, @"^(A)(\s)", "A ", RegexOptions.None, new string[] { "A ", "A", " " });