Skip to content

Commit

Permalink
Add Regex span startat overloads (#71228)
Browse files Browse the repository at this point in the history
Also fixes the new Count methods to behave correctly for RightToLeft.
  • Loading branch information
stephentoub authored Jun 27, 2022
1 parent 872fef0 commit 0c473ab
Show file tree
Hide file tree
Showing 7 changed files with 110 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ public static void CompileToAssembly(System.Text.RegularExpressions.RegexCompila
public static void CompileToAssembly(System.Text.RegularExpressions.RegexCompilationInfo[] regexinfos, System.Reflection.AssemblyName assemblyname, System.Reflection.Emit.CustomAttributeBuilder[]? attributes, string? resourceFile) { }
public int Count(string input) { throw null; }
public int Count(System.ReadOnlySpan<char> input) { throw null; }
public int Count(System.ReadOnlySpan<char> input, int startat) { throw null; }
public static int Count(string input, [System.Diagnostics.CodeAnalysis.StringSyntax(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex)] string pattern) { throw null; }
public static int Count(string input, [System.Diagnostics.CodeAnalysis.StringSyntax(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options) { throw null; }
public static int Count(string input, [System.Diagnostics.CodeAnalysis.StringSyntax(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options, System.TimeSpan matchTimeout) { throw null; }
Expand All @@ -172,6 +173,7 @@ public static void CompileToAssembly(System.Text.RegularExpressions.RegexCompila
public static int Count(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntax(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options, System.TimeSpan matchTimeout) { throw null; }
public static string Escape(string str) { throw null; }
public System.Text.RegularExpressions.Regex.ValueMatchEnumerator EnumerateMatches(System.ReadOnlySpan<char> input) { throw null; }
public System.Text.RegularExpressions.Regex.ValueMatchEnumerator EnumerateMatches(System.ReadOnlySpan<char> input, int startat) { throw null; }
public static System.Text.RegularExpressions.Regex.ValueMatchEnumerator EnumerateMatches(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute("Regex")] string pattern) { throw null; }
public static System.Text.RegularExpressions.Regex.ValueMatchEnumerator EnumerateMatches(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute("Regex", new object[]{ "options"})] string pattern, System.Text.RegularExpressions.RegexOptions options) { throw null; }
public static System.Text.RegularExpressions.Regex.ValueMatchEnumerator EnumerateMatches(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute("Regex", new object[]{ "options"})] string pattern, System.Text.RegularExpressions.RegexOptions options, System.TimeSpan matchTimeout) { throw null; }
Expand All @@ -181,6 +183,7 @@ public static void CompileToAssembly(System.Text.RegularExpressions.RegexCompila
public int GroupNumberFromName(string name) { throw null; }
protected void InitializeReferences() { }
public bool IsMatch(System.ReadOnlySpan<char> input) { throw null; }
public bool IsMatch(System.ReadOnlySpan<char> input, int startat) { throw null; }
public static bool IsMatch(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute("Regex")] string pattern) { throw null; }
public static bool IsMatch(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute("Regex", "options")] string pattern, System.Text.RegularExpressions.RegexOptions options) { throw null; }
public static bool IsMatch(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute("Regex", "options")] string pattern, System.Text.RegularExpressions.RegexOptions options, System.TimeSpan matchTimeout) { throw null; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public int Count(string input)

int count = 0;

RunAllMatchesWithCallback(input, 0, ref count, static (ref int count, Match match) =>
RunAllMatchesWithCallback(input, RightToLeft ? input.Length : 0, ref count, static (ref int count, Match match) =>
{
count++;
return true;
Expand All @@ -34,11 +34,20 @@ public int Count(string input)
/// </summary>
/// <param name="input">The span to search for a match.</param>
/// <returns>The number of matches.</returns>
public int Count(ReadOnlySpan<char> input)
public int Count(ReadOnlySpan<char> input) =>
Count(input, RightToLeft ? input.Length : 0);

/// <summary>
/// Searches an input span for all occurrences of a regular expression and returns the number of matches.
/// </summary>
/// <param name="input">The span to search for a match.</param>
/// <param name="startat">The zero-based character position at which to start the search.</param>
/// <returns>The number of matches.</returns>
public int Count(ReadOnlySpan<char> input, int startat)
{
int count = 0;

RunAllMatchesWithCallback(input, 0, ref count, static (ref int count, Match match) =>
RunAllMatchesWithCallback(input, startat, ref count, static (ref int count, Match match) =>
{
count++;
return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,21 @@ public static ValueMatchEnumerator EnumerateMatches(ReadOnlySpan<char> input, [S
public ValueMatchEnumerator EnumerateMatches(ReadOnlySpan<char> input) =>
new ValueMatchEnumerator(this, input, RightToLeft ? input.Length : 0);

/// <summary>
/// Searches an input span for all occurrences of a regular expression and returns a <see cref="ValueMatchEnumerator"/> to iterate over the matches.
/// </summary>
/// <remarks>
/// Each match won't actually happen until <see cref="ValueMatchEnumerator.MoveNext"/> is invoked on the enumerator, with one match being performed per <see cref="ValueMatchEnumerator.MoveNext"/> call.
/// Since the evaluation of the match happens lazily, any changes to the passed in input in between calls to <see cref="ValueMatchEnumerator.MoveNext"/> will affect the match results.
/// The enumerator returned by this method, as well as the structs returned by the enumerator that wrap each match found in the input are ref structs which
/// make this method be amortized allocation free.
/// </remarks>
/// <param name="input">The span to search for a match.</param>
/// <param name="startat">The zero-based character position at which to start the search.</param>
/// <returns>A <see cref="ValueMatchEnumerator"/> to iterate over the matches.</returns>
public ValueMatchEnumerator EnumerateMatches(ReadOnlySpan<char> input, int startat) =>
new ValueMatchEnumerator(this, input, startat);

/// <summary>
/// Represents an enumerator containing the set of successful matches found by iteratively applying a regular expression pattern to the input span.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,6 @@ public bool IsMatch(string input)
return RunSingleMatch(RegexRunnerMode.ExistenceRequired, -1, input, 0, input.Length, RightToLeft ? input.Length : 0) is null;
}

/// <summary>
/// Indicates whether the regular expression specified in the Regex constructor finds a match in a specified input span.
/// </summary>
/// <param name="input">The span to search for a match.</param>
/// <returns><see langword="true"/> if the regular expression finds a match; otherwise, <see langword="false"/>.</returns>
/// <exception cref="RegexMatchTimeoutException">A time-out occurred.</exception>
public bool IsMatch(ReadOnlySpan<char> input) =>
RunSingleMatch(RegexRunnerMode.ExistenceRequired, -1, input, RightToLeft ? input.Length : 0).Success;

/// <summary>
/// Searches the input string for one or more matches using the previous pattern and options,
/// with a new starting position.
Expand All @@ -103,6 +94,25 @@ public bool IsMatch(string input, int startat)
return RunSingleMatch(RegexRunnerMode.ExistenceRequired, -1, input, 0, input.Length, startat) is null;
}

/// <summary>
/// Indicates whether the regular expression specified in the Regex constructor finds a match in a specified input span.
/// </summary>
/// <param name="input">The span to search for a match.</param>
/// <returns><see langword="true"/> if the regular expression finds a match; otherwise, <see langword="false"/>.</returns>
/// <exception cref="RegexMatchTimeoutException">A time-out occurred.</exception>
public bool IsMatch(ReadOnlySpan<char> input) =>
IsMatch(input, RightToLeft ? input.Length : 0);

/// <summary>
/// Indicates whether the regular expression specified in the Regex constructor finds a match in a specified input span.
/// </summary>
/// <param name="input">The span to search for a match.</param>
/// <param name="startat">The zero-based character position at which to start the search.</param>
/// <returns><see langword="true"/> if the regular expression finds a match; otherwise, <see langword="false"/>.</returns>
/// <exception cref="RegexMatchTimeoutException">A time-out occurred.</exception>
public bool IsMatch(ReadOnlySpan<char> input, int startat) =>
RunSingleMatch(RegexRunnerMode.ExistenceRequired, -1, input, startat).Success;

/// <summary>
/// Searches the input string for one or more occurrences of the text
/// supplied in the pattern parameter.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,19 @@ public partial class RegexCountTests
{
[Theory]
[MemberData(nameof(Count_ReturnsExpectedCount_TestData))]
public async Task Count_ReturnsExpectedCount(RegexEngine engine, string pattern, string input, RegexOptions options, int expectedCount)
public async Task Count_ReturnsExpectedCount(RegexEngine engine, string pattern, string input, int startat, RegexOptions options, int expectedCount)
{
Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options);

Assert.Equal(expectedCount, r.Count(input.AsSpan(), startat));
Assert.Equal(r.Count(input.AsSpan(), startat), r.Matches(input, startat).Count);

bool isDefaultStartAt = startat == ((options & RegexOptions.RightToLeft) != 0 ? input.Length : 0);
if (!isDefaultStartAt)
{
return;
}

Assert.Equal(expectedCount, r.Count(input));
Assert.Equal(expectedCount, r.Count(input.AsSpan()));
Assert.Equal(r.Count(input), r.Matches(input).Count);
Expand Down Expand Up @@ -44,22 +54,41 @@ public static IEnumerable<object[]> Count_ReturnsExpectedCount_TestData()
{
foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
{
yield return new object[] { engine, @"", "", RegexOptions.None, 1 };
yield return new object[] { engine, @"", "a", RegexOptions.None, 2 };
yield return new object[] { engine, @"", "ab", RegexOptions.None, 3 };

yield return new object[] { engine, @"\w", "", RegexOptions.None, 0 };
yield return new object[] { engine, @"\w", "a", RegexOptions.None, 1 };
yield return new object[] { engine, @"\w", "ab", RegexOptions.None, 2 };

yield return new object[] { engine, @"\b\w+\b", "abc def ghi jkl", RegexOptions.None, 4 };

yield return new object[] { engine, @"A", "", RegexOptions.IgnoreCase, 0 };
yield return new object[] { engine, @"A", "a", RegexOptions.IgnoreCase, 1 };
yield return new object[] { engine, @"A", "aAaA", RegexOptions.IgnoreCase, 4 };

yield return new object[] { engine, @".", "\n\n\n", RegexOptions.None, 0 };
yield return new object[] { engine, @".", "\n\n\n", RegexOptions.Singleline, 3 };
yield return new object[] { engine, @"", "", 0, RegexOptions.None, 1 };
yield return new object[] { engine, @"", "a", 0, RegexOptions.None, 2 };
yield return new object[] { engine, @"", "ab", 0, RegexOptions.None, 3 };
yield return new object[] { engine, @"", "ab", 1, RegexOptions.None, 2 };

yield return new object[] { engine, @"\w", "", 0, RegexOptions.None, 0 };
yield return new object[] { engine, @"\w", "a", 0, RegexOptions.None, 1 };
yield return new object[] { engine, @"\w", "ab", 0, RegexOptions.None, 2 };
yield return new object[] { engine, @"\w", "ab", 1, RegexOptions.None, 1 };
yield return new object[] { engine, @"\w", "ab", 2, RegexOptions.None, 0 };

yield return new object[] { engine, @"\b\w+\b", "abc def ghi jkl", 0, RegexOptions.None, 4 };
yield return new object[] { engine, @"\b\w+\b", "abc def ghi jkl", 7, RegexOptions.None, 2 };

yield return new object[] { engine, @"A", "", 0, RegexOptions.IgnoreCase, 0 };
yield return new object[] { engine, @"A", "a", 0, RegexOptions.IgnoreCase, 1 };
yield return new object[] { engine, @"A", "aAaA", 0, RegexOptions.IgnoreCase, 4 };

yield return new object[] { engine, @".", "\n\n\n", 0, RegexOptions.None, 0 };
yield return new object[] { engine, @".", "\n\n\n", 0, RegexOptions.Singleline, 3 };

if (!RegexHelpers.IsNonBacktracking(engine))
{
// Lookbehinds
yield return new object[] { engine, @"(?<=abc)\w", "abcxabcy", 7, RegexOptions.None, 1 };

// Starting anchors
yield return new object[] { engine, @"\Gdef", "abcdef", 0, RegexOptions.None, 0 };
yield return new object[] { engine, @"\Gdef", "abcdef", 3, RegexOptions.None, 1 };

// RightToLeft
yield return new object[] { engine, @"\b\w+\b", "abc def ghi jkl", 15, RegexOptions.RightToLeft, 4 };
yield return new object[] { RegexEngine.Interpreter, @"(?<=abc)\w", "abcxabcy", 8, RegexOptions.RightToLeft, 2 };
yield return new object[] { engine, @"(?<=abc)\w", "abcxabcy", 7, RegexOptions.RightToLeft, 1 };
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,16 +151,25 @@ public partial class RegexCountTests
{
[Theory]
[MemberData(nameof(Count_ReturnsExpectedCount_TestData))]
public void EnumerateMatches_ReturnsExpectedCount(RegexEngine engine, string pattern, string input, RegexOptions options, int expectedCount)
public void EnumerateMatches_ReturnsExpectedCount(RegexEngine engine, string pattern, string input, int startat, RegexOptions options, int expectedCount)
{
Regex r = RegexHelpers.GetRegexAsync(engine, pattern, options).GetAwaiter().GetResult();
int count = 0;
foreach (ValueMatch _ in r.EnumerateMatches(input))

int count;

count = 0;
foreach (ValueMatch _ in r.EnumerateMatches(input, startat))
{
count++;
}
Assert.Equal(expectedCount, count);

bool isDefaultStartAt = startat == ((options & RegexOptions.RightToLeft) != 0 ? input.Length : 0);
if (!isDefaultStartAt)
{
return;
}

if (options == RegexOptions.None && engine == RegexEngine.Interpreter)
{
count = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1728,6 +1728,9 @@ public async Task Match_StartatDiffersFromBeginning(RegexEngine engine, string p
Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options);

Assert.Equal(expectedSuccessStartAt, r.IsMatch(input, startat));
#if NET7_0_OR_GREATER
Assert.Equal(expectedSuccessStartAt, r.IsMatch(input.AsSpan(), startat));
#endif

// Normal matching, but any match before startat is ignored.
Match match = r.Match(input, startat);
Expand Down

0 comments on commit 0c473ab

Please sign in to comment.