Skip to content

Commit

Permalink
[release/7.0] Fix RegexOptions.NonBacktracking matching end anchors a…
Browse files Browse the repository at this point in the history
…t timeout check boundaries (#75308)

* Test for exposing timeout check bug

* Fix timeout check bug

* Improve naming

* Simplify test

* Make test search range higher

* Simplify test for timeout check bug

Co-authored-by: Olli Saarikivi <olsaarik@microsoft.com>
  • Loading branch information
github-actions[bot] and olsaarik authored Sep 9, 2022
1 parent acf0dd2 commit d18ff87
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -448,13 +448,13 @@ private int FindEndPosition<TInputReader, TFindOptimizationsHandler, TNullabilit
// still check the timeout now and again to provide some semblance of the behavior a developer experiences with
// the backtracking engines. We can, however, choose a large number here, since it's not actually needed for security.
const int CharsPerTimeoutCheck = 1_000;
ReadOnlySpan<char> inputForInnerLoop = _checkTimeout && input.Length - pos > CharsPerTimeoutCheck ?
input.Slice(0, pos + CharsPerTimeoutCheck) :
input;
int innerLoopLength = _checkTimeout && input.Length - pos > CharsPerTimeoutCheck ?
pos + CharsPerTimeoutCheck :
input.Length;

bool done = currentState.NfaState is not null ?
FindEndPositionDeltas<NfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(inputForInnerLoop, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate) :
FindEndPositionDeltas<DfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(inputForInnerLoop, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate);
FindEndPositionDeltas<NfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(input, innerLoopLength, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate) :
FindEndPositionDeltas<DfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(input, innerLoopLength, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate);

// If the inner loop indicates that the search finished (for example due to reaching a deadend state) or
// there is no more input available, then the whole search is done.
Expand All @@ -466,7 +466,7 @@ private int FindEndPosition<TInputReader, TFindOptimizationsHandler, TNullabilit
// The search did not finish, so we either failed to transition (which should only happen if we were in DFA mode and
// need to switch over to NFA mode) or ran out of input in the inner loop. Check if the inner loop still had more
// input available.
if (pos < inputForInnerLoop.Length)
if (pos < innerLoopLength)
{
// Because there was still more input available, a failure to transition in DFA mode must be the cause
// of the early exit. Upgrade to NFA mode.
Expand Down Expand Up @@ -505,7 +505,7 @@ private int FindEndPosition<TInputReader, TFindOptimizationsHandler, TNullabilit
/// 0 if iteration completed because we reached an initial state.
/// A negative value if iteration completed because we ran out of input or we failed to transition.
/// </returns>
private bool FindEndPositionDeltas<TStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(ReadOnlySpan<char> input, RegexRunnerMode mode,
private bool FindEndPositionDeltas<TStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(ReadOnlySpan<char> input, int length, RegexRunnerMode mode,
ref int posRef, ref CurrentState state, ref int endPosRef, ref int endStateIdRef, ref int initialStatePosRef, ref int initialStatePosCandidateRef)
where TStateHandler : struct, IStateHandler
where TInputReader : struct, IInputReader
Expand Down Expand Up @@ -561,7 +561,7 @@ private bool FindEndPositionDeltas<TStateHandler, TInputReader, TFindOptimizatio
}

// If there is more input available try to transition with the next character.
if (!IsMintermId(positionId) || !TStateHandler.TryTakeTransition(this, ref state, positionId))
if (pos >= length || !TStateHandler.TryTakeTransition(this, ref state, positionId))
{
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1278,6 +1278,22 @@ public void Match_CachedPattern_NewTimeoutApplies(RegexOptions options)
Assert.InRange(sw.Elapsed.TotalSeconds, 0, 10); // arbitrary upper bound that should be well above what's needed with a 1ms timeout
}

[ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNetCore))]
public void NonBacktracking_NoEndAnchorMatchAtTimeoutCheck()
{
// This constant must be at least as large as the one in the implementation that sets the maximum number
// of innermost loop iterations between timeout checks.
const int CharsToTriggerTimeoutCheck = 10000;
// Check that it is indeed large enough to trigger timeouts. If this fails the constant above needs to be larger.
Assert.Throws<RegexMatchTimeoutException>(() => new Regex("a*", RegexHelpers.RegexOptionNonBacktracking, TimeSpan.FromTicks(1))
.Match(new string('a', CharsToTriggerTimeoutCheck)));

// The actual test: ^a*$ shouldn't match in a string ending in 'b'
Regex testPattern = new Regex("^a*$", RegexHelpers.RegexOptionNonBacktracking, TimeSpan.FromHours(1));
string input = string.Concat(new string('a', CharsToTriggerTimeoutCheck), 'b');
Assert.False(testPattern.IsMatch(input));
}

public static IEnumerable<object[]> Match_Advanced_TestData()
{
foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
Expand Down

0 comments on commit d18ff87

Please sign in to comment.