Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release/7.0] Fix RegexOptions.NonBacktracking matching end anchors at timeout check boundaries #75308

Merged
merged 6 commits into from
Sep 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -448,13 +448,13 @@ private int FindEndPosition<TInputReader, TFindOptimizationsHandler, TNullabilit
// still check the timeout now and again to provide some semblance of the behavior a developer experiences with
// the backtracking engines. We can, however, choose a large number here, since it's not actually needed for security.
const int CharsPerTimeoutCheck = 1_000;
ReadOnlySpan<char> inputForInnerLoop = _checkTimeout && input.Length - pos > CharsPerTimeoutCheck ?
input.Slice(0, pos + CharsPerTimeoutCheck) :
input;
int innerLoopLength = _checkTimeout && input.Length - pos > CharsPerTimeoutCheck ?
pos + CharsPerTimeoutCheck :
input.Length;

bool done = currentState.NfaState is not null ?
FindEndPositionDeltas<NfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(inputForInnerLoop, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate) :
FindEndPositionDeltas<DfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(inputForInnerLoop, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate);
FindEndPositionDeltas<NfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(input, innerLoopLength, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate) :
FindEndPositionDeltas<DfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(input, innerLoopLength, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate);

// If the inner loop indicates that the search finished (for example due to reaching a deadend state) or
// there is no more input available, then the whole search is done.
Expand All @@ -466,7 +466,7 @@ private int FindEndPosition<TInputReader, TFindOptimizationsHandler, TNullabilit
// The search did not finish, so we either failed to transition (which should only happen if we were in DFA mode and
// need to switch over to NFA mode) or ran out of input in the inner loop. Check if the inner loop still had more
// input available.
if (pos < inputForInnerLoop.Length)
if (pos < innerLoopLength)
{
// Because there was still more input available, a failure to transition in DFA mode must be the cause
// of the early exit. Upgrade to NFA mode.
Expand Down Expand Up @@ -505,7 +505,7 @@ private int FindEndPosition<TInputReader, TFindOptimizationsHandler, TNullabilit
/// 0 if iteration completed because we reached an initial state.
/// A negative value if iteration completed because we ran out of input or we failed to transition.
/// </returns>
private bool FindEndPositionDeltas<TStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(ReadOnlySpan<char> input, RegexRunnerMode mode,
private bool FindEndPositionDeltas<TStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(ReadOnlySpan<char> input, int length, RegexRunnerMode mode,
ref int posRef, ref CurrentState state, ref int endPosRef, ref int endStateIdRef, ref int initialStatePosRef, ref int initialStatePosCandidateRef)
where TStateHandler : struct, IStateHandler
where TInputReader : struct, IInputReader
Expand Down Expand Up @@ -561,7 +561,7 @@ private bool FindEndPositionDeltas<TStateHandler, TInputReader, TFindOptimizatio
}

// If there is more input available try to transition with the next character.
if (!IsMintermId(positionId) || !TStateHandler.TryTakeTransition(this, ref state, positionId))
if (pos >= length || !TStateHandler.TryTakeTransition(this, ref state, positionId))
{
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1278,6 +1278,22 @@ public void Match_CachedPattern_NewTimeoutApplies(RegexOptions options)
Assert.InRange(sw.Elapsed.TotalSeconds, 0, 10); // arbitrary upper bound that should be well above what's needed with a 1ms timeout
}

[ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNetCore))]
public void NonBacktracking_NoEndAnchorMatchAtTimeoutCheck()
{
// This constant must be at least as large as the one in the implementation that sets the maximum number
// of innermost loop iterations between timeout checks.
const int CharsToTriggerTimeoutCheck = 10000;
// Check that it is indeed large enough to trigger timeouts. If this fails the constant above needs to be larger.
Assert.Throws<RegexMatchTimeoutException>(() => new Regex("a*", RegexHelpers.RegexOptionNonBacktracking, TimeSpan.FromTicks(1))
.Match(new string('a', CharsToTriggerTimeoutCheck)));

// The actual test: ^a*$ shouldn't match in a string ending in 'b'
Regex testPattern = new Regex("^a*$", RegexHelpers.RegexOptionNonBacktracking, TimeSpan.FromHours(1));
string input = string.Concat(new string('a', CharsToTriggerTimeoutCheck), 'b');
Assert.False(testPattern.IsMatch(input));
}

public static IEnumerable<object[]> Match_Advanced_TestData()
{
foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
Expand Down