Skip to content

Commit

Permalink
lock protect nullability cache of symbolic regex node (#60942)
Browse files Browse the repository at this point in the history
* fixed nullability checking to be threadsafe

* use volatile write for nullability cache

Co-authored-by: Stephen Toub <stoub@microsoft.com>

* made read of nullability cache volatile and fixed other PR comments

* made non-lock-protected reads from SymbolicRegexBuilder._delta volatile

* Apply suggestions from code review

Co-authored-by: Stephen Toub <stoub@microsoft.com>
  • Loading branch information
veanes and stephentoub authored Nov 3, 2021
1 parent f7be57f commit 1e48eca
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 72 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,12 @@ internal static class CharKind
/// <summary>Gets the next character kind from a context</summary>
internal static uint Next(uint context) => context >> 3;

/// <summary>Creates the context of the previous and the next character kinds.</summary>
/// <summary>Encodes the pair (prevKind, nextKind) using 6 bits</summary>
internal static uint Context(uint prevKind, uint nextKind) => (nextKind << 3) | prevKind;

/// <summary>Exclusive maximum context (limit) is 64 because a context uses bit-shifting where each kind needs 3 bits.</summary>
internal const int ContextLimit = 64;

internal static string DescribePrev(uint i) => i switch
{
StartStop => @"\A",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -359,9 +359,7 @@ public DfaMatchingState<TSetType> TakeTransition(
Debug.Assert(builder._delta is not null);

int offset = (currentState.Id << builder._mintermsCount) | mintermId;
return
builder._delta[offset] ??
matcher.CreateNewTransition(currentState, minterm, offset);
return Volatile.Read(ref builder._delta[offset]) ?? matcher.CreateNewTransition(currentState, minterm, offset);
}
}

Expand Down Expand Up @@ -391,7 +389,7 @@ public DfaMatchingState<TSetType> TakeTransition(
DfaMatchingState<TSetType> nextStates = builder.MkState(oneState, currentStates.PrevCharKind);

int offset = (nextStates.Id << builder._mintermsCount) | mintermId;
DfaMatchingState<TSetType> p = builder._delta[offset] ?? matcher.CreateNewTransition(nextStates, minterm, offset);
DfaMatchingState<TSetType> p = Volatile.Read(ref builder._delta[offset]) ?? matcher.CreateNewTransition(nextStates, minterm, offset);

// Observe that if p.Node is an Or it will be flattened.
union = builder.MkOr2(union, p.Node);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ namespace System.Text.RegularExpressions.Symbolic
internal sealed class SymbolicRegexNode<S> where S : notnull
{
internal const string EmptyCharClass = "[]";
/// <summary>Some byte other than 0 to represent true</summary>
internal const byte TrueByte = 1;
/// <summary>Some byte other than 0 to represent false</summary>
internal const byte FalseByte = 2;
/// <summary>The undefined value is the default value 0</summary>
internal const byte UndefinedByte = 0;

internal readonly SymbolicRegexBuilder<S> _builder;
internal readonly SymbolicRegexKind _kind;
Expand All @@ -23,7 +29,11 @@ internal sealed class SymbolicRegexNode<S> where S : notnull
internal readonly SymbolicRegexNode<S>? _right;
internal readonly SymbolicRegexSet<S>? _alts;

private Dictionary<uint, bool>? _nullabilityCache;
/// <summary>
/// Caches nullability of this node for any given context (0 &lt;= context &lt; ContextLimit)
/// when _info.StartsWithSomeAnchor and _info.CanBeNullable are true. Otherwise the cache is null.
/// </summary>
private byte[]? _nullabilityCache;

private S _startSet;

Expand All @@ -50,6 +60,7 @@ private SymbolicRegexNode(SymbolicRegexBuilder<S> builder, SymbolicRegexKind kin
_info = info;
_hashcode = ComputeHashCode();
_startSet = ComputeStartSet();
_nullabilityCache = info.StartsWithSomeAnchor && info.CanBeNullable ? new byte[CharKind.ContextLimit] : null;
}

private bool _isInternalizedUnion;
Expand Down Expand Up @@ -162,92 +173,100 @@ static void AppendToList(SymbolicRegexNode<S> concat, List<SymbolicRegexNode<S>>
/// <param name="context">kind info for previous and next characters</param>
internal bool IsNullableFor(uint context)
{
if (!_info.StartsWithSomeAnchor)
return IsNullable;

if (!_info.CanBeNullable)
return false;
if (_nullabilityCache is null)
{
// if _nullabilityCache is null then IsNullable==CanBeNullable
// Observe that if IsNullable==true then CanBeNullable==true.
// but when the node does not start with an anchor
// and IsNullable==false then CanBeNullable==false.
return _info.IsNullable;
}

if (!StackHelper.TryEnsureSufficientExecutionStack())
{
return StackHelper.CallOnEmptyStack(IsNullableFor, context);
}

// Initialize the nullability cache for this node.
_nullabilityCache ??= new Dictionary<uint, bool>();
Debug.Assert(context < CharKind.ContextLimit);

if (!_nullabilityCache.TryGetValue(context, out bool is_nullable))
// If nullablity has been computed for the given context then return it
byte b = Volatile.Read(ref _nullabilityCache[context]);
if (b != UndefinedByte)
{
switch (_kind)
{
case SymbolicRegexKind.Loop:
Debug.Assert(_left is not null);
is_nullable = _lower == 0 || _left.IsNullableFor(context);
break;
return b == TrueByte;
}

case SymbolicRegexKind.Concat:
Debug.Assert(_left is not null && _right is not null);
is_nullable = _left.IsNullableFor(context) && _right.IsNullableFor(context);
break;
// Otherwise compute the nullability recursively for the given context
bool is_nullable;
switch (_kind)
{
case SymbolicRegexKind.Loop:
Debug.Assert(_left is not null);
is_nullable = _lower == 0 || _left.IsNullableFor(context);
break;

case SymbolicRegexKind.Or:
case SymbolicRegexKind.And:
Debug.Assert(_alts is not null);
is_nullable = _alts.IsNullableFor(context);
break;
case SymbolicRegexKind.Concat:
Debug.Assert(_left is not null && _right is not null);
is_nullable = _left.IsNullableFor(context) && _right.IsNullableFor(context);
break;

case SymbolicRegexKind.Not:
Debug.Assert(_left is not null);
is_nullable = !_left.IsNullableFor(context);
break;
case SymbolicRegexKind.Or:
case SymbolicRegexKind.And:
Debug.Assert(_alts is not null);
is_nullable = _alts.IsNullableFor(context);
break;

case SymbolicRegexKind.StartAnchor:
is_nullable = CharKind.Prev(context) == CharKind.StartStop;
break;
case SymbolicRegexKind.Not:
Debug.Assert(_left is not null);
is_nullable = !_left.IsNullableFor(context);
break;

case SymbolicRegexKind.EndAnchor:
is_nullable = CharKind.Next(context) == CharKind.StartStop;
break;
case SymbolicRegexKind.StartAnchor:
is_nullable = CharKind.Prev(context) == CharKind.StartStop;
break;

case SymbolicRegexKind.BOLAnchor:
// Beg-Of-Line anchor is nullable when the previous character is Newline or Start
// note: at least one of the bits must be 1, but both could also be 1 in case of very first newline
is_nullable = (CharKind.Prev(context) & CharKind.NewLineS) != 0;
break;
case SymbolicRegexKind.EndAnchor:
is_nullable = CharKind.Next(context) == CharKind.StartStop;
break;

case SymbolicRegexKind.EOLAnchor:
// End-Of-Line anchor is nullable when the next character is Newline or Stop
// note: at least one of the bits must be 1, but both could also be 1 in case of \Z
is_nullable = (CharKind.Next(context) & CharKind.NewLineS) != 0;
break;
case SymbolicRegexKind.BOLAnchor:
// Beg-Of-Line anchor is nullable when the previous character is Newline or Start
// note: at least one of the bits must be 1, but both could also be 1 in case of very first newline
is_nullable = (CharKind.Prev(context) & CharKind.NewLineS) != 0;
break;

case SymbolicRegexKind.WBAnchor:
// test that prev char is word letter iff next is not not word letter
is_nullable = ((CharKind.Prev(context) & CharKind.WordLetter) ^ (CharKind.Next(context) & CharKind.WordLetter)) != 0;
break;
case SymbolicRegexKind.EOLAnchor:
// End-Of-Line anchor is nullable when the next character is Newline or Stop
// note: at least one of the bits must be 1, but both could also be 1 in case of \Z
is_nullable = (CharKind.Next(context) & CharKind.NewLineS) != 0;
break;

case SymbolicRegexKind.NWBAnchor:
// test that prev char is word letter iff next is word letter
is_nullable = ((CharKind.Prev(context) & CharKind.WordLetter) ^ (CharKind.Next(context) & CharKind.WordLetter)) == 0;
break;
case SymbolicRegexKind.WBAnchor:
// test that prev char is word letter iff next is not not word letter
is_nullable = ((CharKind.Prev(context) & CharKind.WordLetter) ^ (CharKind.Next(context) & CharKind.WordLetter)) != 0;
break;

case SymbolicRegexKind.EndAnchorZ:
// \Z anchor is nullable when the next character is either the last Newline or Stop
// note: CharKind.NewLineS == CharKind.Newline|CharKind.StartStop
is_nullable = (CharKind.Next(context) & CharKind.StartStop) != 0;
break;
case SymbolicRegexKind.NWBAnchor:
// test that prev char is word letter iff next is word letter
is_nullable = ((CharKind.Prev(context) & CharKind.WordLetter) ^ (CharKind.Next(context) & CharKind.WordLetter)) == 0;
break;

default: //SymbolicRegexKind.EndAnchorZRev:
// EndAnchorZRev (rev(\Z)) anchor is nullable when the prev character is either the first Newline or Start
// note: CharKind.NewLineS == CharKind.Newline|CharKind.StartStop
Debug.Assert(_kind == SymbolicRegexKind.EndAnchorZRev);
is_nullable = (CharKind.Prev(context) & CharKind.StartStop) != 0;
break;
}
case SymbolicRegexKind.EndAnchorZ:
// \Z anchor is nullable when the next character is either the last Newline or Stop
// note: CharKind.NewLineS == CharKind.Newline|CharKind.StartStop
is_nullable = (CharKind.Next(context) & CharKind.StartStop) != 0;
break;

_nullabilityCache[context] = is_nullable;
default: // SymbolicRegexKind.EndAnchorZRev:
// EndAnchorZRev (rev(\Z)) anchor is nullable when the prev character is either the first Newline or Start
// note: CharKind.NewLineS == CharKind.Newline|CharKind.StartStop
Debug.Assert(_kind == SymbolicRegexKind.EndAnchorZRev);
is_nullable = (CharKind.Prev(context) & CharKind.StartStop) != 0;
break;
}

Volatile.Write(ref _nullabilityCache[context], is_nullable ? TrueByte : FalseByte);

return is_nullable;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1827,5 +1827,33 @@ public async Task UseRegexConcurrently_ThreadSafe_Success(RegexEngine engine, Ti
}, CancellationToken.None, TaskCreationOptions.LongRunning, TaskScheduler.Default)).ToArray());
}
}

[Theory]
[MemberData(nameof(MatchWordsInAnchoredRegexes_TestData))]
public async Task MatchWordsInAnchoredRegexes(RegexEngine engine, RegexOptions options, string pattern, string input, (int, int)[] matches)
{
// The aim of these test is to test corner cases of matches involving anchors
// For NonBacktracking these tests are meant to
// cover most contexts in _nullabilityForContext in SymbolicRegexNode
Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options);
MatchCollection ms = r.Matches(input);
Assert.Equal(matches.Length, ms.Count);
for (int i = 0; i < matches.Length; i++)
{
Assert.Equal(ms[i].Index, matches[i].Item1);
Assert.Equal(ms[i].Length, matches[i].Item2);
}
}

public static IEnumerable<object[]> MatchWordsInAnchoredRegexes_TestData()
{
foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
{
yield return new object[] { engine, RegexOptions.None, @"\b\w{10,}\b", "this is a complicated word in a\nnontrivial sentence", new (int, int)[] { (10, 11), (32, 10) } };
yield return new object[] { engine, RegexOptions.Multiline, @"^\w{10,}\b", "this is a\ncomplicated word in a\nnontrivial sentence", new (int, int)[] { (10, 11), (32, 10) } };
yield return new object[] { engine, RegexOptions.None, @"\b\d{1,2}\/\d{1,2}\/\d{2,4}\b", "date 10/12/1966 and 10/12/66 are the same", new (int, int)[] { (5, 10), (20, 8) } };
yield return new object[] { engine, RegexOptions.Multiline, @"\b\d{1,2}\/\d{1,2}\/\d{2,4}$", "date 10/12/1966\nand 10/12/66\nare the same", new (int, int)[] { (5, 10), (20, 8) } };
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -520,8 +520,9 @@ public void TestConjunctionOverCounting(string conjunct1, string conjunct2, stri
Assert.Contains("conditional", e.Message);
}
}
#endregion


#region Random input generation tests
public static IEnumerable<object[]> GenerateRandomMembers_TestData()
{
string[] patterns = new string[] { @"pa[5\$s]{2}w[o0]rd$", @"\w\d+", @"\d{10}" };
Expand All @@ -536,7 +537,7 @@ public static IEnumerable<object[]> GenerateRandomMembers_TestData()
{
foreach (string input in inputs)
{
yield return new object[] {engine, pattern, input, !negative };
yield return new object[] { engine, pattern, input, !negative };
}
}
}
Expand Down

0 comments on commit 1e48eca

Please sign in to comment.