Skip to content

Commit

Permalink
Fix a deadlock in EventSource and CounterGroup (#40259)
Browse files Browse the repository at this point in the history
* Fix a deadlock in CounterGroup and EventSource

* add more comments

* Add some more comments

* PR feedback

* Add comments
  • Loading branch information
sywhang authored Aug 15, 2020
1 parent 75495b7 commit c67684b
Showing 1 changed file with 46 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -200,22 +200,47 @@ private void ResetCounters()

private void OnTimer()
{
Debug.Assert(Monitor.IsEntered(s_counterGroupLock));
if (_eventSource.IsEnabled())
{
DateTime now = DateTime.UtcNow;
TimeSpan elapsed = now - _timeStampSinceCollectionStarted;
DateTime now;
TimeSpan elapsed;
int pollingIntervalInMilliseconds;
DiagnosticCounter[] counters;
lock (s_counterGroupLock)
{
now = DateTime.UtcNow;
elapsed = now - _timeStampSinceCollectionStarted;
pollingIntervalInMilliseconds = _pollingIntervalInMilliseconds;
counters = new DiagnosticCounter[_counters.Count];
_counters.CopyTo(counters);
}

foreach (DiagnosticCounter counter in _counters)
// MUST keep out of the scope of s_counterGroupLock because this will cause WritePayload
// callback can be re-entrant to CounterGroup (i.e. it's possible it calls back into EnableTimer()
// above, since WritePayload callback can contain user code that can invoke EventSource constructor
// and lead to a deadlock. (See https://github.com/dotnet/runtime/issues/40190 for details)
foreach (DiagnosticCounter counter in counters)
{
counter.WritePayload((float)elapsed.TotalSeconds, _pollingIntervalInMilliseconds);
// NOTE: It is still possible for a race condition to occur here. An example is if the session
// that subscribed to these batch of counters was disabled and it was immediately enabled in
// a different session, some of the counter data that was supposed to be written to the old
// session can now "overflow" into the new session.
// This problem pre-existed to this change (when we used to hold lock in the call to WritePayload):
// the only difference being the old behavior caused the entire batch of counters to be either
// written to the old session or the new session. The behavior change is not being treated as a
// significant problem to address for now, but we can come back and address it if it turns out to
// be an actual issue.
counter.WritePayload((float)elapsed.TotalSeconds, pollingIntervalInMilliseconds);
}
_timeStampSinceCollectionStarted = now;

do
lock (s_counterGroupLock)
{
_nextPollingTimeStamp += new TimeSpan(0, 0, 0, 0, _pollingIntervalInMilliseconds);
} while (_nextPollingTimeStamp <= now);
_timeStampSinceCollectionStarted = now;
do
{
_nextPollingTimeStamp += new TimeSpan(0, 0, 0, 0, _pollingIntervalInMilliseconds);
} while (_nextPollingTimeStamp <= now);
}
}
}

Expand All @@ -228,8 +253,15 @@ private void OnTimer()
private static void PollForValues()
{
AutoResetEvent? sleepEvent = null;

// Cache of onTimer callbacks for each CounterGroup.
// We cache these outside of the scope of s_counterGroupLock because
// calling into the callbacks can cause a re-entrancy into CounterGroup.Enable()
// and result in a deadlock. (See https://github.com/dotnet/runtime/issues/40190 for details)
List<Action> onTimers = new List<Action>();
while (true)
{
onTimers.Clear();
int sleepDurationInMilliseconds = int.MaxValue;
lock (s_counterGroupLock)
{
Expand All @@ -239,14 +271,18 @@ private static void PollForValues()
DateTime now = DateTime.UtcNow;
if (counterGroup._nextPollingTimeStamp < now + new TimeSpan(0, 0, 0, 0, 1))
{
counterGroup.OnTimer();
onTimers.Add(() => counterGroup.OnTimer());
}

int millisecondsTillNextPoll = (int)((counterGroup._nextPollingTimeStamp - now).TotalMilliseconds);
millisecondsTillNextPoll = Math.Max(1, millisecondsTillNextPoll);
sleepDurationInMilliseconds = Math.Min(sleepDurationInMilliseconds, millisecondsTillNextPoll);
}
}
foreach (Action onTimer in onTimers)
{
onTimer.Invoke();
}
if (sleepDurationInMilliseconds == int.MaxValue)
{
sleepDurationInMilliseconds = -1; // WaitOne uses -1 to mean infinite
Expand Down

0 comments on commit c67684b

Please sign in to comment.