From c67684bead32185f086c3d376c6914dfbd0d4b6a Mon Sep 17 00:00:00 2001 From: Sung Yoon Whang Date: Fri, 14 Aug 2020 17:27:34 -0700 Subject: [PATCH] Fix a deadlock in EventSource and CounterGroup (#40259) * Fix a deadlock in CounterGroup and EventSource * add more comments * Add some more comments * PR feedback * Add comments --- .../Diagnostics/Tracing/CounterGroup.cs | 56 +++++++++++++++---- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/CounterGroup.cs b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/CounterGroup.cs index 611c83547b39c..effc21fd61ad5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/CounterGroup.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/CounterGroup.cs @@ -200,22 +200,47 @@ private void ResetCounters() private void OnTimer() { - Debug.Assert(Monitor.IsEntered(s_counterGroupLock)); if (_eventSource.IsEnabled()) { - DateTime now = DateTime.UtcNow; - TimeSpan elapsed = now - _timeStampSinceCollectionStarted; + DateTime now; + TimeSpan elapsed; + int pollingIntervalInMilliseconds; + DiagnosticCounter[] counters; + lock (s_counterGroupLock) + { + now = DateTime.UtcNow; + elapsed = now - _timeStampSinceCollectionStarted; + pollingIntervalInMilliseconds = _pollingIntervalInMilliseconds; + counters = new DiagnosticCounter[_counters.Count]; + _counters.CopyTo(counters); + } - foreach (DiagnosticCounter counter in _counters) + // MUST keep out of the scope of s_counterGroupLock because this will cause WritePayload + // callback can be re-entrant to CounterGroup (i.e. it's possible it calls back into EnableTimer() + // above, since WritePayload callback can contain user code that can invoke EventSource constructor + // and lead to a deadlock. (See https://github.com/dotnet/runtime/issues/40190 for details) + foreach (DiagnosticCounter counter in counters) { - counter.WritePayload((float)elapsed.TotalSeconds, _pollingIntervalInMilliseconds); + // NOTE: It is still possible for a race condition to occur here. An example is if the session + // that subscribed to these batch of counters was disabled and it was immediately enabled in + // a different session, some of the counter data that was supposed to be written to the old + // session can now "overflow" into the new session. + // This problem pre-existed to this change (when we used to hold lock in the call to WritePayload): + // the only difference being the old behavior caused the entire batch of counters to be either + // written to the old session or the new session. The behavior change is not being treated as a + // significant problem to address for now, but we can come back and address it if it turns out to + // be an actual issue. + counter.WritePayload((float)elapsed.TotalSeconds, pollingIntervalInMilliseconds); } - _timeStampSinceCollectionStarted = now; - do + lock (s_counterGroupLock) { - _nextPollingTimeStamp += new TimeSpan(0, 0, 0, 0, _pollingIntervalInMilliseconds); - } while (_nextPollingTimeStamp <= now); + _timeStampSinceCollectionStarted = now; + do + { + _nextPollingTimeStamp += new TimeSpan(0, 0, 0, 0, _pollingIntervalInMilliseconds); + } while (_nextPollingTimeStamp <= now); + } } } @@ -228,8 +253,15 @@ private void OnTimer() private static void PollForValues() { AutoResetEvent? sleepEvent = null; + + // Cache of onTimer callbacks for each CounterGroup. + // We cache these outside of the scope of s_counterGroupLock because + // calling into the callbacks can cause a re-entrancy into CounterGroup.Enable() + // and result in a deadlock. (See https://github.com/dotnet/runtime/issues/40190 for details) + List onTimers = new List(); while (true) { + onTimers.Clear(); int sleepDurationInMilliseconds = int.MaxValue; lock (s_counterGroupLock) { @@ -239,7 +271,7 @@ private static void PollForValues() DateTime now = DateTime.UtcNow; if (counterGroup._nextPollingTimeStamp < now + new TimeSpan(0, 0, 0, 0, 1)) { - counterGroup.OnTimer(); + onTimers.Add(() => counterGroup.OnTimer()); } int millisecondsTillNextPoll = (int)((counterGroup._nextPollingTimeStamp - now).TotalMilliseconds); @@ -247,6 +279,10 @@ private static void PollForValues() sleepDurationInMilliseconds = Math.Min(sleepDurationInMilliseconds, millisecondsTillNextPoll); } } + foreach (Action onTimer in onTimers) + { + onTimer.Invoke(); + } if (sleepDurationInMilliseconds == int.MaxValue) { sleepDurationInMilliseconds = -1; // WaitOne uses -1 to mean infinite