-
Notifications
You must be signed in to change notification settings - Fork 757
Implement A Thread Time View for Universal Traces #2320
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f7cfa82
71d36a6
b4b0d5b
b99b446
44f5571
be8050e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,12 +10,14 @@ | |
| // using Microsoft.Diagnostics.Tracing.Parsers; | ||
| using Microsoft.Diagnostics.Tracing.Parsers.Clr; | ||
| using Microsoft.Diagnostics.Tracing.Parsers.Kernel; | ||
| using Microsoft.Diagnostics.Tracing.Parsers.Universal.Events; | ||
| using Microsoft.Diagnostics.Tracing.Session; | ||
| using Microsoft.Diagnostics.Tracing.Stacks; | ||
| using System; | ||
| using System.Collections.Generic; | ||
| using System.Diagnostics; | ||
| using System.Net; | ||
| using System.Threading; | ||
| using Address = System.UInt64; | ||
|
|
||
| namespace Microsoft.Diagnostics.Tracing | ||
|
|
@@ -38,11 +40,21 @@ public ThreadTimeStackComputer(TraceLog eventLog, SymbolReader symbolReader) | |
| m_symbolReader = symbolReader; | ||
|
|
||
| m_threadState = new ThreadState[eventLog.Threads.Count]; | ||
| for (int i = 0; i < m_threadState.Length; i++) | ||
| { | ||
| m_threadState[i] = new ThreadState(); | ||
| } | ||
|
|
||
| m_IRPToThread = new Dictionary<Address, TraceThread>(32); | ||
|
|
||
| // We assume to begin with that all processors are idle (it will fix itself shortly). | ||
| m_numIdleProcs = eventLog.NumberOfProcessors; | ||
| m_threadIDUsingProc = new int[m_numIdleProcs]; | ||
| m_threadIndexUsingProc = new ThreadIndex[m_numIdleProcs]; | ||
| for(int i=0; i< m_threadIndexUsingProc.Length; i++) | ||
| { | ||
| m_threadIndexUsingProc[i] = ThreadIndex.Invalid; | ||
| } | ||
|
|
||
| m_lastPacketForProcess = new NetworkInfo[eventLog.Processes.Count]; | ||
| for (int i = 0; i < m_lastPacketForProcess.Length; i++) | ||
|
|
@@ -51,6 +63,7 @@ public ThreadTimeStackComputer(TraceLog eventLog, SymbolReader symbolReader) | |
| } | ||
|
|
||
| MiniumReadiedTimeMSec = 0.5F; // We tend to only care about this if we are being starved. | ||
| InitializeForUniversal(); | ||
| } | ||
| /// <summary> | ||
| /// If set we compute thread time using Tasks | ||
|
|
@@ -114,6 +127,9 @@ public void GenerateThreadTimeStacks(MutableTraceEventStackSource outputStackSou | |
|
|
||
| TraceLogEventSource eventSource = traceEvents == null ? m_eventLog.Events.GetSource() : | ||
| traceEvents.GetSource(); | ||
|
|
||
| UniversalEventsTraceEventParser universalEventsParser = new UniversalEventsTraceEventParser(eventSource); | ||
|
|
||
| if (GroupByStartStopActivity) | ||
| { | ||
| UseTasks = true; | ||
|
|
@@ -170,6 +186,7 @@ public void GenerateThreadTimeStacks(MutableTraceEventStackSource outputStackSou | |
| if (!BlockedTimeOnly) | ||
| { | ||
| eventSource.Kernel.PerfInfoSample += OnSampledProfile; | ||
| universalEventsParser.cpu += OnUniversalCPUSample; | ||
|
|
||
| // WE add these too, because they are reasonably common so they can add a level of detail. | ||
| eventSource.Clr.GCAllocationTick += OnSampledProfile; | ||
|
|
@@ -315,6 +332,18 @@ public void GenerateThreadTimeStacks(MutableTraceEventStackSource outputStackSou | |
| { | ||
| eventSource.Dynamic.All += delegate (TraceEvent data) | ||
| { | ||
| // Ignore UniversalSystem. | ||
| if (data.ProviderGuid == UniversalSystemTraceEventParser.ProviderGuid) | ||
| { | ||
| return; | ||
| } | ||
|
|
||
| // Ignore UniversalEvents. | ||
| if (data.ProviderGuid == UniversalEventsTraceEventParser.ProviderGuid) | ||
| { | ||
| return; | ||
| } | ||
|
|
||
| // TODO decide what the correct heuristic is. | ||
| // Currently I only do this for things that might be an EventSoruce (uses the name->Guid hashing) | ||
| // Most importantly, it excludes the high volume CLR providers. | ||
|
|
@@ -376,6 +405,7 @@ public void GenerateThreadTimeStacks(MutableTraceEventStackSource outputStackSou | |
|
|
||
| // Add my own callbacks. | ||
| eventSource.Kernel.ThreadCSwitch += OnThreadCSwitch; | ||
| universalEventsParser.cswitch += OnUniversalCSwitch; | ||
| eventSource.Kernel.AddCallbackForEvents<DiskIOTraceData>(OnDiskIO); | ||
| eventSource.Kernel.AddCallbackForEvents<DiskIOInitTraceData>(OnDiskIOInit); | ||
| eventSource.Kernel.ThreadStart += OnThreadStart; | ||
|
|
@@ -423,6 +453,27 @@ public void GenerateThreadTimeStacks(MutableTraceEventStackSource outputStackSou | |
| m_threadState = null; | ||
| } | ||
|
|
||
| private void InitializeForUniversal() | ||
| { | ||
| if (m_eventLog.Stats.Contains(UniversalEventsTraceEventParser.ProviderGuid, "cswitch")) | ||
| { | ||
| // Set has cswitches, since we don't get thread start events, and the computer needs to know | ||
| // that there will be cswitch events. | ||
| m_traceHasCSwitches = true; | ||
|
|
||
| // Initialize all threads to blocked. For Windows traces, this happens when a thread start event is encountered. | ||
| // For Linux, we don't get these events, so we must pre-initialize them. | ||
|
|
||
| foreach (TraceThread thread in m_eventLog.Threads) | ||
| { | ||
| // Threads start off blocked. | ||
| Debug.Assert(m_threadState[(int)thread.ThreadIndex].ThreadUninitialized); | ||
| Debug.Assert(m_threadState[(int)thread.ThreadIndex].LastCPUStackRelativeMSec == 0); | ||
| m_threadState[(int)thread.ThreadIndex].LogBlockingStart(timeRelativeMSec: 0, processorNumber: 0, thread, this); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private void Clr_GCAllocationTick(GCAllocationTickTraceData obj) | ||
| { | ||
| throw new NotImplementedException(); | ||
|
|
@@ -648,6 +699,87 @@ private void OnSampledProfile(TraceEvent data) | |
| } | ||
| } | ||
|
|
||
| private void OnUniversalCSwitch(SampleTraceData data) | ||
| { | ||
| // This event fires when a thread is switched back in. | ||
| // The timestamp is the time at which the thread was switched back in. | ||
| // The value is the amount of time that thread was switched out. | ||
|
|
||
| TraceThread thread = data.Thread(); | ||
| if (thread == null) | ||
| { | ||
| Debug.WriteLine("Warning, no thread at " + data.TimeStampRelativeMSec.ToString("f3")); | ||
| return; | ||
| } | ||
|
|
||
| // Get the blocking stack. | ||
marklio marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| StackSourceCallStackIndex stackIndex = GetCallStack(data, thread); | ||
|
|
||
| // If the thread wasn't previously marked as blocked, then mark it. | ||
marklio marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if (!m_threadState[(int)thread.ThreadIndex].ThreadBlocked) | ||
| { | ||
| double startTimeStampRelMsec = m_eventLog.QPCTimeToRelMSec(data.TimeStampQPC - (long)data.Value); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Below, you use data.TimeStampRelativeMSec. This is calculation different? The answer to this may answer my question above
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TraceEvent does most of its calculations in milliseconds, though the data type is a double so we get fractional milliseconds. The reason this one is in QPC is because the entity that writes the event is writing it in QPC and not milliseconds.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm sure I'm missing something. On line 731 (with the same data), it uses .TimeStampRelativeMSec. And further below on 754, which seems like the same "mark as blocked" scenario, it also uses .TimeStampRelativeMSec. What is different about THIS callsite that it does the calculation from QPC?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this should block this particular improvement. I'm more just curious in what our story is for reacting to customer pain with this tool. |
||
| m_threadState[(int)thread.ThreadIndex].LogBlockingStart( | ||
| startTimeStampRelMsec, | ||
| data.ProcessorNumber, | ||
| thread, | ||
| this); | ||
| } | ||
|
|
||
| // Mark that this thread is no longer blocked. | ||
| m_threadState[(int)thread.ThreadIndex].LogBlockingEnd( | ||
| data.TimeStampRelativeMSec, data.ProcessorNumber, stackIndex, thread, this); | ||
|
|
||
| // Specify the CPU that is being used by this thread. | ||
| m_threadIndexUsingProc[data.ProcessorNumber] = thread.ThreadIndex; | ||
| } | ||
|
|
||
| private void OnUniversalCPUSample(SampleTraceData data) | ||
| { | ||
| TraceThread thread = data.Thread(); | ||
| if (thread == null) | ||
| { | ||
| Debug.WriteLine("Warning, no thread at " + data.TimeStampRelativeMSec.ToString("f3")); | ||
| return; | ||
| } | ||
|
|
||
| ThreadIndex oldThreadIndex = m_threadIndexUsingProc[data.ProcessorNumber]; | ||
| if (oldThreadIndex != ThreadIndex.Invalid && oldThreadIndex != thread.ThreadIndex) | ||
| { | ||
| // Mark the old thread as blocked. | ||
| if (m_threadState[(int)oldThreadIndex].ThreadRunning) | ||
| { | ||
| TraceThread oldThread = m_eventLog.Threads[oldThreadIndex]; | ||
| m_threadState[(int)oldThreadIndex].LogBlockingStart( | ||
| data.TimeStampRelativeMSec, | ||
| data.ProcessorNumber, | ||
| oldThread, | ||
| this); | ||
| } | ||
|
|
||
| // Mark the new thread as unblocked. | ||
| if (m_threadState[(int)thread.ThreadIndex].ThreadBlocked) | ||
| { | ||
| m_threadState[(int)thread.ThreadIndex].LogBlockingEnd( | ||
| data.TimeStampRelativeMSec, | ||
| data.ProcessorNumber, | ||
| m_threadState[(int)thread.ThreadIndex].LastCPUCallStack, | ||
| thread, | ||
| this); | ||
| } | ||
| } | ||
|
|
||
| StackSourceCallStackIndex stackIndex = GetCallStack(data, thread); | ||
| m_threadState[(int)thread.ThreadIndex].LogCPUStack( | ||
| data.TimeStampRelativeMSec, | ||
| stackIndex, | ||
| thread, | ||
| this, | ||
| isCPUSample: true); | ||
|
|
||
| m_threadIndexUsingProc[data.ProcessorNumber] = thread.ThreadIndex; | ||
| } | ||
|
|
||
| // THis is for the TaskWaitEnd. We want to have a stack event if 'data' does not have one, we lose the fact that | ||
| // ANYTHING happened on this thread. Thus we log the stack of the activity so that data does not need a stack. | ||
| private void OnTaskUnblock(TraceEvent data) | ||
|
|
@@ -810,6 +942,12 @@ private Func<TraceThread, StackSourceCallStackIndex> GetTopFramesForActivityComp | |
| /// </summary> | ||
| private struct ThreadState | ||
| { | ||
| public ThreadState() | ||
| { | ||
| LastCPUCallStack = StackSourceCallStackIndex.Invalid; | ||
| ReadyThreadCallStack = CallStackIndex.Invalid; | ||
| } | ||
|
|
||
| public void LogCPUStack(double timeRelativeMSec, StackSourceCallStackIndex stackIndex, TraceThread thread, ThreadTimeStackComputer computer, bool isCPUSample) | ||
| { | ||
| // THere is a small amount of cleanup after logging thread death. We will ignore this. | ||
|
|
@@ -1045,7 +1183,7 @@ public void AddCPUSample(double timeRelativeMSec, TraceThread thread, ThreadTime | |
| public ushort ProcessorNumberWhereAwakened; | ||
|
|
||
| internal double LastCPUStackRelativeMSec; | ||
| private StackSourceCallStackIndex LastCPUCallStack; | ||
| internal StackSourceCallStackIndex LastCPUCallStack; | ||
|
|
||
| private double ReadyThreadRelativeMSec; | ||
| private CallStackIndex ReadyThreadCallStack; | ||
|
|
@@ -1326,6 +1464,12 @@ private struct AspNetRequestInfo | |
| /// </summary> | ||
| private int m_numIdleProcs; // Count of bits idle threads in m_threadIDUsingProc | ||
|
|
||
| /// <summary> | ||
| /// For universal traces, we need to keep track of the ThreadIndex because the context switch events only come in when a thread switches back in. | ||
| /// This allows us to keep track of what thread was using a processor when a different thread starts using a processor. | ||
| /// </summary> | ||
| private ThreadIndex[] m_threadIndexUsingProc; | ||
|
|
||
| private bool m_traceHasCSwitches; // Does the trace have CSwitches (decide what kind of view to create) | ||
|
|
||
| private StackSourceSample m_sample; // Reusable scratch space | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this hardening against a plausible scenario? Or is this representative of a "do not crash" principle in this codebase? That is, would a null thread here be indicative of a bug elsewhere that we are making hard to diagnose by simply writing to Debug?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For universal traces this would be a bug. We create all of the threads that we know about when opening the trace for the first time. It's important to check for null here since we don't want to break the tool if we introduce a bug elsewhere. That said, we could potentially do something better here. From a layering perspective, there's not a good log to write to, but that's something that we could fix going forward. Thoughts?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a hard call when a tool is so widely used on a diversity of inputs we can't hope to reasonably cover in validation. Do we use Watson to look at crashes? If this were my tool, I'd want to be set up to get the very best diagnostic artifacts. What is the experience of this "log and ignore" approach? Is it actually less annoying than an actionable crash for the type of user that runs the tool? Would be sweet to have the UI be isolated from this processing in a way that allowed actionable diagnostics to flow and provide a good UI experience, but that's a lot of refactoring.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On Watson - I don't usually look at it too much, but on occasion yes. I would say that yes, the current behavior is less annoying than the crash. I usually see this fall into one of two buckets:
In the first case, the user's analysis is almost always unaffected. In the second case, it's very obvious something went wrong and the user will re-collect and/or report the issue.
I do agree with you - there is a pattern missing here around how we egress diagnostics. Some of the very core pieces of TraceEvent have logging capabilities that PerfView just has to turn on. That pattern likely just needs to be extended here (and frankly to many other places).