Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion src/PerfView/PerfViewData.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9479,6 +9479,7 @@ public partial class EventPipePerfViewData : PerfViewFile
public override bool SupportsProcesses => m_supportsProcesses;

private bool m_supportsProcesses;
private bool m_hasUniversal;

public override List<IProcess> GetProcesses(TextWriter log)
{
Expand Down Expand Up @@ -9532,6 +9533,7 @@ protected override Action<Action> OpenImpl(Window parentWindow, StatusBar worker
bool hasExceptions = false;
bool hasUniversalSystem = false;
bool hasUniversalCPU = false;
bool hasUniversalCSwitch = false;
if (m_traceLog != null)
{
foreach (TraceEventCounts eventStats in m_traceLog.Stats)
Expand Down Expand Up @@ -9591,6 +9593,7 @@ protected override Action<Action> OpenImpl(Window parentWindow, StatusBar worker
}
else if (eventStats.ProviderGuid == UniversalSystemTraceEventParser.ProviderGuid)
{
m_hasUniversal = true;
hasUniversalSystem = true;
m_supportsProcesses = true;
}
Expand All @@ -9599,6 +9602,11 @@ protected override Action<Action> OpenImpl(Window parentWindow, StatusBar worker
hasUniversalCPU = true;
m_supportsProcesses = true;
}
else if (eventStats.ProviderGuid == UniversalEventsTraceEventParser.ProviderGuid && eventStats.EventName.StartsWith("cswitch"))
{
hasUniversalCSwitch = true;
m_supportsProcesses = true;
}
}
}

Expand All @@ -9616,6 +9624,11 @@ protected override Action<Action> OpenImpl(Window parentWindow, StatusBar worker
{
m_Children.Add(new PerfViewStackSource(this, "CPU"));
}

if (hasUniversalCSwitch)
{
m_Children.Add(new PerfViewStackSource(this, "Thread Time"));
}
}
else // dotnet-trace
{
Expand Down Expand Up @@ -9908,6 +9921,11 @@ protected internal override StackSource OpenStackSourceImpl(string streamName, T

return stackSource;
}
case "Thread Time":
{
var eventLog = GetTraceLog(log);
return eventLog.ThreadTimeStacks();
}
default:
{
var eventLog = GetTraceLog(log);
Expand Down Expand Up @@ -10103,7 +10121,7 @@ protected internal override void ConfigureStackWindow(string stackSourceName, St
stackWindow.ExcludeRegExTextBox.Text = excludePat;
}

if (stackSourceName.Contains("Thread Time"))
if (!m_hasUniversal && stackSourceName.Contains("Thread Time"))
{
stackWindow.ScalingPolicy = ScalingPolicyKind.TimeMetric;
stackWindow.FoldRegExTextBox.Text += ";UNMANAGED_CODE_TIME;CPU";
Expand Down
146 changes: 145 additions & 1 deletion src/TraceEvent/Computers/ThreadTimeComputer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@
// using Microsoft.Diagnostics.Tracing.Parsers;
using Microsoft.Diagnostics.Tracing.Parsers.Clr;
using Microsoft.Diagnostics.Tracing.Parsers.Kernel;
using Microsoft.Diagnostics.Tracing.Parsers.Universal.Events;
using Microsoft.Diagnostics.Tracing.Session;
using Microsoft.Diagnostics.Tracing.Stacks;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Net;
using System.Threading;
using Address = System.UInt64;

namespace Microsoft.Diagnostics.Tracing
Expand All @@ -38,11 +40,21 @@ public ThreadTimeStackComputer(TraceLog eventLog, SymbolReader symbolReader)
m_symbolReader = symbolReader;

m_threadState = new ThreadState[eventLog.Threads.Count];
for (int i = 0; i < m_threadState.Length; i++)
{
m_threadState[i] = new ThreadState();
}

m_IRPToThread = new Dictionary<Address, TraceThread>(32);

// We assume to begin with that all processors are idle (it will fix itself shortly).
m_numIdleProcs = eventLog.NumberOfProcessors;
m_threadIDUsingProc = new int[m_numIdleProcs];
m_threadIndexUsingProc = new ThreadIndex[m_numIdleProcs];
for(int i=0; i< m_threadIndexUsingProc.Length; i++)
{
m_threadIndexUsingProc[i] = ThreadIndex.Invalid;
}

m_lastPacketForProcess = new NetworkInfo[eventLog.Processes.Count];
for (int i = 0; i < m_lastPacketForProcess.Length; i++)
Expand All @@ -51,6 +63,7 @@ public ThreadTimeStackComputer(TraceLog eventLog, SymbolReader symbolReader)
}

MiniumReadiedTimeMSec = 0.5F; // We tend to only care about this if we are being starved.
InitializeForUniversal();
}
/// <summary>
/// If set we compute thread time using Tasks
Expand Down Expand Up @@ -114,6 +127,9 @@ public void GenerateThreadTimeStacks(MutableTraceEventStackSource outputStackSou

TraceLogEventSource eventSource = traceEvents == null ? m_eventLog.Events.GetSource() :
traceEvents.GetSource();

UniversalEventsTraceEventParser universalEventsParser = new UniversalEventsTraceEventParser(eventSource);

if (GroupByStartStopActivity)
{
UseTasks = true;
Expand Down Expand Up @@ -170,6 +186,7 @@ public void GenerateThreadTimeStacks(MutableTraceEventStackSource outputStackSou
if (!BlockedTimeOnly)
{
eventSource.Kernel.PerfInfoSample += OnSampledProfile;
universalEventsParser.cpu += OnUniversalCPUSample;

// WE add these too, because they are reasonably common so they can add a level of detail.
eventSource.Clr.GCAllocationTick += OnSampledProfile;
Expand Down Expand Up @@ -315,6 +332,18 @@ public void GenerateThreadTimeStacks(MutableTraceEventStackSource outputStackSou
{
eventSource.Dynamic.All += delegate (TraceEvent data)
{
// Ignore UniversalSystem.
if (data.ProviderGuid == UniversalSystemTraceEventParser.ProviderGuid)
{
return;
}

// Ignore UniversalEvents.
if (data.ProviderGuid == UniversalEventsTraceEventParser.ProviderGuid)
{
return;
}

// TODO decide what the correct heuristic is.
// Currently I only do this for things that might be an EventSoruce (uses the name->Guid hashing)
// Most importantly, it excludes the high volume CLR providers.
Expand Down Expand Up @@ -376,6 +405,7 @@ public void GenerateThreadTimeStacks(MutableTraceEventStackSource outputStackSou

// Add my own callbacks.
eventSource.Kernel.ThreadCSwitch += OnThreadCSwitch;
universalEventsParser.cswitch += OnUniversalCSwitch;
eventSource.Kernel.AddCallbackForEvents<DiskIOTraceData>(OnDiskIO);
eventSource.Kernel.AddCallbackForEvents<DiskIOInitTraceData>(OnDiskIOInit);
eventSource.Kernel.ThreadStart += OnThreadStart;
Expand Down Expand Up @@ -423,6 +453,27 @@ public void GenerateThreadTimeStacks(MutableTraceEventStackSource outputStackSou
m_threadState = null;
}

private void InitializeForUniversal()
{
if (m_eventLog.Stats.Contains(UniversalEventsTraceEventParser.ProviderGuid, "cswitch"))
{
// Set has cswitches, since we don't get thread start events, and the computer needs to know
// that there will be cswitch events.
m_traceHasCSwitches = true;

// Initialize all threads to blocked. For Windows traces, this happens when a thread start event is encountered.
// For Linux, we don't get these events, so we must pre-initialize them.

foreach (TraceThread thread in m_eventLog.Threads)
{
// Threads start off blocked.
Debug.Assert(m_threadState[(int)thread.ThreadIndex].ThreadUninitialized);
Debug.Assert(m_threadState[(int)thread.ThreadIndex].LastCPUStackRelativeMSec == 0);
m_threadState[(int)thread.ThreadIndex].LogBlockingStart(timeRelativeMSec: 0, processorNumber: 0, thread, this);
}
}
}

private void Clr_GCAllocationTick(GCAllocationTickTraceData obj)
{
throw new NotImplementedException();
Expand Down Expand Up @@ -648,6 +699,87 @@ private void OnSampledProfile(TraceEvent data)
}
}

private void OnUniversalCSwitch(SampleTraceData data)
{
// This event fires when a thread is switched back in.
// The timestamp is the time at which the thread was switched back in.
// The value is the amount of time that thread was switched out.

TraceThread thread = data.Thread();
if (thread == null)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this hardening against a plausible scenario? Or is this representative of a "do not crash" principle in this codebase? That is, would a null thread here be indicative of a bug elsewhere that we are making hard to diagnose by simply writing to Debug?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For universal traces this would be a bug. We create all of the threads that we know about when opening the trace for the first time. It's important to check for null here since we don't want to break the tool if we introduce a bug elsewhere. That said, we could potentially do something better here. From a layering perspective, there's not a good log to write to, but that's something that we could fix going forward. Thoughts?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a hard call when a tool is so widely used on a diversity of inputs we can't hope to reasonably cover in validation. Do we use Watson to look at crashes? If this were my tool, I'd want to be set up to get the very best diagnostic artifacts. What is the experience of this "log and ignore" approach? Is it actually less annoying than an actionable crash for the type of user that runs the tool? Would be sweet to have the UI be isolated from this processing in a way that allowed actionable diagnostics to flow and provide a good UI experience, but that's a lot of refactoring.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On Watson - I don't usually look at it too much, but on occasion yes. I would say that yes, the current behavior is less annoying than the crash. I usually see this fall into one of two buckets:

  1. There is a single or very small number of events that get ignored because the thread for whatever reason didn't get created.
  2. We're missing something important in the trace and when you open the view it's effectively empty.

In the first case, the user's analysis is almost always unaffected. In the second case, it's very obvious something went wrong and the user will re-collect and/or report the issue.

I do agree with you - there is a pattern missing here around how we egress diagnostics. Some of the very core pieces of TraceEvent have logging capabilities that PerfView just has to turn on. That pattern likely just needs to be extended here (and frankly to many other places).

{
Debug.WriteLine("Warning, no thread at " + data.TimeStampRelativeMSec.ToString("f3"));
return;
}

// Get the blocking stack.
StackSourceCallStackIndex stackIndex = GetCallStack(data, thread);

// If the thread wasn't previously marked as blocked, then mark it.
if (!m_threadState[(int)thread.ThreadIndex].ThreadBlocked)
{
double startTimeStampRelMsec = m_eventLog.QPCTimeToRelMSec(data.TimeStampQPC - (long)data.Value);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Below, you use data.TimeStampRelativeMSec. This is calculation different? The answer to this may answer my question above

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TraceEvent does most of its calculations in milliseconds, though the data type is a double so we get fractional milliseconds. The reason this one is in QPC is because the entity that writes the event is writing it in QPC and not milliseconds.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm sure I'm missing something. On line 731 (with the same data), it uses .TimeStampRelativeMSec. And further below on 754, which seems like the same "mark as blocked" scenario, it also uses .TimeStampRelativeMSec. What is different about THIS callsite that it does the calculation from QPC?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this should block this particular improvement. I'm more just curious in what our story is for reacting to customer pain with this tool.

m_threadState[(int)thread.ThreadIndex].LogBlockingStart(
startTimeStampRelMsec,
data.ProcessorNumber,
thread,
this);
}

// Mark that this thread is no longer blocked.
m_threadState[(int)thread.ThreadIndex].LogBlockingEnd(
data.TimeStampRelativeMSec, data.ProcessorNumber, stackIndex, thread, this);

// Specify the CPU that is being used by this thread.
m_threadIndexUsingProc[data.ProcessorNumber] = thread.ThreadIndex;
}

private void OnUniversalCPUSample(SampleTraceData data)
{
TraceThread thread = data.Thread();
if (thread == null)
{
Debug.WriteLine("Warning, no thread at " + data.TimeStampRelativeMSec.ToString("f3"));
return;
}

ThreadIndex oldThreadIndex = m_threadIndexUsingProc[data.ProcessorNumber];
if (oldThreadIndex != ThreadIndex.Invalid && oldThreadIndex != thread.ThreadIndex)
{
// Mark the old thread as blocked.
if (m_threadState[(int)oldThreadIndex].ThreadRunning)
{
TraceThread oldThread = m_eventLog.Threads[oldThreadIndex];
m_threadState[(int)oldThreadIndex].LogBlockingStart(
data.TimeStampRelativeMSec,
data.ProcessorNumber,
oldThread,
this);
}

// Mark the new thread as unblocked.
if (m_threadState[(int)thread.ThreadIndex].ThreadBlocked)
{
m_threadState[(int)thread.ThreadIndex].LogBlockingEnd(
data.TimeStampRelativeMSec,
data.ProcessorNumber,
m_threadState[(int)thread.ThreadIndex].LastCPUCallStack,
thread,
this);
}
}

StackSourceCallStackIndex stackIndex = GetCallStack(data, thread);
m_threadState[(int)thread.ThreadIndex].LogCPUStack(
data.TimeStampRelativeMSec,
stackIndex,
thread,
this,
isCPUSample: true);

m_threadIndexUsingProc[data.ProcessorNumber] = thread.ThreadIndex;
}

// THis is for the TaskWaitEnd. We want to have a stack event if 'data' does not have one, we lose the fact that
// ANYTHING happened on this thread. Thus we log the stack of the activity so that data does not need a stack.
private void OnTaskUnblock(TraceEvent data)
Expand Down Expand Up @@ -810,6 +942,12 @@ private Func<TraceThread, StackSourceCallStackIndex> GetTopFramesForActivityComp
/// </summary>
private struct ThreadState
{
public ThreadState()
{
LastCPUCallStack = StackSourceCallStackIndex.Invalid;
ReadyThreadCallStack = CallStackIndex.Invalid;
}

public void LogCPUStack(double timeRelativeMSec, StackSourceCallStackIndex stackIndex, TraceThread thread, ThreadTimeStackComputer computer, bool isCPUSample)
{
// THere is a small amount of cleanup after logging thread death. We will ignore this.
Expand Down Expand Up @@ -1045,7 +1183,7 @@ public void AddCPUSample(double timeRelativeMSec, TraceThread thread, ThreadTime
public ushort ProcessorNumberWhereAwakened;

internal double LastCPUStackRelativeMSec;
private StackSourceCallStackIndex LastCPUCallStack;
internal StackSourceCallStackIndex LastCPUCallStack;

private double ReadyThreadRelativeMSec;
private CallStackIndex ReadyThreadCallStack;
Expand Down Expand Up @@ -1326,6 +1464,12 @@ private struct AspNetRequestInfo
/// </summary>
private int m_numIdleProcs; // Count of bits idle threads in m_threadIDUsingProc

/// <summary>
/// For universal traces, we need to keep track of the ThreadIndex because the context switch events only come in when a thread switches back in.
/// This allows us to keep track of what thread was using a processor when a different thread starts using a processor.
/// </summary>
private ThreadIndex[] m_threadIndexUsingProc;

private bool m_traceHasCSwitches; // Does the trace have CSwitches (decide what kind of view to create)

private StackSourceSample m_sample; // Reusable scratch space
Expand Down
20 changes: 14 additions & 6 deletions src/TraceEvent/EventPipe/EventPipeMetadata.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ internal unsafe class EventPipeMetadata
public static EventPipeMetadata ReadV5OrLower(ref SpanReader reader, int pointerSize, int processId, int fileFormatVersionNumber)
{
// Read in the header (The header does not include payload parameter information)
var metadata = new EventPipeMetadata(pointerSize, processId);
var metadata = new EventPipeMetadata(pointerSize, processId, EventPipeFieldLayoutVersion.FileFormatV5OrLess);
metadata.ParseHeader(ref reader, fileFormatVersionNumber);

// If the metadata contains no parameter metadata, don't attempt to read it.
Expand Down Expand Up @@ -83,7 +83,7 @@ public static EventPipeMetadata ReadV5OrLower(ref SpanReader reader, int pointer
/// </summary>
public static EventPipeMetadata ReadV6OrGreater(ref SpanReader reader, int pointerSize)
{
var metadata = new EventPipeMetadata(pointerSize);
var metadata = new EventPipeMetadata(pointerSize, EventPipeFieldLayoutVersion.FileFormatV6OrGreater);
int metadataLength = reader.ReadUInt16();
long offset = reader.StreamOffset;
SpanReader metadataReader = new SpanReader(reader.ReadBytes(metadataLength), offset);
Expand All @@ -94,16 +94,18 @@ public static EventPipeMetadata ReadV6OrGreater(ref SpanReader reader, int point
return metadata;
}

private EventPipeMetadata(int pointerSize)
: this(pointerSize, -1)
private EventPipeMetadata(int pointerSize, EventPipeFieldLayoutVersion fieldLayoutVersion)
: this(pointerSize, -1, fieldLayoutVersion)
{
}

/// <summary>
/// 'processID' is the process ID for the whole stream or -1 is this is a V6+ stream that can support multiple processes.
/// </summary>
private EventPipeMetadata(int pointerSize, int processId)
private EventPipeMetadata(int pointerSize, int processId, EventPipeFieldLayoutVersion fieldLayoutVersion)
{
_fieldLayoutVersion = fieldLayoutVersion;

// Get the event record and fill in fields that we can without deserializing anything.
_eventRecord = (TraceEventNativeMethods.EVENT_RECORD*)Marshal.AllocHGlobal(sizeof(TraceEventNativeMethods.EVENT_RECORD));
ClearMemory(_eventRecord, sizeof(TraceEventNativeMethods.EVENT_RECORD));
Expand Down Expand Up @@ -175,7 +177,12 @@ private void InitDefaultParameters()
// ID. We'll need to refactor up the stack if we want to expose a bigger ID.
_eventRecord->EventHeader.ThreadId = unchecked((int)eventData.ThreadId);
_eventRecord->EventHeader.ProcessId = unchecked((int)eventData.ProcessId);
if (eventData.ThreadIndexOrId == eventData.CaptureThreadIndexOrId && eventData.CaptureProcNumber != -1)

// Set the processor number for V6+ file formats. For V5 and below we only set it if the ThreadId == CaptureThreadId
// to ensure that we don't surface a thread ID as a processor ID.
if ((eventData.CaptureProcNumber != -1) &&
((_fieldLayoutVersion == EventPipeFieldLayoutVersion.FileFormatV5OrLess && eventData.ThreadIndexOrId == eventData.CaptureThreadIndexOrId) ||
(_fieldLayoutVersion == EventPipeFieldLayoutVersion.FileFormatV6OrGreater)))
{
// Its not clear how the caller is supposed to distinguish between events that we know were on
// processor 0 vs. lacking information about what processor number the thread is on and
Expand Down Expand Up @@ -959,6 +966,7 @@ private void StripStartStopInEventName()
}
}

private EventPipeFieldLayoutVersion _fieldLayoutVersion;
private TraceEventNativeMethods.EVENT_RECORD* _eventRecord;
private TraceEventNativeMethods.EVENT_HEADER_EXTENDED_DATA_ITEM* _extendedDataBuffer;
private Guid* _relatedActivityBuffer;
Expand Down
4 changes: 4 additions & 0 deletions src/TraceEvent/Parsers/UniversalSystemTraceEventParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,10 @@ internal sealed class ELFProcessMappingSymbolMetadata : ProcessMappingSymbolMeta
public string DebugLink { get; set; }
[JsonPropertyName("build_id")]
public string BuildId { get; set; }
[JsonPropertyName("p_vaddr")]
public ulong VirtualAddress { get; set; }
[JsonPropertyName("p_offset")]
public ulong FileOffset { get; set; }
}

internal enum ProcessMappingSymbolMetadataFileType
Expand Down
3 changes: 3 additions & 0 deletions src/TraceEvent/TraceEvent.Tests/Universal/UniversalTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ public void ELFProcessMappingMetadata_Empty()
ELFProcessMappingSymbolMetadata peMetadata = (ELFProcessMappingSymbolMetadata)metadata;
Assert.Null(peMetadata.DebugLink);
Assert.Null(peMetadata.BuildId);
Assert.Equal((ulong)0, peMetadata.VirtualAddress);
Assert.Equal((ulong)0, peMetadata.FileOffset);

}
}
}
Loading