Skip to content

Commit

Permalink
Prevent ActivityId leak across threads
Browse files Browse the repository at this point in the history
Fixes dotnet#51608

Previously running an async task with two or more levels of nesting on the EventSource activity stack would leave the ActivityID set
on the old thread after being swapped out at a yield point. This meant that threadpool threads would often have stale ActivityIDs
present when they began a new work item. This in turn produced non-sensical logs where it appeared that unrelated work items were
nested within each other.

Most of this change is adding tests. The ActivityIdIsZeroedOnThreadSwitchOut test is the one being fixed. The remainder of the new
tests pass both before and after the change. I added those to better describe some of the existing behavior and add a little confidence
that the change didn't have unintended effects elsewhere. As best I can tell the Activity tracking feature didn't have any testing previously
and there is certainly still room for improvement on test coverage.
  • Loading branch information
noahfalk committed Jul 14, 2021
1 parent 21c2516 commit 4081e9e
Show file tree
Hide file tree
Showing 3 changed files with 181 additions and 4 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Collections.Generic;
using System.Diagnostics.Tracing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Xunit;

namespace BasicEventSourceTests
{
public class ActivityTracking
{
[Fact]
public void StartStopCreatesActivity()
{
using ActivityEventListener l = new ActivityEventListener();

Assert.Equal(Guid.Empty, EventSource.CurrentThreadActivityId);
ActivityEventSource.Log.ExampleStart();
Assert.NotEqual(Guid.Empty, EventSource.CurrentThreadActivityId);
ActivityEventSource.Log.ExampleStop();
Assert.Equal(Guid.Empty, EventSource.CurrentThreadActivityId);
}

[Fact]
public async Task ActivityFlowsAsync()
{
using ActivityEventListener l = new ActivityEventListener();

Assert.Equal(Guid.Empty, EventSource.CurrentThreadActivityId);
ActivityEventSource.Log.ExampleStart();
Assert.NotEqual(Guid.Empty, EventSource.CurrentThreadActivityId);
await Task.Yield();
Assert.NotEqual(Guid.Empty, EventSource.CurrentThreadActivityId);
ActivityEventSource.Log.ExampleStop();
Assert.Equal(Guid.Empty, EventSource.CurrentThreadActivityId);
}

[Fact]
public async Task ActivityIdIsZeroedOnThreadSwitchOut()
{
using ActivityEventListener l = new ActivityEventListener();

// Run tasks on many threads. If an activity id leaks it is likely
// that the thread will be sheduled to run one of our other tasks
// and we can detect the non-zero id at the start of the task
List<Task> tasks = new List<Task>();
for (int i = 0; i < 100; i++)
{
tasks.Add(Task.Run(YieldTwoActivitiesDeep));
}

await Task.WhenAll(tasks.ToArray());
}

private async Task YieldTwoActivitiesDeep()
{
Assert.Equal(Guid.Empty, EventSource.CurrentThreadActivityId);
ActivityEventSource.Log.ExampleStart();
ActivityEventSource.Log.Example2Start();
await Task.Yield();
ActivityEventSource.Log.Example2Stop();
ActivityEventSource.Log.ExampleStop();
Assert.Equal(Guid.Empty, EventSource.CurrentThreadActivityId);
}

// I don't know if this behavior is by-design or accidental. For now
// I am attempting to preserve it to lower back compat risk, but in
// the future we might decide it wasn't even desirable to begin with.
// Compare with SetCurrentActivityIdAfterEventDoesNotFlowAsync below.
[Fact]
public async Task SetCurrentActivityIdBeforeEventFlowsAsync()
{
using ActivityEventListener l = new ActivityEventListener();
try
{
Guid g = Guid.NewGuid();
EventSource.SetCurrentThreadActivityId(g);
Assert.Equal(g, EventSource.CurrentThreadActivityId);
ActivityEventSource.Log.ExampleStart();
await Task.Yield();
ActivityEventSource.Log.ExampleStop();
Assert.Equal(g, EventSource.CurrentThreadActivityId);
}
finally
{
EventSource.SetCurrentThreadActivityId(Guid.Empty);
}
}

// I don't know if this behavior is by-design or accidental. For now
// I am attempting to preserve it to lower back compat risk, but in
// the future we might decide it wasn't even desirable to begin with.
// Compare with SetCurrentActivityIdBeforeEventFlowsAsync above.
[Fact]
public async Task SetCurrentActivityIdAfterEventDoesNotFlowAsync()
{
using ActivityEventListener l = new ActivityEventListener();
try
{
ActivityEventSource.Log.ExampleStart();
Guid g = Guid.NewGuid();
EventSource.SetCurrentThreadActivityId(g);
Assert.Equal(g, EventSource.CurrentThreadActivityId);
await Task.Yield();
Assert.NotEqual(g, EventSource.CurrentThreadActivityId);
ActivityEventSource.Log.ExampleStop();
}
finally
{
EventSource.SetCurrentThreadActivityId(Guid.Empty);
}
}
}

[EventSource(Name = "ActivityEventSource")]
class ActivityEventSource : EventSource
{
public static ActivityEventSource Log = new ActivityEventSource();

[Event(1)]
public void ExampleStart() => WriteEvent(1);

[Event(2)]
public void ExampleStop() => WriteEvent(2);

[Event(3)]
public void Example2Start() => WriteEvent(3);

[Event(4)]
public void Example2Stop() => WriteEvent(4);
}

class ActivityEventListener : EventListener
{
protected override void OnEventSourceCreated(EventSource eventSource)
{
if (eventSource.Name == "System.Threading.Tasks.TplEventSource")
{
EnableEvents(eventSource, EventLevel.LogAlways, (EventKeywords)0x80);
}
else if (eventSource.Name == "ActivityEventSource")
{
EnableEvents(eventSource, EventLevel.Informational);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
<Compile Include="BasicEventSourceTest\TestsManifestGeneration.Etw.cs" />
</ItemGroup>
<ItemGroup>
<Compile Include="BasicEventSourceTest\ActivityTracking.cs" />
<Compile Include="BasicEventSourceTest\Harness\EventTestHarness.cs" />
<Compile Include="BasicEventSourceTest\FuzzyTests.cs" />
<Compile Include="BasicEventSourceTest\Harness\Listeners.cs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -550,12 +550,36 @@ private static unsafe void WriteNibble(ref byte* ptr, byte* endPtr, uint value)
// This callback is used to initialize the m_current AsyncLocal Variable.
// Its job is to keep the ETW Activity ID (part of thread local storage) in sync
// with m_current.ActivityID
//
// WARNING: When mixing manual usage of EventSource.SetCurrentThreadActivityID
// and Start/Stop EventSource events I can't identify a clear design how this
// synchronization is intended to work. For example code that changes
// SetCurrentThreadActivityID after a FooStart() event will not flow the
// explicit ID with the async work, but if FooStart() event is called after
// SetCurrentThreadActivityID then the explicit ID change does flow.
// For now I've adopted the approach:
// Priority 1: Make the API predictable/sensible when only Start/Stop events
// are in use.
// Priority 2: If users aren't complaining and it doesn't interfere with
// goal #1, try to preserve the arbitrary/buggy? existing behavior
// for mixed usage of SetActivityID + events.
//
// For scenarios that only use start/stop events this is what we expect:
// calling start -> push new ID on stack and update thread-local to match new ID
// calling stop -> pop ID from stack and update thread-local to match new topmost
// still active ID. If there is none, set ID to zero
// thread swap -> update thread-local to match topmost active ID.
// If there is none, set ID to zero.
private void ActivityChanging(AsyncLocalValueChangedArgs<ActivityInfo?> args)
{
ActivityInfo? cur = args.CurrentValue;
ActivityInfo? prev = args.PreviousValue;

// Are we popping off a value? (we have a prev, and it creator is cur)
// Special case only relevant for mixed SetActivityID usage:
//
// Are we MAYBE popping off a value? (we have a prev, and it creator is cur)
// We can't be certain this is a pop because a thread swapping between two
// ExecutionContexts can also appear the same way.
// Then check if we should use the GUID at the time of the start event
if (prev != null && prev.m_creator == cur)
{
Expand All @@ -569,8 +593,7 @@ private void ActivityChanging(AsyncLocalValueChangedArgs<ActivityInfo?> args)
}
}

// OK we did not have an explicit SetActivityID set. Then we should be
// setting the activity to current ActivityInfo. However that activity
// Set the activity to current ActivityInfo. However that activity
// might be dead, in which case we should skip it, so we never set
// the ID to dead things.
while (cur != null)
Expand All @@ -583,8 +606,10 @@ private void ActivityChanging(AsyncLocalValueChangedArgs<ActivityInfo?> args)
}
cur = cur.m_creator;
}

// we can get here if there is no information on our activity stack (everything is dead)
// currently we do nothing, as that seems better than setting to Guid.Emtpy.
// Set ActivityID to zero
EventSource.SetCurrentThreadActivityId(Guid.Empty);
}

/// <summary>
Expand Down

0 comments on commit 4081e9e

Please sign in to comment.