diff --git a/TUnit.Engine.Tests/Scheduling/ConstraintKeySchedulerConcurrencyTests.cs b/TUnit.Engine.Tests/Scheduling/ConstraintKeySchedulerConcurrencyTests.cs new file mode 100644 index 0000000000..a75195c890 --- /dev/null +++ b/TUnit.Engine.Tests/Scheduling/ConstraintKeySchedulerConcurrencyTests.cs @@ -0,0 +1,35 @@ +using Shouldly; +using TUnit.Engine.Tests.Enums; + +namespace TUnit.Engine.Tests.Scheduling; + +/// +/// Engine tests that validate ConstraintKeyScheduler handles high contention scenarios +/// without deadlocks. Invokes TUnit.TestProject.ConstraintKeyStressTests to ensure: +/// 1. Many concurrent tests with overlapping constraint keys complete successfully +/// 2. No deadlocks occur under high contention +/// 3. Tests complete within reasonable timeout +/// +public class ConstraintKeySchedulerConcurrencyTests(TestMode testMode) : InvokableTestBase(testMode) +{ + [Test] + [Repeat(5)] + public async Task HighContention_WithOverlappingConstraints_CompletesWithoutDeadlock() + { + // This test establishes baseline behavior before optimization + // It runs tests with overlapping constraint keys to verify the scheduler + // can handle high contention without deadlocking + + // Timeout proves no deadlock occurred - if tests complete before timeout, scheduler is working correctly + using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(30)); + + await RunTestsWithFilter("/*/*/ConstraintKeyStressTests/*", + [ + result => result.ResultSummary.Outcome.ShouldBe("Completed"), + result => result.ResultSummary.Counters.Total.ShouldBe(30), // 10 tests × 3 runs (Repeat(2)) + result => result.ResultSummary.Counters.Passed.ShouldBe(30), + result => result.ResultSummary.Counters.Failed.ShouldBe(0) + ], + new RunOptions().WithForcefulCancellationToken(cts.Token)); + } +} diff --git a/TUnit.Engine/Discovery/ReflectionTestDataCollector.cs b/TUnit.Engine/Discovery/ReflectionTestDataCollector.cs index caac547fb9..1a1aabfdcd 100644 --- a/TUnit.Engine/Discovery/ReflectionTestDataCollector.cs +++ b/TUnit.Engine/Discovery/ReflectionTestDataCollector.cs @@ -1,5 +1,6 @@ using System.Buffers; using System.Collections.Concurrent; +using System.Collections.Immutable; using System.Diagnostics.CodeAnalysis; using System.Reflection; using System.Runtime.CompilerServices; @@ -28,8 +29,7 @@ namespace TUnit.Engine.Discovery; internal sealed class ReflectionTestDataCollector : ITestDataCollector { private static readonly ConcurrentDictionary _scannedAssemblies = new(); - private static readonly List _discoveredTests = new(capacity: 1000); // Pre-sized for typical test suites - private static readonly Lock _discoveredTestsLock = new(); // Lock for thread-safe access to _discoveredTests + private static ImmutableList _discoveredTests = ImmutableList.Empty; private static readonly ConcurrentDictionary _assemblyTypesCache = new(); private static readonly ConcurrentDictionary _typeMethodsCache = new(); @@ -47,10 +47,7 @@ private static Assembly[] GetCachedAssemblies() public static void ClearCaches() { _scannedAssemblies.Clear(); - lock (_discoveredTestsLock) - { - _discoveredTests.Clear(); - } + Interlocked.Exchange(ref _discoveredTests, ImmutableList.Empty); _assemblyTypesCache.Clear(); _typeMethodsCache.Clear(); lock (_assemblyCacheLock) @@ -133,12 +130,15 @@ public async Task> CollectTestsAsync(string testSessio var dynamicTests = await DiscoverDynamicTests(testSessionId).ConfigureAwait(false); newTests.AddRange(dynamicTests); - // Add to discovered tests with lock (better enumeration performance than ConcurrentBag) - lock (_discoveredTestsLock) + // Atomic swap - no lock needed for readers + ImmutableList original, updated; + do { - _discoveredTests.AddRange(newTests); - return new List(_discoveredTests); - } + original = _discoveredTests; + updated = original.AddRange(newTests); + } while (Interlocked.CompareExchange(ref _discoveredTests, updated, original) != original); + + return _discoveredTests; } public async IAsyncEnumerable CollectTestsStreamingAsync( @@ -171,10 +171,7 @@ public async IAsyncEnumerable CollectTestsStreamingAsync( // Stream tests from this assembly await foreach (var test in DiscoverTestsInAssemblyStreamingAsync(assembly, cancellationToken)) { - lock (_discoveredTestsLock) - { - _discoveredTests.Add(test); - } + ImmutableInterlocked.Update(ref _discoveredTests, list => list.Add(test)); yield return test; } } @@ -182,10 +179,7 @@ public async IAsyncEnumerable CollectTestsStreamingAsync( // Stream dynamic tests await foreach (var dynamicTest in DiscoverDynamicTestsStreamingAsync(testSessionId, cancellationToken)) { - lock (_discoveredTestsLock) - { - _discoveredTests.Add(dynamicTest); - } + ImmutableInterlocked.Update(ref _discoveredTests, list => list.Add(dynamicTest)); yield return dynamicTest; } } diff --git a/TUnit.Engine/Scheduling/ConstraintKeyScheduler.cs b/TUnit.Engine/Scheduling/ConstraintKeyScheduler.cs index dcdcb4e4f1..47c1b88dd6 100644 --- a/TUnit.Engine/Scheduling/ConstraintKeyScheduler.cs +++ b/TUnit.Engine/Scheduling/ConstraintKeyScheduler.cs @@ -55,15 +55,24 @@ public async ValueTask ExecuteTestsWithConstraintsAsync( bool canStart; lock (lockObject) { - // Check if all constraint keys are available - canStart = !constraintKeys.Any(key => lockedKeys.Contains(key)); - + // Check if all constraint keys are available - manual loop avoids LINQ allocation + canStart = true; + var keyCount = constraintKeys.Count; + for (var i = 0; i < keyCount; i++) + { + if (lockedKeys.Contains(constraintKeys[i])) + { + canStart = false; + break; + } + } + if (canStart) { // Lock all the constraint keys for this test - foreach (var key in constraintKeys) + for (var i = 0; i < keyCount; i++) { - lockedKeys.Add(key); + lockedKeys.Add(constraintKeys[i]); } } } @@ -146,8 +155,10 @@ private async Task ExecuteTestAndReleaseKeysAsync( parallelLimiterSemaphore?.Release(); // Release the constraint keys and check if any waiting tests can now run + // Pre-allocate lists outside the lock to minimize lock duration var testsToStart = new List<(AbstractExecutableTest Test, IReadOnlyList ConstraintKeys, TaskCompletionSource StartSignal)>(); - + var testsToRequeue = new List<(AbstractExecutableTest Test, IReadOnlyList ConstraintKeys, TaskCompletionSource StartSignal)>(); + lock (lockObject) { // Release all constraint keys for this test @@ -155,35 +166,43 @@ private async Task ExecuteTestAndReleaseKeysAsync( { lockedKeys.Remove(key); } - + // Check waiting tests to see if any can now run - var tempQueue = new List<(AbstractExecutableTest Test, IReadOnlyList ConstraintKeys, TaskCompletionSource StartSignal)>(); while (waitingTests.TryDequeue(out var waitingTest)) { - // Check if all constraint keys are available for this waiting test - var canStart = !waitingTest.ConstraintKeys.Any(key => lockedKeys.Contains(key)); - + // Check if all constraint keys are available for this waiting test - manual loop avoids LINQ allocation + var canStart = true; + var waitingKeyCount = waitingTest.ConstraintKeys.Count; + for (var i = 0; i < waitingKeyCount; i++) + { + if (lockedKeys.Contains(waitingTest.ConstraintKeys[i])) + { + canStart = false; + break; + } + } + if (canStart) { // Lock the keys for this test - foreach (var key in waitingTest.ConstraintKeys) + for (var i = 0; i < waitingKeyCount; i++) { - lockedKeys.Add(key); + lockedKeys.Add(waitingTest.ConstraintKeys[i]); } - + // Mark test to start after we exit the lock testsToStart.Add(waitingTest); } else { // Still can't run, keep it in the queue - tempQueue.Add(waitingTest); + testsToRequeue.Add(waitingTest); } } - + // Re-add tests that still can't run - foreach (var waitingTestItem in tempQueue) + foreach (var waitingTestItem in testsToRequeue) { waitingTests.Enqueue(waitingTestItem); } diff --git a/TUnit.PerformanceBenchmarks/Tests/ConstraintKeys/ConstraintKeyStressTests.cs b/TUnit.PerformanceBenchmarks/Tests/ConstraintKeys/ConstraintKeyStressTests.cs new file mode 100644 index 0000000000..f1e0f46f05 --- /dev/null +++ b/TUnit.PerformanceBenchmarks/Tests/ConstraintKeys/ConstraintKeyStressTests.cs @@ -0,0 +1,47 @@ +namespace TUnit.PerformanceBenchmarks.Tests.ConstraintKeys; + +/// +/// Stress tests for ConstraintKeyScheduler with overlapping constraint keys. +/// These tests create high contention scenarios to exercise lock contention paths. +/// +public class ConstraintKeyStressTests +{ + // Single constraint key tests - 50 instances each, must run serially within key group + [Test, Repeat(50), NotInParallel("KeyA")] + public async Task Test_SingleKey_A() => await Task.Delay(1); + + [Test, Repeat(50), NotInParallel("KeyB")] + public async Task Test_SingleKey_B() => await Task.Delay(1); + + [Test, Repeat(50), NotInParallel("KeyC")] + public async Task Test_SingleKey_C() => await Task.Delay(1); + + [Test, Repeat(50), NotInParallel("KeyD")] + public async Task Test_SingleKey_D() => await Task.Delay(1); + + [Test, Repeat(50), NotInParallel("KeyE")] + public async Task Test_SingleKey_E() => await Task.Delay(1); + + // Overlapping two-key tests - creates dependency chains between key groups + [Test, Repeat(30), NotInParallel(["KeyA", "KeyB"])] + public async Task Test_OverlappingKeys_AB() => await Task.Delay(1); + + [Test, Repeat(30), NotInParallel(["KeyB", "KeyC"])] + public async Task Test_OverlappingKeys_BC() => await Task.Delay(1); + + [Test, Repeat(30), NotInParallel(["KeyC", "KeyD"])] + public async Task Test_OverlappingKeys_CD() => await Task.Delay(1); + + [Test, Repeat(30), NotInParallel(["KeyD", "KeyE"])] + public async Task Test_OverlappingKeys_DE() => await Task.Delay(1); + + [Test, Repeat(30), NotInParallel(["KeyE", "KeyA"])] + public async Task Test_OverlappingKeys_EA() => await Task.Delay(1); + + // Triple-key tests - maximum contention scenarios + [Test, Repeat(20), NotInParallel(["KeyA", "KeyB", "KeyC"])] + public async Task Test_TripleKeys_ABC() => await Task.Delay(1); + + [Test, Repeat(20), NotInParallel(["KeyC", "KeyD", "KeyE"])] + public async Task Test_TripleKeys_CDE() => await Task.Delay(1); +} diff --git a/TUnit.TestProject/ConstraintKeyStressTests.cs b/TUnit.TestProject/ConstraintKeyStressTests.cs new file mode 100644 index 0000000000..69d27b3a61 --- /dev/null +++ b/TUnit.TestProject/ConstraintKeyStressTests.cs @@ -0,0 +1,247 @@ +using System.Collections.Concurrent; +using TUnit.Assertions; +using TUnit.Core; +using TUnit.TestProject.Attributes; + +namespace TUnit.TestProject; + +/// +/// Stress tests for ConstraintKeyScheduler with overlapping constraint keys. +/// These tests create high contention scenarios to verify the scheduler: +/// 1. Handles concurrent access to shared constraint keys without deadlocks +/// 2. Enforces mutual exclusion for tests with same constraint keys +/// 3. Allows parallel execution for tests with different constraint keys +/// +[EngineTest(ExpectedResult.Pass)] +public class ConstraintKeyStressTests +{ + // Constants for test configuration + private const int WorkDurationMilliseconds = 50; // Duration each test simulates work + private const int TimeoutMilliseconds = 30_000; // Maximum time allowed for each test to prevent CI hangs + + // Track execution windows to verify constraint key semantics + private static readonly ConcurrentDictionary ExecutionWindows = new(); + + // Track test invocations to verify all tests actually execute + private static readonly ConcurrentDictionary TestInvocations = new(); + + // Tests with constraint key "A" - will contend with each other + [Test, Repeat(2), Timeout(TimeoutMilliseconds)] + [NotInParallel("A")] + public async Task StressTest_KeyA_1(CancellationToken cancellationToken) + { + await ExecuteConstraintKeyStressTest(cancellationToken); + } + + [Test, Repeat(2), Timeout(TimeoutMilliseconds)] + [NotInParallel("A")] + public async Task StressTest_KeyA_2(CancellationToken cancellationToken) + { + await ExecuteConstraintKeyStressTest(cancellationToken); + } + + // Tests with constraint key "B" - will contend with each other + [Test, Repeat(2), Timeout(TimeoutMilliseconds)] + [NotInParallel("B")] + public async Task StressTest_KeyB_1(CancellationToken cancellationToken) + { + await ExecuteConstraintKeyStressTest(cancellationToken); + } + + [Test, Repeat(2), Timeout(TimeoutMilliseconds)] + [NotInParallel("B")] + public async Task StressTest_KeyB_2(CancellationToken cancellationToken) + { + await ExecuteConstraintKeyStressTest(cancellationToken); + } + + // Tests with overlapping constraint keys "A" and "B" + // These create complex contention scenarios + [Test, Repeat(2), Timeout(TimeoutMilliseconds)] + [NotInParallel(["A", "B"])] + public async Task StressTest_KeyAB_1(CancellationToken cancellationToken) + { + await ExecuteConstraintKeyStressTest(cancellationToken); + } + + [Test, Repeat(2), Timeout(TimeoutMilliseconds)] + [NotInParallel(["A", "B"])] + public async Task StressTest_KeyAB_2(CancellationToken cancellationToken) + { + await ExecuteConstraintKeyStressTest(cancellationToken); + } + + // Tests with constraint key "C" - independent of A and B + [Test, Repeat(2), Timeout(TimeoutMilliseconds)] + [NotInParallel("C")] + public async Task StressTest_KeyC_1(CancellationToken cancellationToken) + { + await ExecuteConstraintKeyStressTest(cancellationToken); + } + + [Test, Repeat(2), Timeout(TimeoutMilliseconds)] + [NotInParallel("C")] + public async Task StressTest_KeyC_2(CancellationToken cancellationToken) + { + await ExecuteConstraintKeyStressTest(cancellationToken); + } + + // Tests with overlapping constraint keys "B" and "C" + [Test, Repeat(2), Timeout(TimeoutMilliseconds)] + [NotInParallel(["B", "C"])] + public async Task StressTest_KeyBC_1(CancellationToken cancellationToken) + { + await ExecuteConstraintKeyStressTest(cancellationToken); + } + + [Test, Repeat(2), Timeout(TimeoutMilliseconds)] + [NotInParallel(["B", "C"])] + public async Task StressTest_KeyBC_2(CancellationToken cancellationToken) + { + await ExecuteConstraintKeyStressTest(cancellationToken); + } + + /// + /// Common execution logic for all constraint key stress tests. + /// Records execution windows and verifies constraint semantics. + /// + private static async Task ExecuteConstraintKeyStressTest(CancellationToken cancellationToken) + { + var testContext = TestContext.Current!; + var testName = testContext.Metadata.TestDetails.TestName; + + // Track that this test was invoked + TestInvocations.AddOrUpdate(testName, 1, (_, count) => count + 1); + + // Record execution start time + var startTime = DateTime.UtcNow; + + // Simulate work - represents actual test execution time + await Task.Delay(WorkDurationMilliseconds, cancellationToken); + + // Record execution end time + var endTime = DateTime.UtcNow; + + // Store execution window for constraint verification + ExecutionWindows[testName] = (startTime, endTime); + + // Verify mutual exclusion for same constraint keys + await VerifyConstraintKeySemantics(testContext); + } + + /// + /// Verifies that constraint keys enforce proper mutual exclusion. + /// Tests with the same constraint key should not have overlapping execution windows. + /// + private static async Task VerifyConstraintKeySemantics(TestContext currentTest) + { + var currentTestName = currentTest.Metadata.TestDetails.TestName; + var currentWindow = ExecutionWindows[currentTestName]; + + // Get constraint keys for current test + var currentKeys = GetConstraintKeys(currentTest); + + // Check all other completed tests for overlaps with same constraint keys + foreach (var (otherTestName, otherWindow) in ExecutionWindows) + { + if (otherTestName == currentTestName) + { + continue; // Skip self + } + + // For tests with shared constraint keys, verify no overlap + var otherKeys = GetConstraintKeysFromTestName(otherTestName); + var hasSharedKey = currentKeys.Intersect(otherKeys).Any(); + + if (hasSharedKey) + { + // Check if execution windows overlap + var overlap = !(currentWindow.End <= otherWindow.Start || otherWindow.End <= currentWindow.Start); + + if (overlap) + { + // This indicates a constraint violation - tests with same key should be serial + // Note: Due to timing precision and async nature, allow small overlaps (< 10ms) + var overlapDuration = GetOverlapDuration(currentWindow, otherWindow); + + // Use Assert.Fail for constraint violations + if (overlapDuration.TotalMilliseconds >= 10) + { + Assert.Fail( + $"Tests with shared constraint keys should not overlap significantly. " + + $"Current: {currentTestName}, Other: {otherTestName}, " + + $"Shared keys: {string.Join(",", currentKeys.Intersect(otherKeys))}, " + + $"Overlap: {overlapDuration.TotalMilliseconds:F2}ms"); + } + } + } + } + } + + /// + /// Extracts constraint keys from the current test context. + /// + private static string[] GetConstraintKeys(TestContext testContext) + { + var notInParallelAttributes = testContext.Metadata.TestDetails.Attributes + .GetAttributes() + .ToArray(); + + var keys = new List(); + + foreach (var attr in notInParallelAttributes) + { + keys.AddRange(attr.ConstraintKeys); + } + + return [.. keys]; + } + + /// + /// Extracts constraint keys from test name pattern. + /// This is a simplified version used when we don't have the full test context. + /// + private static string[] GetConstraintKeysFromTestName(string testName) + { + // Parse test name to extract keys + // Test names follow pattern: StressTest_Key{Keys}_{Number} + if (testName.Contains("KeyAB")) + { + return ["A", "B"]; + } + if (testName.Contains("KeyBC")) + { + return ["B", "C"]; + } + if (testName.Contains("KeyA")) + { + return ["A"]; + } + if (testName.Contains("KeyB")) + { + return ["B"]; + } + if (testName.Contains("KeyC")) + { + return ["C"]; + } + + return []; + } + + /// + /// Calculates the overlap duration between two execution windows. + /// + private static TimeSpan GetOverlapDuration((DateTime Start, DateTime End) window1, (DateTime Start, DateTime End) window2) + { + var overlapStart = window1.Start > window2.Start ? window1.Start : window2.Start; + var overlapEnd = window1.End < window2.End ? window1.End : window2.End; + + if (overlapEnd <= overlapStart) + { + return TimeSpan.Zero; + } + + return overlapEnd - overlapStart; + } +} diff --git a/docs/plans/2025-12-26-lock-contention-optimization-design.md b/docs/plans/2025-12-26-lock-contention-optimization-design.md new file mode 100644 index 0000000000..010cb0313f --- /dev/null +++ b/docs/plans/2025-12-26-lock-contention-optimization-design.md @@ -0,0 +1,377 @@ +# Lock Contention Optimization Design + +**Issue:** [#4162 - perf: reduce lock contention in test discovery and scheduling](https://github.com/thomhurst/TUnit/issues/4162) +**Date:** 2025-12-26 +**Priority:** P1 +**Goal:** Maximum parallelism - minimize lock duration at any cost + +--- + +## Problem Statement + +Two performance-critical code paths unnecessarily extend lock durations, creating bottlenecks in parallel test execution: + +1. **ReflectionTestDataCollector.cs (lines 137-141):** Lock remains held while creating a full defensive copy of discovered tests +2. **ConstraintKeyScheduler.cs (lines 56-69, 151-190):** LINQ evaluation and list allocation happen within lock scope + +These practices serialize operations that should execute in parallel, degrading throughput on multi-core systems. + +--- + +## Solution Overview + +| Component | Approach | Complexity | +|-----------|----------|------------| +| ReflectionTestDataCollector | ImmutableList with atomic swap | Medium | +| ConstraintKeyScheduler | Manual loops + two-phase locking + pre-allocation | High | + +--- + +## Design: ReflectionTestDataCollector + +### Current Implementation (Problem) + +```csharp +private static readonly List _discoveredTests = new(capacity: 1000); +private static readonly Lock _discoveredTestsLock = new(); + +public async Task> CollectTestsAsync(string testSessionId) +{ + // ... discovery logic ... + + lock (_discoveredTestsLock) + { + _discoveredTests.AddRange(newTests); + return new List(_discoveredTests); // O(n) copy under lock + } +} +``` + +### Proposed Implementation + +Replace `List` + lock with `ImmutableList` + atomic swap: + +```csharp +private static ImmutableList _discoveredTests = ImmutableList.Empty; + +public async Task> CollectTestsAsync(string testSessionId) +{ + // ... discovery logic unchanged ... + + // Atomic swap - no lock needed for readers + ImmutableList original, updated; + do + { + original = _discoveredTests; + updated = original.AddRange(newTests); + } while (Interlocked.CompareExchange(ref _discoveredTests, updated, original) != original); + + return _discoveredTests; // Already immutable, no copy needed +} +``` + +For streaming writes, use `ImmutableInterlocked.Update()` helper: + +```csharp +// In CollectTestsStreamingAsync +ImmutableInterlocked.Update(ref _discoveredTests, list => list.Add(test)); +yield return test; +``` + +### Benefits + +- Zero lock contention on reads (callers get immutable snapshot) +- Writes use lock-free CAS loop (brief spin during concurrent writes) +- Eliminates defensive copy entirely +- Thread-safe enumeration without locks + +### Trade-offs + +- Slightly higher allocation on writes (new immutable list per update) +- Discovery is infrequent compared to reads, so this is acceptable + +--- + +## Design: ConstraintKeyScheduler + +### Problem 1: LINQ Inside Lock (lines 56-69) + +**Current:** +```csharp +lock (lockObject) +{ + canStart = !constraintKeys.Any(key => lockedKeys.Contains(key)); // LINQ allocation + if (canStart) + { + foreach (var key in constraintKeys) + lockedKeys.Add(key); + } +} +``` + +**Proposed - Manual loop with indexer access:** +```csharp +lock (lockObject) +{ + canStart = true; + var keyCount = constraintKeys.Count; + for (var i = 0; i < keyCount; i++) + { + if (lockedKeys.Contains(constraintKeys[i])) + { + canStart = false; + break; // Early exit + } + } + + if (canStart) + { + for (var i = 0; i < keyCount; i++) + lockedKeys.Add(constraintKeys[i]); + } +} +``` + +**Benefits:** +- No delegate allocation +- No enumerator allocation +- Early exit on first conflict + +### Problem 2: Allocations and Extended Lock Scope (lines 149-190) + +**Current:** +```csharp +var testsToStart = new List<...>(); // Outside lock - good + +lock (lockObject) +{ + foreach (var key in constraintKeys) + lockedKeys.Remove(key); + + var tempQueue = new List<...>(); // Allocation INSIDE lock - bad + + while (waitingTests.TryDequeue(out var waitingTest)) + { + var canStart = !waitingTest.ConstraintKeys.Any(...); // LINQ inside lock + // ... extensive work inside lock + } + + foreach (var item in tempQueue) + waitingTests.Enqueue(item); +} +``` + +**Proposed - Two-phase locking with pre-allocation:** + +```csharp +// Pre-allocate outside any lock +var testsToStart = new List<(..., TaskCompletionSource)>(4); +var testsToRequeue = new List<(..., TaskCompletionSource)>(8); +var waitingSnapshot = new List<(..., TaskCompletionSource)>(8); + +// Phase 1: Release keys and snapshot queue (single brief lock) +lock (lockObject) +{ + var keyCount = constraintKeys.Count; + for (var i = 0; i < keyCount; i++) + lockedKeys.Remove(constraintKeys[i]); + + while (waitingTests.TryDequeue(out var item)) + waitingSnapshot.Add(item); +} + +// Phase 2: For each candidate, try to acquire keys (brief lock per candidate) +foreach (var waitingTest in waitingSnapshot) +{ + bool acquired; + lock (lockObject) + { + acquired = true; + var keys = waitingTest.ConstraintKeys; + var keyCount = keys.Count; + for (var i = 0; i < keyCount; i++) + { + if (lockedKeys.Contains(keys[i])) + { + acquired = false; + break; + } + } + + if (acquired) + { + for (var i = 0; i < keyCount; i++) + lockedKeys.Add(keys[i]); + } + } + + if (acquired) + testsToStart.Add(waitingTest); + else + testsToRequeue.Add(waitingTest); +} + +// Phase 3: Requeue non-starters (single brief lock) +if (testsToRequeue.Count > 0) +{ + lock (lockObject) + { + foreach (var item in testsToRequeue) + waitingTests.Enqueue(item); + } +} + +// Phase 4: Signal starters (outside lock - no contention) +foreach (var test in testsToStart) + test.StartSignal.SetResult(true); +``` + +### Benefits + +- Multiple brief locks instead of one long lock +- Other threads can interleave between phases +- All allocations outside locks +- No LINQ allocations +- Early exit in availability checks + +--- + +## Testing Strategy + +### 1. Stress Tests for Thread Safety + +Add to `TUnit.Engine.Tests`: + +```csharp +[Test] +[Repeat(10)] +public async Task ReflectionTestDataCollector_ConcurrentDiscovery_NoRaceConditions() +{ + var tasks = Enumerable.Range(0, 50) + .Select(_ => collector.CollectTestsAsync(Guid.NewGuid().ToString())); + + var results = await Task.WhenAll(tasks); + + await Assert.That(results.SelectMany(r => r).Distinct().Count()) + .IsGreaterThan(0); +} + +[Test] +[Repeat(10)] +public async Task ConstraintKeyScheduler_HighContention_NoDeadlocks() +{ + var tests = CreateTestsWithOverlappingConstraints(100, overlapFactor: 0.3); + + using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(30)); + await scheduler.ExecuteTestsWithConstraintsAsync(tests, cts.Token); +} +``` + +### 2. Wall Clock Benchmarks + +```csharp +[Test] +[Category("Performance")] +public async Task Benchmark_TestDiscovery_WallClock() +{ + var stopwatch = Stopwatch.StartNew(); + var tests = await collector.CollectTestsAsync(testSessionId); + stopwatch.Stop(); + + Console.WriteLine($"Discovery wall clock: {stopwatch.ElapsedMilliseconds}ms"); + Console.WriteLine($"Tests discovered: {tests.Count()}"); + Console.WriteLine($"Throughput: {tests.Count() / stopwatch.Elapsed.TotalSeconds:F0} tests/sec"); +} + +[Test] +[Category("Performance")] +public async Task Benchmark_ConstrainedExecution_WallClock() +{ + var tests = CreateTestsWithConstraints(count: 500, constraintOverlap: 0.3); + + var stopwatch = Stopwatch.StartNew(); + await scheduler.ExecuteTestsWithConstraintsAsync(tests, CancellationToken.None); + stopwatch.Stop(); + + Console.WriteLine($"Constrained execution wall clock: {stopwatch.ElapsedMilliseconds}ms"); +} +``` + +### 3. Profiling with dotnet-trace + +```bash +# Baseline (before changes) +dotnet trace collect --name TUnit.PerformanceTests -- dotnet run -c Release +mv trace.nettrace baseline.nettrace + +# Optimized (after changes) +dotnet trace collect --name TUnit.PerformanceTests -- dotnet run -c Release +mv trace.nettrace optimized.nettrace +``` + +### 4. Key Metrics + +| Scenario | Metric | Target | +|----------|--------|--------| +| Discovery (1000 tests) | Wall clock time | >=10% improvement | +| Constrained execution (500 tests, 30% overlap) | Wall clock time | >=15% improvement | +| High parallelism (16+ cores) | Scaling efficiency | Near-linear | +| Lock contention | Thread wait time | >=50% reduction | +| Memory | Hot path allocations | >=30% reduction | + +--- + +## Risk Mitigation + +### Race Condition Risks + +| Risk | Mitigation | +|------|------------| +| ImmutableList CAS loop starvation | Add retry limit with fallback to lock; contention is rare during discovery | +| Lost updates in two-phase lock | Each phase is atomic; tests either start or requeue, never lost | +| Stale reads of `_discoveredTests` | Acceptable - immutable snapshots are always consistent | + +### Behavioral Compatibility + +| Concern | Mitigation | +|---------|------------| +| Test ordering changes | Discovery order was never guaranteed | +| API consumers expecting mutable list | Return type is `IEnumerable`, already read-only contract | +| Constraint scheduling order | Priority ordering preserved | + +### Rollback Strategy + +Both changes are isolated: +- `ReflectionTestDataCollector`: Revert to `List` + lock with single file change +- `ConstraintKeyScheduler`: Revert loop-by-loop if specific optimization causes issues + +--- + +## Implementation Plan + +### Incremental Rollout (Suggested Merge Order) + +1. **PR 1: LINQ to manual loop replacements** (lowest risk) + - Replace `.Any()` with manual loops in ConstraintKeyScheduler + - Immediate benefit, minimal code change + +2. **PR 2: Lock scope restructuring** (medium risk) + - Two-phase locking in ConstraintKeyScheduler + - Pre-allocate lists outside locks + +3. **PR 3: ImmutableList migration** (medium risk) + - Replace List + lock with ImmutableList in ReflectionTestDataCollector + - Atomic swap pattern + +This allows isolating any regressions to specific changes. + +--- + +## Verification Checklist + +- [ ] All existing tests pass +- [ ] Stress tests added and passing +- [ ] Wall clock benchmarks show improvement +- [ ] dotnet-trace shows reduced lock contention +- [ ] No deadlocks under high parallelism (16+ cores) +- [ ] Memory allocations reduced in hot paths diff --git a/docs/plans/2025-12-26-lock-contention-optimization.md b/docs/plans/2025-12-26-lock-contention-optimization.md new file mode 100644 index 0000000000..fc88ab6335 --- /dev/null +++ b/docs/plans/2025-12-26-lock-contention-optimization.md @@ -0,0 +1,505 @@ +# Lock Contention Optimization Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Reduce lock contention in test discovery and scheduling to improve parallel test execution throughput. + +**Architecture:** Replace `List` + lock with `ImmutableList` + atomic swap in ReflectionTestDataCollector. Replace LINQ with manual loops and restructure to two-phase locking in ConstraintKeyScheduler. + +**Tech Stack:** C#, System.Collections.Immutable, System.Threading + +--- + +## Task 1: Add Stress Tests for Thread Safety Baseline + +**Files:** +- Create: `TUnit.Engine.Tests/Scheduling/ConstraintKeySchedulerConcurrencyTests.cs` + +**Step 1: Write the failing test for high contention scenarios** + +```csharp +using TUnit.Core; +using TUnit.Engine.Scheduling; + +namespace TUnit.Engine.Tests.Scheduling; + +public class ConstraintKeySchedulerConcurrencyTests +{ + [Test] + [Repeat(5)] + public async Task HighContention_WithOverlappingConstraints_CompletesWithoutDeadlock() + { + // Arrange - create mock tests with overlapping constraint keys + // This test establishes baseline behavior before optimization + + using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(30)); + + // Act & Assert - should complete without timeout/deadlock + await Assert.That(async () => + { + // Placeholder - will be implemented after understanding test infrastructure + await Task.Delay(1, cts.Token); + }).ThrowsNothing(); + } +} +``` + +**Step 2: Run test to verify it passes (baseline)** + +Run: `cd TUnit.Engine.Tests && dotnet test --filter "FullyQualifiedName~ConstraintKeySchedulerConcurrencyTests"` +Expected: PASS (baseline test) + +**Step 3: Commit** + +```bash +git add TUnit.Engine.Tests/Scheduling/ConstraintKeySchedulerConcurrencyTests.cs +git commit -m "test: add concurrency stress test baseline for ConstraintKeyScheduler" +``` + +--- + +## Task 2: Replace LINQ with Manual Loops in ConstraintKeyScheduler (lines 56-69) + +**Files:** +- Modify: `TUnit.Engine/Scheduling/ConstraintKeyScheduler.cs:56-69` + +**Step 1: Write test for constraint key checking behavior** + +```csharp +[Test] +public async Task ConstraintKeyCheck_WithNoConflicts_StartsImmediately() +{ + // This test verifies the behavior is unchanged after LINQ removal + // Exact implementation depends on testability of ConstraintKeyScheduler +} +``` + +**Step 2: Run existing tests to establish baseline** + +Run: `cd TUnit.Engine.Tests && dotnet test` +Expected: All tests PASS + +**Step 3: Replace LINQ `.Any()` with manual loop** + +In `ConstraintKeyScheduler.cs`, change lines 56-69 from: + +```csharp +lock (lockObject) +{ + // Check if all constraint keys are available + canStart = !constraintKeys.Any(key => lockedKeys.Contains(key)); + + if (canStart) + { + // Lock all the constraint keys for this test + foreach (var key in constraintKeys) + { + lockedKeys.Add(key); + } + } +} +``` + +To: + +```csharp +lock (lockObject) +{ + // Check if all constraint keys are available - manual loop avoids LINQ allocation + canStart = true; + var keyCount = constraintKeys.Count; + for (var i = 0; i < keyCount; i++) + { + if (lockedKeys.Contains(constraintKeys[i])) + { + canStart = false; + break; + } + } + + if (canStart) + { + // Lock all the constraint keys for this test + for (var i = 0; i < keyCount; i++) + { + lockedKeys.Add(constraintKeys[i]); + } + } +} +``` + +**Step 4: Run tests to verify behavior unchanged** + +Run: `cd TUnit.Engine.Tests && dotnet test` +Expected: All tests PASS + +**Step 5: Commit** + +```bash +git add TUnit.Engine/Scheduling/ConstraintKeyScheduler.cs +git commit -m "perf: replace LINQ with manual loop in ConstraintKeyScheduler key checking" +``` + +--- + +## Task 3: Replace LINQ in Waiting Test Check (line 165) + +**Files:** +- Modify: `TUnit.Engine/Scheduling/ConstraintKeyScheduler.cs:165` + +**Step 1: Run existing tests to establish baseline** + +Run: `cd TUnit.Engine.Tests && dotnet test` +Expected: All tests PASS + +**Step 2: Replace LINQ `.Any()` in waiting test check** + +In `ConstraintKeyScheduler.cs`, change line 165 from: + +```csharp +var canStart = !waitingTest.ConstraintKeys.Any(key => lockedKeys.Contains(key)); +``` + +To: + +```csharp +var canStart = true; +var waitingKeys = waitingTest.ConstraintKeys; +var waitingKeyCount = waitingKeys.Count; +for (var j = 0; j < waitingKeyCount; j++) +{ + if (lockedKeys.Contains(waitingKeys[j])) + { + canStart = false; + break; + } +} +``` + +**Step 3: Run tests to verify behavior unchanged** + +Run: `cd TUnit.Engine.Tests && dotnet test` +Expected: All tests PASS + +**Step 4: Commit** + +```bash +git add TUnit.Engine/Scheduling/ConstraintKeyScheduler.cs +git commit -m "perf: replace LINQ with manual loop in waiting test availability check" +``` + +--- + +## Task 4: Move List Allocation Outside Lock (lines 149-190) + +**Files:** +- Modify: `TUnit.Engine/Scheduling/ConstraintKeyScheduler.cs:149-190` + +**Step 1: Run existing tests to establish baseline** + +Run: `cd TUnit.Engine.Tests && dotnet test` +Expected: All tests PASS + +**Step 2: Pre-allocate lists outside lock scope** + +Change the structure from allocating `tempQueue` inside lock to pre-allocating outside. Replace lines 149-190: + +```csharp +// Release the constraint keys and check if any waiting tests can now run +var testsToStart = new List<(AbstractExecutableTest Test, IReadOnlyList ConstraintKeys, TaskCompletionSource StartSignal)>(4); +var testsToRequeue = new List<(AbstractExecutableTest Test, IReadOnlyList ConstraintKeys, TaskCompletionSource StartSignal)>(8); + +lock (lockObject) +{ + // Release all constraint keys for this test + var keyCount = constraintKeys.Count; + for (var i = 0; i < keyCount; i++) + { + lockedKeys.Remove(constraintKeys[i]); + } + + // Check waiting tests to see if any can now run + while (waitingTests.TryDequeue(out var waitingTest)) + { + // Check if all constraint keys are available for this waiting test + var canStart = true; + var waitingKeys = waitingTest.ConstraintKeys; + var waitingKeyCount = waitingKeys.Count; + for (var j = 0; j < waitingKeyCount; j++) + { + if (lockedKeys.Contains(waitingKeys[j])) + { + canStart = false; + break; + } + } + + if (canStart) + { + // Lock the keys for this test + for (var j = 0; j < waitingKeyCount; j++) + { + lockedKeys.Add(waitingKeys[j]); + } + + // Mark test to start after we exit the lock + testsToStart.Add(waitingTest); + } + else + { + // Still can't run, keep it for re-queuing + testsToRequeue.Add(waitingTest); + } + } + + // Re-add tests that still can't run + foreach (var waitingTestItem in testsToRequeue) + { + waitingTests.Enqueue(waitingTestItem); + } +} +``` + +**Step 3: Run tests to verify behavior unchanged** + +Run: `cd TUnit.Engine.Tests && dotnet test` +Expected: All tests PASS + +**Step 4: Commit** + +```bash +git add TUnit.Engine/Scheduling/ConstraintKeyScheduler.cs +git commit -m "perf: pre-allocate lists outside lock scope in ConstraintKeyScheduler" +``` + +--- + +## Task 5: Add ImmutableList to ReflectionTestDataCollector + +**Files:** +- Modify: `TUnit.Engine/Discovery/ReflectionTestDataCollector.cs:31-32` + +**Step 1: Run existing tests to establish baseline** + +Run: `cd TUnit.Engine.Tests && dotnet test` +Expected: All tests PASS + +**Step 2: Add System.Collections.Immutable using and change field declaration** + +At the top of `ReflectionTestDataCollector.cs`, ensure this using is present: + +```csharp +using System.Collections.Immutable; +``` + +Change lines 31-32 from: + +```csharp +private static readonly List _discoveredTests = new(capacity: 1000); +private static readonly Lock _discoveredTestsLock = new(); +``` + +To: + +```csharp +private static ImmutableList _discoveredTests = ImmutableList.Empty; +``` + +**Step 3: Run tests (will fail - fields referenced elsewhere)** + +Run: `cd TUnit.Engine.Tests && dotnet test` +Expected: FAIL (compilation errors due to field changes) + +**Step 4: Commit partial change** + +```bash +git add TUnit.Engine/Discovery/ReflectionTestDataCollector.cs +git commit -m "refactor: change _discoveredTests to ImmutableList (WIP)" +``` + +--- + +## Task 6: Update CollectTestsAsync for ImmutableList + +**Files:** +- Modify: `TUnit.Engine/Discovery/ReflectionTestDataCollector.cs:136-141` + +**Step 1: Replace lock with atomic swap** + +Change lines 136-141 from: + +```csharp +// Add to discovered tests with lock +lock (_discoveredTestsLock) +{ + _discoveredTests.AddRange(newTests); + return new List(_discoveredTests); +} +``` + +To: + +```csharp +// Atomic swap - no lock needed for readers +ImmutableList original, updated; +do +{ + original = _discoveredTests; + updated = original.AddRange(newTests); +} while (Interlocked.CompareExchange(ref _discoveredTests, updated, original) != original); + +return _discoveredTests; +``` + +**Step 2: Run tests (may still fail if other usages remain)** + +Run: `cd TUnit.Engine.Tests && dotnet test` +Expected: May fail if streaming methods still use old lock + +**Step 3: Commit** + +```bash +git add TUnit.Engine/Discovery/ReflectionTestDataCollector.cs +git commit -m "perf: use atomic swap for CollectTestsAsync" +``` + +--- + +## Task 7: Update CollectTestsStreamingAsync for ImmutableList + +**Files:** +- Modify: `TUnit.Engine/Discovery/ReflectionTestDataCollector.cs:174-190` + +**Step 1: Replace lock with ImmutableInterlocked.Update** + +Change lines 174-178 from: + +```csharp +lock (_discoveredTestsLock) +{ + _discoveredTests.Add(test); +} +yield return test; +``` + +To: + +```csharp +ImmutableInterlocked.Update(ref _discoveredTests, list => list.Add(test)); +yield return test; +``` + +Similarly for lines 185-188: + +```csharp +ImmutableInterlocked.Update(ref _discoveredTests, list => list.Add(dynamicTest)); +yield return dynamicTest; +``` + +**Step 2: Run tests to verify streaming works** + +Run: `cd TUnit.Engine.Tests && dotnet test` +Expected: All tests PASS + +**Step 3: Commit** + +```bash +git add TUnit.Engine/Discovery/ReflectionTestDataCollector.cs +git commit -m "perf: use ImmutableInterlocked.Update for streaming discovery" +``` + +--- + +## Task 8: Update ClearCaches for ImmutableList + +**Files:** +- Modify: `TUnit.Engine/Discovery/ReflectionTestDataCollector.cs:47-60` + +**Step 1: Simplify ClearCaches to use atomic assignment** + +Change lines 50-53 from: + +```csharp +lock (_discoveredTestsLock) +{ + _discoveredTests.Clear(); +} +``` + +To: + +```csharp +Interlocked.Exchange(ref _discoveredTests, ImmutableList.Empty); +``` + +**Step 2: Run all tests** + +Run: `cd TUnit.Engine.Tests && dotnet test` +Expected: All tests PASS + +**Step 3: Commit** + +```bash +git add TUnit.Engine/Discovery/ReflectionTestDataCollector.cs +git commit -m "perf: simplify ClearCaches with atomic exchange" +``` + +--- + +## Task 9: Run Full Test Suite and Performance Verification + +**Files:** +- None (verification only) + +**Step 1: Run full TUnit test suite** + +Run: `dotnet test` +Expected: All tests PASS + +**Step 2: Run performance tests with dotnet-trace** + +```bash +cd TUnit.PerformanceTests +dotnet trace collect -- dotnet run -c Release +``` + +**Step 3: Verify no regressions** + +Compare trace output with baseline (if available). Look for: +- Reduced lock contention time +- Reduced thread wait time +- Similar or better wall clock time + +**Step 4: Final commit with summary** + +```bash +git add -A +git commit -m "perf: reduce lock contention in test discovery and scheduling + +Implements optimizations for #4162: +- ReflectionTestDataCollector: ImmutableList with atomic swap +- ConstraintKeyScheduler: manual loops replacing LINQ, pre-allocated lists + +This eliminates defensive copies under lock and reduces LINQ allocations +in hot paths during parallel test execution." +``` + +--- + +## Verification Checklist + +- [ ] All existing tests pass +- [ ] Stress tests pass under high contention +- [ ] No deadlocks under parallel execution +- [ ] Wall clock time equal or improved +- [ ] Lock contention reduced (verify with dotnet-trace) + +--- + +Plan complete and saved to `docs/plans/2025-12-26-lock-contention-optimization.md`. Two execution options: + +**1. Subagent-Driven (this session)** - I dispatch fresh subagent per task, review between tasks, fast iteration + +**2. Parallel Session (separate)** - Open new session with executing-plans, batch execution with checkpoints + +**Which approach?**