Skip to content

Commit

Permalink
Add normalized equivalent of YieldProcessor, retune some spin loops
Browse files Browse the repository at this point in the history
Related to https://github.com/dotnet/coreclr/issues/13388:
- SpinWait - If SwitchToThread does not switch, Sleep(0) instead since otherwise the iteration is wasted
- Changed ManualResetEventSlim and Task to use SpinWait instead of their custom spinning that was very similar to SpinWait anyway
  - On a single-proc machine, YieldProcessor is ineffective and it will SwitchToThread instead
  - This also removes multiplying the number of YieldProcessor calls per iteration by the number of processors. The multiply could cause artifically long delays on machines with many processors and it would be better to yield/sleep instead. Based on this change, I have tuned the spin counts based that I found to be appropriate from a basic microbenchmark.
  • Loading branch information
kouvel committed Aug 28, 2017
1 parent 1596d06 commit b5b64bf
Show file tree
Hide file tree
Showing 10 changed files with 287 additions and 142 deletions.
78 changes: 58 additions & 20 deletions src/mscorlib/shared/System/Threading/SpinWait.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-

using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Security;
using Internal.Runtime.Augments;

namespace System.Threading
Expand Down Expand Up @@ -69,9 +72,9 @@ public struct SpinWait
// numbers may seem fairly arbitrary, but were derived with at least some
// thought in the design document. I fully expect they will need to change
// over time as we gain more experience with performance.
internal const int YIELD_THRESHOLD = 10; // When to switch over to a true yield.
internal const int SLEEP_0_EVERY_HOW_MANY_TIMES = 5; // After how many yields should we Sleep(0)?
internal const int SLEEP_1_EVERY_HOW_MANY_TIMES = 20; // After how many yields should we Sleep(1)?
internal const int YieldThreshold = 10; // When to switch over to a true yield.
private const int Sleep0EveryHowManyYields = 5; // After how many yields should we Sleep(0)?
private const int DefaultSleep1Threshold = 20; // After how many yields should we Sleep(1) frequently?

// The number of times we've spun already.
private int _count;
Expand All @@ -81,7 +84,8 @@ public struct SpinWait
/// </summary>
public int Count
{
get { return _count; }
get => _count;
internal set => _count = value;
}

/// <summary>
Expand All @@ -96,7 +100,24 @@ public int Count
/// </remarks>
public bool NextSpinWillYield
{
get { return _count > YIELD_THRESHOLD || PlatformHelper.IsSingleProcessor; }
get
{
// (_count - YieldThreshold) % 2 == 0: The purpose of this check is to interleave Thread.Yield/Sleep(0) with
// Thread.SpinWait. Otherwise, the following issues occur:
// - When there are no threads to switch to, Yield and Sleep(0) become no-op and it turns the spin loop into a
// busy -spin that may quickly reach the max spin count and cause the thread to enter a wait state, or may
// just busy-spin for longer than desired before a Sleep(1). Completing the spin loop too early can cause
// excessive context switcing if a wait follows, and entering the Sleep(1) stage too early can cause
// excessive delays.
// - If there are multiple threads doing Yield and Sleep(0) (typically from the same spin loop due to
// contention), they may switch between one another, delaying work that can make progress from being done.
return
(
_count >= YieldThreshold &&
(_count >= DefaultSleep1Threshold || (_count - YieldThreshold) % 2 == 0)
) ||
PlatformHelper.IsSingleProcessor;
}
}

/// <summary>
Expand All @@ -108,6 +129,13 @@ public bool NextSpinWillYield
/// </remarks>
public void SpinOnce()
{
SpinOnce(DefaultSleep1Threshold);
}

internal void SpinOnce(int sleep1Threshold)
{
Debug.Assert(sleep1Threshold >= DefaultSleep1Threshold); // so that NextSpinWillYield behaves as requested

if (NextSpinWillYield)
{
//
Expand All @@ -125,19 +153,21 @@ public void SpinOnce()
// configured to use the (default) coarse-grained system timer.
//

int yieldsSoFar = (_count >= YIELD_THRESHOLD ? _count - YIELD_THRESHOLD : _count);

if ((yieldsSoFar % SLEEP_1_EVERY_HOW_MANY_TIMES) == (SLEEP_1_EVERY_HOW_MANY_TIMES - 1))
{
RuntimeThread.Sleep(1);
}
else if ((yieldsSoFar % SLEEP_0_EVERY_HOW_MANY_TIMES) == (SLEEP_0_EVERY_HOW_MANY_TIMES - 1))
if (_count >= sleep1Threshold)
{
RuntimeThread.Sleep(0);
Thread.Sleep(1);
}
else
{
RuntimeThread.Yield();
int yieldsSoFar = _count >= YieldThreshold ? (_count - YieldThreshold) / 2 : _count;
if ((yieldsSoFar % Sleep0EveryHowManyYields) == (Sleep0EveryHowManyYields - 1))
{
RuntimeThread.Sleep(0);
}
else
{
RuntimeThread.Yield();
}
}
}
else
Expand All @@ -153,11 +183,21 @@ public void SpinOnce()
// number of spins we are willing to tolerate to reduce delay to the caller,
// since we expect most callers will eventually block anyway.
//
RuntimeThread.SpinWait(4 << _count);
// Also, cap the maximum spin count to a value such that many thousands of CPU cycles would not be wasted doing
// the equivalent of YieldProcessor(), as that that point SwitchToThread/Sleep(0) are more likely to be able to
// allow other useful work to run. Long YieldProcessor() loops can help to reduce contention, but Sleep(1) is
// usually better for that.
//
int n = RuntimeThread.OptimalMaxSpinWaitsPerSpinIteration;
if (_count <= 30 && (1 << _count) < n)
{
n = 1 << _count;
}
RuntimeThread.SpinWait(n);
}

// Finally, increment our spin counter.
_count = (_count == int.MaxValue ? YIELD_THRESHOLD : _count + 1);
_count = (_count == int.MaxValue ? YieldThreshold : _count + 1);
}

/// <summary>
Expand Down Expand Up @@ -299,9 +339,7 @@ internal static int ProcessorCount
/// <summary>
/// Gets whether the current machine has only a single processor.
/// </summary>
internal static bool IsSingleProcessor
{
get { return ProcessorCount == 1; }
}
/// <remarks>This typically does not change on a machine, so it's checked only once.</remarks>
internal static readonly bool IsSingleProcessor = ProcessorCount == 1;
}
}
26 changes: 26 additions & 0 deletions src/mscorlib/src/Internal/Runtime/Augments/RuntimeThread.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ namespace Internal.Runtime.Augments
{
public class RuntimeThread : CriticalFinalizerObject
{
private static int s_optimalMaxSpinWaitsPerSpinIteration;

internal RuntimeThread() { }

public static RuntimeThread Create(ThreadStart start) => new Thread(start);
Expand Down Expand Up @@ -186,6 +188,30 @@ public void DisableComObjectEagerCleanup()
private extern bool JoinInternal(int millisecondsTimeout);

public static void Sleep(int millisecondsTimeout) => Thread.Sleep(millisecondsTimeout);

[DllImport(JitHelpers.QCall)]
[SuppressUnmanagedCodeSecurity]
private static extern int GetOptimalMaxSpinWaitsPerSpinIterationInternal();

/// <summary>
/// Max value to be passed into <see cref="SpinWait(int)"/> for optimal delaying. This value is normalized to be
/// appropriate for the processor.
/// </summary>
internal static int OptimalMaxSpinWaitsPerSpinIteration
{
get
{
if (s_optimalMaxSpinWaitsPerSpinIteration != 0)
{
return s_optimalMaxSpinWaitsPerSpinIteration;
}

s_optimalMaxSpinWaitsPerSpinIteration = GetOptimalMaxSpinWaitsPerSpinIterationInternal();
Debug.Assert(s_optimalMaxSpinWaitsPerSpinIteration > 0);
return s_optimalMaxSpinWaitsPerSpinIteration;
}
}

public static void SpinWait(int iterations) => Thread.SpinWait(iterations);
public static bool Yield() => Thread.Yield();

Expand Down
45 changes: 9 additions & 36 deletions src/mscorlib/src/System/Threading/ManualResetEventSlim.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@
//
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-

using System;
using System.Threading;
using System.Runtime.InteropServices;
using System.Diagnostics;
using System.Diagnostics.Contracts;

Expand Down Expand Up @@ -47,8 +44,9 @@ namespace System.Threading
public class ManualResetEventSlim : IDisposable
{
// These are the default spin counts we use on single-proc and MP machines.
private const int DEFAULT_SPIN_SP = 1;
private const int DEFAULT_SPIN_MP = SpinWait.YIELD_THRESHOLD;
internal const int DEFAULT_SPIN_SP = 1;
internal const int DEFAULT_SPIN_MP = 35;
internal const int SLEEP_1_THRESHOLD = 40; // should be greater than DEFAULT_SPIN_MP

private volatile object m_lock;
// A lock used for waiting and pulsing. Lazily initialized via EnsureLockObjectCreated()
Expand Down Expand Up @@ -563,44 +561,19 @@ public bool Wait(int millisecondsTimeout, CancellationToken cancellationToken)
bNeedTimeoutAdjustment = true;
}

//spin
int HOW_MANY_SPIN_BEFORE_YIELD = 10;
int HOW_MANY_YIELD_EVERY_SLEEP_0 = 5;
int HOW_MANY_YIELD_EVERY_SLEEP_1 = 20;

// Spin
int spinCount = SpinCount;
for (int i = 0; i < spinCount; i++)
var spinWaiter = new SpinWait();
while (spinWaiter.Count < spinCount)
{
spinWaiter.SpinOnce(SLEEP_1_THRESHOLD);

if (IsSet)
{
return true;
}

else if (i < HOW_MANY_SPIN_BEFORE_YIELD)
{
if (i == HOW_MANY_SPIN_BEFORE_YIELD / 2)
{
Thread.Yield();
}
else
{
Thread.SpinWait(4 << i);
}
}
else if (i % HOW_MANY_YIELD_EVERY_SLEEP_1 == 0)
{
Thread.Sleep(1);
}
else if (i % HOW_MANY_YIELD_EVERY_SLEEP_0 == 0)
{
Thread.Sleep(0);
}
else
{
Thread.Yield();
}

if (i >= 100 && i % 10 == 0) // check the cancellation token if the user passed a very large spin count
if (spinWaiter.Count >= 100 && spinWaiter.Count % 10 == 0) // check the cancellation token if the user passed a very large spin count
cancellationToken.ThrowIfCancellationRequested();
}

Expand Down
68 changes: 11 additions & 57 deletions src/mscorlib/src/System/Threading/SpinLock.cs
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,9 @@ public struct SpinLock

private volatile int m_owner;

// The multiplier factor for the each spinning iteration
// This number has been chosen after trying different numbers on different CPUs (4, 8 and 16 ) and this provided the best results
private const int SPINNING_FACTOR = 100;

// After how many yields, call Sleep(1)
private const int SLEEP_ONE_FREQUENCY = 40;

// After how many yields, call Sleep(0)
private const int SLEEP_ZERO_FREQUENCY = 10;

// After how many yields, check the timeout
private const int TIMEOUT_CHECK_FREQUENCY = 10;

Expand Down Expand Up @@ -347,48 +340,24 @@ private void ContinueTryEnter(int millisecondsTimeout, ref bool lockTaken)
else //failed to acquire the lock,then try to update the waiters. If the waiters count reached the maximum, jsut break the loop to avoid overflow
{
if ((observedOwner & WAITERS_MASK) != MAXIMUM_WAITERS)
{
// This can still overflow, but maybe there will never be that many waiters
turn = (Interlocked.Add(ref m_owner, 2) & WAITERS_MASK) >> 1;
}
}

//***Step 2. Spinning
//lock acquired failed and waiters updated
int processorCount = PlatformHelper.ProcessorCount;
if (turn < processorCount)
{
int processFactor = 1;
for (int i = 1; i <= turn * SPINNING_FACTOR; i++)
{
Thread.SpinWait((turn + i) * SPINNING_FACTOR * processFactor);
if (processFactor < processorCount)
processFactor++;
observedOwner = m_owner;
if ((observedOwner & LOCK_ANONYMOUS_OWNED) == LOCK_UNOWNED)
{
int newOwner = (observedOwner & WAITERS_MASK) == 0 ? // Gets the number of waiters, if zero
observedOwner | 1 // don't decrement it. just set the lock bit, it is zzero because a previous call of Exit(false) ehich corrupted the waiters
: (observedOwner - 2) | 1; // otherwise decrement the waiters and set the lock bit
Debug.Assert((newOwner & WAITERS_MASK) >= 0);

if (CompareExchange(ref m_owner, newOwner, observedOwner, ref lockTaken) == observedOwner)
{
return;
}
}
}

// Check the timeout.
if (millisecondsTimeout != Timeout.Infinite && TimeoutHelper.UpdateTimeOut(startTime, millisecondsTimeout) <= 0)
{
DecrementWaiters();
return;
}
//*** Step 2, Spinning and Yielding
var spinner = new SpinWait();
if (turn > PlatformHelper.ProcessorCount)
{
spinner.Count = SpinWait.YieldThreshold;
}

//*** Step 3, Yielding
//Sleep(1) every 50 yields
int yieldsoFar = 0;
while (true)
{
spinner.SpinOnce(SLEEP_ONE_FREQUENCY);

observedOwner = m_owner;
if ((observedOwner & LOCK_ANONYMOUS_OWNED) == LOCK_UNOWNED)
{
Expand All @@ -403,20 +372,7 @@ private void ContinueTryEnter(int millisecondsTimeout, ref bool lockTaken)
}
}

if (yieldsoFar % SLEEP_ONE_FREQUENCY == 0)
{
Thread.Sleep(1);
}
else if (yieldsoFar % SLEEP_ZERO_FREQUENCY == 0)
{
Thread.Sleep(0);
}
else
{
Thread.Yield();
}

if (yieldsoFar % TIMEOUT_CHECK_FREQUENCY == 0)
if (spinner.Count % TIMEOUT_CHECK_FREQUENCY == 0)
{
//Check the timeout.
if (millisecondsTimeout != Timeout.Infinite && TimeoutHelper.UpdateTimeOut(startTime, millisecondsTimeout) <= 0)
Expand All @@ -425,8 +381,6 @@ private void ContinueTryEnter(int millisecondsTimeout, ref bool lockTaken)
return;
}
}

yieldsoFar++;
}
}

Expand Down
Loading

0 comments on commit b5b64bf

Please sign in to comment.