From 0eb6b906df55525253ab896bd899ce9fd7c578ec Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Fri, 12 Nov 2021 14:27:16 -0800 Subject: [PATCH 01/25] WIP on ManualFasterOperations --- cs/benchmark/Functions.cs | 4 + cs/src/core/Async/ReadAsync.cs | 6 +- cs/src/core/ClientSession/ClientSession.cs | 624 ++++------------ .../core/ClientSession/IFasterOperations.cs | 492 ++++++++++++ .../ClientSession/ManualFasterOperations.cs | 698 ++++++++++++++++++ cs/src/core/Epochs/LightEpoch.cs | 10 + cs/src/core/Index/Common/CompletedOutput.cs | 5 + cs/src/core/Index/Common/Contexts.cs | 10 +- cs/src/core/Index/Common/RecordInfo.cs | 73 +- cs/src/core/Index/FASTER/FASTER.cs | 96 ++- cs/src/core/Index/FASTER/FASTERImpl.cs | 194 ++++- cs/src/core/Index/FASTER/FASTERThread.cs | 3 +- cs/src/core/Index/FASTER/FasterSettings.cs | 16 + .../Index/FASTER/LogCompactionFunctions.cs | 2 + .../core/Index/FasterLog/FasterLogSettings.cs | 4 - cs/src/core/Index/Interfaces/FunctionsBase.cs | 4 + .../core/Index/Interfaces/IFasterSession.cs | 60 +- cs/src/core/Index/Interfaces/IFunctions.cs | 37 +- cs/src/core/Utilities/LockType.cs | 49 ++ cs/test/LockTests.cs | 2 +- cs/test/ManualOperationsTests.cs | 62 ++ cs/test/MiscFASTERTests.cs | 13 +- cs/test/TestUtils.cs | 1 + 23 files changed, 1864 insertions(+), 601 deletions(-) create mode 100644 cs/src/core/ClientSession/IFasterOperations.cs create mode 100644 cs/src/core/ClientSession/ManualFasterOperations.cs create mode 100644 cs/src/core/Index/FASTER/FasterSettings.cs create mode 100644 cs/src/core/Utilities/LockType.cs create mode 100644 cs/test/ManualOperationsTests.cs diff --git a/cs/benchmark/Functions.cs b/cs/benchmark/Functions.cs index b52bb9b87..1a1ee225a 100644 --- a/cs/benchmark/Functions.cs +++ b/cs/benchmark/Functions.cs @@ -121,5 +121,9 @@ public bool UnlockShared(ref RecordInfo recordInfo, ref Key key, ref Value value } public bool TryLockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) => recordInfo.TryLockShared(spinCount); + + public void LockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) => recordInfo.LockExclusiveFromShared(); + + public bool TryLockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) => recordInfo.TryLockExclusiveFromShared(spinCount); } } diff --git a/cs/src/core/Async/ReadAsync.cs b/cs/src/core/Async/ReadAsync.cs index 63afbf913..4551bd6fe 100644 --- a/cs/src/core/Async/ReadAsync.cs +++ b/cs/src/core/Async/ReadAsync.cs @@ -163,8 +163,10 @@ internal ValueTask> ReadAsync /// /// - public sealed class ClientSession : IClientSession, IDisposable + public sealed class ClientSession : IClientSession, IFasterOperations, IDisposable where Functions : IFunctions { - private readonly FasterKV fht; + internal readonly FasterKV fht; internal readonly bool SupportAsync = false; internal readonly FasterKV.FasterExecutionContext ctx; @@ -156,15 +156,8 @@ public void Dispose() UnsafeSuspendThread(); } - /// - /// Read operation - /// - /// The key to look up - /// Input to help extract the retrieved value into - /// The location to place the retrieved value - /// User application context passed in case the read goes pending due to IO - /// The serial number of the operation (used in recovery) - /// is populated by the implementation + #region IFasterOperations + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public Status Read(ref Key key, ref Input input, ref Output output, Context userContext = default, long serialNo = 0) { @@ -179,15 +172,7 @@ public Status Read(ref Key key, ref Input input, ref Output output, Context user } } - /// - /// Read operation - /// - /// The key to look up - /// Input to help extract the retrieved value into - /// The location to place the retrieved value - /// User application context passed in case the read goes pending due to IO - /// The serial number of the operation (used in recovery) - /// is populated by the implementation + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public Status Read(Key key, Input input, out Output output, Context userContext = default, long serialNo = 0) { @@ -195,14 +180,7 @@ public Status Read(Key key, Input input, out Output output, Context userContext return Read(ref key, ref input, ref output, userContext, serialNo); } - /// - /// Read operation - /// - /// The key to look up - /// The location to place the retrieved value - /// User application context passed in case the read goes pending due to IO - /// The serial number of the operation (used in recovery) - /// is populated by the implementation + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public Status Read(ref Key key, ref Output output, Context userContext = default, long serialNo = 0) { @@ -210,14 +188,7 @@ public Status Read(ref Key key, ref Output output, Context userContext = default return Read(ref key, ref input, ref output, userContext, serialNo); } - /// - /// Read operation - /// - /// - /// - /// - /// - /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public Status Read(Key key, out Output output, Context userContext = default, long serialNo = 0) { @@ -226,13 +197,7 @@ public Status Read(Key key, out Output output, Context userContext = default, lo return Read(ref key, ref input, ref output, userContext, serialNo); } - /// - /// Read operation - /// - /// - /// - /// - /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public (Status status, Output output) Read(Key key, Context userContext = default, long serialNo = 0) { @@ -241,35 +206,15 @@ public Status Read(Key key, out Output output, Context userContext = default, lo return (Read(ref key, ref input, ref output, userContext, serialNo), output); } - /// - /// Read operation that accepts a ref argument to start the lookup at instead of starting at the hash table entry for , - /// and is updated with the address and record header for the found record. - /// - /// The key to look up - /// Input to help extract the retrieved value into - /// The location to place the retrieved value - /// On input contains the address to start at in ; if this is Constants.kInvalidAddress, the - /// search starts with the key as in other forms of Read. - /// On output, receives: - /// - ///
  • The address of the found record. This may be different from the passed on the call, due to - /// tracing back over hash collisions until we arrive at the key match
  • - ///
  • A copy of the record's header in ; can be passed - /// in a subsequent call, thereby enumerating all records in a key's hash chain.
  • - ///
    - ///
    - /// - /// Flags for controlling operations within the read, such as ReadCache interaction - /// User application context passed in case the read goes pending due to IO - /// The serial number of the operation (used in recovery) - /// is populated by the implementation + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public Status Read(ref Key key, ref Input input, ref Output output, ref RecordMetadata recordMetadata, ReadFlags readFlags = ReadFlags.None, Context userContext = default, long serialNo = 0) { if (SupportAsync) UnsafeResumeThread(); try { - return fht.ContextRead(ref key, ref input, ref output, ref recordMetadata, readFlags, userContext, FasterSession, serialNo, ctx); + LockOperation lockOp = default; + return fht.ContextRead(ref key, ref input, ref output, ref lockOp, ref recordMetadata, readFlags, userContext, FasterSession, serialNo, ctx); } finally { @@ -277,16 +222,7 @@ public Status Read(ref Key key, ref Input input, ref Output output, ref RecordMe } } - /// - /// Read operation that accepts an argument to lookup at, instead of a key. - /// - /// The address to look up - /// Input to help extract the retrieved value into - /// The location to place the retrieved value - /// Flags for controlling operations within the read, such as ReadCache interaction - /// User application context passed in case the read goes pending due to IO - /// The serial number of the operation (used in recovery) - /// is populated by the implementation; this should store the key if it needs it + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public Status ReadAtAddress(long address, ref Input input, ref Output output, ReadFlags readFlags = ReadFlags.None, Context userContext = default, long serialNo = 0) { @@ -301,22 +237,7 @@ public Status ReadAtAddress(long address, ref Input input, ref Output output, Re } } - /// - /// Async read operation. May return uncommitted results; to ensure reading of committed results, complete the read and then call WaitForCommitAsync. - /// - /// The key to look up - /// Input to help extract the retrieved value into output - /// User application context passed in case the read goes pending due to IO - /// The serial number of the operation (used in recovery) - /// Token to cancel the operation - /// wrapping - /// The caller must await the return value to obtain the result, then call one of - /// - /// result. - /// result. - /// - /// to complete the read operation and obtain the result status, the output that is populated by the - /// implementation, and optionally a copy of the header for the retrieved record + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public ValueTask.ReadAsyncResult> ReadAsync(ref Key key, ref Input input, Context userContext = default, long serialNo = 0, CancellationToken cancellationToken = default) { @@ -324,23 +245,7 @@ public ValueTask.ReadAsyncResult> R return fht.ReadAsync(this.FasterSession, this.ctx, ref key, ref input, Constants.kInvalidAddress, userContext, serialNo, cancellationToken); } - /// - /// Async read operation, may return uncommitted result - /// To ensure reading of committed result, complete the read and then call WaitForCommitAsync. - /// - /// - /// - /// - /// - /// - /// wrapping - /// The caller must await the return value to obtain the result, then call one of - /// - /// result. - /// result. - /// - /// to complete the read operation and obtain the result status, the output that is populated by the - /// implementation, and optionally a copy of the header for the retrieved record + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public ValueTask.ReadAsyncResult> ReadAsync(Key key, Input input, Context context = default, long serialNo = 0, CancellationToken token = default) { @@ -348,21 +253,7 @@ public ValueTask.ReadAsyncResult> R return fht.ReadAsync(this.FasterSession, this.ctx, ref key, ref input, Constants.kInvalidAddress, context, serialNo, token); } - /// - /// Async read operation. May return uncommitted results; to ensure reading of committed results, complete the read and then call WaitForCommitAsync. - /// - /// The key to look up - /// User application context passed in case the read goes pending due to IO - /// The serial number of the operation (used in recovery) - /// Token to cancel the operation - /// wrapping - /// The caller must await the return value to obtain the result, then call one of - /// - /// result. - /// result. - /// - /// to complete the read operation and obtain the result status, the output that is populated by the - /// implementation, and optionally a copy of the header for the retrieved record + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public ValueTask.ReadAsyncResult> ReadAsync(ref Key key, Context userContext = default, long serialNo = 0, CancellationToken token = default) { @@ -371,22 +262,7 @@ public ValueTask.ReadAsyncResult> R return fht.ReadAsync(this.FasterSession, this.ctx, ref key, ref input, Constants.kInvalidAddress, userContext, serialNo, token); } - /// - /// Async read operation, may return uncommitted result - /// To ensure reading of committed result, complete the read and then call WaitForCommitAsync. - /// - /// - /// - /// - /// - /// wrapping - /// The caller must await the return value to obtain the result, then call one of - /// - /// result. - /// result. - /// - /// to complete the read operation and obtain the result status, the output that is populated by the - /// implementation, and optionally a copy of the header for the retrieved record + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public ValueTask.ReadAsyncResult> ReadAsync(Key key, Context context = default, long serialNo = 0, CancellationToken token = default) { @@ -395,27 +271,7 @@ public ValueTask.ReadAsyncResult> R return fht.ReadAsync(this.FasterSession, this.ctx, ref key, ref input, Constants.kInvalidAddress, context, serialNo, token); } - /// - /// Async read operation that accepts a to start the lookup at instead of starting at the hash table entry for , - /// and returns the for the found record (which contains previous address in the hash chain for this key; this can - /// be used as in a subsequent call to iterate all records for ). - /// - /// The key to look up - /// Input to help extract the retrieved value into output - /// Start at this address rather than the address in the hash table for "/> - /// Flags for controlling operations within the read, such as ReadCache interaction - /// User application context passed in case the read goes pending due to IO - /// The serial number of the operation (used in recovery) - /// Token to cancel the operation - /// wrapping - /// The caller must await the return value to obtain the result, then call one of - /// - /// result. - /// result. - /// - /// to complete the read operation and obtain the result status, the output that is populated by the - /// implementation, and optionally a copy of the header for the retrieved record - /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public ValueTask.ReadAsyncResult> ReadAsync(ref Key key, ref Input input, long startAddress, ReadFlags readFlags = ReadFlags.None, Context userContext = default, long serialNo = 0, CancellationToken cancellationToken = default) @@ -425,23 +281,7 @@ public ValueTask.ReadAsyncResult> R return fht.ReadAsync(this.FasterSession, this.ctx, ref key, ref input, startAddress, userContext, serialNo, cancellationToken, operationFlags); } - /// - /// Async Read operation that accepts an argument to lookup at, instead of a key. - /// - /// The address to look up - /// Input to help extract the retrieved value into output - /// Flags for controlling operations within the read, such as ReadCache interaction - /// User application context passed in case the read goes pending due to IO - /// The serial number of the operation (used in recovery) - /// Token to cancel the operation - /// wrapping - /// The caller must await the return value to obtain the result, then call one of - /// - /// result. - /// result. - /// - /// to complete the read operation and obtain the result status, the output that is populated by the - /// implementation, and optionally a copy of the header for the retrieved record + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public ValueTask.ReadAsyncResult> ReadAtAddressAsync(long address, ref Input input, ReadFlags readFlags = ReadFlags.None, Context userContext = default, long serialNo = 0, CancellationToken cancellationToken = default) @@ -452,54 +292,39 @@ public ValueTask.ReadAsyncResult> R return fht.ReadAsync(this.FasterSession, this.ctx, ref key, ref input, address, userContext, serialNo, cancellationToken, operationFlags); } - /// - /// Upsert operation - /// - /// - /// - /// - /// - /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public Status Upsert(ref Key key, ref Value desiredValue, Context userContext = default, long serialNo = 0) { Input input = default; Output output = default; - return Upsert(ref key, ref input, ref desiredValue, ref output, out _, userContext, serialNo); + return Upsert(ref key, ref input, ref desiredValue, ref output, userContext, serialNo); } - /// - /// Upsert operation - /// - /// - /// - /// - /// - /// - /// - /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public Status Upsert(ref Key key, ref Input input, ref Value desiredValue, ref Output output, Context userContext = default, long serialNo = 0) - => Upsert(ref key, ref input, ref desiredValue, ref output, out _, userContext, serialNo); + public Status Upsert(ref Key key, ref Input input, ref Value desiredValue, ref Output output, Context userContext = default, long serialNo = 0) + { + if (SupportAsync) UnsafeResumeThread(); + try + { + return fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, userContext, FasterSession, serialNo, ctx); + } + finally + { + if (SupportAsync) UnsafeSuspendThread(); + } + } - /// - /// Upsert operation - /// - /// - /// - /// - /// - /// - /// - /// - /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public Status Upsert(ref Key key, ref Input input, ref Value desiredValue, ref Output output, out RecordMetadata recordMetadata, Context userContext = default, long serialNo = 0) { if (SupportAsync) UnsafeResumeThread(); try { - return fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, out recordMetadata, userContext, FasterSession, serialNo, ctx); + LockOperation lockOp = default; + return fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, ref lockOp, out recordMetadata, userContext, FasterSession, serialNo, ctx); } finally { @@ -507,48 +332,17 @@ public Status Upsert(ref Key key, ref Input input, ref Value desiredValue, ref O } } - /// - /// Upsert operation - /// - /// - /// - /// - /// - /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public Status Upsert(Key key, Value desiredValue, Context userContext = default, long serialNo = 0) => Upsert(ref key, ref desiredValue, userContext, serialNo); - /// - /// Upsert operation - /// - /// - /// - /// - /// - /// - /// - /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public Status Upsert(Key key, Input input, Value desiredValue, ref Output output, Context userContext = default, long serialNo = 0) - => Upsert(ref key, ref input, ref desiredValue, ref output, out _, userContext, serialNo); + => Upsert(ref key, ref input, ref desiredValue, ref output, userContext, serialNo); - /// - /// Async Upsert operation - /// Await operation in session before issuing next one - /// - /// - /// - /// - /// - /// - /// ValueTask wrapping - /// The caller must await the return value to obtain the result, then call one of - /// - /// result. - /// result = await result. while result.Status == - /// - /// to complete the Upsert operation. Failure to complete the operation will result in leaked allocations. + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public ValueTask.UpsertAsyncResult> UpsertAsync(ref Key key, ref Value desiredValue, Context userContext = default, long serialNo = 0, CancellationToken token = default) { @@ -556,23 +350,7 @@ public ValueTask.UpsertAsyncResult> return UpsertAsync(ref key, ref input, ref desiredValue, userContext, serialNo, token); } - /// - /// Async Upsert operation - /// Await operation in session before issuing next one - /// - /// - /// - /// - /// - /// - /// - /// ValueTask wrapping - /// The caller must await the return value to obtain the result, then call one of - /// - /// result. - /// result = await result. while result.Status == - /// - /// to complete the Upsert operation. Failure to complete the operation will result in leaked allocations. + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public ValueTask.UpsertAsyncResult> UpsertAsync(ref Key key, ref Input input, ref Value desiredValue, Context userContext = default, long serialNo = 0, CancellationToken token = default) { @@ -580,72 +358,24 @@ public ValueTask.UpsertAsyncResult> return fht.UpsertAsync(this.FasterSession, this.ctx, ref key, ref input, ref desiredValue, userContext, serialNo, token); } - /// - /// Async Upsert operation - /// Await operation in session before issuing next one - /// - /// - /// - /// - /// - /// - /// ValueTask wrapping the asyncResult of the operation - /// The caller must await the return value to obtain the result, then call one of - /// - /// result. - /// result = await result. while result.Status == - /// - /// to complete the Upsert operation. Failure to complete the operation will result in leaked allocations. + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public ValueTask.UpsertAsyncResult> UpsertAsync(Key key, Value desiredValue, Context userContext = default, long serialNo = 0, CancellationToken token = default) { return UpsertAsync(ref key, ref desiredValue, userContext, serialNo, token); } - /// - /// Async Upsert operation - /// Await operation in session before issuing next one - /// - /// - /// - /// - /// - /// - /// - /// ValueTask wrapping the asyncResult of the operation - /// The caller must await the return value to obtain the result, then call one of - /// - /// result. - /// result = await result. while result.Status == - /// - /// to complete the Upsert operation. Failure to complete the operation will result in leaked allocations. + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public ValueTask.UpsertAsyncResult> UpsertAsync(Key key, Input input, Value desiredValue, Context userContext = default, long serialNo = 0, CancellationToken token = default) => UpsertAsync(ref key, ref input, ref desiredValue, userContext, serialNo, token); - /// - /// RMW operation - /// - /// - /// - /// - /// - /// - /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public Status RMW(ref Key key, ref Input input, ref Output output, Context userContext = default, long serialNo = 0) => RMW(ref key, ref input, ref output, out _, userContext, serialNo); - /// - /// RMW operation - /// - /// - /// - /// - /// - /// - /// - /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public Status RMW(ref Key key, ref Input input, ref Output output, out RecordMetadata recordMetadata, Context userContext = default, long serialNo = 0) { @@ -660,15 +390,7 @@ public Status RMW(ref Key key, ref Input input, ref Output output, out RecordMet } } - /// - /// RMW operation - /// - /// - /// - /// - /// - /// - /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public Status RMW(Key key, Input input, out Output output, Context userContext = default, long serialNo = 0) { @@ -676,14 +398,7 @@ public Status RMW(Key key, Input input, out Output output, Context userContext = return RMW(ref key, ref input, ref output, userContext, serialNo); } - /// - /// RMW operation - /// - /// - /// - /// - /// - /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public Status RMW(ref Key key, ref Input input, Context userContext = default, long serialNo = 0) { @@ -691,14 +406,7 @@ public Status RMW(ref Key key, ref Input input, Context userContext = default, l return RMW(ref key, ref input, ref output, userContext, serialNo); } - /// - /// RMW operation - /// - /// - /// - /// - /// - /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public Status RMW(Key key, Input input, Context userContext = default, long serialNo = 0) { @@ -706,21 +414,7 @@ public Status RMW(Key key, Input input, Context userContext = default, long seri return RMW(ref key, ref input, ref output, userContext, serialNo); } - /// - /// Async RMW operation - /// Await operation in session before issuing next one - /// - /// - /// - /// - /// - /// - /// The caller must await the return value to obtain the result, then call one of - /// - /// result. - /// result = await result. while result.Status == - /// - /// to complete the Upsert operation. Failure to complete the operation will result in leaked allocations. + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public ValueTask.RmwAsyncResult> RMWAsync(ref Key key, ref Input input, Context context = default, long serialNo = 0, CancellationToken token = default) { @@ -728,32 +422,12 @@ public ValueTask.RmwAsyncResult> RM return fht.RmwAsync(this.FasterSession, this.ctx, ref key, ref input, context, serialNo, token); } - /// - /// Async RMW operation - /// Await operation in session before issuing next one - /// - /// - /// - /// - /// - /// - /// The caller must await the return value to obtain the result, then call one of - /// - /// result. - /// result = await result. while result.Status == - /// - /// to complete the Upsert operation. Failure to complete the operation will result in leaked allocations. + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public ValueTask.RmwAsyncResult> RMWAsync(Key key, Input input, Context context = default, long serialNo = 0, CancellationToken token = default) => RMWAsync(ref key, ref input, context, serialNo, token); - /// - /// Delete operation - /// - /// - /// - /// - /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public Status Delete(ref Key key, Context userContext = default, long serialNo = 0) { @@ -768,31 +442,12 @@ public Status Delete(ref Key key, Context userContext = default, long serialNo = } } - /// - /// Delete operation - /// - /// - /// - /// - /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public Status Delete(Key key, Context userContext = default, long serialNo = 0) => Delete(ref key, userContext, serialNo); - /// - /// Async Delete operation - /// Await operation in session before issuing next one - /// - /// - /// - /// - /// - /// The caller must await the return value to obtain the result, then call one of - /// - /// result. - /// result = await result. while result.Status == - /// - /// to complete the Upsert operation. Failure to complete the operation will result in leaked allocations. + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public ValueTask.DeleteAsyncResult> DeleteAsync(ref Key key, Context userContext = default, long serialNo = 0, CancellationToken token = default) { @@ -800,47 +455,23 @@ public ValueTask.DeleteAsyncResult> return fht.DeleteAsync(this.FasterSession, this.ctx, ref key, userContext, serialNo, token); } - /// - /// Async Delete operation - /// Await operation in session before issuing next one - /// - /// - /// - /// - /// - /// The caller must await the return value to obtain the result, then call one of - /// - /// result. - /// result = await result. while result.Status == - /// - /// to complete the Upsert operation. Failure to complete the operation will result in leaked allocations. + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public ValueTask.DeleteAsyncResult> DeleteAsync(Key key, Context userContext = default, long serialNo = 0, CancellationToken token = default) => DeleteAsync(ref key, userContext, serialNo, token); - /// - /// Experimental feature - /// Checks whether specified record is present in memory - /// (between HeadAddress and tail, or between fromAddress - /// and tail), including tombstones. - /// - /// Key of the record. - /// Logical address of record, if found - /// Look until this address - /// Status - internal Status ContainsKeyInMemory(ref Key key, out long logicalAddress, long fromAddress = -1) + /// + public void Refresh() { if (SupportAsync) UnsafeResumeThread(); - try - { - return fht.InternalContainsKeyInMemory(ref key, ctx, FasterSession, out logicalAddress, fromAddress); - } - finally - { - if (SupportAsync) UnsafeSuspendThread(); - } + fht.InternalRefresh(ctx, FasterSession); + if (SupportAsync) UnsafeSuspendThread(); } + #endregion IFasterOperations + + #region Pending Operations + /// /// Get list of pending requests (for current session) /// @@ -860,17 +491,6 @@ public IEnumerable GetPendingRequests() yield return val.serialNum; } - /// - /// Refresh session epoch and handle checkpointing phases. Used only - /// in case of thread-affinitized sessions (async support is disabled). - /// - public void Refresh() - { - if (SupportAsync) UnsafeResumeThread(); - fht.InternalRefresh(ctx, FasterSession); - if (SupportAsync) UnsafeSuspendThread(); - } - /// /// Synchronously complete outstanding pending synchronous operations. /// Async operations must be completed individually. @@ -987,6 +607,10 @@ public async ValueTask ReadyToCompletePendingAsync(CancellationToken token = def await fht.ReadyToCompletePendingAsync(this.ctx, token).ConfigureAwait(false); } + #endregion Pending Operations + + #region Other Operations + /// /// Wait for commit of all operations completed until the current point in session. /// Does not itself issue checkpoint/commits. @@ -1019,6 +643,29 @@ public async ValueTask WaitForCommitAsync(CancellationToken token = default) } } + /// + /// Experimental feature + /// Checks whether specified record is present in memory + /// (between HeadAddress and tail, or between fromAddress + /// and tail), including tombstones. + /// + /// Key of the record. + /// Logical address of record, if found + /// Look until this address + /// Status + internal Status ContainsKeyInMemory(ref Key key, out long logicalAddress, long fromAddress = -1) + { + if (SupportAsync) UnsafeResumeThread(); + try + { + return fht.InternalContainsKeyInMemory(ref key, ctx, FasterSession, out logicalAddress, fromAddress); + } + finally + { + if (SupportAsync) UnsafeSuspendThread(); + } + } + /// /// Compact the log until specified address using current session, moving active records to the tail of the log. /// @@ -1089,7 +736,7 @@ public IFasterScanIterator Iterate(long untilAddress = -1) } /// - /// Resume session on current thread + /// Resume session on current thread. IMPORTANT: Call SuspendThread before any async op. /// Call SuspendThread before any async op /// [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -1099,6 +746,17 @@ internal void UnsafeResumeThread() fht.InternalRefresh(ctx, FasterSession); } + /// + /// Resume session on current thread. IMPORTANT: Call SuspendThread before any async op. + /// + /// Epoch that session resumes on; can be saved to see if epoch has changed + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void UnsafeResumeThread(out int resumeEpoch) + { + fht.epoch.Resume(out resumeEpoch); + fht.InternalRefresh(ctx, FasterSession); + } + /// /// Suspend session on current thread /// @@ -1113,6 +771,10 @@ void IClientSession.AtomicSwitch(int version) fht.AtomicSwitch(ctx, ctx.prevCtx, version, fht._hybridLogCheckpoint.info.checkpointTokens); } + #endregion Other Operations + + #region IFasterSession + // This is a struct to allow JIT to inline calls (and bypass default interface call mechanism) internal readonly struct InternalFasterSession : IFasterSession { @@ -1124,17 +786,17 @@ public InternalFasterSession(ClientSession _clientSession.functions.SupportsLocking; + public bool SupportsLocking => _clientSession.fht.SupportsLocking; public bool SupportsPostOperations => _clientSession.functions.SupportsPostOperations; #endregion IFunctions - Optional features supported #region IFunctions - Reads - public bool SingleReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, long address) + public bool SingleReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) => _clientSession.functions.SingleReader(ref key, ref input, ref value, ref dst, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, long address) + public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) => !this.SupportsLocking ? _clientSession.functions.ConcurrentReader(ref key, ref input, ref value, ref dst, ref recordInfo, address) : ConcurrentReaderLock(ref key, ref input, ref value, ref dst, ref recordInfo, address); @@ -1164,46 +826,42 @@ public void ReadCompletionCallback(ref Key key, ref Input input, ref Output outp #endregion IFunctions - Reads - #region IFunctions - Upserts - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) - => _clientSession.functions.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); + // Except for readcache/copy-to-tail usage of SingleWriter, all operations that append a record must lock in the () call and unlock + // in the Post call; otherwise another session can try to access the record as soon as it's CAS'd and before Post is called. + #region IFunctions - Upserts [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address, out long lockContext) + public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) { - lockContext = 0; - this.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); - if (this.SupportsPostOperations) + recordInfo.Version = _clientSession.ctx.version; + _clientSession.functions.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); + + if (this.SupportsPostOperations && this.SupportsLocking) { // Lock must be taken after the value is initialized. Unlocked in PostSingleWriterLock. - this.LockExclusive(ref recordInfo, ref key, ref dst, ref lockContext); + this.LockExclusive(ref recordInfo, ref key, ref dst, ref lockOp.LockContext); } } - public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) - => throw new FasterException("The lockContext form of PostSingleWriter should always be called"); - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address, long lockContext) + public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) { if (!this.SupportsPostOperations) return; if (!this.SupportsLocking) PostSingleWriterNoLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); else - PostSingleWriterLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address, lockContext); + PostSingleWriterLock(ref key, ref input, ref src, ref dst, ref output, ref lockOp, ref recordInfo, address); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private void PostSingleWriterNoLock(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) { - recordInfo.Version = _clientSession.ctx.version; _clientSession.functions.PostSingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void PostSingleWriterLock(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address, long lockContext) + private void PostSingleWriterLock(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) { // Lock was taken in SingleWriterLock try @@ -1212,12 +870,12 @@ private void PostSingleWriterLock(ref Key key, ref Input input, ref Value src, r } finally { - this.UnlockExclusive(ref recordInfo, ref key, ref dst, lockContext); + this.UnlockExclusive(ref recordInfo, ref key, ref dst, lockOp.LockContext); } } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) + public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) => !this.SupportsLocking ? ConcurrentWriterNoLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address) : ConcurrentWriterLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); @@ -1255,24 +913,20 @@ public void UpsertCompletionCallback(ref Key key, ref Input input, ref Value val public bool NeedInitialUpdate(ref Key key, ref Input input, ref Output output) => _clientSession.functions.NeedInitialUpdate(ref key, ref input, ref output); - public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) - => throw new FasterException("The lockContext form of InitialUpdater should always be called"); - [MethodImpl(MethodImplOptions.AggressiveInlining)] public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address, out long lockContext) { lockContext = 0; + recordInfo.Version = _clientSession.ctx.version; _clientSession.functions.InitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); - if (this.SupportsPostOperations) + + if (this.SupportsPostOperations && this.SupportsLocking) { // Lock must be taken after the value is initialized. Unlocked in PostInitialUpdaterLock. this.LockExclusive(ref recordInfo, ref key, ref value, ref lockContext); } } - public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) - => throw new FasterException("The lockContext form of PostInitialUpdater should always be called"); - [MethodImpl(MethodImplOptions.AggressiveInlining)] public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address, long lockContext) { @@ -1287,7 +941,6 @@ public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, re [MethodImpl(MethodImplOptions.AggressiveInlining)] private void PostInitialUpdaterNoLock(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) { - recordInfo.Version = _clientSession.ctx.version; _clientSession.functions.PostInitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); } @@ -1311,25 +964,20 @@ private void PostInitialUpdaterLock(ref Key key, ref Input input, ref Value valu public bool NeedCopyUpdate(ref Key key, ref Input input, ref Value oldValue, ref Output output) => _clientSession.functions.NeedCopyUpdate(ref key, ref input, ref oldValue, ref output); - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address) - => throw new FasterException("The lockContext form of CopyUpdater should always be called"); - [MethodImpl(MethodImplOptions.AggressiveInlining)] public void CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address, out long lockContext) { lockContext = 0; + recordInfo.Version = _clientSession.ctx.version; _clientSession.functions.CopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); - if (this.SupportsPostOperations) + + if (this.SupportsPostOperations && this.SupportsLocking) { // Lock must be taken after the value is initialized. Unlocked in PostInitialUpdaterLock. this.LockExclusive(ref recordInfo, ref key, ref newValue, ref lockContext); } } - public bool PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address) - => throw new FasterException("The lockContext form of PostCopyUpdater should always be called"); - [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address, long lockContext) { @@ -1343,7 +991,6 @@ public bool PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, re [MethodImpl(MethodImplOptions.AggressiveInlining)] private bool PostCopyUpdaterNoLock(ref Key key, ref Input input, ref Output output, ref Value oldValue, ref Value newValue, ref RecordInfo recordInfo, long address) { - recordInfo.Version = _clientSession.ctx.version; return _clientSession.functions.PostCopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); } @@ -1402,12 +1049,11 @@ public void RMWCompletionCallback(ref Key key, ref Input input, ref Output outpu [MethodImpl(MethodImplOptions.AggressiveInlining)] public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long address) { - if (!this.SupportsPostOperations) - return; - // There is no value to lock here, so we take a RecordInfo lock in InternalDelete and release it here. recordInfo.Version = _clientSession.ctx.version; - _clientSession.functions.PostSingleDeleter(ref key, ref recordInfo, address); + + if (this.SupportsPostOperations) + _clientSession.functions.PostSingleDeleter(ref key, ref recordInfo, address); if (this.SupportsLocking) recordInfo.UnlockExclusive(); } @@ -1464,6 +1110,12 @@ public bool UnlockShared(ref RecordInfo recordInfo, ref Key key, ref Value value public bool TryLockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) => _clientSession.functions.TryLockShared(ref recordInfo, ref key, ref value, ref lockContext, spinCount); + + public void LockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) + => _clientSession.functions.LockExclusiveFromShared(ref recordInfo, ref key, ref value, ref lockContext); + + public bool TryLockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) + => _clientSession.functions.TryLockExclusiveFromShared(ref recordInfo, ref key, ref value, ref lockContext, spinCount); #endregion IFunctions - Locking #region IFunctions - Checkpointing @@ -1496,5 +1148,7 @@ public bool CompletePendingWithOutputs(out CompletedOutputIterator _clientSession.CompletePendingWithOutputs(out completedOutputs, wait, spinWaitForCommit); #endregion Internal utilities } + + #endregion IFasterSession } } diff --git a/cs/src/core/ClientSession/IFasterOperations.cs b/cs/src/core/ClientSession/IFasterOperations.cs new file mode 100644 index 000000000..9dc3de8b5 --- /dev/null +++ b/cs/src/core/ClientSession/IFasterOperations.cs @@ -0,0 +1,492 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using System.Threading; +using System.Threading.Tasks; + +namespace FASTER.core +{ + /// + /// Interface for FASTER operations + /// + public interface IFasterOperations + { + /// + /// Read operation + /// + /// The key to look up + /// Input to help extract the retrieved value into + /// The location to place the retrieved value + /// User application context passed in case the read goes pending due to IO + /// The serial number of the operation (used in recovery) + /// is populated by the implementation + Status Read(ref Key key, ref Input input, ref Output output, Context userContext = default, long serialNo = 0); + + /// + /// Read operation + /// + /// The key to look up + /// Input to help extract the retrieved value into + /// The location to place the retrieved value + /// User application context passed in case the read goes pending due to IO + /// The serial number of the operation (used in recovery) + /// is populated by the implementation + Status Read(Key key, Input input, out Output output, Context userContext = default, long serialNo = 0); + + /// + /// Read operation + /// + /// The key to look up + /// The location to place the retrieved value + /// User application context passed in case the read goes pending due to IO + /// The serial number of the operation (used in recovery) + /// is populated by the implementation + Status Read(ref Key key, ref Output output, Context userContext = default, long serialNo = 0); + + /// + /// Read operation + /// + /// + /// + /// + /// + /// + Status Read(Key key, out Output output, Context userContext = default, long serialNo = 0); + + /// + /// Read operation + /// + /// + /// + /// + /// + public (Status status, Output output) Read(Key key, Context userContext = default, long serialNo = 0); + + /// + /// Read operation that accepts a ref argument to start the lookup at instead of starting at the hash table entry for , + /// and is updated with the address and record header for the found record. + /// + /// The key to look up + /// Input to help extract the retrieved value into + /// The location to place the retrieved value + /// On input contains the address to start at in ; if this is Constants.kInvalidAddress, the + /// search starts with the key as in other forms of Read. + /// On output, receives: + /// + ///
  • The address of the found record. This may be different from the passed on the call, due to + /// tracing back over hash collisions until we arrive at the key match
  • + ///
  • A copy of the record's header in ; can be passed + /// in a subsequent call, thereby enumerating all records in a key's hash chain.
  • + ///
    + ///
    + /// + /// Flags for controlling operations within the read, such as ReadCache interaction + /// User application context passed in case the read goes pending due to IO + /// The serial number of the operation (used in recovery) + /// is populated by the implementation + Status Read(ref Key key, ref Input input, ref Output output, ref RecordMetadata recordMetadata, ReadFlags readFlags = ReadFlags.None, Context userContext = default, long serialNo = 0); + + /// + /// Read operation that accepts an argument to lookup at, instead of a key. + /// + /// The address to look up + /// Input to help extract the retrieved value into + /// The location to place the retrieved value + /// Flags for controlling operations within the read, such as ReadCache interaction + /// User application context passed in case the read goes pending due to IO + /// The serial number of the operation (used in recovery) + /// is populated by the implementation; this should store the key if it needs it + Status ReadAtAddress(long address, ref Input input, ref Output output, ReadFlags readFlags = ReadFlags.None, Context userContext = default, long serialNo = 0); + + /// + /// Async read operation. May return uncommitted results; to ensure reading of committed results, complete the read and then call WaitForCommitAsync. + /// + /// The key to look up + /// Input to help extract the retrieved value into output + /// User application context passed in case the read goes pending due to IO + /// The serial number of the operation (used in recovery) + /// Token to cancel the operation + /// wrapping + /// The caller must await the return value to obtain the result, then call one of + /// + /// result. + /// result. + /// + /// to complete the read operation and obtain the result status, the output that is populated by the + /// implementation, and optionally a copy of the header for the retrieved record + ValueTask.ReadAsyncResult> ReadAsync(ref Key key, ref Input input, Context userContext = default, long serialNo = 0, CancellationToken cancellationToken = default); + + /// + /// Async read operation, may return uncommitted result + /// To ensure reading of committed result, complete the read and then call WaitForCommitAsync. + /// + /// + /// + /// + /// + /// + /// wrapping + /// The caller must await the return value to obtain the result, then call one of + /// + /// result. + /// result. + /// + /// to complete the read operation and obtain the result status, the output that is populated by the + /// implementation, and optionally a copy of the header for the retrieved record + ValueTask.ReadAsyncResult> ReadAsync(Key key, Input input, Context context = default, long serialNo = 0, CancellationToken token = default); + + /// + /// Async read operation. May return uncommitted results; to ensure reading of committed results, complete the read and then call WaitForCommitAsync. + /// + /// The key to look up + /// User application context passed in case the read goes pending due to IO + /// The serial number of the operation (used in recovery) + /// Token to cancel the operation + /// wrapping + /// The caller must await the return value to obtain the result, then call one of + /// + /// result. + /// result. + /// + /// to complete the read operation and obtain the result status, the output that is populated by the + /// implementation, and optionally a copy of the header for the retrieved record + ValueTask.ReadAsyncResult> ReadAsync(ref Key key, Context userContext = default, long serialNo = 0, CancellationToken token = default); + + /// + /// Async read operation, may return uncommitted result + /// To ensure reading of committed result, complete the read and then call WaitForCommitAsync. + /// + /// + /// + /// + /// + /// wrapping + /// The caller must await the return value to obtain the result, then call one of + /// + /// result. + /// result. + /// + /// to complete the read operation and obtain the result status, the output that is populated by the + /// implementation, and optionally a copy of the header for the retrieved record + ValueTask.ReadAsyncResult> ReadAsync(Key key, Context context = default, long serialNo = 0, CancellationToken token = default); + + /// + /// Async read operation that accepts a to start the lookup at instead of starting at the hash table entry for , + /// and returns the for the found record (which contains previous address in the hash chain for this key; this can + /// be used as in a subsequent call to iterate all records for ). + /// + /// The key to look up + /// Input to help extract the retrieved value into output + /// Start at this address rather than the address in the hash table for "/> + /// Flags for controlling operations within the read, such as ReadCache interaction + /// User application context passed in case the read goes pending due to IO + /// The serial number of the operation (used in recovery) + /// Token to cancel the operation + /// wrapping + /// The caller must await the return value to obtain the result, then call one of + /// + /// result. + /// result. + /// + /// to complete the read operation and obtain the result status, the output that is populated by the + /// implementation, and optionally a copy of the header for the retrieved record + /// + ValueTask.ReadAsyncResult> ReadAsync(ref Key key, ref Input input, long startAddress, ReadFlags readFlags = ReadFlags.None, + Context userContext = default, long serialNo = 0, CancellationToken cancellationToken = default); + + /// + /// Async Read operation that accepts an argument to lookup at, instead of a key. + /// + /// The address to look up + /// Input to help extract the retrieved value into output + /// Flags for controlling operations within the read, such as ReadCache interaction + /// User application context passed in case the read goes pending due to IO + /// The serial number of the operation (used in recovery) + /// Token to cancel the operation + /// wrapping + /// The caller must await the return value to obtain the result, then call one of + /// + /// result. + /// result. + /// + /// to complete the read operation and obtain the result status, the output that is populated by the + /// implementation, and optionally a copy of the header for the retrieved record + ValueTask.ReadAsyncResult> ReadAtAddressAsync(long address, ref Input input, ReadFlags readFlags = ReadFlags.None, + Context userContext = default, long serialNo = 0, CancellationToken cancellationToken = default); + + /// + /// Upsert operation + /// + /// + /// + /// + /// + /// + Status Upsert(ref Key key, ref Value desiredValue, Context userContext = default, long serialNo = 0); + + /// + /// Upsert operation + /// + /// + /// + /// + /// + /// + /// + /// + Status Upsert(ref Key key, ref Input input, ref Value desiredValue, ref Output output, Context userContext = default, long serialNo = 0); + + /// + /// Upsert operation + /// + /// + /// + /// + /// + /// + /// + /// + /// + Status Upsert(ref Key key, ref Input input, ref Value desiredValue, ref Output output, out RecordMetadata recordMetadata, Context userContext = default, long serialNo = 0); + + /// + /// Upsert operation + /// + /// + /// + /// + /// + /// + Status Upsert(Key key, Value desiredValue, Context userContext = default, long serialNo = 0); + + /// + /// Upsert operation + /// + /// + /// + /// + /// + /// + /// + /// + Status Upsert(Key key, Input input, Value desiredValue, ref Output output, Context userContext = default, long serialNo = 0); + + /// + /// Async Upsert operation + /// Await operation in session before issuing next one + /// + /// + /// + /// + /// + /// + /// ValueTask wrapping + /// The caller must await the return value to obtain the result, then call one of + /// + /// result. + /// result = await result. while result.Status == + /// + /// to complete the Upsert operation. Failure to complete the operation will result in leaked allocations. + ValueTask.UpsertAsyncResult> UpsertAsync(ref Key key, ref Value desiredValue, Context userContext = default, long serialNo = 0, CancellationToken token = default); + + /// + /// Async Upsert operation + /// Await operation in session before issuing next one + /// + /// + /// + /// + /// + /// + /// + /// ValueTask wrapping + /// The caller must await the return value to obtain the result, then call one of + /// + /// result. + /// result = await result. while result.Status == + /// + /// to complete the Upsert operation. Failure to complete the operation will result in leaked allocations. + ValueTask.UpsertAsyncResult> UpsertAsync(ref Key key, ref Input input, ref Value desiredValue, Context userContext = default, long serialNo = 0, CancellationToken token = default); + + /// + /// Async Upsert operation + /// Await operation in session before issuing next one + /// + /// + /// + /// + /// + /// + /// ValueTask wrapping the asyncResult of the operation + /// The caller must await the return value to obtain the result, then call one of + /// + /// result. + /// result = await result. while result.Status == + /// + /// to complete the Upsert operation. Failure to complete the operation will result in leaked allocations. + ValueTask.UpsertAsyncResult> UpsertAsync(Key key, Value desiredValue, Context userContext = default, long serialNo = 0, CancellationToken token = default); + + /// + /// Async Upsert operation + /// Await operation in session before issuing next one + /// + /// + /// + /// + /// + /// + /// + /// ValueTask wrapping the asyncResult of the operation + /// The caller must await the return value to obtain the result, then call one of + /// + /// result. + /// result = await result. while result.Status == + /// + /// to complete the Upsert operation. Failure to complete the operation will result in leaked allocations. + ValueTask.UpsertAsyncResult> UpsertAsync(Key key, Input input, Value desiredValue, Context userContext = default, long serialNo = 0, CancellationToken token = default); + + /// + /// RMW operation + /// + /// + /// + /// + /// + /// + /// + Status RMW(ref Key key, ref Input input, ref Output output, Context userContext = default, long serialNo = 0); + + /// + /// RMW operation + /// + /// + /// + /// + /// + /// + /// + /// + Status RMW(ref Key key, ref Input input, ref Output output, out RecordMetadata recordMetadata, Context userContext = default, long serialNo = 0); + + /// + /// RMW operation + /// + /// + /// + /// + /// + /// + /// + Status RMW(Key key, Input input, out Output output, Context userContext = default, long serialNo = 0); + + /// + /// RMW operation + /// + /// + /// + /// + /// + /// + Status RMW(ref Key key, ref Input input, Context userContext = default, long serialNo = 0); + + /// + /// RMW operation + /// + /// + /// + /// + /// + /// + Status RMW(Key key, Input input, Context userContext = default, long serialNo = 0); + + /// + /// Async RMW operation + /// Await operation in session before issuing next one + /// + /// + /// + /// + /// + /// + /// The caller must await the return value to obtain the result, then call one of + /// + /// result. + /// result = await result. while result.Status == + /// + /// to complete the Upsert operation. Failure to complete the operation will result in leaked allocations. + ValueTask.RmwAsyncResult> RMWAsync(ref Key key, ref Input input, Context context = default, long serialNo = 0, CancellationToken token = default); + + /// + /// Async RMW operation + /// Await operation in session before issuing next one + /// + /// + /// + /// + /// + /// + /// The caller must await the return value to obtain the result, then call one of + /// + /// result. + /// result = await result. while result.Status == + /// + /// to complete the Upsert operation. Failure to complete the operation will result in leaked allocations. + ValueTask.RmwAsyncResult> RMWAsync(Key key, Input input, Context context = default, long serialNo = 0, CancellationToken token = default); + + /// + /// Delete operation + /// + /// + /// + /// + /// + Status Delete(ref Key key, Context userContext = default, long serialNo = 0); + + /// + /// Delete operation + /// + /// + /// + /// + /// + Status Delete(Key key, Context userContext = default, long serialNo = 0); + + /// + /// Async Delete operation + /// Await operation in session before issuing next one + /// + /// + /// + /// + /// + /// The caller must await the return value to obtain the result, then call one of + /// + /// result. + /// result = await result. while result.Status == + /// + /// to complete the Upsert operation. Failure to complete the operation will result in leaked allocations. + ValueTask.DeleteAsyncResult> DeleteAsync(ref Key key, Context userContext = default, long serialNo = 0, CancellationToken token = default); + + /// + /// Async Delete operation + /// Await operation in session before issuing next one + /// + /// + /// + /// + /// + /// The caller must await the return value to obtain the result, then call one of + /// + /// result. + /// result = await result. while result.Status == + /// + /// to complete the Upsert operation. Failure to complete the operation will result in leaked allocations. + ValueTask.DeleteAsyncResult> DeleteAsync(Key key, Context userContext = default, long serialNo = 0, CancellationToken token = default); + + /// + /// Refresh session epoch and handle checkpointing phases. Used only + /// in case of thread-affinitized sessions (async support is disabled). + /// + public void Refresh(); + } +} diff --git a/cs/src/core/ClientSession/ManualFasterOperations.cs b/cs/src/core/ClientSession/ManualFasterOperations.cs new file mode 100644 index 000000000..0b942c216 --- /dev/null +++ b/cs/src/core/ClientSession/ManualFasterOperations.cs @@ -0,0 +1,698 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Threading; +using System.Threading.Tasks; + +namespace FASTER.core +{ + /// + /// Faster Operations implementation that allows manual control of record locking and epoch management. For advanced use only. + /// + public sealed class ManualFasterOperations : IFasterOperations, IDisposable + where Functions : IFunctions + { + readonly ClientSession clientSession; + + internal readonly InternalFasterSession FasterSession; + + internal ManualFasterOperations(ClientSession clientSession) + { + this.clientSession = clientSession; + FasterSession = new InternalFasterSession(clientSession); + } + + /// + /// Resume session on current thread. IMPORTANT: Call SuspendThread before any async op. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void UnsafeResumeThread() => clientSession.UnsafeResumeThread(); + + /// + /// Resume session on current thread. IMPORTANT: Call SuspendThread before any async op. + /// + /// Epoch that session resumes on; can be saved to see if epoch has changed + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void UnsafeResumeThread(out int resumeEpoch) => clientSession.UnsafeResumeThread(out resumeEpoch); + + /// + /// Suspend session on current thread + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void UnsafeSuspendThread() => clientSession.UnsafeSuspendThread(); + + /// + /// Does not actually dispose of anything; asserts the epoch has been suspended + /// + public void Dispose() + { + Debug.Assert(!LightEpoch.AnyInstanceProtected()); + } + + #region Key Locking + + /// + /// Lock the key with the specified , waiting until it is acquired + /// + /// The key to lock + /// The type of lock to take + /// Whether to retrieve data (and copy to the tail of the log) if the key is not in the mutable region + /// Context-specific information; will be passed to + /// The address of the record. May be checked against to check if the lock remains valid + public unsafe void Lock(ref Key key, LockType lockType, bool retrieveData, out long lockContext, out long address) + { + LockOperation lockOp = new(LockOperationType.Lock, lockType); + + lockContext = default; + Input input = default; + Output output = default; + RecordMetadata recordMetadata = default; + + // Note: this does not use RMW because that would complicate the RMW process: + // - InternalRMW would have to know whether we are doing retrieveData + // - this.CopyUpdater would have to call SingleWriter to simply copy the data over unchanged + // The assumption is that if retrieveData is true, there is an expectation the key already exists, so only ContextRead would be called. + + bool success = false; + if (retrieveData) + { + var status = clientSession.fht.ContextRead(ref key, ref input, ref output, ref lockOp, ref recordMetadata, ReadFlags.CopyToTail, context: default, FasterSession, serialNo: 0, clientSession.ctx); + success = status == Status.OK; + if (success) + { + lockContext = lockOp.LockContext; + } + else if (status == Status.PENDING) + { + UnsafeSuspendThread(); + clientSession.CompletePendingWithOutputs(out var completedOutputs, wait: true); + recordMetadata = completedOutputs.Current.RecordMetadata; + lockContext = completedOutputs.Current.LockContext; + completedOutputs.Dispose(); + success = true; + UnsafeResumeThread(); + } + } + + if (!success) + { + Value value = default; + var status = clientSession.fht.ContextUpsert(ref key, ref input, ref value, ref output, ref lockOp, out recordMetadata, context: default, FasterSession, serialNo: 0, clientSession.ctx); + Debug.Assert(status == Status.OK); + } + + address = recordMetadata.Address; + } + + /// + /// Lock the key with the specified + /// + /// The key to lock + /// The type of lock to take + /// Context-specific information; was returned by + public void Unlock(ref Key key, LockType lockType, long lockContext) + { + LockOperation lockOp = new(LockOperationType.Unlock, lockType); + + Input input = default; + Output output = default; + RecordMetadata recordMetadata = default; + + var status = clientSession.fht.ContextRead(ref key, ref input, ref output, ref lockOp, ref recordMetadata, ReadFlags.None, context: default, FasterSession, serialNo: 0, clientSession.ctx); + if (status == Status.PENDING) + { + // Do nothing here, as a lock that goes into the immutable region is considered unlocked. + UnsafeSuspendThread(); + clientSession.CompletePending(wait: true); + UnsafeResumeThread(); + } + } + + /// + /// The minimum valid address for a locked record (includes copies to tail). + /// + public long MinimumValidLockAddress => clientSession.fht.Log.ReadOnlyAddress; + + #endregion Key Locking + + #region IFasterOperations + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Read(ref Key key, ref Input input, ref Output output, Context userContext = default, long serialNo = 0) + { + Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); + return clientSession.fht.ContextRead(ref key, ref input, ref output, userContext, FasterSession, serialNo, clientSession.ctx); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Read(Key key, Input input, out Output output, Context userContext = default, long serialNo = 0) + { + output = default; + return Read(ref key, ref input, ref output, userContext, serialNo); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Read(ref Key key, ref Output output, Context userContext = default, long serialNo = 0) + { + Input input = default; + return Read(ref key, ref input, ref output, userContext, serialNo); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Read(Key key, out Output output, Context userContext = default, long serialNo = 0) + { + Input input = default; + output = default; + return Read(ref key, ref input, ref output, userContext, serialNo); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public (Status status, Output output) Read(Key key, Context userContext = default, long serialNo = 0) + { + Input input = default; + Output output = default; + return (Read(ref key, ref input, ref output, userContext, serialNo), output); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Read(ref Key key, ref Input input, ref Output output, ref RecordMetadata recordMetadata, ReadFlags readFlags = ReadFlags.None, Context userContext = default, long serialNo = 0) + { + Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); + LockOperation lockOp = default; + return clientSession.fht.ContextRead(ref key, ref input, ref output, ref lockOp, ref recordMetadata, readFlags, userContext, FasterSession, serialNo, clientSession.ctx); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status ReadAtAddress(long address, ref Input input, ref Output output, ReadFlags readFlags = ReadFlags.None, Context userContext = default, long serialNo = 0) + { + Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); + return clientSession.fht.ContextReadAtAddress(address, ref input, ref output, readFlags, userContext, FasterSession, serialNo, clientSession.ctx); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.ReadAsyncResult> ReadAsync(ref Key key, ref Input input, Context userContext = default, long serialNo = 0, CancellationToken cancellationToken = default) + { + Debug.Assert(!LightEpoch.AnyInstanceProtected()); + return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, Constants.kInvalidAddress, userContext, serialNo, cancellationToken); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.ReadAsyncResult> ReadAsync(Key key, Input input, Context context = default, long serialNo = 0, CancellationToken token = default) + { + Debug.Assert(!LightEpoch.AnyInstanceProtected()); + return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, Constants.kInvalidAddress, context, serialNo, token); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.ReadAsyncResult> ReadAsync(ref Key key, Context userContext = default, long serialNo = 0, CancellationToken token = default) + { + Debug.Assert(!LightEpoch.AnyInstanceProtected()); + Input input = default; + return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, Constants.kInvalidAddress, userContext, serialNo, token); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.ReadAsyncResult> ReadAsync(Key key, Context context = default, long serialNo = 0, CancellationToken token = default) + { + Debug.Assert(!LightEpoch.AnyInstanceProtected()); + Input input = default; + return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, Constants.kInvalidAddress, context, serialNo, token); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.ReadAsyncResult> ReadAsync(ref Key key, ref Input input, long startAddress, ReadFlags readFlags = ReadFlags.None, + Context userContext = default, long serialNo = 0, CancellationToken cancellationToken = default) + { + Debug.Assert(!LightEpoch.AnyInstanceProtected()); + var operationFlags = FasterKV.PendingContext.GetOperationFlags(readFlags); + return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, startAddress, userContext, serialNo, cancellationToken, operationFlags); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.ReadAsyncResult> ReadAtAddressAsync(long address, ref Input input, ReadFlags readFlags = ReadFlags.None, + Context userContext = default, long serialNo = 0, CancellationToken cancellationToken = default) + { + Debug.Assert(!LightEpoch.AnyInstanceProtected()); + Key key = default; + var operationFlags = FasterKV.PendingContext.GetOperationFlags(readFlags, noKey: true); + return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, address, userContext, serialNo, cancellationToken, operationFlags); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Upsert(ref Key key, ref Value desiredValue, Context userContext = default, long serialNo = 0) + { + Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); + Input input = default; + Output output = default; + return Upsert(ref key, ref input, ref desiredValue, ref output, out _, userContext, serialNo); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Upsert(ref Key key, ref Input input, ref Value desiredValue, ref Output output, Context userContext = default, long serialNo = 0) + { + Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); + return clientSession.fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, userContext, FasterSession, serialNo, clientSession.ctx); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Upsert(ref Key key, ref Input input, ref Value desiredValue, ref Output output, out RecordMetadata recordMetadata, Context userContext = default, long serialNo = 0) + { + Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); + LockOperation lockOp = default; + return clientSession.fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, ref lockOp, out recordMetadata, userContext, FasterSession, serialNo, clientSession.ctx); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Upsert(Key key, Value desiredValue, Context userContext = default, long serialNo = 0) + => Upsert(ref key, ref desiredValue, userContext, serialNo); + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Upsert(Key key, Input input, Value desiredValue, ref Output output, Context userContext = default, long serialNo = 0) + => Upsert(ref key, ref input, ref desiredValue, ref output, userContext, serialNo); + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.UpsertAsyncResult> UpsertAsync(ref Key key, ref Value desiredValue, Context userContext = default, long serialNo = 0, CancellationToken token = default) + { + Input input = default; + return UpsertAsync(ref key, ref input, ref desiredValue, userContext, serialNo, token); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.UpsertAsyncResult> UpsertAsync(ref Key key, ref Input input, ref Value desiredValue, Context userContext = default, long serialNo = 0, CancellationToken token = default) + { + Debug.Assert(!LightEpoch.AnyInstanceProtected()); + return clientSession.fht.UpsertAsync(FasterSession, clientSession.ctx, ref key, ref input, ref desiredValue, userContext, serialNo, token); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.UpsertAsyncResult> UpsertAsync(Key key, Value desiredValue, Context userContext = default, long serialNo = 0, CancellationToken token = default) + => UpsertAsync(ref key, ref desiredValue, userContext, serialNo, token); + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.UpsertAsyncResult> UpsertAsync(Key key, Input input, Value desiredValue, Context userContext = default, long serialNo = 0, CancellationToken token = default) + => UpsertAsync(ref key, ref input, ref desiredValue, userContext, serialNo, token); + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status RMW(ref Key key, ref Input input, ref Output output, Context userContext = default, long serialNo = 0) + => RMW(ref key, ref input, ref output, out _, userContext, serialNo); + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status RMW(ref Key key, ref Input input, ref Output output, out RecordMetadata recordMetadata, Context userContext = default, long serialNo = 0) + { + Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); + return clientSession.fht.ContextRMW(ref key, ref input, ref output, out recordMetadata, userContext, FasterSession, serialNo, clientSession.ctx); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status RMW(Key key, Input input, out Output output, Context userContext = default, long serialNo = 0) + { + output = default; + return RMW(ref key, ref input, ref output, userContext, serialNo); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status RMW(ref Key key, ref Input input, Context userContext = default, long serialNo = 0) + { + Output output = default; + return RMW(ref key, ref input, ref output, userContext, serialNo); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status RMW(Key key, Input input, Context userContext = default, long serialNo = 0) + { + Output output = default; + return RMW(ref key, ref input, ref output, userContext, serialNo); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.RmwAsyncResult> RMWAsync(ref Key key, ref Input input, Context context = default, long serialNo = 0, CancellationToken token = default) + { + Debug.Assert(!LightEpoch.AnyInstanceProtected()); + return clientSession.fht.RmwAsync(FasterSession, clientSession.ctx, ref key, ref input, context, serialNo, token); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.RmwAsyncResult> RMWAsync(Key key, Input input, Context context = default, long serialNo = 0, CancellationToken token = default) + => RMWAsync(ref key, ref input, context, serialNo, token); + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Delete(ref Key key, Context userContext = default, long serialNo = 0) + { + Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); + return clientSession.fht.ContextDelete(ref key, userContext, FasterSession, serialNo, clientSession.ctx); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Delete(Key key, Context userContext = default, long serialNo = 0) + => Delete(ref key, userContext, serialNo); + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.DeleteAsyncResult> DeleteAsync(ref Key key, Context userContext = default, long serialNo = 0, CancellationToken token = default) + { + Debug.Assert(!LightEpoch.AnyInstanceProtected()); + return clientSession.fht.DeleteAsync(FasterSession, clientSession.ctx, ref key, userContext, serialNo, token); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.DeleteAsyncResult> DeleteAsync(Key key, Context userContext = default, long serialNo = 0, CancellationToken token = default) + => DeleteAsync(ref key, userContext, serialNo, token); + + /// + public void Refresh() + { + Debug.Assert(!LightEpoch.AnyInstanceProtected()); + clientSession.fht.InternalRefresh(clientSession.ctx, FasterSession); + } + + #endregion IFasterOperations + + #region IFasterSession + + // This is a struct to allow JIT to inline calls (and bypass default interface call mechanism) + internal readonly struct InternalFasterSession : IFasterSession + { + private readonly ClientSession _clientSession; + + public InternalFasterSession(ClientSession clientSession) + { + _clientSession = clientSession; + } + + #region IFunctions - Optional features supported + public bool SupportsLocking => true; // Check user's setting in FasterKV to know whose lock scheme to use + + public bool SupportsPostOperations => true; // We need this for user record locking, but check for user's setting before calling user code + #endregion IFunctions - Optional features supported + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + void HandleLockOperation(ref RecordInfo recordInfo, ref Key key, ref Value value, ref LockOperation lockOp, out bool isLock) + { + isLock = false; + if (lockOp.LockOperationType == LockOperationType.Unlock) + { + if (recordInfo.Stub) + { + recordInfo.Stub = false; + recordInfo.Invalid = true; + } + if (lockOp.LockType == LockType.Shared) + this.UnlockShared(ref recordInfo, ref key, ref value, lockOp.LockContext); + else if (lockOp.LockType == LockType.Exclusive) + this.UnlockExclusive(ref recordInfo, ref key, ref value, lockOp.LockContext); + else + Debug.Fail($"Unexpected LockType: {lockOp.LockType}"); + return; + } + isLock = true; + if (lockOp.LockType == LockType.Shared) + this.LockShared(ref recordInfo, ref key, ref value, ref lockOp.LockContext); + else if (lockOp.LockType == LockType.Exclusive) + this.LockExclusive(ref recordInfo, ref key, ref value, ref lockOp.LockContext); + else if (lockOp.LockType == LockType.ExclusiveFromShared) + this.LockExclusiveFromShared(ref recordInfo, ref key, ref value, ref lockOp.LockContext); + else + Debug.Fail($"Unexpected LockType: {lockOp.LockType}"); + } + + #region IFunctions - Reads + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool SingleReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) + { + if (lockOp.IsSet) + { + // No value is returned to the client through the lock sequence; for consistency all key locks must be acquired before their values are read. + HandleLockOperation(ref recordInfo, ref key, ref value, ref lockOp, out _); + return true; + } + return _clientSession.functions.SingleReader(ref key, ref input, ref value, ref dst, ref recordInfo, address); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) + { + if (lockOp.IsSet) + { + // No value is returned to the client through the lock sequence; for consistency all key locks must be acquired before their values are read. + HandleLockOperation(ref recordInfo, ref key, ref value, ref lockOp, out _); + return true; + } + return _clientSession.functions.ConcurrentReader(ref key, ref input, ref value, ref dst, ref recordInfo, address); + } + + public void ReadCompletionCallback(ref Key key, ref Input input, ref Output output, Context ctx, Status status, RecordMetadata recordMetadata) + => _clientSession.functions.ReadCompletionCallback(ref key, ref input, ref output, ctx, status, recordMetadata); + + #endregion IFunctions - Reads + + // Our general locking rule in this "session" is: we don't lock unless explicitly requested via lockOp.IsSet. If it is requested, then as with + // ClientSession, except for readcache usage of SingleWriter, all operations that append a record must lock in the () call. Unlike + // ClientSession, we do *not* unlock in the Post call; instead we wait for explicit client user unlock. + + #region IFunctions - Upserts + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) + { + recordInfo.Version = _clientSession.ctx.version; + _clientSession.functions.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); + + // Lock (or unlock) here, and do not unlock in PostSingleWriter; wait for the user to explicitly unlock + if (lockOp.IsSet) + HandleLockOperation(ref recordInfo, ref key, ref dst, ref lockOp, out _); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) + { + if (_clientSession.functions.SupportsPostOperations) + _clientSession.functions.PostSingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) + { + recordInfo.Version = _clientSession.ctx.version; + + if (lockOp.IsSet) + { + // All lock operations in ConcurrentWriter can return immediately. + HandleLockOperation(ref recordInfo, ref key, ref dst, ref lockOp, out _); + return true; + } + + // Note: KeyIndexes do not need notification of in-place updates because the key does not change. + return _clientSession.functions.ConcurrentWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); + } + + public void UpsertCompletionCallback(ref Key key, ref Input input, ref Value value, Context ctx) + => _clientSession.functions.UpsertCompletionCallback(ref key, ref input, ref value, ctx); +#endregion IFunctions - Upserts + +#region IFunctions - RMWs +#region InitialUpdater + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool NeedInitialUpdate(ref Key key, ref Input input, ref Output output) + => _clientSession.functions.NeedInitialUpdate(ref key, ref input, ref output); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address, out long lockContext) + { + lockContext = default; + recordInfo.Version = _clientSession.ctx.version; + _clientSession.functions.InitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address, long lockContext) + { + if (_clientSession.functions.SupportsPostOperations) + _clientSession.functions.PostInitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); + } +#endregion InitialUpdater + +#region CopyUpdater + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool NeedCopyUpdate(ref Key key, ref Input input, ref Value oldValue, ref Output output) + => _clientSession.functions.NeedCopyUpdate(ref key, ref input, ref oldValue, ref output); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address, out long lockContext) + { + lockContext = 0; + recordInfo.Version = _clientSession.ctx.version; + _clientSession.functions.CopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address, long lockContext) + { + return !_clientSession.functions.SupportsPostOperations + || _clientSession.functions.PostCopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); + } +#endregion CopyUpdater + +#region InPlaceUpdater + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool InPlaceUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) + { + recordInfo.Version = _clientSession.ctx.version; + + // Note: KeyIndexes do not need notification of in-place updates because the key does not change. + return _clientSession.functions.InPlaceUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); + } + + public void RMWCompletionCallback(ref Key key, ref Input input, ref Output output, Context ctx, Status status, RecordMetadata recordMetadata) + => _clientSession.functions.RMWCompletionCallback(ref key, ref input, ref output, ctx, status, recordMetadata); + +#endregion InPlaceUpdater +#endregion IFunctions - RMWs + +#region IFunctions - Deletes + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long address) + { + // There is no value to lock here, so we take a RecordInfo lock in InternalDelete and release it here. + recordInfo.Version = _clientSession.ctx.version; + if (_clientSession.functions.SupportsPostOperations) + _clientSession.functions.PostSingleDeleter(ref key, ref recordInfo, address); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address) + { + recordInfo.Version = _clientSession.ctx.version; + recordInfo.Tombstone = true; + return _clientSession.functions.ConcurrentDeleter(ref key, ref value, ref recordInfo, address); + } + + public void DeleteCompletionCallback(ref Key key, Context ctx) + => _clientSession.functions.DeleteCompletionCallback(ref key, ctx); +#endregion IFunctions - Deletes + +#region IFunctions - Locking + + public void LockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) + { + if (_clientSession.fht.SupportsLocking) + _clientSession.functions.LockExclusive(ref recordInfo, ref key, ref value, ref lockContext); + else + recordInfo.LockExclusive(); + } + + public void UnlockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, long lockContext) + { + if (_clientSession.fht.SupportsLocking) + _clientSession.functions.UnlockExclusive(ref recordInfo, ref key, ref value, lockContext); + else + recordInfo.UnlockExclusive(); + } + + public bool TryLockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) + => _clientSession.fht.SupportsLocking + ? _clientSession.functions.TryLockExclusive(ref recordInfo, ref key, ref value, ref lockContext, spinCount) + : recordInfo.TryLockExclusive(spinCount); + + public void LockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) + { + if (_clientSession.fht.SupportsLocking) + _clientSession.functions.LockShared(ref recordInfo, ref key, ref value, ref lockContext); + else + recordInfo.LockShared(); + } + + public bool UnlockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, long lockContext) + { + if (_clientSession.fht.SupportsLocking) + return _clientSession.functions.UnlockShared(ref recordInfo, ref key, ref value, lockContext); + recordInfo.UnlockShared(); + return true; + } + + public bool TryLockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) + => _clientSession.fht.SupportsLocking + ? _clientSession.functions.TryLockShared(ref recordInfo, ref key, ref value, ref lockContext, spinCount) + : recordInfo.TryLockShared(spinCount); + + public void LockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) + { + if (_clientSession.fht.SupportsLocking) + _clientSession.functions.LockExclusiveFromShared(ref recordInfo, ref key, ref value, ref lockContext); + else + recordInfo.LockExclusiveFromShared(); + } + + public bool TryLockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) + => _clientSession.fht.SupportsLocking + ? _clientSession.functions.TryLockExclusiveFromShared(ref recordInfo, ref key, ref value, ref lockContext, spinCount) + : recordInfo.TryLockExclusiveFromShared(spinCount); + #endregion IFunctions - Locking + + #region IFunctions - Checkpointing + public void CheckpointCompletionCallback(string guid, CommitPoint commitPoint) + { + _clientSession.functions.CheckpointCompletionCallback(guid, commitPoint); + _clientSession.LatestCommitPoint = commitPoint; + } +#endregion IFunctions - Checkpointing + +#region Internal utilities + public int GetInitialLength(ref Input input) + => _clientSession.variableLengthStruct.GetInitialLength(ref input); + + public int GetLength(ref Value t, ref Input input) + => _clientSession.variableLengthStruct.GetLength(ref t, ref input); + + public IHeapContainer GetHeapContainer(ref Input input) + { + if (_clientSession.inputVariableLengthStruct == default) + return new StandardHeapContainer(ref input); + return new VarLenHeapContainer(ref input, _clientSession.inputVariableLengthStruct, _clientSession.fht.hlog.bufferPool); + } + + public void UnsafeResumeThread() => _clientSession.UnsafeResumeThread(); + + public void UnsafeSuspendThread() => _clientSession.UnsafeSuspendThread(); + + public bool CompletePendingWithOutputs(out CompletedOutputIterator completedOutputs, bool wait = false, bool spinWaitForCommit = false) + => _clientSession.CompletePendingWithOutputs(out completedOutputs, wait, spinWaitForCommit); +#endregion Internal utilities + } + +#endregion IFasterSession + } +} diff --git a/cs/src/core/Epochs/LightEpoch.cs b/cs/src/core/Epochs/LightEpoch.cs index d856a0530..dd4d40179 100644 --- a/cs/src/core/Epochs/LightEpoch.cs +++ b/cs/src/core/Epochs/LightEpoch.cs @@ -285,6 +285,16 @@ public void Resume() ProtectAndDrain(); } + /// + /// Thread resumes its epoch entry + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Resume(out int resumeEpoch) + { + Acquire(); + resumeEpoch = ProtectAndDrain(); + } + /// /// Increment global current epoch /// diff --git a/cs/src/core/Index/Common/CompletedOutput.cs b/cs/src/core/Index/Common/CompletedOutput.cs index 401cb7c20..03fba8aa6 100644 --- a/cs/src/core/Index/Common/CompletedOutput.cs +++ b/cs/src/core/Index/Common/CompletedOutput.cs @@ -112,6 +112,11 @@ public struct CompletedOutput ///
    public Status Status; + /// + /// The lock context for + /// + public long LockContext; + internal void Set(ref FasterKV.PendingContext pendingContext, Status status) { this.keyContainer = pendingContext.key; diff --git a/cs/src/core/Index/Common/Contexts.cs b/cs/src/core/Index/Common/Contexts.cs index 37e9cde55..1bdd8f905 100644 --- a/cs/src/core/Index/Common/Contexts.cs +++ b/cs/src/core/Index/Common/Contexts.cs @@ -87,10 +87,12 @@ internal struct PendingContext internal byte operationFlags; internal RecordInfo recordInfo; internal long minAddress; + internal LockOperation lockOperation; // Note: Must be kept in sync with corresponding ReadFlags enum values internal const byte kSkipReadCache = 0x01; internal const byte kMinAddress = 0x02; + internal const byte kCopyReadsToTail = 0x04; internal const byte kNoKey = 0x10; internal const byte kSkipCopyReadsToTail = 0x20; @@ -117,7 +119,7 @@ internal static byte GetOperationFlags(ReadFlags readFlags, bool noKey = false) { Debug.Assert((byte)ReadFlags.SkipReadCache == kSkipReadCache); Debug.Assert((byte)ReadFlags.MinAddress == kMinAddress); - byte flags = (byte)(readFlags & (ReadFlags.SkipReadCache | ReadFlags.MinAddress)); + byte flags = (byte)(readFlags & (ReadFlags.SkipReadCache | ReadFlags.MinAddress | ReadFlags.CopyToTail)); if (noKey) flags |= kNoKey; // This is always set true for the Read overloads (Reads by address) that call this method. @@ -155,6 +157,12 @@ internal bool HasMinAddress set => operationFlags = value ? (byte)(operationFlags | kMinAddress) : (byte)(operationFlags & ~kMinAddress); } + internal bool CopyReadsToTail + { + get => (operationFlags & kCopyReadsToTail) != 0; + set => operationFlags = value ? (byte)(operationFlags | kCopyReadsToTail) : (byte)(operationFlags & ~kCopyReadsToTail); + } + internal bool SkipCopyReadsToTail { get => (operationFlags & kSkipCopyReadsToTail) != 0; diff --git a/cs/src/core/Index/Common/RecordInfo.cs b/cs/src/core/Index/Common/RecordInfo.cs index c8a3173e7..7eea6c22a 100644 --- a/cs/src/core/Index/Common/RecordInfo.cs +++ b/cs/src/core/Index/Common/RecordInfo.cs @@ -3,6 +3,7 @@ #pragma warning disable 1591 +using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Threading; @@ -76,29 +77,17 @@ public static void WriteInfo(ref RecordInfo info, int checkpointVersion, bool to /// Take exclusive (write) lock on RecordInfo ///
    [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void LockExclusive() - { - // Acquire exclusive lock (readers may still be present) - while (true) - { - long expected_word = word; - if ((expected_word & kExclusiveLockBitMask) == 0) - { - if (expected_word == Interlocked.CompareExchange(ref word, expected_word | kExclusiveLockBitMask, expected_word)) - break; - } - Thread.Yield(); - } - - // Wait for readers to drain - while ((word & kSharedLockMaskInWord) != 0) Thread.Yield(); - } + public void LockExclusive() => TryLockExclusive(spinCount: -1); /// /// Unlock RecordInfo that was previously locked for exclusive access, via /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void UnlockExclusive() => word &= ~kExclusiveLockBitMask; // Safe because there should be no other threads (e.g., readers) updating the word at this point + public void UnlockExclusive() + { + Debug.Assert((word & kExclusiveLockBitMask) != 0); + word &= ~kExclusiveLockBitMask; // Safe because there should be no other threads (e.g., readers) updating the word at this point + } /// /// Try to take an exclusive (write) lock on RecordInfo @@ -108,7 +97,7 @@ public void LockExclusive() [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool TryLockExclusive(int spinCount = 1) { - // Acquire exclusive lock (readers may still be present) + // Acquire exclusive lock (readers may still be present; we'll drain them later) while (true) { long expected_word = word; @@ -117,7 +106,7 @@ public bool TryLockExclusive(int spinCount = 1) if (expected_word == Interlocked.CompareExchange(ref word, expected_word | kExclusiveLockBitMask, expected_word)) break; } - if (--spinCount <= 0) return false; + if (spinCount > 0 && --spinCount <= 0) return false; Thread.Yield(); } @@ -130,7 +119,25 @@ public bool TryLockExclusive(int spinCount = 1) /// Take shared (read) lock on RecordInfo /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void LockShared() + public void LockShared() => TryLockShared(spinCount: -1); + + /// + /// Unlock RecordInfo that was previously locked for shared access, via + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void UnlockShared() + { + Debug.Assert((word & kSharedLockMaskInWord) != 0); + Interlocked.Add(ref word, -kSharedLockIncrement); + } + + /// + /// Take shared (read) lock on RecordInfo + /// + /// Number of attempts before giving up + /// Whether lock was acquired successfully + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool TryLockShared(int spinCount = 1) { // Acquire shared lock while (true) @@ -142,35 +149,41 @@ public void LockShared() if (expected_word == Interlocked.CompareExchange(ref word, expected_word + kSharedLockIncrement, expected_word)) break; } + if (spinCount > 0 && --spinCount <= 0) return false; Thread.Yield(); } + return true; } /// - /// Unlock RecordInfo that was previously locked for shared access, via + /// Take shared (read) lock on RecordInfo /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void UnlockShared() => Interlocked.Add(ref word, -kSharedLockIncrement); + public void LockExclusiveFromShared() => TryLockExclusiveFromShared(spinCount: -1); /// - /// Take shared (read) lock on RecordInfo + /// Promote a shared (read) lock on RecordInfo to exclusive /// /// Number of attempts before giving up /// Whether lock was acquired successfully [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool TryLockShared(int spinCount = 1) + public bool TryLockExclusiveFromShared(int spinCount = 1) { // Acquire shared lock while (true) { long expected_word = word; - if (((expected_word & kExclusiveLockBitMask) == 0) // not exclusively locked - && (expected_word & kSharedLockMaskInWord) != kSharedLockMaskInWord) // shared lock is not full + if ((expected_word & kExclusiveLockBitMask) == 0) // not exclusively locked { - if (expected_word == Interlocked.CompareExchange(ref word, expected_word + kSharedLockIncrement, expected_word)) + var new_word = expected_word | kExclusiveLockBitMask; + if ((expected_word & kSharedLockMaskInWord) != 0) // shared lock is not empty + new_word -= kSharedLockIncrement; + else + Debug.Fail($"SharedLock count should not be 0"); + if (expected_word == Interlocked.CompareExchange(ref word, new_word, expected_word)) break; } - if (--spinCount <= 0) return false; + if (spinCount > 0 && --spinCount <= 0) return false; Thread.Yield(); } return true; @@ -200,7 +213,7 @@ public bool Valid public bool Stub { - get => (word & kTombstoneBitMask) > 0; + get => (word & kStubBitMask) > 0; set { if (value) word |= kStubBitMask; diff --git a/cs/src/core/Index/FASTER/FASTER.cs b/cs/src/core/Index/FASTER/FASTER.cs index afff26c1a..1fd959bc0 100644 --- a/cs/src/core/Index/FASTER/FASTER.cs +++ b/cs/src/core/Index/FASTER/FASTER.cs @@ -20,14 +20,27 @@ namespace FASTER.core [Flags] public enum ReadFlags { - /// Default read operation + /// + /// Default read operation + /// None = 0, - /// Skip the ReadCache when reading, including not inserting to ReadCache when pending reads are complete + /// + /// Skip the ReadCache when reading, including not inserting to ReadCache when pending reads are complete + /// SkipReadCache = 0x00000001, - /// The minimum address at which to resolve the Key; return if the key is not found at this address or higher + /// + /// The minimum address at which to resolve the Key; return if the key is not found at this address or higher + /// MinAddress = 0x00000002, + + /// + /// Force a copy to tail if we read from immutable or on-disk. If this and ReadCache are both specified, ReadCache wins. + /// This avoids log pollution for read-mostly workloads. Used mostly in conjunction with + /// locking. + /// + CopyToTail = 0x00000004, } public partial class FasterKV : FasterBase, @@ -88,6 +101,8 @@ public partial class FasterKV : FasterBase, internal ConcurrentDictionary _recoveredSessions; + internal bool SupportsLocking; + /// /// Create FASTER instance /// @@ -97,10 +112,11 @@ public partial class FasterKV : FasterBase, /// Serializer settings /// FASTER equality comparer for key /// + /// FASTER settings public FasterKV(long size, LogSettings logSettings, CheckpointSettings checkpointSettings = null, SerializerSettings serializerSettings = null, IFasterEqualityComparer comparer = null, - VariableLengthStructSettings variableLengthStructSettings = null) + VariableLengthStructSettings variableLengthStructSettings = null, FasterSettings fasterSettings = null) { if (comparer != null) this.comparer = comparer; @@ -108,7 +124,7 @@ public FasterKV(long size, LogSettings logSettings, { if (typeof(IFasterEqualityComparer).IsAssignableFrom(typeof(Key))) { - if (default(Key) != null) + if (default(Key) is not null) { this.comparer = default(Key) as IFasterEqualityComparer; } @@ -123,7 +139,12 @@ public FasterKV(long size, LogSettings logSettings, } } - if (checkpointSettings == null) + if (fasterSettings is not null) + { + this.SupportsLocking = fasterSettings.SupportsLocking; + } + + if (checkpointSettings is null) checkpointSettings = new CheckpointSettings(); if (checkpointSettings.CheckpointDir != null && checkpointSettings.CheckpointManager != null) @@ -146,13 +167,13 @@ public FasterKV(long size, LogSettings logSettings, new DirectoryInfo(checkpointSettings.CheckpointDir ?? ".").FullName), removeOutdated: checkpointSettings.RemoveOutdated); } - if (checkpointSettings.CheckpointManager == null) + if (checkpointSettings.CheckpointManager is null) disposeCheckpointManager = true; UseFoldOverCheckpoint = checkpointSettings.CheckPointType == core.CheckpointType.FoldOver; CopyReadsToTail = logSettings.CopyReadsToTail; - if (logSettings.ReadCacheSettings != null) + if (logSettings.ReadCacheSettings is not null) { CopyReadsToTail = CopyReadsToTail.None; UseReadCache = true; @@ -160,8 +181,8 @@ public FasterKV(long size, LogSettings logSettings, UpdateVarLen(ref variableLengthStructSettings); - if ((!Utility.IsBlittable() && variableLengthStructSettings?.keyLength == null) || - (!Utility.IsBlittable() && variableLengthStructSettings?.valueLength == null)) + if ((!Utility.IsBlittable() && variableLengthStructSettings?.keyLength is null) || + (!Utility.IsBlittable() && variableLengthStructSettings?.valueLength is null)) { WriteDefaultOnDelete = true; @@ -541,8 +562,10 @@ internal Status ContextRead(ref Key key, where FasterSession : IFasterSession { var pcontext = default(PendingContext); - var internalStatus = InternalRead(ref key, ref input, ref output, Constants.kInvalidAddress, ref context, ref pcontext, fasterSession, sessionCtx, serialNo); - Debug.Assert(internalStatus != OperationStatus.RETRY_NOW); + OperationStatus internalStatus; + do + internalStatus = InternalRead(ref key, ref input, ref output, Constants.kInvalidAddress, ref context, ref pcontext, fasterSession, sessionCtx, serialNo); + while (internalStatus == OperationStatus.RETRY_NOW); Status status; if (internalStatus == OperationStatus.SUCCESS || internalStatus == OperationStatus.NOTFOUND) @@ -560,24 +583,29 @@ internal Status ContextRead(ref Key key, } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal Status ContextRead(ref Key key, ref Input input, ref Output output, ref RecordMetadata recordMetadata, ReadFlags readFlags, Context context, + internal Status ContextRead(ref Key key, ref Input input, ref Output output, ref LockOperation lockOp, ref RecordMetadata recordMetadata, ReadFlags readFlags, Context context, FasterSession fasterSession, long serialNo, FasterExecutionContext sessionCtx) where FasterSession : IFasterSession { var pcontext = default(PendingContext); pcontext.SetOperationFlags(readFlags, recordMetadata.RecordInfo.PreviousAddress); - var internalStatus = InternalRead(ref key, ref input, ref output, recordMetadata.RecordInfo.PreviousAddress, ref context, ref pcontext, fasterSession, sessionCtx, serialNo); - Debug.Assert(internalStatus != OperationStatus.RETRY_NOW); + pcontext.lockOperation = lockOp; + OperationStatus internalStatus; + do + internalStatus = InternalRead(ref key, ref input, ref output, recordMetadata.RecordInfo.PreviousAddress, ref context, ref pcontext, fasterSession, sessionCtx, serialNo); + while (internalStatus == OperationStatus.RETRY_NOW); Status status; if (internalStatus == OperationStatus.SUCCESS || internalStatus == OperationStatus.NOTFOUND) { recordMetadata = new(pcontext.recordInfo, pcontext.logicalAddress); + lockOp.LockContext = pcontext.lockOperation.LockContext; status = (Status)internalStatus; } else { recordMetadata = default; + lockOp.LockContext = default; status = HandleOperationStatus(sessionCtx, sessionCtx, ref pcontext, fasterSession, internalStatus, false, out _); } @@ -594,8 +622,37 @@ internal Status ContextReadAtAddress(long var pcontext = default(PendingContext); pcontext.SetOperationFlags(readFlags, address, noKey: true); Key key = default; - var internalStatus = InternalRead(ref key, ref input, ref output, address, ref context, ref pcontext, fasterSession, sessionCtx, serialNo); - Debug.Assert(internalStatus != OperationStatus.RETRY_NOW); + OperationStatus internalStatus; + do + internalStatus = InternalRead(ref key, ref input, ref output, address, ref context, ref pcontext, fasterSession, sessionCtx, serialNo); + while (internalStatus == OperationStatus.RETRY_NOW); + + Status status; + if (internalStatus == OperationStatus.SUCCESS || internalStatus == OperationStatus.NOTFOUND) + { + status = (Status)internalStatus; + } + else + { + status = HandleOperationStatus(sessionCtx, sessionCtx, ref pcontext, fasterSession, internalStatus, false, out _); + } + + Debug.Assert(serialNo >= sessionCtx.serialNum, "Operation serial numbers must be non-decreasing"); + sessionCtx.serialNum = serialNo; + return status; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal Status ContextUpsert(ref Key key, ref Input input, ref Value value, ref Output output, + Context context, FasterSession fasterSession, long serialNo, FasterExecutionContext sessionCtx) + where FasterSession : IFasterSession + { + var pcontext = default(PendingContext); + OperationStatus internalStatus; + + do + internalStatus = InternalUpsert(ref key, ref input, ref value, ref output, ref context, ref pcontext, fasterSession, sessionCtx, serialNo); + while (internalStatus == OperationStatus.RETRY_NOW); Status status; if (internalStatus == OperationStatus.SUCCESS || internalStatus == OperationStatus.NOTFOUND) @@ -613,11 +670,12 @@ internal Status ContextReadAtAddress(long } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal Status ContextUpsert(ref Key key, ref Input input, ref Value value, ref Output output, out RecordMetadata recordMetadata, + internal Status ContextUpsert(ref Key key, ref Input input, ref Value value, ref Output output, ref LockOperation lockOp, out RecordMetadata recordMetadata, Context context, FasterSession fasterSession, long serialNo, FasterExecutionContext sessionCtx) where FasterSession : IFasterSession { var pcontext = default(PendingContext); + pcontext.lockOperation = lockOp; OperationStatus internalStatus; do @@ -628,11 +686,13 @@ internal Status ContextUpsert(ref Key key if (internalStatus == OperationStatus.SUCCESS || internalStatus == OperationStatus.NOTFOUND) { recordMetadata = new(pcontext.recordInfo, pcontext.logicalAddress); + lockOp.LockContext = pcontext.lockOperation.LockContext; status = (Status)internalStatus; } else { recordMetadata = default; + lockOp.LockContext = default; status = HandleOperationStatus(sessionCtx, sessionCtx, ref pcontext, fasterSession, internalStatus, false, out _); } diff --git a/cs/src/core/Index/FASTER/FASTERImpl.cs b/cs/src/core/Index/FASTER/FASTERImpl.cs index c5e5cc123..0f35857bb 100644 --- a/cs/src/core/Index/FASTER/FASTERImpl.cs +++ b/cs/src/core/Index/FASTER/FASTERImpl.cs @@ -154,7 +154,7 @@ internal OperationStatus InternalRead( // This is not called when looking up by address, so we do not set pendingContext.recordInfo. // ReadCache addresses are not valid for indexing etc. so pass kInvalidAddress. - return fasterSession.SingleReader(ref key, ref input, ref readcache.GetValue(physicalAddress), ref output, ref readcache.GetInfo(physicalAddress), Constants.kInvalidAddress) + return fasterSession.SingleReader(ref key, ref input, ref readcache.GetValue(physicalAddress), ref output, ref pendingContext.lockOperation, ref readcache.GetInfo(physicalAddress), Constants.kInvalidAddress) ? OperationStatus.SUCCESS : OperationStatus.NOTFOUND; } } @@ -202,8 +202,17 @@ internal OperationStatus InternalRead( ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); pendingContext.recordInfo = recordInfo; pendingContext.logicalAddress = logicalAddress; - return !pendingContext.recordInfo.Tombstone - && fasterSession.ConcurrentReader(ref key, ref input, ref hlog.GetValue(physicalAddress), ref output, ref recordInfo, logicalAddress) + ref Value recordValue = ref hlog.GetValue(physicalAddress); + if (recordInfo.Sealed || recordInfo.Stub) + { + // These are "intermediate" states that will be replaced by either a Delete or an Upsert of an actual value. Acquire the lock, then release and retry immediately. + long lockContext = default; + fasterSession.LockShared(ref recordInfo, ref key, ref recordValue, ref lockContext); + fasterSession.UnlockShared(ref recordInfo, ref key, ref recordValue, lockContext); + return OperationStatus.RETRY_NOW; + } + return !recordInfo.Tombstone + && fasterSession.ConcurrentReader(ref key, ref input, ref recordValue, ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress) ? OperationStatus.SUCCESS : OperationStatus.NOTFOUND; } @@ -213,13 +222,18 @@ internal OperationStatus InternalRead( { pendingContext.recordInfo = hlog.GetInfo(physicalAddress); pendingContext.logicalAddress = logicalAddress; + if (pendingContext.recordInfo.Sealed || pendingContext.recordInfo.Stub) + { + // These are "intermediate" states that will be replaced by either a Delete or an Upsert of an actual value. We can't lock immutable records, so retry immediately. + return OperationStatus.RETRY_NOW; + } if (!pendingContext.recordInfo.Tombstone - && fasterSession.SingleReader(ref key, ref input, ref hlog.GetValue(physicalAddress), ref output, ref pendingContext.recordInfo, logicalAddress)) + && fasterSession.SingleReader(ref key, ref input, ref hlog.GetValue(physicalAddress), ref output, ref pendingContext.lockOperation, ref pendingContext.recordInfo, logicalAddress)) { - if (CopyReadsToTail == CopyReadsToTail.FromReadOnly && !pendingContext.SkipCopyReadsToTail) + if ((CopyReadsToTail == CopyReadsToTail.FromReadOnly && !pendingContext.SkipCopyReadsToTail) || pendingContext.CopyReadsToTail) { var container = hlog.GetValueContainer(ref hlog.GetValue(physicalAddress)); - InternalTryCopyToTail(ref key, ref input, ref container.Get(), ref output, logicalAddress, fasterSession, sessionCtx); + InternalTryCopyToTail(ref key, ref input, ref container.Get(), ref output, ref pendingContext.lockOperation, logicalAddress, fasterSession, sessionCtx); container.Dispose(); } return OperationStatus.SUCCESS; @@ -385,16 +399,46 @@ internal OperationStatus InternalUpsert( #endregion // Optimization for the most common case + long stubOrSealedPhysicalAddress = Constants.kInvalidAddress; + long sealedLockContext = default; if (sessionCtx.phase == Phase.REST && logicalAddress >= hlog.ReadOnlyAddress) { ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); - if (!recordInfo.Tombstone - && fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref hlog.GetValue(physicalAddress), ref output, ref recordInfo, logicalAddress)) + if (recordInfo.Stub) { - hlog.MarkPage(logicalAddress, sessionCtx.version); - pendingContext.recordInfo = recordInfo; - pendingContext.logicalAddress = logicalAddress; - return OperationStatus.SUCCESS; + // This is only for ManualFasterOperations, and we assume we hold this lock. We will need to transfer the lock + // to the updated record. + stubOrSealedPhysicalAddress = physicalAddress; + goto CreateNewRecord; + } + ref Value recordValue = ref hlog.GetValue(physicalAddress); + if (recordInfo.Sealed) + { + // This is an "intermediate" state from a different session that will be replaced by an Upsert of an updated value. Acquire the lock, then release and retry immediately. + long lockContext = default; + fasterSession.LockShared(ref recordInfo, ref key, ref recordValue, ref lockContext); + fasterSession.UnlockShared(ref recordInfo, ref key, ref recordValue, lockContext); + return OperationStatus.RETRY_NOW; + } + if (!recordInfo.Tombstone) + { + if (fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref hlog.GetValue(physicalAddress), ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress)) + { + hlog.MarkPage(logicalAddress, sessionCtx.version); + pendingContext.recordInfo = recordInfo; + pendingContext.logicalAddress = logicalAddress; + return OperationStatus.SUCCESS; + } + + // ConcurrentWriter failed (e.g. insufficient space). Another thread may come along to do this update in-place; Seal it to prevent that. + stubOrSealedPhysicalAddress = physicalAddress; + fasterSession.LockExclusive(ref recordInfo, ref key, ref recordValue, ref sealedLockContext); + if (recordInfo.Sealed || recordInfo.Invalid || recordInfo.Tombstone) + { + // Another thread was doing the same thing; unlock and retry. + fasterSession.UnlockExclusive(ref recordInfo, ref key, ref recordValue, sealedLockContext); + return OperationStatus.RETRY_NOW; + } } goto CreateNewRecord; } @@ -416,7 +460,7 @@ internal OperationStatus InternalUpsert( { ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); if (!recordInfo.Tombstone - && fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref hlog.GetValue(physicalAddress), ref output, ref recordInfo, logicalAddress)) + && fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref hlog.GetValue(physicalAddress), ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress)) { if (sessionCtx.phase == Phase.REST) hlog.MarkPage(logicalAddress, sessionCtx.version); else hlog.MarkPageAtomic(logicalAddress, sessionCtx.version); @@ -438,7 +482,18 @@ internal OperationStatus InternalUpsert( // Immutable region or new record status = CreateNewRecordUpsert(ref key, ref input, ref value, ref output, ref pendingContext, fasterSession, sessionCtx, bucket, slot, tag, entry, latestLogicalAddress); if (status != OperationStatus.ALLOCATE_FAILED) + { + if (stubOrSealedPhysicalAddress != Constants.kInvalidAddress) + { + // Mark the *old* record as Invalid and unlock it--thereby "transferring" the lock to the new record. + ref RecordInfo recordInfo = ref hlog.GetInfo(stubOrSealedPhysicalAddress); + recordInfo.Stub = false; + recordInfo.Sealed = false; + recordInfo.Invalid = true; + fasterSession.UnlockExclusive(ref recordInfo, ref key, ref hlog.GetValue(physicalAddress), sealedLockContext); + } goto LatchRelease; + } latchDestination = LatchDestination.CreatePendingContext; } #endregion @@ -576,7 +631,7 @@ private OperationStatus CreateNewRecordUpsert( out physicalAddress); } } -#endregion + #endregion // Optimization for the most common case + long sealedPhysicalAddress = Constants.kInvalidAddress; + long sealedLockContext = default; if (sessionCtx.phase == Phase.REST && logicalAddress >= hlog.ReadOnlyAddress) { ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); - if (!recordInfo.Tombstone - && fasterSession.InPlaceUpdater(ref key, ref input, ref hlog.GetValue(physicalAddress), ref output, ref recordInfo, logicalAddress)) + + ref Value recordValue = ref hlog.GetValue(physicalAddress); + if (recordInfo.Sealed || recordInfo.Stub) { - hlog.MarkPage(logicalAddress, sessionCtx.version); - pendingContext.recordInfo = recordInfo; - pendingContext.logicalAddress = logicalAddress; - return OperationStatus.SUCCESS; + // These are "intermediate" states that will be replaced by either a Delete or an Upsert of an actual value. Acquire the lock, then release and retry immediately. + long lockContext = default; + fasterSession.LockShared(ref recordInfo, ref key, ref recordValue, ref lockContext); + fasterSession.UnlockShared(ref recordInfo, ref key, ref recordValue, lockContext); + return OperationStatus.RETRY_NOW; + } + + if (!recordInfo.Tombstone) + { + if (fasterSession.InPlaceUpdater(ref key, ref input, ref hlog.GetValue(physicalAddress), ref output, ref recordInfo, logicalAddress)) + { + hlog.MarkPage(logicalAddress, sessionCtx.version); + pendingContext.recordInfo = recordInfo; + pendingContext.logicalAddress = logicalAddress; + return OperationStatus.SUCCESS; + } + + // ConcurrentWriter failed (e.g. insufficient space). Another thread may come along to do this update in-place; Seal it to prevent that. + sealedPhysicalAddress = physicalAddress; + fasterSession.LockExclusive(ref recordInfo, ref key, ref recordValue, ref sealedLockContext); + if (recordInfo.Sealed || recordInfo.Invalid || recordInfo.Tombstone) + { + // Another thread was doing the same thing; unlock and retry. + fasterSession.UnlockExclusive(ref recordInfo, ref key, ref recordValue, sealedLockContext); + return OperationStatus.RETRY_NOW; + } } goto CreateNewRecord; } @@ -725,7 +803,6 @@ internal OperationStatus InternalRMW( } #endregion - #region Normal processing // Mutable Region: Update the record in-place @@ -805,7 +882,19 @@ internal OperationStatus InternalRMW( { status = CreateNewRecordRMW(ref key, ref input, ref output, ref pendingContext, fasterSession, sessionCtx, bucket, slot, logicalAddress, physicalAddress, tag, entry, latestLogicalAddress); if (status != OperationStatus.ALLOCATE_FAILED) + { + if (sealedPhysicalAddress != Constants.kInvalidAddress) + { + // Mark the *old* record as Invalid and unlock it--thereby "transferring" the lock to the new record. + ref RecordInfo recordInfo = ref hlog.GetInfo(sealedPhysicalAddress); + recordInfo.Stub = false; + recordInfo.Sealed = false; + recordInfo.Invalid = true; + fasterSession.UnlockExclusive(ref recordInfo, ref key, ref hlog.GetValue(physicalAddress), sealedLockContext); + } goto LatchRelease; + + } latchDestination = LatchDestination.CreatePendingContext; } #endregion @@ -1024,8 +1113,6 @@ ref hlog.GetValue(newPhysicalAddress), else { // CAS failed - if (fasterSession.SupportsLocking) - fasterSession.UnlockExclusive(ref recordInfo, ref key, ref hlog.GetValue(newPhysicalAddress), lockContext); hlog.GetInfo(newPhysicalAddress).Invalid = true; } status = OperationStatus.RETRY_NOW; @@ -1195,6 +1282,17 @@ internal OperationStatus InternalDelete( if (logicalAddress >= hlog.ReadOnlyAddress) { ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); + + if (recordInfo.Sealed || recordInfo.Stub) + { + // These are "intermediate" states that will be replaced by either a Delete or an Upsert of an actual value. Acquire the lock, then release and retry immediately. + ref Value recordValue = ref hlog.GetValue(physicalAddress); + long lockContext = default; + fasterSession.LockShared(ref recordInfo, ref key, ref recordValue, ref lockContext); + fasterSession.UnlockShared(ref recordInfo, ref key, ref recordValue, lockContext); + return OperationStatus.RETRY_NOW; + } + ref Value value = ref hlog.GetValue(physicalAddress); // The concurrent delete may fail if the record is sealed @@ -1251,7 +1349,8 @@ internal OperationStatus InternalDelete( hlog.Serialize(ref key, newPhysicalAddress); // There is no Value to lock, so we lock the RecordInfo directly. TODO: Updaters must honor this lock as well - recordInfo.LockExclusive(); + if (fasterSession.SupportsLocking) + recordInfo.LockExclusive(); var updatedEntry = default(HashBucketEntry); updatedEntry.Tag = tag; @@ -1269,7 +1368,6 @@ internal OperationStatus InternalDelete( // Note that this is the new logicalAddress; we have not retrieved the old one if it was below HeadAddress, and thus // we do not know whether 'logicalAddress' belongs to 'key' or is a collision. fasterSession.PostSingleDeleter(ref key, ref recordInfo, newLogicalAddress); - recordInfo.UnlockExclusive(); pendingContext.recordInfo = recordInfo; pendingContext.logicalAddress = newLogicalAddress; status = OperationStatus.SUCCESS; @@ -1277,7 +1375,6 @@ internal OperationStatus InternalDelete( } else { - recordInfo.UnlockExclusive(); recordInfo.Invalid = true; status = OperationStatus.RETRY_NOW; goto LatchRelease; @@ -1426,6 +1523,12 @@ internal OperationStatus InternalContinuePendingRead @@ -1705,7 +1808,8 @@ internal Status HandleOperationStatus( SynchronizeEpoch(opCtx, currentCtx, ref pendingContext, fasterSession); } - if (status == OperationStatus.CPR_SHIFT_DETECTED || ((asyncOp || RelaxedCPR) && status == OperationStatus.RETRY_LATER)) + Debug.Assert(status != OperationStatus.RETRY_NOW || pendingContext.type == OperationType.READ); + if (status == OperationStatus.CPR_SHIFT_DETECTED || status == OperationStatus.RETRY_NOW || ((asyncOp || RelaxedCPR) && status == OperationStatus.RETRY_LATER)) { #region Retry as (v+1) Operation var internalStatus = default(OperationStatus); @@ -1951,8 +2055,9 @@ internal OperationStatus InternalCopyToTail { OperationStatus internalStatus; + LockOperation dummyLockOperation = default; do - internalStatus = InternalTryCopyToTail(currentCtx, ref key, ref input, ref value, ref output, expectedLogicalAddress, fasterSession, currentCtx, noReadCache); + internalStatus = InternalTryCopyToTail(currentCtx, ref key, ref input, ref value, ref output, ref dummyLockOperation, expectedLogicalAddress, fasterSession, currentCtx, noReadCache); while (internalStatus == OperationStatus.RETRY_NOW); return internalStatus; } @@ -1960,13 +2065,13 @@ internal OperationStatus InternalCopyToTail( - ref Key key, ref Input input, ref Value value, ref Output output, + ref Key key, ref Input input, ref Value value, ref Output output, ref LockOperation lockOperation, long foundLogicalAddress, FasterSession fasterSession, FasterExecutionContext currentCtx, bool noReadCache = false) where FasterSession : IFasterSession - => InternalTryCopyToTail(currentCtx, ref key, ref input, ref value, ref output, foundLogicalAddress, fasterSession, currentCtx, noReadCache); + => InternalTryCopyToTail(currentCtx, ref key, ref input, ref value, ref output, ref lockOperation, foundLogicalAddress, fasterSession, currentCtx, noReadCache); /// /// Helper function for trying to copy existing immutable records (at foundLogicalAddress) to the tail, @@ -1988,6 +2093,7 @@ internal OperationStatus InternalTryCopyToTail /// /// + /// /// /// The expected address of the record being copied. /// @@ -2005,7 +2111,7 @@ internal OperationStatus InternalTryCopyToTail internal OperationStatus InternalTryCopyToTail( FasterExecutionContext opCtx, - ref Key key, ref Input input, ref Value value, ref Output output, + ref Key key, ref Input input, ref Value value, ref Output output, ref LockOperation lockOperation, long expectedLogicalAddress, FasterSession fasterSession, FasterExecutionContext currentCtx, @@ -2064,7 +2170,7 @@ internal OperationStatus InternalTryCopyToTail + /// Settings for the store + /// + public class FasterSettings + { + /// + /// Whether this FasterKV instance supports locking. Iff so, FASTER will call the locking methods of . + /// + public bool SupportsLocking { get; set; } + } +} diff --git a/cs/src/core/Index/FASTER/LogCompactionFunctions.cs b/cs/src/core/Index/FASTER/LogCompactionFunctions.cs index 23490a735..65ef7d217 100644 --- a/cs/src/core/Index/FASTER/LogCompactionFunctions.cs +++ b/cs/src/core/Index/FASTER/LogCompactionFunctions.cs @@ -76,5 +76,7 @@ public void UnlockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value va public void LockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) { } public bool UnlockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, long lockContext) => true; public bool TryLockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) => true; + public void LockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) { } + public bool TryLockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) => true; } } \ No newline at end of file diff --git a/cs/src/core/Index/FasterLog/FasterLogSettings.cs b/cs/src/core/Index/FasterLog/FasterLogSettings.cs index 9fe7cc8e5..a44e96308 100644 --- a/cs/src/core/Index/FasterLog/FasterLogSettings.cs +++ b/cs/src/core/Index/FasterLog/FasterLogSettings.cs @@ -1,10 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. -#pragma warning disable 0162 - -using System; - namespace FASTER.core { /// diff --git a/cs/src/core/Index/Interfaces/FunctionsBase.cs b/cs/src/core/Index/Interfaces/FunctionsBase.cs index 29508f1fd..7bdd2fa70 100644 --- a/cs/src/core/Index/Interfaces/FunctionsBase.cs +++ b/cs/src/core/Index/Interfaces/FunctionsBase.cs @@ -95,6 +95,10 @@ public virtual bool UnlockShared(ref RecordInfo recordInfo, ref Key key, ref Val /// public virtual bool TryLockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) => recordInfo.TryLockShared(spinCount); + + public virtual void LockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) => recordInfo.LockExclusiveFromShared(); + + public virtual bool TryLockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) => recordInfo.TryLockExclusiveFromShared(spinCount); } /// diff --git a/cs/src/core/Index/Interfaces/IFasterSession.cs b/cs/src/core/Index/Interfaces/IFasterSession.cs index 33df4c8c1..d7bda3cd0 100644 --- a/cs/src/core/Index/Interfaces/IFasterSession.cs +++ b/cs/src/core/Index/Interfaces/IFasterSession.cs @@ -15,23 +15,71 @@ internal interface IFasterSession } /// - /// Provides thread management and all callbacks. + /// Provides thread management and all callbacks. A wrapper for IFunctions and additional methods called by FasterImpl; the wrapped + /// IFunctions methods provide additional parameters to support the wrapper functionality, then call through to the user implementations. /// /// /// /// /// /// - internal interface IFasterSession : IFunctions, IFasterSession, IVariableLengthStruct + internal interface IFasterSession : IFasterSession, IVariableLengthStruct { - // Overloads for locking. Except for readcache/copy-to-tail usage of SingleWriter, all operations that append a record must lock in the () call and unlock - // in the Post call; otherwise another session can try to access the record as soon as it's CAS'd and before Post is called. - void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address, out long lockContext); - void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address, long lockContext); + #region Optional features supported by this implementation + bool SupportsLocking { get; } + + bool SupportsPostOperations { get; } + #endregion Optional features supported by this implementation + + #region Reads + bool SingleReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref LockOperation lockOp, ref RecordInfo recordInfo, long address); + bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref LockOperation lockOp, ref RecordInfo recordInfo, long address); + void ReadCompletionCallback(ref Key key, ref Input input, ref Output output, Context ctx, Status status, RecordMetadata recordMetadata); + #endregion reads + + #region Upserts + void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address); + void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address); + bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address); + void UpsertCompletionCallback(ref Key key, ref Input input, ref Value value, Context ctx); + #endregion Upserts + + #region RMWs + #region InitialUpdater + bool NeedInitialUpdate(ref Key key, ref Input input, ref Output output); void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address, out long lockContext); void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address, long lockContext); + #endregion InitialUpdater + + #region CopyUpdater + bool NeedCopyUpdate(ref Key key, ref Input input, ref Value oldValue, ref Output output); void CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address, out long lockContext); bool PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address, long lockContext); + #endregion CopyUpdater + + #region InPlaceUpdater + bool InPlaceUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address); + #endregion InPlaceUpdater + + void RMWCompletionCallback(ref Key key, ref Input input, ref Output output, Context ctx, Status status, RecordMetadata recordMetadata); + #endregion RMWs + + #region Deletes + void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long address); + bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address); + void DeleteCompletionCallback(ref Key key, Context ctx); + #endregion Deletes + + #region Locking + void LockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext); + void UnlockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, long lockContext); + bool TryLockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1); + void LockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext); + bool UnlockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, long lockContext); + bool TryLockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1); + void LockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext); + bool TryLockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1); + #endregion Locking bool CompletePendingWithOutputs(out CompletedOutputIterator completedOutputs, bool wait = false, bool spinWaitForCommit = false); diff --git a/cs/src/core/Index/Interfaces/IFunctions.cs b/cs/src/core/Index/Interfaces/IFunctions.cs index 4d9ab1b07..80f80a5e5 100644 --- a/cs/src/core/Index/Interfaces/IFunctions.cs +++ b/cs/src/core/Index/Interfaces/IFunctions.cs @@ -14,19 +14,12 @@ namespace FASTER.core public interface IFunctions { #region Optional features supported by this implementation - /// - /// Whether this Functions instance supports locking. Iff so, FASTER will call - /// or to lock the record as appropriate, and - /// or to match. - /// - bool SupportsLocking { get; } - /// /// Whether this Functions instance supports operations on records after they have been successfully appended to the log. For example, /// after copies a list, /// can add items to it. /// - /// Once the record has been appended it is visible to other sessions, so locking will be done per + /// Once the record has been appended it is visible to other sessions, so locking will be done per bool SupportsPostOperations { get; } #endregion Optional features supported by this implementation @@ -325,6 +318,34 @@ public interface IFunctions /// True if the lock was acquired, else false. /// bool TryLockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1); + + /// + /// User-provided lock promotion call, converting a shared lock into an exclusive lock, defaulting to no-op. A default implementation is available via . + /// + /// The header for the current record + /// The key for the current record + /// The value for the current record + /// Context-specific information; will be passed to + /// + /// This is called only for records guaranteed to be in the mutable range. + /// + void LockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext); + + /// + /// User-provided lock promotion call, converting a shared lock into an exclusive lock, defaulting to no-op. A default implementation is available via . + /// + /// The header for the current record + /// The key for the current record + /// The value for the current record + /// Context-specific information; will be passed to + /// The number of times to spin in a try/yield loop until giving up; default is once + /// + /// This is called only for records guaranteed to be in the mutable range. + /// + /// + /// True if the lock was acquired, else false. + /// + bool TryLockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1); #endregion Locking #region Checkpointing diff --git a/cs/src/core/Utilities/LockType.cs b/cs/src/core/Utilities/LockType.cs new file mode 100644 index 000000000..a643252ca --- /dev/null +++ b/cs/src/core/Utilities/LockType.cs @@ -0,0 +1,49 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +namespace FASTER.core +{ + /// + /// Type of lock taken by FASTER on Read, Upsert, RMW, or Delete operations, either directly or within concurrent callback operations + /// + public enum LockType : byte + { + /// + /// Shared lock, taken on Read + /// + Shared, + + /// + /// Exclusive lock, taken on Upsert, RMW, or Delete + /// + Exclusive, + + /// + /// Promote a Shared lock to an Exclusive lock + /// + ExclusiveFromShared + } + + internal enum LockOperationType : byte + { + None, + Lock, + Unlock + } + + internal struct LockOperation + { + internal LockOperationType LockOperationType; + internal LockType LockType; + internal long LockContext; + + internal bool IsSet => LockOperationType != LockOperationType.None; + + internal LockOperation(LockOperationType opType, LockType lockType) + { + this.LockOperationType = opType; + this.LockType = lockType; + this.LockContext = default; + } + } +} diff --git a/cs/test/LockTests.cs b/cs/test/LockTests.cs index 39122cd8d..e95cdd74c 100644 --- a/cs/test/LockTests.cs +++ b/cs/test/LockTests.cs @@ -39,7 +39,7 @@ public void Setup() { TestUtils.DeleteDirectory(TestUtils.MethodTestDir, wait: true); log = Devices.CreateLogDevice(TestUtils.MethodTestDir + "/GenericStringTests.log", deleteOnClose: true); - fkv = new FasterKV(1L << 20, new LogSettings { LogDevice = log, ObjectLogDevice = null }); + fkv = new FasterKV(1L << 20, new LogSettings { LogDevice = log, ObjectLogDevice = null }, fasterSettings: new FasterSettings { SupportsLocking = true }); session = fkv.For(new Functions()).NewSession(); } diff --git a/cs/test/ManualOperationsTests.cs b/cs/test/ManualOperationsTests.cs new file mode 100644 index 000000000..6994b29d6 --- /dev/null +++ b/cs/test/ManualOperationsTests.cs @@ -0,0 +1,62 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using System.IO; +using System.Threading; +using FASTER.core; +using NUnit.Framework; + +namespace FASTER.test +{ + [TestFixture] + class ManualOperationsTests + { + const int numRecords = 1000; + const int valueMult = 1_000_000; + const int numThreads = 12; + + private FasterKV fkv; + private ClientSession> session; + private IDevice log; + + [SetUp] + public void Setup() + { + TestUtils.DeleteDirectory(TestUtils.MethodTestDir, wait: true); + + log = Devices.CreateLogDevice(Path.Combine(TestUtils.MethodTestDir, "test.log"), deleteOnClose: true); + fkv = new FasterKV(1L << 20, new LogSettings { LogDevice = log, ObjectLogDevice = null, PageSizeBits = 12, MemorySizeBits = 22 }); + session = fkv.For(new SimpleFunctions()).NewSession>(); + } + + [TearDown] + public void TearDown() + { + session?.Dispose(); + session = null; + fkv?.Dispose(); + fkv = null; + log?.Dispose(); + log = null; + + // Clean up log + TestUtils.DeleteDirectory(TestUtils.MethodTestDir); + } + + void Populate() + { + for (int key = 0; key < numRecords; key++) + { + Assert.AreNotEqual(Status.PENDING, session.Upsert(key, key * valueMult)); + } + } + + [Test] + [Category(TestUtils.ManualOpsTestCategory)] + [Category(TestUtils.SmokeTestCategory)] + public void InMemoryLockTest() + { + Populate(); + } + } +} diff --git a/cs/test/MiscFASTERTests.cs b/cs/test/MiscFASTERTests.cs index b9eb27c38..4cc6799ae 100644 --- a/cs/test/MiscFASTERTests.cs +++ b/cs/test/MiscFASTERTests.cs @@ -121,26 +121,31 @@ public void ShouldCreateNewRecordIfConcurrentWriterReturnsFalse() var key = default(KeyStruct); var value = default(ValueStruct); + var input = default(InputStruct); + var output = default(OutputStruct); key = new KeyStruct() { kfield1 = 1, kfield2 = 2 }; value = new ValueStruct() { vfield1 = 1000, vfield2 = 2000 }; - session.Upsert(ref key, ref value, Empty.Default, 0); + session.Upsert(ref key, ref input, ref value, ref output, out RecordMetadata recordMetadata1); value = new ValueStruct() { vfield1 = 1001, vfield2 = 2002 }; - session.Upsert(ref key, ref value, Empty.Default, 0); + session.Upsert(ref key, ref input, ref value, ref output, out RecordMetadata recordMetadata2); + + Assert.Greater(recordMetadata2.Address, recordMetadata1.Address); var recordCount = 0; using (var iterator = fht.Log.Scan(fht.Log.BeginAddress, fht.Log.TailAddress)) { + // We now seal before copying and unseal/set to Invalid after copying, so we only get one record. while (iterator.GetNext(out var info)) { recordCount++; } } - Assert.AreEqual(1, copyOnWrite.ConcurrentWriterCallCount, 2); - Assert.AreEqual(2, recordCount); + Assert.AreEqual(1, copyOnWrite.ConcurrentWriterCallCount); + Assert.AreEqual(1, recordCount); } finally { diff --git a/cs/test/TestUtils.cs b/cs/test/TestUtils.cs index 98a2a8472..7debff6a7 100644 --- a/cs/test/TestUtils.cs +++ b/cs/test/TestUtils.cs @@ -17,6 +17,7 @@ internal static class TestUtils // Various categories used to group tests internal const string SmokeTestCategory = "Smoke"; internal const string FasterKVTestCategory = "FasterKV"; + internal const string ManualOpsTestCategory = "ManualOps"; /// /// Delete a directory recursively From 0d152dc181689e8c47497c9c01bbed0c602ff0e6 Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Fri, 12 Nov 2021 14:53:07 -0800 Subject: [PATCH 02/25] fixes for v2 merge --- cs/src/core/ClientSession/ClientSession.cs | 3 --- cs/src/core/ClientSession/ManualFasterOperations.cs | 11 +---------- cs/src/core/Index/FASTER/FASTERImpl.cs | 4 ++-- 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/cs/src/core/ClientSession/ClientSession.cs b/cs/src/core/ClientSession/ClientSession.cs index 63ba6d274..9c2ed1c92 100644 --- a/cs/src/core/ClientSession/ClientSession.cs +++ b/cs/src/core/ClientSession/ClientSession.cs @@ -833,7 +833,6 @@ public void ReadCompletionCallback(ref Key key, ref Input input, ref Output outp [MethodImpl(MethodImplOptions.AggressiveInlining)] public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) { - recordInfo.Version = _clientSession.ctx.version; _clientSession.functions.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); if (this.SupportsPostOperations && this.SupportsLocking) @@ -917,7 +916,6 @@ public bool NeedInitialUpdate(ref Key key, ref Input input, ref Output output) public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address, out long lockContext) { lockContext = 0; - recordInfo.Version = _clientSession.ctx.version; _clientSession.functions.InitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); if (this.SupportsPostOperations && this.SupportsLocking) @@ -968,7 +966,6 @@ public bool NeedCopyUpdate(ref Key key, ref Input input, ref Value oldValue, ref public void CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address, out long lockContext) { lockContext = 0; - recordInfo.Version = _clientSession.ctx.version; _clientSession.functions.CopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); if (this.SupportsPostOperations && this.SupportsLocking) diff --git a/cs/src/core/ClientSession/ManualFasterOperations.cs b/cs/src/core/ClientSession/ManualFasterOperations.cs index 0b942c216..6955e78f3 100644 --- a/cs/src/core/ClientSession/ManualFasterOperations.cs +++ b/cs/src/core/ClientSession/ManualFasterOperations.cs @@ -429,7 +429,7 @@ void HandleLockOperation(ref RecordInfo recordInfo, ref Key key, ref Value value if (recordInfo.Stub) { recordInfo.Stub = false; - recordInfo.Invalid = true; + recordInfo.SetInvalid(); } if (lockOp.LockType == LockType.Shared) this.UnlockShared(ref recordInfo, ref key, ref value, lockOp.LockContext); @@ -488,7 +488,6 @@ public void ReadCompletionCallback(ref Key key, ref Input input, ref Output outp [MethodImpl(MethodImplOptions.AggressiveInlining)] public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) { - recordInfo.Version = _clientSession.ctx.version; _clientSession.functions.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); // Lock (or unlock) here, and do not unlock in PostSingleWriter; wait for the user to explicitly unlock @@ -506,8 +505,6 @@ public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Va [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) { - recordInfo.Version = _clientSession.ctx.version; - if (lockOp.IsSet) { // All lock operations in ConcurrentWriter can return immediately. @@ -533,7 +530,6 @@ public bool NeedInitialUpdate(ref Key key, ref Input input, ref Output output) public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address, out long lockContext) { lockContext = default; - recordInfo.Version = _clientSession.ctx.version; _clientSession.functions.InitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); } @@ -554,7 +550,6 @@ public bool NeedCopyUpdate(ref Key key, ref Input input, ref Value oldValue, ref public void CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address, out long lockContext) { lockContext = 0; - recordInfo.Version = _clientSession.ctx.version; _clientSession.functions.CopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); } @@ -570,8 +565,6 @@ public bool PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, re [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool InPlaceUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) { - recordInfo.Version = _clientSession.ctx.version; - // Note: KeyIndexes do not need notification of in-place updates because the key does not change. return _clientSession.functions.InPlaceUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); } @@ -587,7 +580,6 @@ public void RMWCompletionCallback(ref Key key, ref Input input, ref Output outpu public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long address) { // There is no value to lock here, so we take a RecordInfo lock in InternalDelete and release it here. - recordInfo.Version = _clientSession.ctx.version; if (_clientSession.functions.SupportsPostOperations) _clientSession.functions.PostSingleDeleter(ref key, ref recordInfo, address); } @@ -595,7 +587,6 @@ public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long addre [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address) { - recordInfo.Version = _clientSession.ctx.version; recordInfo.Tombstone = true; return _clientSession.functions.ConcurrentDeleter(ref key, ref value, ref recordInfo, address); } diff --git a/cs/src/core/Index/FASTER/FASTERImpl.cs b/cs/src/core/Index/FASTER/FASTERImpl.cs index 9f7a5bf1c..e2d96fb7f 100644 --- a/cs/src/core/Index/FASTER/FASTERImpl.cs +++ b/cs/src/core/Index/FASTER/FASTERImpl.cs @@ -492,7 +492,7 @@ internal OperationStatus InternalUpsert( ref RecordInfo recordInfo = ref hlog.GetInfo(stubOrSealedPhysicalAddress); recordInfo.Stub = false; recordInfo.Sealed = false; - recordInfo.Invalid = true; + recordInfo.SetInvalid(); fasterSession.UnlockExclusive(ref recordInfo, ref key, ref hlog.GetValue(physicalAddress), sealedLockContext); } goto LatchRelease; @@ -874,7 +874,7 @@ internal OperationStatus InternalRMW( ref RecordInfo recordInfo = ref hlog.GetInfo(sealedPhysicalAddress); recordInfo.Stub = false; recordInfo.Sealed = false; - recordInfo.Invalid = true; + recordInfo.SetInvalid(); fasterSession.UnlockExclusive(ref recordInfo, ref key, ref hlog.GetValue(physicalAddress), sealedLockContext); } goto LatchRelease; From 48e10631870e0bd40cd84edb43992e5dd868b313 Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Mon, 29 Nov 2021 03:06:47 -0800 Subject: [PATCH 03/25] Updates to locking (still in progress) - Remove User-defined locks - Move SupportsLocking up to FasterKV level - Skip Stub and Sealed in iterators - Change PendingContext.OperationFlags from byte to ushort - Add ReadFlags.SkipCopyToTail - Add IsLocked*, TranferLocks methods to RecordInfo - Add UnsafeCompletePending* to ManualOps - Track lock counts in ManualOps and throw if Dispose() is called with locks or epoch held - Add LockInfo for ManualOps.*Lock - Implementation and testing of Faster operations with ManualOps locks - Fix ReadAddressTests to reflect improved consistency of not returning readcache addresses --- cs/benchmark/Functions.cs | 22 - cs/remote/samples/FixedLenServer/Program.cs | 2 +- cs/remote/samples/FixedLenServer/Types.cs | 10 - .../src/FASTER.server/ServerKVFunctions.cs | 25 - .../FASTER.server/Servers/FixedLenServer.cs | 5 +- .../FASTER.server/Servers/GenericServer.cs | 6 +- .../src/FASTER.server/Servers/VarLenServer.cs | 2 +- .../test/FASTER.remote.test/TestUtils.cs | 2 +- .../core/Allocator/BlittableScanIterator.cs | 2 +- cs/src/core/Allocator/GenericScanIterator.cs | 4 +- .../Allocator/VarLenBlittableScanIterator.cs | 2 +- cs/src/core/Async/ReadAsync.cs | 2 +- cs/src/core/ClientSession/ClientSession.cs | 177 +++--- .../core/ClientSession/FASTERClientSession.cs | 2 +- .../ClientSession/ManualFasterOperations.cs | 291 ++++++---- cs/src/core/Index/Common/CompletedOutput.cs | 5 - cs/src/core/Index/Common/Contexts.cs | 31 +- cs/src/core/Index/Common/RecordInfo.cs | 20 +- cs/src/core/Index/FASTER/FASTER.cs | 17 +- cs/src/core/Index/FASTER/FASTERImpl.cs | 514 ++++++++++++------ .../Index/FASTER/LogCompactionFunctions.cs | 10 - cs/src/core/Index/Interfaces/FunctionsBase.cs | 29 - .../core/Index/Interfaces/IFasterSession.cs | 30 +- cs/src/core/Index/Interfaces/IFunctions.cs | 114 ---- cs/src/core/Utilities/LockType.cs | 32 +- cs/src/core/VarLen/SpanByteFunctions.cs | 52 +- cs/test/ManualOperationsTests.cs | 428 ++++++++++++++- cs/test/ReadAddressTests.cs | 182 +++++-- 28 files changed, 1308 insertions(+), 710 deletions(-) diff --git a/cs/benchmark/Functions.cs b/cs/benchmark/Functions.cs index 1a1ee225a..52cfd27b9 100644 --- a/cs/benchmark/Functions.cs +++ b/cs/benchmark/Functions.cs @@ -103,27 +103,5 @@ public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, re public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long address) { } public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) { } - - public bool SupportsLocking => locking; - - public void LockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) => recordInfo.LockExclusive(); - - public void UnlockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, long lockContext) => recordInfo.UnlockExclusive(); - - public bool TryLockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) => recordInfo.TryLockExclusive(spinCount); - - public void LockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) => recordInfo.LockShared(); - - public bool UnlockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, long lockContext) - { - recordInfo.UnlockShared(); - return true; - } - - public bool TryLockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) => recordInfo.TryLockShared(spinCount); - - public void LockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) => recordInfo.LockExclusiveFromShared(); - - public bool TryLockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) => recordInfo.TryLockExclusiveFromShared(spinCount); } } diff --git a/cs/remote/samples/FixedLenServer/Program.cs b/cs/remote/samples/FixedLenServer/Program.cs index f28ee1aa6..03bc6ab0e 100644 --- a/cs/remote/samples/FixedLenServer/Program.cs +++ b/cs/remote/samples/FixedLenServer/Program.cs @@ -27,7 +27,7 @@ static void Main(string[] args) if (result.Tag == ParserResultType.NotParsed) return; var opts = result.MapResult(o => o, xs => new Options()); - using var server = new FixedLenServer(opts.GetServerOptions(), e => new Functions()); + using var server = new FixedLenServer(opts.GetServerOptions(), e => new Functions(), supportsLocking: false); server.Start(); Console.WriteLine("Started server"); diff --git a/cs/remote/samples/FixedLenServer/Types.cs b/cs/remote/samples/FixedLenServer/Types.cs index c9e26f025..815b81857 100644 --- a/cs/remote/samples/FixedLenServer/Types.cs +++ b/cs/remote/samples/FixedLenServer/Types.cs @@ -55,9 +55,6 @@ public struct Output public struct Functions : IFunctions { - // No locking needed for atomic types such as Value - public bool SupportsLocking => false; - // Callbacks public void RMWCompletionCallback(ref Key key, ref Input input, ref Output output, long ctx, Status status, RecordMetadata recordMetadata) { } @@ -134,13 +131,6 @@ public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long addre public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) { } - public void LockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) { } - public void UnlockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, long lockContext) { } - public bool TryLockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) => true; - public void LockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) { } - public bool UnlockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, long lockContext) => true; - public bool TryLockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) => true; - public bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address) => true; } } diff --git a/cs/remote/src/FASTER.server/ServerKVFunctions.cs b/cs/remote/src/FASTER.server/ServerKVFunctions.cs index fc22ae638..6f07f0d1b 100644 --- a/cs/remote/src/FASTER.server/ServerKVFunctions.cs +++ b/cs/remote/src/FASTER.server/ServerKVFunctions.cs @@ -11,8 +11,6 @@ internal struct ServerKVFunctions : IFunct private readonly Functions functions; private readonly FasterKVServerSessionBase serverNetworkSession; - public bool SupportsLocking => functions.SupportsLocking; - public bool SupportsPostOperations => true; public ServerKVFunctions(Functions functions, FasterKVServerSessionBase serverNetworkSession) @@ -80,28 +78,5 @@ public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Va public void UpsertCompletionCallback(ref Key key, ref Input input, ref Value value, long ctx) => functions.UpsertCompletionCallback(ref key, ref input, ref value, ctx); - - public void LockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) - => functions.LockExclusive(ref recordInfo, ref key, ref value, ref lockContext); - - /// - public void UnlockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, long lockContext) - => functions.UnlockExclusive(ref recordInfo, ref key, ref value, lockContext); - - /// - public bool TryLockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) - => functions.TryLockExclusive(ref recordInfo, ref key, ref value, ref lockContext, spinCount); - - /// - public void LockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) - => functions.LockShared(ref recordInfo, ref key, ref value, ref lockContext); - - /// - public bool UnlockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, long lockContext) - => functions.UnlockShared(ref recordInfo, ref key, ref value, lockContext); - - /// - public bool TryLockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) - => functions.TryLockShared(ref recordInfo, ref key, ref value, ref lockContext, spinCount); } } diff --git a/cs/remote/src/FASTER.server/Servers/FixedLenServer.cs b/cs/remote/src/FASTER.server/Servers/FixedLenServer.cs index d222e75b0..241d4b962 100644 --- a/cs/remote/src/FASTER.server/Servers/FixedLenServer.cs +++ b/cs/remote/src/FASTER.server/Servers/FixedLenServer.cs @@ -22,9 +22,10 @@ public sealed class FixedLenServer : Gener /// /// /// + /// /// - public FixedLenServer(ServerOptions opts, Func functionsGen, MaxSizeSettings maxSizeSettings = default) - : base(opts, functionsGen, new FixedLenSerializer(), new FixedLenKeySerializer(), maxSizeSettings) + public FixedLenServer(ServerOptions opts, Func functionsGen, bool supportsLocking, MaxSizeSettings maxSizeSettings = default) + : base(opts, functionsGen, new FixedLenSerializer(), new FixedLenKeySerializer(), supportsLocking, maxSizeSettings) { } } diff --git a/cs/remote/src/FASTER.server/Servers/GenericServer.cs b/cs/remote/src/FASTER.server/Servers/GenericServer.cs index 8f20aba26..7c7f38871 100644 --- a/cs/remote/src/FASTER.server/Servers/GenericServer.cs +++ b/cs/remote/src/FASTER.server/Servers/GenericServer.cs @@ -29,8 +29,10 @@ public class GenericServer /// /// + /// /// - public GenericServer(ServerOptions opts, Func functionsGen, ParameterSerializer serializer, IKeyInputSerializer keyInputSerializer, MaxSizeSettings maxSizeSettings = default) + public GenericServer(ServerOptions opts, Func functionsGen, ParameterSerializer serializer, IKeyInputSerializer keyInputSerializer, + bool supportsLocking, MaxSizeSettings maxSizeSettings = default) { this.opts = opts; @@ -41,7 +43,7 @@ public GenericServer(ServerOptions opts, Func functionsGe Directory.CreateDirectory(opts.CheckpointDir); opts.GetSettings(out var logSettings, out var checkpointSettings, out var indexSize); - store = new FasterKV(indexSize, logSettings, checkpointSettings); + store = new FasterKV(indexSize, logSettings, checkpointSettings, fasterSettings: new FasterSettings { SupportsLocking = supportsLocking }); if (opts.Recover) { diff --git a/cs/remote/src/FASTER.server/Servers/VarLenServer.cs b/cs/remote/src/FASTER.server/Servers/VarLenServer.cs index 6ece966a4..efd28b107 100644 --- a/cs/remote/src/FASTER.server/Servers/VarLenServer.cs +++ b/cs/remote/src/FASTER.server/Servers/VarLenServer.cs @@ -36,7 +36,7 @@ public VarLenServer(ServerOptions opts) Directory.CreateDirectory(opts.CheckpointDir); opts.GetSettings(out logSettings, out var checkpointSettings, out var indexSize); - store = new FasterKV(indexSize, logSettings, checkpointSettings); + store = new FasterKV(indexSize, logSettings, checkpointSettings, fasterSettings: new FasterSettings { SupportsLocking = true } ); if (opts.EnablePubSub) { diff --git a/cs/remote/test/FASTER.remote.test/TestUtils.cs b/cs/remote/test/FASTER.remote.test/TestUtils.cs index 49e941c12..99c7bf1fe 100644 --- a/cs/remote/test/FASTER.remote.test/TestUtils.cs +++ b/cs/remote/test/FASTER.remote.test/TestUtils.cs @@ -33,7 +33,7 @@ public static FixedLenServer>(opts, e => new SimpleFunctions(merger)); + return new FixedLenServer>(opts, e => new SimpleFunctions(merger), supportsLocking: false); } /// diff --git a/cs/src/core/Allocator/BlittableScanIterator.cs b/cs/src/core/Allocator/BlittableScanIterator.cs index 9af1d31ee..bb1c08e1e 100644 --- a/cs/src/core/Allocator/BlittableScanIterator.cs +++ b/cs/src/core/Allocator/BlittableScanIterator.cs @@ -124,7 +124,7 @@ public bool GetNext(out RecordInfo recordInfo) nextAddress = currentAddress + recordSize; ref var info = ref hlog.GetInfo(physicalAddress); - if (info.Invalid || info.IsNull()) + if (info.SkipOnScan || info.IsNull()) { epoch?.Suspend(); continue; diff --git a/cs/src/core/Allocator/GenericScanIterator.cs b/cs/src/core/Allocator/GenericScanIterator.cs index 4f475a198..ee8a57d52 100644 --- a/cs/src/core/Allocator/GenericScanIterator.cs +++ b/cs/src/core/Allocator/GenericScanIterator.cs @@ -111,7 +111,7 @@ public bool GetNext(out RecordInfo recordInfo) // Read record from cached page memory var page = currentPage % hlog.BufferSize; - if (hlog.values[page][offset].info.Invalid) + if (hlog.values[page][offset].info.SkipOnScan) { epoch?.Suspend(); continue; @@ -126,7 +126,7 @@ public bool GetNext(out RecordInfo recordInfo) var currentFrame = currentPage % frameSize; - if (frame.GetInfo(currentFrame, offset).Invalid) + if (frame.GetInfo(currentFrame, offset).SkipOnScan) { epoch?.Suspend(); continue; diff --git a/cs/src/core/Allocator/VarLenBlittableScanIterator.cs b/cs/src/core/Allocator/VarLenBlittableScanIterator.cs index e22f76eaf..943b3a8d8 100644 --- a/cs/src/core/Allocator/VarLenBlittableScanIterator.cs +++ b/cs/src/core/Allocator/VarLenBlittableScanIterator.cs @@ -114,7 +114,7 @@ public unsafe bool GetNext(out RecordInfo recordInfo) nextAddress = currentAddress + recordSize; ref var info = ref hlog.GetInfo(physicalAddress); - if (info.Invalid || info.IsNull()) + if (info.SkipOnScan || info.IsNull()) { epoch?.Suspend(); continue; diff --git a/cs/src/core/Async/ReadAsync.cs b/cs/src/core/Async/ReadAsync.cs index 4551bd6fe..8f96c3fb2 100644 --- a/cs/src/core/Async/ReadAsync.cs +++ b/cs/src/core/Async/ReadAsync.cs @@ -153,7 +153,7 @@ internal ReadAsyncResult( [MethodImpl(MethodImplOptions.AggressiveInlining)] internal ValueTask> ReadAsync(IFasterSession fasterSession, FasterExecutionContext currentCtx, - ref Key key, ref Input input, long startAddress, Context context, long serialNo, CancellationToken token, byte operationFlags = 0) + ref Key key, ref Input input, long startAddress, Context context, long serialNo, CancellationToken token, ushort operationFlags = 0) { var pcontext = default(PendingContext); pcontext.SetOperationFlags(operationFlags, startAddress); diff --git a/cs/src/core/ClientSession/ClientSession.cs b/cs/src/core/ClientSession/ClientSession.cs index 9c2ed1c92..a1cd60327 100644 --- a/cs/src/core/ClientSession/ClientSession.cs +++ b/cs/src/core/ClientSession/ClientSession.cs @@ -36,6 +36,8 @@ public sealed class ClientSession internal readonly InternalFasterSession FasterSession; + ManualFasterOperations manualOperations; + internal const string NotAsyncSessionErr = "Session does not support async operations"; internal ClientSession( @@ -156,6 +158,16 @@ public void Dispose() UnsafeSuspendThread(); } + /// + /// Return a new interface to Faster operations that supports manual locking and epoch control. + /// + public ManualFasterOperations GetManualOperations() + { + this.manualOperations ??= new ManualFasterOperations(this); + this.manualOperations.Acquire(); + return this.manualOperations; + } + #region IFasterOperations /// [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -517,7 +529,20 @@ public bool CompletePendingWithOutputs(out CompletedOutputIterator + /// Synchronously complete outstanding pending synchronous operations, returning outputs for the completed operations. + /// Assumes epoch protection is managed by user. Async operations must be completed individually. + /// + internal bool UnsafeCompletePendingWithOutputs(FasterSession fasterSession, out CompletedOutputIterator completedOutputs, bool wait = false, bool spinWaitForCommit = false) + where FasterSession : IFasterSession + { + InitializeCompletedOutputs(); + var result = UnsafeCompletePending(fasterSession, true, wait, spinWaitForCommit); + completedOutputs = this.completedOutputs; + return result; + } + + private void InitializeCompletedOutputs() { if (this.completedOutputs is null) this.completedOutputs = new CompletedOutputIterator(); @@ -525,30 +550,12 @@ void InitializeCompletedOutputs() this.completedOutputs.Dispose(); } - private bool CompletePending(bool getOutputs, bool wait, bool spinWaitForCommit) + internal bool CompletePending(bool getOutputs, bool wait, bool spinWaitForCommit) { if (SupportAsync) UnsafeResumeThread(); try { - var requestedOutputs = getOutputs ? this.completedOutputs : default; - var result = fht.InternalCompletePending(ctx, FasterSession, wait, requestedOutputs); - if (spinWaitForCommit) - { - if (wait != true) - { - throw new FasterException("Can spin-wait for commit (checkpoint completion) only if wait is true"); - } - do - { - fht.InternalCompletePending(ctx, FasterSession, wait, requestedOutputs); - if (fht.InRestPhase()) - { - fht.InternalCompletePending(ctx, FasterSession, wait, requestedOutputs); - return true; - } - } while (wait); - } - return result; + return UnsafeCompletePending(FasterSession, getOutputs, wait, spinWaitForCommit); } finally { @@ -556,6 +563,30 @@ private bool CompletePending(bool getOutputs, bool wait, bool spinWaitForCommit) } } + internal bool UnsafeCompletePending(FasterSession fasterSession, bool getOutputs, bool wait, bool spinWaitForCommit) + where FasterSession : IFasterSession + { + var requestedOutputs = getOutputs ? this.completedOutputs : default; + var result = fht.InternalCompletePending(ctx, fasterSession, wait, requestedOutputs); + if (spinWaitForCommit) + { + if (wait != true) + { + throw new FasterException("Can spin-wait for commit (checkpoint completion) only if wait is true"); + } + do + { + fht.InternalCompletePending(ctx, fasterSession, wait, requestedOutputs); + if (fht.InRestPhase()) + { + fht.InternalCompletePending(ctx, fasterSession, wait, requestedOutputs); + return true; + } + } while (wait); + } + return result; + } + /// /// Complete all pending synchronous FASTER operations. /// Async operations must be completed individually. @@ -789,6 +820,8 @@ public InternalFasterSession(ClientSession _clientSession.fht.SupportsLocking; public bool SupportsPostOperations => _clientSession.functions.SupportsPostOperations; + + public bool IsManualOperations => false; #endregion IFunctions - Optional features supported #region IFunctions - Reads @@ -803,22 +836,15 @@ public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref public bool ConcurrentReaderLock(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, long address) { - bool success = false; - for (bool retry = true; retry; /* updated in loop */) + this.LockShared(ref recordInfo); + try { - success = false; - long context = 0; - this.LockShared(ref recordInfo, ref key, ref value, ref context); - try - { - success = _clientSession.functions.ConcurrentReader(ref key, ref input, ref value, ref dst, ref recordInfo, address); - } - finally - { - retry = !this.UnlockShared(ref recordInfo, ref key, ref value, context); - } + return _clientSession.functions.ConcurrentReader(ref key, ref input, ref value, ref dst, ref recordInfo, address); + } + finally + { + this.UnlockShared(ref recordInfo); } - return success; } public void ReadCompletionCallback(ref Key key, ref Input input, ref Output output, Context ctx, Status status, RecordMetadata recordMetadata) @@ -838,7 +864,7 @@ public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value if (this.SupportsPostOperations && this.SupportsLocking) { // Lock must be taken after the value is initialized. Unlocked in PostSingleWriterLock. - this.LockExclusive(ref recordInfo, ref key, ref dst, ref lockOp.LockContext); + this.LockExclusive(ref recordInfo); } } @@ -850,7 +876,7 @@ public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Va if (!this.SupportsLocking) PostSingleWriterNoLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); else - PostSingleWriterLock(ref key, ref input, ref src, ref dst, ref output, ref lockOp, ref recordInfo, address); + PostSingleWriterLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -860,7 +886,7 @@ private void PostSingleWriterNoLock(ref Key key, ref Input input, ref Value src, } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void PostSingleWriterLock(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) + private void PostSingleWriterLock(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) { // Lock was taken in SingleWriterLock try @@ -869,7 +895,7 @@ private void PostSingleWriterLock(ref Key key, ref Input input, ref Value src, r } finally { - this.UnlockExclusive(ref recordInfo, ref key, ref dst, lockOp.LockContext); + this.UnlockExclusive(ref recordInfo); } } @@ -890,15 +916,14 @@ private bool ConcurrentWriterNoLock(ref Key key, ref Input input, ref Value src, [MethodImpl(MethodImplOptions.AggressiveInlining)] private bool ConcurrentWriterLock(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) { - long context = 0; - this.LockExclusive(ref recordInfo, ref key, ref dst, ref context); + this.LockExclusive(ref recordInfo); try { return !recordInfo.Tombstone && ConcurrentWriterNoLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); } finally { - this.UnlockExclusive(ref recordInfo, ref key, ref dst, context); + this.UnlockExclusive(ref recordInfo); } } @@ -913,27 +938,26 @@ public bool NeedInitialUpdate(ref Key key, ref Input input, ref Output output) => _clientSession.functions.NeedInitialUpdate(ref key, ref input, ref output); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address, out long lockContext) + public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) { - lockContext = 0; _clientSession.functions.InitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); if (this.SupportsPostOperations && this.SupportsLocking) { // Lock must be taken after the value is initialized. Unlocked in PostInitialUpdaterLock. - this.LockExclusive(ref recordInfo, ref key, ref value, ref lockContext); + this.LockExclusive(ref recordInfo); } } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address, long lockContext) + public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) { if (!this.SupportsPostOperations) return; if (!this.SupportsLocking) PostInitialUpdaterNoLock(ref key, ref input, ref value, ref output, ref recordInfo, address); else - PostInitialUpdaterLock(ref key, ref input, ref value, ref output, ref recordInfo, address, lockContext); + PostInitialUpdaterLock(ref key, ref input, ref value, ref output, ref recordInfo, address); } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -943,7 +967,7 @@ private void PostInitialUpdaterNoLock(ref Key key, ref Input input, ref Value va } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void PostInitialUpdaterLock(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address, long lockContext) + private void PostInitialUpdaterLock(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) { // Lock was taken in InitialUpdaterLock try @@ -952,7 +976,7 @@ private void PostInitialUpdaterLock(ref Key key, ref Input input, ref Value valu } finally { - this.UnlockExclusive(ref recordInfo, ref key, ref value, lockContext); + this.UnlockExclusive(ref recordInfo); } } #endregion InitialUpdater @@ -963,26 +987,25 @@ public bool NeedCopyUpdate(ref Key key, ref Input input, ref Value oldValue, ref => _clientSession.functions.NeedCopyUpdate(ref key, ref input, ref oldValue, ref output); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address, out long lockContext) + public void CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address) { - lockContext = 0; _clientSession.functions.CopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); if (this.SupportsPostOperations && this.SupportsLocking) { // Lock must be taken after the value is initialized. Unlocked in PostInitialUpdaterLock. - this.LockExclusive(ref recordInfo, ref key, ref newValue, ref lockContext); + this.LockExclusive(ref recordInfo); } } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address, long lockContext) + public bool PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address) { if (!this.SupportsPostOperations) return true; return !this.SupportsLocking ? PostCopyUpdaterNoLock(ref key, ref input, ref output, ref oldValue, ref newValue, ref recordInfo, address) - : PostCopyUpdaterLock(ref key, ref input, ref output, ref oldValue, ref newValue, ref recordInfo, address, lockContext); + : PostCopyUpdaterLock(ref key, ref input, ref output, ref oldValue, ref newValue, ref recordInfo, address); } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -992,7 +1015,7 @@ private bool PostCopyUpdaterNoLock(ref Key key, ref Input input, ref Output outp } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool PostCopyUpdaterLock(ref Key key, ref Input input, ref Output output, ref Value oldValue, ref Value newValue, ref RecordInfo recordInfo, long address, long lockContext) + private bool PostCopyUpdaterLock(ref Key key, ref Input input, ref Output output, ref Value oldValue, ref Value newValue, ref RecordInfo recordInfo, long address) { // Lock was taken in CopyUpdaterLock try @@ -1002,7 +1025,7 @@ private bool PostCopyUpdaterLock(ref Key key, ref Input input, ref Output output } finally { - this.UnlockExclusive(ref recordInfo, ref key, ref newValue, lockContext); + this.UnlockExclusive(ref recordInfo); } } #endregion CopyUpdater @@ -1024,15 +1047,14 @@ private bool InPlaceUpdaterNoLock(ref Key key, ref Input input, ref Output outpu private bool InPlaceUpdaterLock(ref Key key, ref Input input, ref Output output, ref Value value, ref RecordInfo recordInfo, long address) { - long context = 0; - this.LockExclusive(ref recordInfo, ref key, ref value, ref context); + this.LockExclusive(ref recordInfo); try { return !recordInfo.Tombstone && InPlaceUpdaterNoLock(ref key, ref input, ref output, ref value, ref recordInfo, address); } finally { - this.UnlockExclusive(ref recordInfo, ref key, ref value, context); + this.UnlockExclusive(ref recordInfo); } } @@ -1072,15 +1094,14 @@ private bool ConcurrentDeleterNoLock(ref Key key, ref Value value, ref RecordInf [MethodImpl(MethodImplOptions.AggressiveInlining)] private bool ConcurrentDeleterLock(ref Key key, ref Value value, ref RecordInfo recordInfo, long address) { - long context = 0; - this.LockExclusive(ref recordInfo, ref key, ref value, ref context); + this.LockExclusive(ref recordInfo); try { return ConcurrentDeleterNoLock(ref key, ref value, ref recordInfo, address); } finally { - this.UnlockExclusive(ref recordInfo, ref key, ref value, context); + this.UnlockExclusive(ref recordInfo); } } @@ -1090,29 +1111,29 @@ public void DeleteCompletionCallback(ref Key key, Context ctx) #region IFunctions - Locking - public void LockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) - => _clientSession.functions.LockExclusive(ref recordInfo, ref key, ref value, ref lockContext); + public void LockExclusive(ref RecordInfo recordInfo) => recordInfo.LockExclusive(); + + public void UnlockExclusive(ref RecordInfo recordInfo) => recordInfo.UnlockExclusive(); + + public bool TryLockExclusive(ref RecordInfo recordInfo, int spinCount = 1) => recordInfo.TryLockExclusive(spinCount); + + public void LockShared(ref RecordInfo recordInfo) => recordInfo.LockShared(); + + public void UnlockShared(ref RecordInfo recordInfo) => recordInfo.UnlockShared(); - public void UnlockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, long lockContext) - => _clientSession.functions.UnlockExclusive(ref recordInfo, ref key, ref value, lockContext); + public bool TryLockShared(ref RecordInfo recordInfo, int spinCount = 1) => recordInfo.TryLockShared(spinCount); - public bool TryLockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) - => _clientSession.functions.TryLockExclusive(ref recordInfo, ref key, ref value, ref lockContext, spinCount); + public void LockExclusiveFromShared(ref RecordInfo recordInfo) => recordInfo.LockExclusiveFromShared(); - public void LockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) - => _clientSession.functions.LockShared(ref recordInfo, ref key, ref value, ref lockContext); + public bool TryLockExclusiveFromShared(ref RecordInfo recordInfo, int spinCount = 1) => recordInfo.TryLockExclusiveFromShared(spinCount); - public bool UnlockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, long lockContext) - => _clientSession.functions.UnlockShared(ref recordInfo, ref key, ref value, lockContext); + public bool IsLocked(ref RecordInfo recordInfo) => recordInfo.IsLocked; - public bool TryLockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) - => _clientSession.functions.TryLockShared(ref recordInfo, ref key, ref value, ref lockContext, spinCount); + public bool IsLockedExclusive(ref RecordInfo recordInfo) => recordInfo.IsLockedExclusive; - public void LockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) - => _clientSession.functions.LockExclusiveFromShared(ref recordInfo, ref key, ref value, ref lockContext); + public bool IsLockedShared(ref RecordInfo recordInfo) => recordInfo.IsLockedShared; - public bool TryLockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) - => _clientSession.functions.TryLockExclusiveFromShared(ref recordInfo, ref key, ref value, ref lockContext, spinCount); + public void TransferLocks(ref RecordInfo oldRecordInfo, ref RecordInfo newRecordInfo) => newRecordInfo.TransferLocksFrom(ref oldRecordInfo); #endregion IFunctions - Locking #region IFunctions - Checkpointing diff --git a/cs/src/core/ClientSession/FASTERClientSession.cs b/cs/src/core/ClientSession/FASTERClientSession.cs index 5d29cdd8c..0a3c322cd 100644 --- a/cs/src/core/ClientSession/FASTERClientSession.cs +++ b/cs/src/core/ClientSession/FASTERClientSession.cs @@ -116,7 +116,7 @@ public ClientSessionBuilder For( /// /// Start a new client session with FASTER. - /// For performance reasons, please use FasterKV<Key, Value>.For(functions).NewSession<Functions>(...) instead of this overload. + /// For performance reasons, please use instead of this overload. /// /// Callback functions /// ID/name of session (auto-generated if not provided) diff --git a/cs/src/core/ClientSession/ManualFasterOperations.cs b/cs/src/core/ClientSession/ManualFasterOperations.cs index 6955e78f3..b64d3de5a 100644 --- a/cs/src/core/ClientSession/ManualFasterOperations.cs +++ b/cs/src/core/ClientSession/ManualFasterOperations.cs @@ -18,6 +18,17 @@ public sealed class ManualFasterOperations clientSession; internal readonly InternalFasterSession FasterSession; + bool isAcquired; + + ulong TotalLockCount => sharedLockCount + exclusiveLockCount; + internal ulong sharedLockCount; + internal ulong exclusiveLockCount; + + void CheckAcquired() + { + if (!isAcquired) + throw new FasterException("Method call on not-acquired ManualFasterOperations"); + } internal ManualFasterOperations(ClientSession clientSession) { @@ -44,13 +55,46 @@ internal ManualFasterOperations(ClientSession clientSession.UnsafeSuspendThread(); + /// + /// Synchronously complete outstanding pending synchronous operations. + /// Async operations must be completed individually. + /// + /// Wait for all pending operations on session to complete + /// Spin-wait until ongoing commit/checkpoint, if any, completes + /// True if all pending operations have completed, false otherwise + public bool UnsafeCompletePending(bool wait = false, bool spinWaitForCommit = false) + => this.clientSession.UnsafeCompletePending(this.FasterSession, false, wait, spinWaitForCommit); + + /// + /// Synchronously complete outstanding pending synchronous operations, returning outputs for the completed operations. + /// Assumes epoch protection is managed by user. Async operations must be completed individually. + /// + /// Outputs completed by this operation + /// Wait for all pending operations on session to complete + /// Spin-wait until ongoing commit/checkpoint, if any, completes + /// True if all pending operations have completed, false otherwise + public bool UnsafeCompletePendingWithOutputs(out CompletedOutputIterator completedOutputs, bool wait = false, bool spinWaitForCommit = false) + => this.clientSession.UnsafeCompletePendingWithOutputs(this.FasterSession, out completedOutputs, wait, spinWaitForCommit); + + #region Acquire and Dispose + internal void Acquire() + { + if (this.isAcquired) + throw new FasterException("Trying to acquire an already-acquired ManualFasterOperations"); + this.isAcquired = true; + } + /// /// Does not actually dispose of anything; asserts the epoch has been suspended /// public void Dispose() { - Debug.Assert(!LightEpoch.AnyInstanceProtected()); + if (LightEpoch.AnyInstanceProtected()) + throw new FasterException("Disposing ManualFasterOperations with a protected epoch; must call UnsafeSuspendThread"); + if (TotalLockCount > 0) + throw new FasterException($"Disposing ManualFasterOperations with locks held: {sharedLockCount} shared locks, {exclusiveLockCount} exclusive locks"); } + #endregion Acquire and Dispose #region Key Locking @@ -60,13 +104,12 @@ public void Dispose() /// The key to lock /// The type of lock to take /// Whether to retrieve data (and copy to the tail of the log) if the key is not in the mutable region - /// Context-specific information; will be passed to - /// The address of the record. May be checked against to check if the lock remains valid - public unsafe void Lock(ref Key key, LockType lockType, bool retrieveData, out long lockContext, out long address) + /// Information about the acquired lock + public unsafe void Lock(ref Key key, LockType lockType, bool retrieveData, ref LockInfo lockInfo) { - LockOperation lockOp = new(LockOperationType.Lock, lockType); + CheckAcquired(); + LockOperation lockOp = new(LockOperationType.LockRead, lockType); - lockContext = default; Input input = default; Output output = default; RecordMetadata recordMetadata = default; @@ -81,30 +124,65 @@ public unsafe void Lock(ref Key key, LockType lockType, bool retrieveData, out l { var status = clientSession.fht.ContextRead(ref key, ref input, ref output, ref lockOp, ref recordMetadata, ReadFlags.CopyToTail, context: default, FasterSession, serialNo: 0, clientSession.ctx); success = status == Status.OK; - if (success) - { - lockContext = lockOp.LockContext; - } - else if (status == Status.PENDING) + if (status == Status.PENDING) { - UnsafeSuspendThread(); - clientSession.CompletePendingWithOutputs(out var completedOutputs, wait: true); + // This bottoms out in WaitPending which assumes the epoch is protected, and releases it. So we don't release it here. + this.UnsafeCompletePendingWithOutputs(out var completedOutputs, wait: true); + completedOutputs.Next(); recordMetadata = completedOutputs.Current.RecordMetadata; - lockContext = completedOutputs.Current.LockContext; completedOutputs.Dispose(); success = true; - UnsafeResumeThread(); } } if (!success) { + lockOp.LockOperationType = LockOperationType.LockUpsert; Value value = default; var status = clientSession.fht.ContextUpsert(ref key, ref input, ref value, ref output, ref lockOp, out recordMetadata, context: default, FasterSession, serialNo: 0, clientSession.ctx); Debug.Assert(status == Status.OK); } - address = recordMetadata.Address; + lockInfo.LockType = lockType == LockType.ExclusiveFromShared ? LockType.Exclusive : lockType; + lockInfo.Address = recordMetadata.Address; + if (lockInfo.LockType == LockType.Exclusive) + ++this.exclusiveLockCount; + else + ++this.sharedLockCount; + } + + /// + /// Lock the key with the specified , waiting until it is acquired + /// + /// The key to lock + /// The type of lock to take + /// Whether to retrieve data (and copy to the tail of the log) if the key is not in the mutable region + /// Information about the acquired lock + public unsafe void Lock(Key key, LockType lockType, bool retrieveData, ref LockInfo lockInfo) + => Lock(ref key, lockType, retrieveData, ref lockInfo); + + /// + /// Lock the key with the specified , waiting until it is acquired + /// + /// The key to lock + /// The type of lock to take + /// Whether to retrieve data (and copy to the tail of the log) if the key is not in the mutable region + public unsafe void Lock(ref Key key, LockType lockType, bool retrieveData) + { + LockInfo lockInfo = default; + Lock(ref key, lockType, retrieveData, ref lockInfo); + } + + /// + /// Lock the key with the specified , waiting until it is acquired + /// + /// The key to lock + /// The type of lock to take + /// Whether to retrieve data (and copy to the tail of the log) if the key is not in the mutable region + public unsafe void Lock(Key key, LockType lockType, bool retrieveData) + { + LockInfo lockInfo = default; + Lock(ref key, lockType, retrieveData, ref lockInfo); } /// @@ -112,9 +190,10 @@ public unsafe void Lock(ref Key key, LockType lockType, bool retrieveData, out l /// /// The key to lock /// The type of lock to take - /// Context-specific information; was returned by - public void Unlock(ref Key key, LockType lockType, long lockContext) + /// Information about the acquired lock + public void Unlock(ref Key key, LockType lockType, ref LockInfo lockInfo) { + CheckAcquired(); LockOperation lockOp = new(LockOperationType.Unlock, lockType); Input input = default; @@ -124,17 +203,35 @@ public void Unlock(ref Key key, LockType lockType, long lockContext) var status = clientSession.fht.ContextRead(ref key, ref input, ref output, ref lockOp, ref recordMetadata, ReadFlags.None, context: default, FasterSession, serialNo: 0, clientSession.ctx); if (status == Status.PENDING) { - // Do nothing here, as a lock that goes into the immutable region is considered unlocked. - UnsafeSuspendThread(); - clientSession.CompletePending(wait: true); - UnsafeResumeThread(); + // Do nothing here, as a lock that goes into the on-disk region is considered unlocked--we will not allow that anyway. + // This bottoms out in WaitPending which assumes the epoch is protected, and releases it. So we don't release it here. + this.UnsafeCompletePending(wait: true); } + + if (lockInfo.LockType == LockType.Exclusive) + --this.exclusiveLockCount; + else + --this.sharedLockCount; } /// - /// The minimum valid address for a locked record (includes copies to tail). + /// Lock the key with the specified lock type. + /// + /// The key to lock + /// Information about the acquired lock + public void Unlock(Key key, ref LockInfo lockInfo) + => Unlock(ref key, lockInfo.LockType, ref lockInfo); + + /// + /// Lock the key with the specified /// - public long MinimumValidLockAddress => clientSession.fht.Log.ReadOnlyAddress; + /// The key to lock + /// The type of lock to take + public void Unlock(Key key, LockType lockType) + { + LockInfo lockInfo = default; + Unlock(ref key, lockType, ref lockInfo); + } #endregion Key Locking @@ -415,13 +512,15 @@ public InternalFasterSession(ClientSession true; // Check user's setting in FasterKV to know whose lock scheme to use + public bool SupportsLocking => false; // We only lock explicitly in Lock/Unlock, which are longer-duration locks. public bool SupportsPostOperations => true; // We need this for user record locking, but check for user's setting before calling user code + + public bool IsManualOperations => true; #endregion IFunctions - Optional features supported [MethodImpl(MethodImplOptions.AggressiveInlining)] - void HandleLockOperation(ref RecordInfo recordInfo, ref Key key, ref Value value, ref LockOperation lockOp, out bool isLock) + void HandleLockOperation(ref RecordInfo recordInfo, ref LockOperation lockOp, out bool isLock) { isLock = false; if (lockOp.LockOperationType == LockOperationType.Unlock) @@ -432,20 +531,20 @@ void HandleLockOperation(ref RecordInfo recordInfo, ref Key key, ref Value value recordInfo.SetInvalid(); } if (lockOp.LockType == LockType.Shared) - this.UnlockShared(ref recordInfo, ref key, ref value, lockOp.LockContext); + this.UnlockShared(ref recordInfo); else if (lockOp.LockType == LockType.Exclusive) - this.UnlockExclusive(ref recordInfo, ref key, ref value, lockOp.LockContext); + this.UnlockExclusive(ref recordInfo); else Debug.Fail($"Unexpected LockType: {lockOp.LockType}"); return; } isLock = true; if (lockOp.LockType == LockType.Shared) - this.LockShared(ref recordInfo, ref key, ref value, ref lockOp.LockContext); + this.LockShared(ref recordInfo); else if (lockOp.LockType == LockType.Exclusive) - this.LockExclusive(ref recordInfo, ref key, ref value, ref lockOp.LockContext); + this.LockExclusive(ref recordInfo); else if (lockOp.LockType == LockType.ExclusiveFromShared) - this.LockExclusiveFromShared(ref recordInfo, ref key, ref value, ref lockOp.LockContext); + this.LockExclusiveFromShared(ref recordInfo); else Debug.Fail($"Unexpected LockType: {lockOp.LockType}"); } @@ -457,7 +556,7 @@ public bool SingleReader(ref Key key, ref Input input, ref Value value, ref Outp if (lockOp.IsSet) { // No value is returned to the client through the lock sequence; for consistency all key locks must be acquired before their values are read. - HandleLockOperation(ref recordInfo, ref key, ref value, ref lockOp, out _); + HandleLockOperation(ref recordInfo, ref lockOp, out _); return true; } return _clientSession.functions.SingleReader(ref key, ref input, ref value, ref dst, ref recordInfo, address); @@ -469,7 +568,7 @@ public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref if (lockOp.IsSet) { // No value is returned to the client through the lock sequence; for consistency all key locks must be acquired before their values are read. - HandleLockOperation(ref recordInfo, ref key, ref value, ref lockOp, out _); + HandleLockOperation(ref recordInfo, ref lockOp, out _); return true; } return _clientSession.functions.ConcurrentReader(ref key, ref input, ref value, ref dst, ref recordInfo, address); @@ -490,9 +589,20 @@ public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value { _clientSession.functions.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); - // Lock (or unlock) here, and do not unlock in PostSingleWriter; wait for the user to explicitly unlock + // Lock here, and do not unlock in PostSingleWriter; wait for the user to explicitly unlock if (lockOp.IsSet) - HandleLockOperation(ref recordInfo, ref key, ref dst, ref lockOp, out _); + { + Debug.Assert(lockOp.LockOperationType != LockOperationType.Unlock); // Should have caught this in InternalUpsert + HandleLockOperation(ref recordInfo, ref lockOp, out _); + + // If this is a lock for upsert, then we've failed to find an in-memory record for this key, and we're creating a stub with a default value. + if (lockOp.LockOperationType == LockOperationType.LockUpsert) + recordInfo.Stub = true; + } + else if (lockOp.IsStubPromotion) + { + this.LockExclusive(ref recordInfo); + } } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -508,7 +618,7 @@ public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Va if (lockOp.IsSet) { // All lock operations in ConcurrentWriter can return immediately. - HandleLockOperation(ref recordInfo, ref key, ref dst, ref lockOp, out _); + HandleLockOperation(ref recordInfo, ref lockOp, out _); return true; } @@ -520,48 +630,48 @@ public void UpsertCompletionCallback(ref Key key, ref Input input, ref Value val => _clientSession.functions.UpsertCompletionCallback(ref key, ref input, ref value, ctx); #endregion IFunctions - Upserts -#region IFunctions - RMWs -#region InitialUpdater + #region IFunctions - RMWs + #region InitialUpdater [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool NeedInitialUpdate(ref Key key, ref Input input, ref Output output) => _clientSession.functions.NeedInitialUpdate(ref key, ref input, ref output); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address, out long lockContext) + public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) { - lockContext = default; _clientSession.functions.InitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); + if (lockOp.IsStubPromotion) + { + this.LockExclusive(ref recordInfo); + } } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address, long lockContext) + public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) { if (_clientSession.functions.SupportsPostOperations) _clientSession.functions.PostInitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); } -#endregion InitialUpdater + #endregion InitialUpdater -#region CopyUpdater + #region CopyUpdater [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool NeedCopyUpdate(ref Key key, ref Input input, ref Value oldValue, ref Output output) => _clientSession.functions.NeedCopyUpdate(ref key, ref input, ref oldValue, ref output); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address, out long lockContext) - { - lockContext = 0; - _clientSession.functions.CopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); - } + public void CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address) + => _clientSession.functions.CopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address, long lockContext) + public bool PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address) { return !_clientSession.functions.SupportsPostOperations || _clientSession.functions.PostCopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); } -#endregion CopyUpdater + #endregion CopyUpdater -#region InPlaceUpdater + #region InPlaceUpdater [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool InPlaceUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) { @@ -572,14 +682,13 @@ public bool InPlaceUpdater(ref Key key, ref Input input, ref Value value, ref Ou public void RMWCompletionCallback(ref Key key, ref Input input, ref Output output, Context ctx, Status status, RecordMetadata recordMetadata) => _clientSession.functions.RMWCompletionCallback(ref key, ref input, ref output, ctx, status, recordMetadata); -#endregion InPlaceUpdater -#endregion IFunctions - RMWs + #endregion InPlaceUpdater + #endregion IFunctions - RMWs -#region IFunctions - Deletes + #region IFunctions - Deletes [MethodImpl(MethodImplOptions.AggressiveInlining)] public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long address) { - // There is no value to lock here, so we take a RecordInfo lock in InternalDelete and release it here. if (_clientSession.functions.SupportsPostOperations) _clientSession.functions.PostSingleDeleter(ref key, ref recordInfo, address); } @@ -593,64 +702,33 @@ public bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recor public void DeleteCompletionCallback(ref Key key, Context ctx) => _clientSession.functions.DeleteCompletionCallback(ref key, ctx); -#endregion IFunctions - Deletes + #endregion IFunctions - Deletes -#region IFunctions - Locking + #region IFunctions - Locking - public void LockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) - { - if (_clientSession.fht.SupportsLocking) - _clientSession.functions.LockExclusive(ref recordInfo, ref key, ref value, ref lockContext); - else - recordInfo.LockExclusive(); - } + public void LockExclusive(ref RecordInfo recordInfo) => recordInfo.LockExclusive(); - public void UnlockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, long lockContext) - { - if (_clientSession.fht.SupportsLocking) - _clientSession.functions.UnlockExclusive(ref recordInfo, ref key, ref value, lockContext); - else - recordInfo.UnlockExclusive(); - } + public void UnlockExclusive(ref RecordInfo recordInfo) => recordInfo.UnlockExclusive(); - public bool TryLockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) - => _clientSession.fht.SupportsLocking - ? _clientSession.functions.TryLockExclusive(ref recordInfo, ref key, ref value, ref lockContext, spinCount) - : recordInfo.TryLockExclusive(spinCount); + public bool TryLockExclusive(ref RecordInfo recordInfo, int spinCount = 1) => recordInfo.TryLockExclusive(spinCount); - public void LockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) - { - if (_clientSession.fht.SupportsLocking) - _clientSession.functions.LockShared(ref recordInfo, ref key, ref value, ref lockContext); - else - recordInfo.LockShared(); - } + public void LockShared(ref RecordInfo recordInfo) => recordInfo.LockShared(); - public bool UnlockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, long lockContext) - { - if (_clientSession.fht.SupportsLocking) - return _clientSession.functions.UnlockShared(ref recordInfo, ref key, ref value, lockContext); - recordInfo.UnlockShared(); - return true; - } + public void UnlockShared(ref RecordInfo recordInfo) => recordInfo.UnlockShared(); - public bool TryLockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) - => _clientSession.fht.SupportsLocking - ? _clientSession.functions.TryLockShared(ref recordInfo, ref key, ref value, ref lockContext, spinCount) - : recordInfo.TryLockShared(spinCount); + public bool TryLockShared(ref RecordInfo recordInfo, int spinCount = 1) => recordInfo.TryLockShared(spinCount); - public void LockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) - { - if (_clientSession.fht.SupportsLocking) - _clientSession.functions.LockExclusiveFromShared(ref recordInfo, ref key, ref value, ref lockContext); - else - recordInfo.LockExclusiveFromShared(); - } + public void LockExclusiveFromShared(ref RecordInfo recordInfo) => recordInfo.LockExclusiveFromShared(); + + public bool TryLockExclusiveFromShared(ref RecordInfo recordInfo, int spinCount = 1) => recordInfo.TryLockExclusiveFromShared(spinCount); - public bool TryLockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) - => _clientSession.fht.SupportsLocking - ? _clientSession.functions.TryLockExclusiveFromShared(ref recordInfo, ref key, ref value, ref lockContext, spinCount) - : recordInfo.TryLockExclusiveFromShared(spinCount); + public bool IsLocked(ref RecordInfo recordInfo) => recordInfo.IsLocked; + + public bool IsLockedExclusive(ref RecordInfo recordInfo) => recordInfo.IsLockedExclusive; + + public bool IsLockedShared(ref RecordInfo recordInfo) => recordInfo.IsLockedShared; + + public void TransferLocks(ref RecordInfo oldRecordInfo, ref RecordInfo newRecordInfo) => newRecordInfo.TransferLocksFrom(ref oldRecordInfo); #endregion IFunctions - Locking #region IFunctions - Checkpointing @@ -661,7 +739,7 @@ public void CheckpointCompletionCallback(string guid, CommitPoint commitPoint) } #endregion IFunctions - Checkpointing -#region Internal utilities + #region Internal utilities public int GetInitialLength(ref Input input) => _clientSession.variableLengthStruct.GetInitialLength(ref input); @@ -681,9 +759,8 @@ public IHeapContainer GetHeapContainer(ref Input input) public bool CompletePendingWithOutputs(out CompletedOutputIterator completedOutputs, bool wait = false, bool spinWaitForCommit = false) => _clientSession.CompletePendingWithOutputs(out completedOutputs, wait, spinWaitForCommit); -#endregion Internal utilities + #endregion Internal utilities } - #endregion IFasterSession } } diff --git a/cs/src/core/Index/Common/CompletedOutput.cs b/cs/src/core/Index/Common/CompletedOutput.cs index 03fba8aa6..401cb7c20 100644 --- a/cs/src/core/Index/Common/CompletedOutput.cs +++ b/cs/src/core/Index/Common/CompletedOutput.cs @@ -112,11 +112,6 @@ public struct CompletedOutput /// public Status Status; - /// - /// The lock context for - /// - public long LockContext; - internal void Set(ref FasterKV.PendingContext pendingContext, Status status) { this.keyContainer = pendingContext.key; diff --git a/cs/src/core/Index/Common/Contexts.cs b/cs/src/core/Index/Common/Contexts.cs index b7f080c5d..320684f00 100644 --- a/cs/src/core/Index/Common/Contexts.cs +++ b/cs/src/core/Index/Common/Contexts.cs @@ -84,19 +84,20 @@ internal struct PendingContext internal HashBucketEntry entry; internal LatchOperation heldLatch; - internal byte operationFlags; + internal ushort operationFlags; internal RecordInfo recordInfo; internal long minAddress; internal LockOperation lockOperation; // Note: Must be kept in sync with corresponding ReadFlags enum values - internal const byte kSkipReadCache = 0x01; - internal const byte kMinAddress = 0x02; - internal const byte kCopyReadsToTail = 0x04; + internal const ushort kSkipReadCache = 0x0001; + internal const ushort kMinAddress = 0x0002; + internal const ushort kCopyReadsToTail = 0x0004; + internal const ushort kSkipCopyReadsToTail = 0x0008; - internal const byte kNoKey = 0x10; - internal const byte kSkipCopyReadsToTail = 0x20; - internal const byte kIsAsync = 0x40; + internal const ushort kNoKey = 0x0100; + internal const ushort kIsAsync = 0x0200; + internal const ushort kIsReadingAtAddress = 0x0400; [MethodImpl(MethodImplOptions.AggressiveInlining)] internal IHeapContainer DetachKey() @@ -115,11 +116,11 @@ internal IHeapContainer DetachInput() } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static byte GetOperationFlags(ReadFlags readFlags, bool noKey = false) + internal static ushort GetOperationFlags(ReadFlags readFlags, bool noKey = false) { - Debug.Assert((byte)ReadFlags.SkipReadCache == kSkipReadCache); - Debug.Assert((byte)ReadFlags.MinAddress == kMinAddress); - byte flags = (byte)(readFlags & (ReadFlags.SkipReadCache | ReadFlags.MinAddress | ReadFlags.CopyToTail)); + Debug.Assert((ushort)ReadFlags.SkipReadCache == kSkipReadCache); + Debug.Assert((ushort)ReadFlags.MinAddress == kMinAddress); + ushort flags = (ushort)(readFlags & (ReadFlags.SkipReadCache | ReadFlags.MinAddress | ReadFlags.CopyToTail | ReadFlags.SkipCopyToTail)); if (noKey) flags |= kNoKey; // This is always set true for the Read overloads (Reads by address) that call this method. @@ -132,7 +133,7 @@ internal void SetOperationFlags(ReadFlags readFlags, long address, bool noKey = => this.SetOperationFlags(GetOperationFlags(readFlags, noKey), address); [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void SetOperationFlags(byte flags, long address) + internal void SetOperationFlags(ushort flags, long address) { this.operationFlags = flags; if (this.HasMinAddress) @@ -175,6 +176,12 @@ internal bool IsAsync set => operationFlags = value ? (byte)(operationFlags | kIsAsync) : (byte)(operationFlags & ~kIsAsync); } + internal bool IsReadingAtAddress + { + get => (operationFlags & kIsReadingAtAddress) != 0; + set => operationFlags = value ? (ushort)(operationFlags | kIsReadingAtAddress) : (ushort)(operationFlags & ~kIsReadingAtAddress); + } + public void Dispose() { key?.Dispose(); diff --git a/cs/src/core/Index/Common/RecordInfo.cs b/cs/src/core/Index/Common/RecordInfo.cs index 1211051ae..2193ca022 100644 --- a/cs/src/core/Index/Common/RecordInfo.cs +++ b/cs/src/core/Index/Common/RecordInfo.cs @@ -68,6 +68,14 @@ public static void WriteInfo(ref RecordInfo info, bool inNewVersion, bool tombst info.InNewVersion = inNewVersion; } + public bool IsLocked => (word & (kExclusiveLockBitMask | kSharedLockMaskInWord)) != 0; + + public bool IsLockedExclusive => (word & kExclusiveLockBitMask) != 0; + + public bool IsLockedShared => (word & kSharedLockMaskInWord) != 0; + + public bool IsIntermediate => (word & (kStubBitMask | kSealedBitMask)) != 0; + /// /// Take exclusive (write) lock on RecordInfo /// @@ -80,7 +88,7 @@ public static void WriteInfo(ref RecordInfo info, bool inNewVersion, bool tombst [MethodImpl(MethodImplOptions.AggressiveInlining)] public void UnlockExclusive() { - Debug.Assert((word & kExclusiveLockBitMask) != 0); + Debug.Assert(IsLockedExclusive); word &= ~kExclusiveLockBitMask; // Safe because there should be no other threads (e.g., readers) updating the word at this point } @@ -184,6 +192,14 @@ public bool TryLockExclusiveFromShared(int spinCount = 1) return true; } + public void TransferLocksFrom(ref RecordInfo other) + { + // We should only be calling this when the record is sealed, to avoid an attempt to do a lock operation on the old record during this. + Debug.Assert(other.Sealed); + word &= ~(kExclusiveLockBitMask | kSharedLockMaskInWord); + word |= (other.word & (kExclusiveLockBitMask | kSharedLockMaskInWord)); + } + public bool IsNull() => word == 0; public bool Tombstone @@ -278,6 +294,8 @@ public bool InNewVersion public bool Invalid => (word & kValidBitMask) == 0; + public bool SkipOnScan => Invalid || (word & (kSealedBitMask | kStubBitMask)) != 0; + public long PreviousAddress { get => word & kPreviousAddressMaskInWord; diff --git a/cs/src/core/Index/FASTER/FASTER.cs b/cs/src/core/Index/FASTER/FASTER.cs index ad602bf58..4ed1de0cd 100644 --- a/cs/src/core/Index/FASTER/FASTER.cs +++ b/cs/src/core/Index/FASTER/FASTER.cs @@ -26,7 +26,8 @@ public enum ReadFlags None = 0, /// - /// Skip the ReadCache when reading, including not inserting to ReadCache when pending reads are complete + /// Skip the ReadCache when reading, including not inserting to ReadCache when pending reads are complete. + /// May be used with ReadAtAddress, to avoid copying earlier versions. /// SkipReadCache = 0x00000001, @@ -41,6 +42,16 @@ public enum ReadFlags /// locking. /// CopyToTail = 0x00000004, + + /// + /// Skip copying to tail even if the FasterKV constructore specifed it. May be used with ReadAtAddress, to avoid copying earlier versions. + /// + SkipCopyToTail = 0x00000008, + + /// + /// Utility to combine these flags. May be used with ReadAtAddress, to avoid copying earlier versions. + /// + SkipCopyReads = SkipReadCache | SkipCopyToTail, } public partial class FasterKV : FasterBase, @@ -597,13 +608,11 @@ internal Status ContextRead(ref Key key, if (internalStatus == OperationStatus.SUCCESS || internalStatus == OperationStatus.NOTFOUND) { recordMetadata = new(pcontext.recordInfo, pcontext.logicalAddress); - lockOp.LockContext = pcontext.lockOperation.LockContext; status = (Status)internalStatus; } else { recordMetadata = default; - lockOp.LockContext = default; status = HandleOperationStatus(sessionCtx, sessionCtx, ref pcontext, fasterSession, internalStatus, false, out _); } @@ -684,13 +693,11 @@ internal Status ContextUpsert(ref Key key if (internalStatus == OperationStatus.SUCCESS || internalStatus == OperationStatus.NOTFOUND) { recordMetadata = new(pcontext.recordInfo, pcontext.logicalAddress); - lockOp.LockContext = pcontext.lockOperation.LockContext; status = (Status)internalStatus; } else { recordMetadata = default; - lockOp.LockContext = default; status = HandleOperationStatus(sessionCtx, sessionCtx, ref pcontext, fasterSession, internalStatus, false, out _); } diff --git a/cs/src/core/Index/FASTER/FASTERImpl.cs b/cs/src/core/Index/FASTER/FASTERImpl.cs index e2d96fb7f..35665b16e 100644 --- a/cs/src/core/Index/FASTER/FASTERImpl.cs +++ b/cs/src/core/Index/FASTER/FASTERImpl.cs @@ -61,6 +61,48 @@ internal enum LatchOperation : byte Exclusive } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static OperationStatus RetryOnIntermediateRecord(ref RecordInfo recordInfo, ref Key key, ref Value value, FasterSession fasterSession) + where FasterSession : IFasterSession + { + // These are "intermediate" states that will be replaced by either a Delete or an Upsert of an actual value. + // - If the record is Sealed, we do not acquire the lock, because that would be a problem if we were in the middle of transferring the locks from one record to another. + // Sealed is very short-duration, so we just yield and then RETRY_NOW. + // - A Stub lock is held longer, but still likely only for the duration of one or more operations on multiple records, e.g. "acquire a lock on 3 source records and 1 + // destination record, the do an operation on the source values and put them into the destination value." So for this case we lock, then immediatly unlock and RETRY_NOW. + if (!recordInfo.Sealed && fasterSession.SupportsLocking) + { + fasterSession.LockShared(ref recordInfo); + fasterSession.UnlockShared(ref recordInfo); + } + else + Thread.Yield(); + return OperationStatus.RETRY_NOW; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static bool SealRecord(ref RecordInfo recordInfo, ref Key key, ref Value value, FasterSession fasterSession) + where FasterSession : IFasterSession + { + // This is the only time we do a Sealed-related lock, and it's just to know whether we are the thread that set it to Sealed. + if (fasterSession.SupportsLocking) + fasterSession.LockExclusive(ref recordInfo); + if (recordInfo.Sealed || recordInfo.Invalid || recordInfo.Tombstone) + { + // Another thread was doing the same thing; unlock and retry. + if (fasterSession.SupportsLocking) + fasterSession.UnlockExclusive(ref recordInfo); + return false; + } + + // We were the sealer, so continue the current operation. Note: the caller here does not unseal; the record must remain sealed to avoid a race condition + // when the CAS of the new record has not yet been done. + recordInfo.Sealed = true; + return true; + } + + internal static bool IsIntermediate(ref RecordInfo recordInfo, bool isReadingAtAddress = false) => recordInfo.Stub || (recordInfo.Sealed && !isReadingAtAddress); + #region Read Operation /// @@ -98,17 +140,17 @@ internal enum LatchOperation : byte /// /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal OperationStatus InternalRead( + internal OperationStatus InternalRead( ref Key key, ref Input input, ref Output output, long startAddress, ref Context userContext, ref PendingContext pendingContext, - Functions fasterSession, + FasterSession fasterSession, FasterExecutionContext sessionCtx, long lsn) - where Functions : IFasterSession + where FasterSession : IFasterSession { var bucket = default(HashBucket*); var slot = default(int); @@ -157,7 +199,9 @@ internal OperationStatus InternalRead( // This is not called when looking up by address, so we do not set pendingContext.recordInfo. // ReadCache addresses are not valid for indexing etc. so pass kInvalidAddress. - return fasterSession.SingleReader(ref key, ref input, ref readcache.GetValue(physicalAddress), ref output, ref pendingContext.lockOperation, ref readcache.GetInfo(physicalAddress), Constants.kInvalidAddress) + ref RecordInfo recordInfo = ref readcache.GetInfo(physicalAddress); + pendingContext.recordInfo = recordInfo; + return fasterSession.SingleReader(ref key, ref input, ref readcache.GetValue(physicalAddress), ref output, ref pendingContext.lockOperation, ref recordInfo, Constants.kInvalidAddress) ? OperationStatus.SUCCESS : OperationStatus.NOTFOUND; } } @@ -206,37 +250,43 @@ internal OperationStatus InternalRead( pendingContext.recordInfo = recordInfo; pendingContext.logicalAddress = logicalAddress; ref Value recordValue = ref hlog.GetValue(physicalAddress); - if (recordInfo.Sealed || recordInfo.Stub) + + if (recordInfo.Tombstone) { - // These are "intermediate" states that will be replaced by either a Delete or an Upsert of an actual value. Acquire the lock, then release and retry immediately. - long lockContext = default; - fasterSession.LockShared(ref recordInfo, ref key, ref recordValue, ref lockContext); - fasterSession.UnlockShared(ref recordInfo, ref key, ref recordValue, lockContext); - return OperationStatus.RETRY_NOW; + if (fasterSession.IsManualOperations && pendingContext.lockOperation.IsSet) + { + fasterSession.ConcurrentReader(ref key, ref input, ref recordValue, ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress); + return OperationStatus.SUCCESS; + } } - return !recordInfo.Tombstone - && fasterSession.ConcurrentReader(ref key, ref input, ref recordValue, ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress) - ? OperationStatus.SUCCESS - : OperationStatus.NOTFOUND; + else if (fasterSession.ConcurrentReader(ref key, ref input, ref recordValue, ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress)) + return OperationStatus.SUCCESS; + return OperationStatus.NOTFOUND; } // Immutable region else if (logicalAddress >= hlog.HeadAddress) { - pendingContext.recordInfo = hlog.GetInfo(physicalAddress); + ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); + pendingContext.recordInfo = recordInfo; pendingContext.logicalAddress = logicalAddress; - if (pendingContext.recordInfo.Sealed || pendingContext.recordInfo.Stub) + if (IsIntermediate(ref pendingContext.recordInfo, useStartAddress)) + return RetryOnIntermediateRecord(ref recordInfo, ref key, ref hlog.GetValue(physicalAddress), fasterSession); + + if (recordInfo.Tombstone) { - // These are "intermediate" states that will be replaced by either a Delete or an Upsert of an actual value. We can't lock immutable records, so retry immediately. - return OperationStatus.RETRY_NOW; + if (fasterSession.IsManualOperations && pendingContext.lockOperation.IsSet) + { + fasterSession.SingleReader(ref key, ref input, ref hlog.GetValue(physicalAddress), ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress); + return OperationStatus.SUCCESS; + } } - if (!pendingContext.recordInfo.Tombstone - && fasterSession.SingleReader(ref key, ref input, ref hlog.GetValue(physicalAddress), ref output, ref pendingContext.lockOperation, ref pendingContext.recordInfo, logicalAddress)) + else if (fasterSession.SingleReader(ref key, ref input, ref hlog.GetValue(physicalAddress), ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress)) { - if ((CopyReadsToTail == CopyReadsToTail.FromReadOnly && !pendingContext.SkipCopyReadsToTail) || pendingContext.CopyReadsToTail) + if (CopyReadsToTail == CopyReadsToTail.FromReadOnly && !pendingContext.SkipCopyReadsToTail) { var container = hlog.GetValueContainer(ref hlog.GetValue(physicalAddress)); - InternalTryCopyToTail(ref key, ref input, ref container.Get(), ref output, ref pendingContext.lockOperation, logicalAddress, fasterSession, sessionCtx); + InternalTryCopyToTail(ref pendingContext, ref key, ref input, ref container.Get(), ref output, ref pendingContext.lockOperation, logicalAddress, fasterSession, sessionCtx); container.Dispose(); } return OperationStatus.SUCCESS; @@ -305,6 +355,7 @@ internal OperationStatus InternalRead( pendingContext.serialNum = lsn; pendingContext.heldLatch = heldOperation; pendingContext.recordInfo.PreviousAddress = startAddress; + pendingContext.IsReadingAtAddress = useStartAddress; } #endregion @@ -402,59 +453,56 @@ internal OperationStatus InternalUpsert( #endregion // Optimization for the most common case - long stubOrSealedPhysicalAddress = Constants.kInvalidAddress; - long sealedLockContext = default; - if (sessionCtx.phase == Phase.REST && logicalAddress >= hlog.ReadOnlyAddress) + long stubPhysicalAddress = Constants.kInvalidAddress; + if (sessionCtx.phase == Phase.REST) { - ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); - if (recordInfo.Stub) - { - // This is only for ManualFasterOperations, and we assume we hold this lock. We will need to transfer the lock - // to the updated record. - stubOrSealedPhysicalAddress = physicalAddress; - goto CreateNewRecord; - } - ref Value recordValue = ref hlog.GetValue(physicalAddress); - if (recordInfo.Sealed) - { - // This is an "intermediate" state from a different session that will be replaced by an Upsert of an updated value. Acquire the lock, then release and retry immediately. - long lockContext = default; - fasterSession.LockShared(ref recordInfo, ref key, ref recordValue, ref lockContext); - fasterSession.UnlockShared(ref recordInfo, ref key, ref recordValue, lockContext); - return OperationStatus.RETRY_NOW; - } - if (!recordInfo.Tombstone) + if (logicalAddress >= hlog.ReadOnlyAddress) { - if (fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref hlog.GetValue(physicalAddress), ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress)) + ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); + if (fasterSession.IsManualOperations && recordInfo.Stub) { - hlog.MarkPage(logicalAddress, sessionCtx.version); - pendingContext.recordInfo = recordInfo; - pendingContext.logicalAddress = logicalAddress; - return OperationStatus.SUCCESS; + // This is only for ManualFasterOperations, and we assume we hold this lock. We will need to transfer the lock to the updated record. + stubPhysicalAddress = physicalAddress; + goto CreateNewRecord; } + ref Value recordValue = ref hlog.GetValue(physicalAddress); + if (IsIntermediate(ref recordInfo)) + return RetryOnIntermediateRecord(ref recordInfo, ref key, ref recordValue, fasterSession); - // ConcurrentWriter failed (e.g. insufficient space). Another thread may come along to do this update in-place; Seal it to prevent that. - stubOrSealedPhysicalAddress = physicalAddress; - fasterSession.LockExclusive(ref recordInfo, ref key, ref recordValue, ref sealedLockContext); - if (recordInfo.Sealed || recordInfo.Invalid || recordInfo.Tombstone) + if (recordInfo.Tombstone) { - // Another thread was doing the same thing; unlock and retry. - fasterSession.UnlockExclusive(ref recordInfo, ref key, ref recordValue, sealedLockContext); - return OperationStatus.RETRY_NOW; + if (fasterSession.IsManualOperations && pendingContext.lockOperation.IsSet) + { + fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref hlog.GetValue(physicalAddress), ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress); + return OperationStatus.SUCCESS; + } } + else + { + if (fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref hlog.GetValue(physicalAddress), ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress)) + { + hlog.MarkPage(logicalAddress, sessionCtx.version); + pendingContext.recordInfo = recordInfo; + pendingContext.logicalAddress = logicalAddress; + return OperationStatus.SUCCESS; + } + + // ConcurrentWriter failed (e.g. insufficient space). Another thread may come along to do this update in-place; Seal it to prevent that. + if (!SealRecord(ref recordInfo, ref key, ref recordValue, fasterSession)) + return OperationStatus.RETRY_NOW; + } + goto CreateNewRecord; } - goto CreateNewRecord; } - #region Entry latch operation +#region Entry latch operation if (sessionCtx.phase != Phase.REST) { latchDestination = AcquireLatchUpsert(sessionCtx, bucket, ref status, ref latchOperation, ref entry, logicalAddress); } - #endregion - +#endregion - #region Normal processing +#region Normal processing // Mutable Region: Update the record in-place if (latchDestination == LatchDestination.NormalProcessing) @@ -462,14 +510,74 @@ internal OperationStatus InternalUpsert( if (logicalAddress >= hlog.ReadOnlyAddress) { ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); - if (!recordInfo.Tombstone - && fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref hlog.GetValue(physicalAddress), ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress)) + if (fasterSession.IsManualOperations && recordInfo.Stub) + { + // This is only for ManualFasterOperations, and we assume we hold this lock. We will need to transfer the lock to the updated record. + stubPhysicalAddress = physicalAddress; + status = OperationStatus.SUCCESS; + goto CreateNewRecord; + } + ref Value recordValue = ref hlog.GetValue(physicalAddress); + if (IsIntermediate(ref recordInfo)) + { + status = RetryOnIntermediateRecord(ref recordInfo, ref key, ref recordValue, fasterSession); + goto LatchRelease; // Release shared latch (if acquired) + } + + if (recordInfo.Tombstone) { - if (sessionCtx.phase == Phase.REST) hlog.MarkPage(logicalAddress, sessionCtx.version); - else hlog.MarkPageAtomic(logicalAddress, sessionCtx.version); + if (fasterSession.IsManualOperations && pendingContext.lockOperation.IsSet) + { + fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref recordValue, ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress); + status = OperationStatus.SUCCESS; + goto LatchRelease; // Release shared latch (if acquired) + } + } + else + { + if (fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref recordValue, ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress)) + { + if (sessionCtx.phase == Phase.REST) + hlog.MarkPage(logicalAddress, sessionCtx.version); + else + hlog.MarkPageAtomic(logicalAddress, sessionCtx.version); + pendingContext.recordInfo = recordInfo; + pendingContext.logicalAddress = logicalAddress; + status = OperationStatus.SUCCESS; + goto LatchRelease; // Release shared latch (if acquired) + } + + // ConcurrentWriter failed (e.g. insufficient space). Another thread may come along to do this update in-place; Seal it to prevent that. + if (!SealRecord(ref recordInfo, ref key, ref recordValue, fasterSession)) + return OperationStatus.RETRY_NOW; + } + } + else if (fasterSession.IsManualOperations) + { + if (logicalAddress >= hlog.HeadAddress) + { + physicalAddress = hlog.GetPhysicalAddress(logicalAddress); + ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); + ref Value recordValue = ref hlog.GetValue(physicalAddress); pendingContext.recordInfo = recordInfo; pendingContext.logicalAddress = logicalAddress; status = OperationStatus.SUCCESS; + + if (pendingContext.lockOperation.IsSet) + { + fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref recordValue, ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress); + goto LatchRelease; // Release shared latch (if acquired) + } + + // This ManualOps instance already owns this lock, or we wouldn't be here. Create a new record and transfer the lock + if (recordInfo.Stub || recordInfo.IsLocked) + stubPhysicalAddress = physicalAddress; + goto CreateNewRecord; + } + else if (pendingContext.lockOperation.LockOperationType == LockOperationType.Unlock) + { + Debug.Fail("Trying to unlock a non-existent value"); + status = OperationStatus.SUCCESS; goto LatchRelease; // Release shared latch (if acquired) } } @@ -483,25 +591,23 @@ internal OperationStatus InternalUpsert( if (latchDestination != LatchDestination.CreatePendingContext) { // Immutable region or new record - status = CreateNewRecordUpsert(ref key, ref input, ref value, ref output, ref pendingContext, fasterSession, sessionCtx, bucket, slot, tag, entry, latestLogicalAddress); + status = CreateNewRecordUpsert(ref key, ref input, ref value, ref output, ref pendingContext, fasterSession, sessionCtx, bucket, slot, tag, entry, latestLogicalAddress, + stubPhysicalAddress != Constants.kInvalidAddress); if (status != OperationStatus.ALLOCATE_FAILED) { - if (stubOrSealedPhysicalAddress != Constants.kInvalidAddress) + if (fasterSession.IsManualOperations && stubPhysicalAddress != Constants.kInvalidAddress) { - // Mark the *old* record as Invalid and unlock it--thereby "transferring" the lock to the new record. - ref RecordInfo recordInfo = ref hlog.GetInfo(stubOrSealedPhysicalAddress); - recordInfo.Stub = false; - recordInfo.Sealed = false; - recordInfo.SetInvalid(); - fasterSession.UnlockExclusive(ref recordInfo, ref key, ref hlog.GetValue(physicalAddress), sealedLockContext); + // Unlock the *old* record--thereby "transferring" the lock to the new record. + ref RecordInfo recordInfo = ref hlog.GetInfo(stubPhysicalAddress); + fasterSession.UnlockExclusive(ref recordInfo); } goto LatchRelease; } latchDestination = LatchDestination.CreatePendingContext; } - #endregion +#endregion - #region Create pending context +#region Create pending context Debug.Assert(latchDestination == LatchDestination.CreatePendingContext, $"Upsert CreatePendingContext encountered latchDest == {latchDestination}"); { pendingContext.type = OperationType.UPSERT; @@ -603,7 +709,7 @@ private LatchDestination AcquireLatchUpsert(FasterExecut private OperationStatus CreateNewRecordUpsert(ref Key key, ref Input input, ref Value value, ref Output output, ref PendingContext pendingContext, FasterSession fasterSession, FasterExecutionContext sessionCtx, HashBucket* bucket, int slot, ushort tag, HashBucketEntry entry, - long latestLogicalAddress) + long latestLogicalAddress, bool isStubPromotion) where FasterSession : IFasterSession { var (actualSize, allocateSize) = hlog.GetRecordSize(ref key, ref value); @@ -618,7 +724,10 @@ private OperationStatus CreateNewRecordUpsert /// Read-Modify-Write Operation. Updates value of 'key' using 'input' and current value. @@ -741,24 +850,23 @@ internal OperationStatus InternalRMW( out physicalAddress); } } - #endregion +#endregion // Optimization for the most common case - long sealedPhysicalAddress = Constants.kInvalidAddress; - long sealedLockContext = default; + long stubPhysicalAddress = Constants.kInvalidAddress; if (sessionCtx.phase == Phase.REST && logicalAddress >= hlog.ReadOnlyAddress) { ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); - - ref Value recordValue = ref hlog.GetValue(physicalAddress); - if (recordInfo.Sealed || recordInfo.Stub) + if (fasterSession.IsManualOperations && recordInfo.Stub) { - // These are "intermediate" states that will be replaced by either a Delete or an Upsert of an actual value. Acquire the lock, then release and retry immediately. - long lockContext = default; - fasterSession.LockShared(ref recordInfo, ref key, ref recordValue, ref lockContext); - fasterSession.UnlockShared(ref recordInfo, ref key, ref recordValue, lockContext); - return OperationStatus.RETRY_NOW; + // This is only for ManualFasterOperations, and we assume we hold this lock. We will need to transfer the lock to the updated record. + stubPhysicalAddress = physicalAddress; + status = OperationStatus.SUCCESS; + goto CreateNewRecord; } + ref Value recordValue = ref hlog.GetValue(physicalAddress); + if (IsIntermediate(ref recordInfo)) + return RetryOnIntermediateRecord(ref recordInfo, ref key, ref recordValue, fasterSession); if (!recordInfo.Tombstone) { @@ -771,14 +879,8 @@ internal OperationStatus InternalRMW( } // ConcurrentWriter failed (e.g. insufficient space). Another thread may come along to do this update in-place; Seal it to prevent that. - sealedPhysicalAddress = physicalAddress; - fasterSession.LockExclusive(ref recordInfo, ref key, ref recordValue, ref sealedLockContext); - if (recordInfo.Sealed || recordInfo.Invalid || recordInfo.Tombstone) - { - // Another thread was doing the same thing; unlock and retry. - fasterSession.UnlockExclusive(ref recordInfo, ref key, ref recordValue, sealedLockContext); + if (!SealRecord(ref recordInfo, ref key, ref recordValue, fasterSession)) return OperationStatus.RETRY_NOW; - } } goto CreateNewRecord; } @@ -788,9 +890,9 @@ internal OperationStatus InternalRMW( { latchDestination = AcquireLatchRMW(pendingContext, sessionCtx, bucket, ref status, ref latchOperation, ref entry, logicalAddress); } - #endregion +#endregion - #region Normal processing +#region Normal processing // Mutable Region: Update the record in-place if (latchDestination == LatchDestination.NormalProcessing) @@ -798,9 +900,20 @@ internal OperationStatus InternalRMW( if (logicalAddress >= hlog.ReadOnlyAddress) { ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); + if (fasterSession.IsManualOperations && recordInfo.Stub) + { + // This is only for ManualFasterOperations, and we assume we hold this lock. We will need to transfer the lock to the updated record. + stubPhysicalAddress = physicalAddress; + status = OperationStatus.SUCCESS; + goto CreateNewRecord; + } + ref Value recordValue = ref hlog.GetValue(physicalAddress); + if (IsIntermediate(ref recordInfo)) + return RetryOnIntermediateRecord(ref recordInfo, ref key, ref recordValue, fasterSession); + if (!recordInfo.Tombstone) { - if (fasterSession.InPlaceUpdater(ref key, ref input, ref hlog.GetValue(physicalAddress), ref output, ref recordInfo, logicalAddress)) + if (fasterSession.InPlaceUpdater(ref key, ref input, ref recordValue, ref output, ref recordInfo, logicalAddress)) { if (sessionCtx.phase == Phase.REST) hlog.MarkPage(logicalAddress, sessionCtx.version); else hlog.MarkPageAtomic(logicalAddress, sessionCtx.version); @@ -809,11 +922,15 @@ internal OperationStatus InternalRMW( status = OperationStatus.SUCCESS; goto LatchRelease; // Release shared latch (if acquired) } + + // ConcurrentWriter failed (e.g. insufficient space). Another thread may come along to do this update in-place; Seal it to prevent that. + if (!SealRecord(ref recordInfo, ref key, ref recordValue, fasterSession)) + return OperationStatus.RETRY_NOW; } } // Fuzzy Region: Must go pending due to lost-update anomaly - else if (logicalAddress >= hlog.SafeReadOnlyAddress && !hlog.GetInfo(physicalAddress).Tombstone) + else if (!fasterSession.IsManualOperations && (logicalAddress >= hlog.SafeReadOnlyAddress && !hlog.GetInfo(physicalAddress).Tombstone)) // TODO replace with Sealed { status = OperationStatus.RETRY_LATER; // Do not retain latch for pendings ops in relaxed CPR @@ -832,6 +949,24 @@ internal OperationStatus InternalRMW( // Safe Read-Only Region: Create a record in the mutable region else if (logicalAddress >= hlog.HeadAddress) { + ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); + if (fasterSession.IsManualOperations && recordInfo.Stub) + { + // This is only for ManualFasterOperations, and we assume we hold this lock. We will need to transfer the lock to the updated record. + stubPhysicalAddress = physicalAddress; + status = OperationStatus.SUCCESS; + goto CreateNewRecord; + } + ref Value recordValue = ref hlog.GetValue(physicalAddress); + if (IsIntermediate(ref recordInfo)) + { + status = RetryOnIntermediateRecord(ref recordInfo, ref key, ref recordValue, fasterSession); + goto LatchRelease; // Release shared latch (if acquired) + } + + // This ManualOps instance already owns this lock, or we wouldn't be here. Create a new record and transfer the lock + if (fasterSession.IsManualOperations && recordInfo.IsLocked) + stubPhysicalAddress = physicalAddress; goto CreateNewRecord; } @@ -859,32 +994,29 @@ internal OperationStatus InternalRMW( } } - #endregion +#endregion - #region Create new record +#region Create new record CreateNewRecord: if (latchDestination != LatchDestination.CreatePendingContext) { - status = CreateNewRecordRMW(ref key, ref input, ref output, ref pendingContext, fasterSession, sessionCtx, bucket, slot, logicalAddress, physicalAddress, tag, entry, latestLogicalAddress); + status = CreateNewRecordRMW(ref key, ref input, ref output, ref pendingContext, fasterSession, sessionCtx, bucket, slot, logicalAddress, physicalAddress, tag, entry, + latestLogicalAddress, stubPhysicalAddress != Constants.kInvalidAddress); if (status != OperationStatus.ALLOCATE_FAILED) { - if (sealedPhysicalAddress != Constants.kInvalidAddress) + if (fasterSession.IsManualOperations && stubPhysicalAddress != Constants.kInvalidAddress) { - // Mark the *old* record as Invalid and unlock it--thereby "transferring" the lock to the new record. - ref RecordInfo recordInfo = ref hlog.GetInfo(sealedPhysicalAddress); - recordInfo.Stub = false; - recordInfo.Sealed = false; - recordInfo.SetInvalid(); - fasterSession.UnlockExclusive(ref recordInfo, ref key, ref hlog.GetValue(physicalAddress), sealedLockContext); + // Unlock the *old* record--thereby "transferring" the lock to the new record. + ref RecordInfo recordInfo = ref hlog.GetInfo(stubPhysicalAddress); + fasterSession.UnlockExclusive(ref recordInfo); } goto LatchRelease; - } latchDestination = LatchDestination.CreatePendingContext; } - #endregion +#endregion - #region Create failure context +#region Create failure context Debug.Assert(latchDestination == LatchDestination.CreatePendingContext, $"RMW CreatePendingContext encountered latchDest == {latchDestination}"); { pendingContext.type = OperationType.RMW; @@ -986,7 +1118,7 @@ private LatchDestination AcquireLatchRMW(PendingContext< private OperationStatus CreateNewRecordRMW(ref Key key, ref Input input, ref Output output, ref PendingContext pendingContext, FasterSession fasterSession, FasterExecutionContext sessionCtx, HashBucket* bucket, int slot, long logicalAddress, - long physicalAddress, ushort tag, HashBucketEntry entry, long latestLogicalAddress) + long physicalAddress, ushort tag, HashBucketEntry entry, long latestLogicalAddress, bool isStubPromotion) where FasterSession : IFasterSession { // Determine if we should allocate a new record @@ -1018,26 +1150,30 @@ private OperationStatus CreateNewRecordRMW= hlog.HeadAddress) { if (hlog.GetInfo(physicalAddress).Tombstone) { - fasterSession.InitialUpdater(ref key, ref input, ref hlog.GetValue(newPhysicalAddress, newPhysicalAddress + actualSize), ref output, ref recordInfo, newLogicalAddress, out lockContext); + fasterSession.InitialUpdater(ref key, ref input, ref hlog.GetValue(newPhysicalAddress, newPhysicalAddress + actualSize), ref output, + ref pendingContext.lockOperation, ref recordInfo, newLogicalAddress); status = OperationStatus.NOTFOUND; } else { fasterSession.CopyUpdater(ref key, ref input, ref hlog.GetValue(physicalAddress), ref hlog.GetValue(newPhysicalAddress, newPhysicalAddress + actualSize), - ref output, ref recordInfo, newLogicalAddress, out lockContext); + ref output, ref recordInfo, newLogicalAddress); status = OperationStatus.SUCCESS; } + pendingContext.lockOperation.IsStubPromotion = false; } else { @@ -1062,7 +1198,7 @@ ref hlog.GetValue(newPhysicalAddress, newPhysicalAddress + actualSize), Debug.Assert(OperationStatus.NOTFOUND == status); fasterSession.PostInitialUpdater(ref key, ref input, ref hlog.GetValue(newPhysicalAddress), - ref output, ref recordInfo, newLogicalAddress, lockContext); + ref output, ref recordInfo, newLogicalAddress); pendingContext.recordInfo = recordInfo; pendingContext.logicalAddress = newLogicalAddress; return status; @@ -1072,7 +1208,7 @@ ref hlog.GetValue(newPhysicalAddress, newPhysicalAddress + actualSize), if (fasterSession.PostCopyUpdater(ref key, ref input, ref hlog.GetValue(physicalAddress), ref hlog.GetValue(newPhysicalAddress), - ref output, ref recordInfo, newLogicalAddress, lockContext)) + ref output, ref recordInfo, newLogicalAddress)) { pendingContext.recordInfo = recordInfo; pendingContext.logicalAddress = newLogicalAddress; @@ -1141,6 +1277,7 @@ internal OperationStatus InternalDelete( var hash = comparer.GetHashCode64(ref key); var tag = (ushort)((ulong)hash >> Constants.kHashTagShift); + long stubPhysicalAddress = Constants.kInvalidAddress; if (sessionCtx.phase != Phase.REST) HeavyEnter(hash, sessionCtx, fasterSession); @@ -1235,30 +1372,34 @@ internal OperationStatus InternalDelete( if (logicalAddress >= hlog.ReadOnlyAddress) { ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); + ref Value recordValue = ref hlog.GetValue(physicalAddress); + if (fasterSession.IsManualOperations && recordInfo.Stub) + { + // This is only for ManualFasterOperations, and we assume we hold this lock. We can Tombstone the record directly. Caller must still unlock. + fasterSession.ConcurrentDeleter(ref hlog.GetKey(physicalAddress), ref recordValue, ref recordInfo, logicalAddress); + recordInfo.Stub = false; + status = OperationStatus.SUCCESS; + goto LatchRelease; // Release shared latch (if acquired) + } - if (recordInfo.Sealed || recordInfo.Stub) + if (IsIntermediate(ref recordInfo)) { - // These are "intermediate" states that will be replaced by either a Delete or an Upsert of an actual value. Acquire the lock, then release and retry immediately. - ref Value recordValue = ref hlog.GetValue(physicalAddress); - long lockContext = default; - fasterSession.LockShared(ref recordInfo, ref key, ref recordValue, ref lockContext); - fasterSession.UnlockShared(ref recordInfo, ref key, ref recordValue, lockContext); - return OperationStatus.RETRY_NOW; + status = RetryOnIntermediateRecord(ref recordInfo, ref key, ref recordValue, fasterSession); + goto LatchRelease; // Release shared latch (if acquired) } - ref Value value = ref hlog.GetValue(physicalAddress); - - // The concurrent delete may fail if the record is sealed - if (!fasterSession.ConcurrentDeleter(ref hlog.GetKey(physicalAddress), ref value, ref recordInfo, logicalAddress)) + if (!fasterSession.ConcurrentDeleter(ref hlog.GetKey(physicalAddress), ref recordValue, ref recordInfo, logicalAddress)) goto CreateNewRecord; - if (sessionCtx.phase == Phase.REST) hlog.MarkPage(logicalAddress, sessionCtx.version); - else hlog.MarkPageAtomic(logicalAddress, sessionCtx.version); + if (sessionCtx.phase == Phase.REST) + hlog.MarkPage(logicalAddress, sessionCtx.version); + else + hlog.MarkPageAtomic(logicalAddress, sessionCtx.version); if (WriteDefaultOnDelete) - value = default; + recordValue = default; // Try to update hash chain and completely elide record only if previous address points to invalid address - if (entry.Address == logicalAddress && recordInfo.PreviousAddress < hlog.BeginAddress) + if (!recordInfo.IsLocked && entry.Address == logicalAddress && recordInfo.PreviousAddress < hlog.BeginAddress) { var updatedEntry = default(HashBucketEntry); updatedEntry.Tag = 0; @@ -1277,6 +1418,24 @@ internal OperationStatus InternalDelete( status = OperationStatus.SUCCESS; goto LatchRelease; // Release shared latch (if acquired) } + else if (fasterSession.IsManualOperations) + { + if (logicalAddress >= hlog.HeadAddress) + { + physicalAddress = hlog.GetPhysicalAddress(logicalAddress); + ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); + + // This ManualOps instance already owns this lock, or we wouldn't be here. Create a new record and transfer the lock + if (recordInfo.Stub || recordInfo.IsLocked) + stubPhysicalAddress = physicalAddress; + goto CreateNewRecord; + } + else if (pendingContext.lockOperation.LockOperationType == LockOperationType.Unlock) + { + Debug.Fail("Trying to unlock a non-existent value"); + return OperationStatus.SUCCESS; + } + } // All other regions: Create a record in the mutable region #endregion @@ -1302,9 +1461,11 @@ internal OperationStatus InternalDelete( latestLogicalAddress); hlog.Serialize(ref key, newPhysicalAddress); - // There is no Value to lock, so we lock the RecordInfo directly. TODO: Updaters must honor this lock as well - if (fasterSession.SupportsLocking) - recordInfo.LockExclusive(); + // Nobody does anything to this record because it is Tombstoned, so we do not lock it unless we are transferring from a stub. + if (fasterSession.IsManualOperations && stubPhysicalAddress != Constants.kInvalidAddress) + { + fasterSession.LockExclusive(ref recordInfo); + } var updatedEntry = default(HashBucketEntry); updatedEntry.Tag = tag; @@ -1319,6 +1480,13 @@ internal OperationStatus InternalDelete( if (foundEntry.word == entry.word) { + if (fasterSession.IsManualOperations && stubPhysicalAddress != Constants.kInvalidAddress) + { + // Unlock the *old* record--thereby "transferring" the lock to the new record. + ref RecordInfo stubRecordInfo = ref hlog.GetInfo(stubPhysicalAddress); + fasterSession.UnlockExclusive(ref stubRecordInfo); + } + // Note that this is the new logicalAddress; we have not retrieved the old one if it was below HeadAddress, and thus // we do not know whether 'logicalAddress' belongs to 'key' or is a collision. fasterSession.PostSingleDeleter(ref key, ref recordInfo, newLogicalAddress); @@ -1476,11 +1644,8 @@ internal OperationStatus InternalContinuePendingRead= hlog.BeginAddress) && !hlog.GetInfoFromBytePointer(request.record.GetValidPointer()).Tombstone) @@ -1649,13 +1814,14 @@ internal OperationStatus InternalContinuePendingRMW pendingContext = default; do - internalStatus = InternalTryCopyToTail(currentCtx, ref key, ref input, ref value, ref output, ref dummyLockOperation, expectedLogicalAddress, fasterSession, currentCtx, noReadCache); + internalStatus = InternalTryCopyToTail(currentCtx, ref pendingContext, ref key, ref input, ref value, ref output, ref dummyLockOperation, expectedLogicalAddress, fasterSession, currentCtx, noReadCache); while (internalStatus == OperationStatus.RETRY_NOW); return internalStatus; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal OperationStatus InternalTryCopyToTail( + internal OperationStatus InternalTryCopyToTail(ref PendingContext pendingContext, ref Key key, ref Input input, ref Value value, ref Output output, ref LockOperation lockOperation, long foundLogicalAddress, FasterSession fasterSession, FasterExecutionContext currentCtx, bool noReadCache = false) where FasterSession : IFasterSession - => InternalTryCopyToTail(currentCtx, ref key, ref input, ref value, ref output, ref lockOperation, foundLogicalAddress, fasterSession, currentCtx, noReadCache); + => InternalTryCopyToTail(currentCtx, ref pendingContext, ref key, ref input, ref value, ref output, ref lockOperation, foundLogicalAddress, fasterSession, currentCtx, noReadCache); /// /// Helper function for trying to copy existing immutable records (at foundLogicalAddress) to the tail, @@ -2043,6 +2210,7 @@ internal OperationStatus InternalTryCopyToTail + /// /// /// /// @@ -2064,7 +2232,7 @@ internal OperationStatus InternalTryCopyToTail internal OperationStatus InternalTryCopyToTail( - FasterExecutionContext opCtx, + FasterExecutionContext opCtx, ref PendingContext pendingContext, ref Key key, ref Input input, ref Value value, ref Output output, ref LockOperation lockOperation, long expectedLogicalAddress, FasterSession fasterSession, @@ -2073,6 +2241,7 @@ internal OperationStatus InternalTryCopyToTail { Debug.Assert(expectedLogicalAddress >= hlog.BeginAddress); + var bucket = default(HashBucket*); var slot = default(int); @@ -2104,11 +2273,12 @@ internal OperationStatus InternalTryCopyToTail expectedLogicalAddress || logicalAddress < hlog.BeginAddress) { // We give up early. - // Note: In Compact, expectedLogicalAddress may not exactly match the source of this copy operation, - // but instead only an upper bound. + // Note1: In Compact, expectedLogicalAddress may not exactly match the source of this copy operation, but instead only an upper bound. + // Note2: In the case of ReadAtAddress, we will bail here by design; we assume anything in the readcache is the latest version. + // Any loop to retrieve prior versions should set ReadFlags.SkipReadCache; see ReadAddressTests. return OperationStatus.NOTFOUND; } - #region Create new copy in mutable region +#region Create new copy in mutable region var (actualSize, allocatedSize) = hlog.GetRecordSize(ref key, ref value); long newLogicalAddress, newPhysicalAddress; @@ -2165,17 +2335,18 @@ ref hlog.GetValue(newPhysicalAddress, newPhysicalAddress + actualSize), ref outp else { var log = copyToReadCache ? readcache : hlog; - LockOperation dummyLockOp = default; - ref LockOperation lockOp = ref (copyToReadCache ? ref dummyLockOp : ref lockOperation); + ref RecordInfo recordInfo = ref log.GetInfo(newPhysicalAddress); + pendingContext.recordInfo = recordInfo; + pendingContext.logicalAddress = copyToReadCache ? Constants.kInvalidAddress /* We do not expose readcache addresses */ : newLogicalAddress; fasterSession.PostSingleWriter(ref key, ref input, ref value, ref log.GetValue(newPhysicalAddress, newPhysicalAddress + actualSize), ref output, - ref lockOp, ref log.GetInfo(newPhysicalAddress), newLogicalAddress); + ref lockOperation, ref recordInfo, pendingContext.logicalAddress); return OperationStatus.SUCCESS; } - #endregion +#endregion } - #endregion +#endregion #region Split Index private void SplitBuckets(long hash) @@ -2410,6 +2581,7 @@ private long TraceBackForOtherChainStart(long logicalAddress, int bit) #region Read Cache private bool ReadFromCache(ref Key key, ref long logicalAddress, ref long physicalAddress) { + // logicalAddress is retrieved from the main FKV's hash table. HashBucketEntry entry = default; entry.word = logicalAddress; if (!entry.ReadCache) return false; @@ -2422,6 +2594,7 @@ private bool ReadFromCache(ref Key key, ref long logicalAddress, ref long physic { if ((logicalAddress & ~Constants.kReadCacheBitMask) >= readcache.SafeReadOnlyAddress) { + // This is a valid readcache record. return true; } Debug.Assert((logicalAddress & ~Constants.kReadCacheBitMask) >= readcache.SafeHeadAddress); @@ -2434,6 +2607,8 @@ private bool ReadFromCache(ref Key key, ref long logicalAddress, ref long physic if (!entry.ReadCache) break; physicalAddress = readcache.GetPhysicalAddress(logicalAddress & ~Constants.kReadCacheBitMask); } + + // Not found in read cache. physicalAddress = 0; return false; } @@ -2455,6 +2630,7 @@ private void SkipReadCache(ref long logicalAddress) } } + // Skip over all records in this key's chain in the readcache (advancing logicalAddress to the first non-readcache record we encounter). private void SkipReadCacheBucket(HashBucket* bucket) { for (int index = 0; index < Constants.kOverflowBucketIndex; ++index) @@ -2477,6 +2653,8 @@ private void SkipReadCacheBucket(HashBucket* bucket) } } + // Skip over all records in this key's chain in the readcache (advancing logicalAddress to the first non-readcache record we encounter). + // Invalidate each record we skip over. private void SkipAndInvalidateReadCache(ref long logicalAddress, ref Key key) { HashBucketEntry entry = default; @@ -2502,6 +2680,7 @@ private void SkipAndInvalidateReadCache(ref long logicalAddress, ref Key key) private void ReadCacheEvict(long fromHeadAddress, long toHeadAddress) { + // fromHeadAddress and toHeadAddress are in the readCache var bucket = default(HashBucket*); var slot = default(int); var logicalAddress = Constants.kInvalidAddress; @@ -2510,6 +2689,7 @@ private void ReadCacheEvict(long fromHeadAddress, long toHeadAddress) HashBucketEntry entry = default; logicalAddress = fromHeadAddress; + // Remove readcache entries from the main FKV that are in the fromHeadAddress/toHeadAddress range in the readcache. while (logicalAddress < toHeadAddress) { physicalAddress = readcache.GetPhysicalAddress(logicalAddress); @@ -2518,14 +2698,24 @@ private void ReadCacheEvict(long fromHeadAddress, long toHeadAddress) if (!info.Invalid) { ref Key key = ref readcache.GetKey(physicalAddress); + + // If this to-be-evicted readcache record's prevAddress points to a record in the main FKV... entry.word = info.PreviousAddress; if (!entry.ReadCache) { + // Find the index entry for the key in the main FKV. var hash = comparer.GetHashCode64(ref key); var tag = (ushort)((ulong)hash >> Constants.kHashTagShift); entry = default; var tagExists = FindTag(hash, tag, ref bucket, ref slot, ref entry); + + // Because we call SkipReadCache on upserts, if we have a readcache entry for this hash, it will be pointed to by + // the hashtable; there may be other readcache entries as well, before one that is a non-readcache entry. + // That is, if there is a readcache entry for this hash, the chain will always be of the form: + // hashtable -> zero or more readcache entries -> main FKV entry. + // Remove the readcache entry for this hash from the main FKV, unless somee other thread has done it for us. + // Note that this removes the entire leading readcache-entry set of records from the hash table pointer. while (tagExists && entry.ReadCache) { var updatedEntry = default(HashBucketEntry); diff --git a/cs/src/core/Index/FASTER/LogCompactionFunctions.cs b/cs/src/core/Index/FASTER/LogCompactionFunctions.cs index 65ef7d217..21e1c910e 100644 --- a/cs/src/core/Index/FASTER/LogCompactionFunctions.cs +++ b/cs/src/core/Index/FASTER/LogCompactionFunctions.cs @@ -68,15 +68,5 @@ public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) { } public void UpsertCompletionCallback(ref Key key, ref Input input, ref Value value, Context ctx) { } - - public bool SupportsLocking => false; - public void LockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) { } - public void UnlockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, long lockContext) { } - public bool TryLockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) => true; - public void LockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) { } - public bool UnlockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, long lockContext) => true; - public bool TryLockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) => true; - public void LockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) { } - public bool TryLockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) => true; } } \ No newline at end of file diff --git a/cs/src/core/Index/Interfaces/FunctionsBase.cs b/cs/src/core/Index/Interfaces/FunctionsBase.cs index 7bdd2fa70..01d3fc398 100644 --- a/cs/src/core/Index/Interfaces/FunctionsBase.cs +++ b/cs/src/core/Index/Interfaces/FunctionsBase.cs @@ -70,35 +70,6 @@ public virtual void UpsertCompletionCallback(ref Key key, ref Input input, ref V public virtual void DeleteCompletionCallback(ref Key key, Context ctx) { } /// public virtual void CheckpointCompletionCallback(string sessionId, CommitPoint commitPoint) { } - - /// - public virtual bool SupportsLocking => locking; - - /// - public virtual void LockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) => recordInfo.LockExclusive(); - - /// - public virtual void UnlockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, long lockContext) => recordInfo.UnlockExclusive(); - - /// - public virtual bool TryLockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) => recordInfo.TryLockExclusive(spinCount); - - /// - public virtual void LockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) => recordInfo.LockShared(); - - /// - public virtual bool UnlockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, long lockContext) - { - recordInfo.UnlockShared(); - return true; - } - - /// - public virtual bool TryLockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) => recordInfo.TryLockShared(spinCount); - - public virtual void LockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext) => recordInfo.LockExclusiveFromShared(); - - public virtual bool TryLockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1) => recordInfo.TryLockExclusiveFromShared(spinCount); } /// diff --git a/cs/src/core/Index/Interfaces/IFasterSession.cs b/cs/src/core/Index/Interfaces/IFasterSession.cs index d7bda3cd0..327d6cdfd 100644 --- a/cs/src/core/Index/Interfaces/IFasterSession.cs +++ b/cs/src/core/Index/Interfaces/IFasterSession.cs @@ -29,6 +29,8 @@ internal interface IFasterSession : IFasterS bool SupportsLocking { get; } bool SupportsPostOperations { get; } + + bool IsManualOperations { get; } #endregion Optional features supported by this implementation #region Reads @@ -47,14 +49,14 @@ internal interface IFasterSession : IFasterS #region RMWs #region InitialUpdater bool NeedInitialUpdate(ref Key key, ref Input input, ref Output output); - void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address, out long lockContext); - void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address, long lockContext); + void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address); + void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address); #endregion InitialUpdater #region CopyUpdater bool NeedCopyUpdate(ref Key key, ref Input input, ref Value oldValue, ref Output output); - void CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address, out long lockContext); - bool PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address, long lockContext); + void CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address); + bool PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address); #endregion CopyUpdater #region InPlaceUpdater @@ -71,14 +73,18 @@ internal interface IFasterSession : IFasterS #endregion Deletes #region Locking - void LockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext); - void UnlockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, long lockContext); - bool TryLockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1); - void LockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext); - bool UnlockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, long lockContext); - bool TryLockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1); - void LockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext); - bool TryLockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1); + void LockExclusive(ref RecordInfo recordInfo); + void UnlockExclusive(ref RecordInfo recordInfo); + bool TryLockExclusive(ref RecordInfo recordInfo, int spinCount = 1); + void LockShared(ref RecordInfo recordInfo); + void UnlockShared(ref RecordInfo recordInfo); + bool TryLockShared(ref RecordInfo recordInfo, int spinCount = 1); + void LockExclusiveFromShared(ref RecordInfo recordInfo); + bool TryLockExclusiveFromShared(ref RecordInfo recordInfo, int spinCount = 1); + bool IsLocked(ref RecordInfo recordInfo); + bool IsLockedExclusive(ref RecordInfo recordInfo); + bool IsLockedShared(ref RecordInfo recordInfo); + void TransferLocks(ref RecordInfo fromRecordInfo, ref RecordInfo toRecordInfo); #endregion Locking bool CompletePendingWithOutputs(out CompletedOutputIterator completedOutputs, bool wait = false, bool spinWaitForCommit = false); diff --git a/cs/src/core/Index/Interfaces/IFunctions.cs b/cs/src/core/Index/Interfaces/IFunctions.cs index 80f80a5e5..52162a29e 100644 --- a/cs/src/core/Index/Interfaces/IFunctions.cs +++ b/cs/src/core/Index/Interfaces/IFunctions.cs @@ -234,120 +234,6 @@ public interface IFunctions void DeleteCompletionCallback(ref Key key, Context ctx); #endregion Deletes - #region Locking - /// - /// User-provided exclusive-lock call, defaulting to no-op. A default implementation is available via . - /// - /// The header for the current record - /// The key for the current record - /// The value for the current record - /// Context-specific information; will be passed to - /// - /// This is called only for records guaranteed to be in the mutable range. - /// - void LockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext); - - /// - /// User-provided exclusive unlock call, defaulting to no-op. A default exclusive implementation is available via . - /// - /// The header for the current record - /// The key for the current record - /// The value for the current record - /// The context returned from - /// - /// This is called only for records guaranteed to be in the mutable range. - /// - void UnlockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, long lockContext); - - /// - /// User-provided try-exclusive-lock call, defaulting to no-op. A default implementation is available via . - /// - /// The header for the current record - /// The key for the current record - /// The value for the current record - /// Context-specific information; will be passed to - /// The number of times to spin in a try/yield loop until giving up; default is once - /// - /// This is called only for records guaranteed to be in the mutable range. - /// - /// - /// True if the lock was acquired, else false. - /// - bool TryLockExclusive(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1); - - /// - /// User-provided shared-lock call, defaulting to no-op. A default implementation is available via . - /// - /// The header for the current record - /// The key for the current record - /// The value for the current record - /// Context-specific information; will be passed to - /// - /// This is called only for records guaranteed to be in the mutable range. - /// - void LockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext); - - /// - /// User-provided shared-unlock call, defaulting to no-op. A default exclusive implementation is available via . - /// - /// The header for the current record - /// The key for the current record - /// The value for the current record - /// The context returned from - /// - /// This is called only for records guaranteed to be in the mutable range. - /// - /// - /// True if no inconsistencies detected. Otherwise, the lock and user's callback are reissued. - /// Currently this is handled only for . - /// - bool UnlockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, long lockContext); - - /// - /// User-provided try-shared-lock call, defaulting to no-op. A default implementation is available via . - /// - /// The header for the current record - /// The key for the current record - /// The value for the current record - /// Context-specific information; will be passed to - /// The number of times to spin in a try/yield loop until giving up; default is once - /// - /// This is called only for records guaranteed to be in the mutable range. - /// - /// - /// True if the lock was acquired, else false. - /// - bool TryLockShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1); - - /// - /// User-provided lock promotion call, converting a shared lock into an exclusive lock, defaulting to no-op. A default implementation is available via . - /// - /// The header for the current record - /// The key for the current record - /// The value for the current record - /// Context-specific information; will be passed to - /// - /// This is called only for records guaranteed to be in the mutable range. - /// - void LockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext); - - /// - /// User-provided lock promotion call, converting a shared lock into an exclusive lock, defaulting to no-op. A default implementation is available via . - /// - /// The header for the current record - /// The key for the current record - /// The value for the current record - /// Context-specific information; will be passed to - /// The number of times to spin in a try/yield loop until giving up; default is once - /// - /// This is called only for records guaranteed to be in the mutable range. - /// - /// - /// True if the lock was acquired, else false. - /// - bool TryLockExclusiveFromShared(ref RecordInfo recordInfo, ref Key key, ref Value value, ref long lockContext, int spinCount = 1); - #endregion Locking - #region Checkpointing /// /// Checkpoint completion callback (called per client session) diff --git a/cs/src/core/Utilities/LockType.cs b/cs/src/core/Utilities/LockType.cs index a643252ca..52823bcf3 100644 --- a/cs/src/core/Utilities/LockType.cs +++ b/cs/src/core/Utilities/LockType.cs @@ -24,26 +24,48 @@ public enum LockType : byte ExclusiveFromShared } + /// + /// Information returned from + /// + public struct LockInfo + { + /// + /// The type of lock that was acquired + /// + public LockType LockType; + + /// + /// The address of the record that was locked. Useful for calling + /// + public long Address; + + /// + public override string ToString() => $"{LockType}: addr {Address}"; + } + internal enum LockOperationType : byte { None, - Lock, + LockRead, + LockUpsert, Unlock } internal struct LockOperation { - internal LockOperationType LockOperationType; internal LockType LockType; - internal long LockContext; + internal LockOperationType LockOperationType; + internal bool IsStubPromotion; internal bool IsSet => LockOperationType != LockOperationType.None; internal LockOperation(LockOperationType opType, LockType lockType) { - this.LockOperationType = opType; this.LockType = lockType; - this.LockContext = default; + this.LockOperationType = opType; + this.IsStubPromotion = false; } + + public override string ToString() => $"{LockType}: opType {LockOperationType}, isStubPromo {IsStubPromotion}"; } } diff --git a/cs/src/core/VarLen/SpanByteFunctions.cs b/cs/src/core/VarLen/SpanByteFunctions.cs index c4b96b378..27af87fd2 100644 --- a/cs/src/core/VarLen/SpanByteFunctions.cs +++ b/cs/src/core/VarLen/SpanByteFunctions.cs @@ -99,32 +99,7 @@ public unsafe override bool ConcurrentReader(ref SpanByte key, ref SpanByte inpu value.CopyTo(ref dst, memoryPool); return true; } - - /// - public override bool SupportsLocking => locking; - - /// - public override void LockExclusive(ref RecordInfo recordInfo, ref SpanByte key, ref SpanByte value, ref long lockContext) => recordInfo.LockExclusive(); - - /// - public override void UnlockExclusive(ref RecordInfo recordInfo, ref SpanByte key, ref SpanByte value, long lockContext) => recordInfo.UnlockExclusive(); - - /// - public override bool TryLockExclusive(ref RecordInfo recordInfo, ref SpanByte key, ref SpanByte value, ref long lockContext, int spinCount = 1) => recordInfo.TryLockExclusive(spinCount); - - /// - public override void LockShared(ref RecordInfo recordInfo, ref SpanByte key, ref SpanByte value, ref long lockContext) => recordInfo.LockShared(); - - /// - public override bool UnlockShared(ref RecordInfo recordInfo, ref SpanByte key, ref SpanByte value, long lockContext) - { - recordInfo.UnlockShared(); - return true; - } - - /// - public override bool TryLockShared(ref RecordInfo recordInfo, ref SpanByte key, ref SpanByte value, ref long lockContext, int spinCount = 1) => recordInfo.TryLockShared(spinCount); -} + } /// /// Callback functions for SpanByte with byte[] output, for SpanByte key, value, input @@ -150,30 +125,5 @@ public override bool ConcurrentReader(ref SpanByte key, ref SpanByte input, ref dst = value.ToByteArray(); return true; } - - /// - public override bool SupportsLocking => locking; - - /// - public override void LockExclusive(ref RecordInfo recordInfo, ref SpanByte key, ref SpanByte value, ref long lockContext) => recordInfo.LockExclusive(); - - /// - public override void UnlockExclusive(ref RecordInfo recordInfo, ref SpanByte key, ref SpanByte value, long lockContext) => recordInfo.UnlockExclusive(); - - /// - public override bool TryLockExclusive(ref RecordInfo recordInfo, ref SpanByte key, ref SpanByte value, ref long lockContext, int spinCount = 1) => recordInfo.TryLockExclusive(spinCount); - - /// - public override void LockShared(ref RecordInfo recordInfo, ref SpanByte key, ref SpanByte value, ref long lockContext) => recordInfo.LockShared(); - - /// - public override bool UnlockShared(ref RecordInfo recordInfo, ref SpanByte key, ref SpanByte value, long lockContext) - { - recordInfo.UnlockShared(); - return true; - } - - /// - public override bool TryLockShared(ref RecordInfo recordInfo, ref SpanByte key, ref SpanByte value, ref long lockContext, int spinCount = 1) => recordInfo.TryLockShared(spinCount); } } diff --git a/cs/test/ManualOperationsTests.cs b/cs/test/ManualOperationsTests.cs index 6994b29d6..d504726c8 100644 --- a/cs/test/ManualOperationsTests.cs +++ b/cs/test/ManualOperationsTests.cs @@ -1,22 +1,53 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. +using System; +using System.Collections.Generic; using System.IO; +using System.Linq; using System.Threading; using FASTER.core; using NUnit.Framework; namespace FASTER.test { + // Functions for the "Simple lock transaction" case, e.g.: + // - Lock key1, key2, key3, keyResult + // - Do some operation on value1, value2, value3 and write the result to valueResult + class ManualFunctions : SimpleFunctions + { + internal long deletedRecordAddress; + + public override bool SupportsPostOperations => true; + + public override void PostSingleDeleter(ref int key, ref RecordInfo recordInfo, long address) + { + deletedRecordAddress = address; + } + + public override bool ConcurrentDeleter(ref int key, ref int value, ref RecordInfo recordInfo, long address) + { + deletedRecordAddress = address; + return true; + } + } + + public enum ResultLockTarget { MutableLock, Stub } + + public enum ReadCopyDestination { Tail, ReadCache } + + public enum FlushMode { NoFlush, ReadOnly, OnDisk } + + public enum UpdateOp { Upsert, RMW } + [TestFixture] class ManualOperationsTests { const int numRecords = 1000; const int valueMult = 1_000_000; - const int numThreads = 12; private FasterKV fkv; - private ClientSession> session; + private ClientSession session; private IDevice log; [SetUp] @@ -25,8 +56,21 @@ public void Setup() TestUtils.DeleteDirectory(TestUtils.MethodTestDir, wait: true); log = Devices.CreateLogDevice(Path.Combine(TestUtils.MethodTestDir, "test.log"), deleteOnClose: true); - fkv = new FasterKV(1L << 20, new LogSettings { LogDevice = log, ObjectLogDevice = null, PageSizeBits = 12, MemorySizeBits = 22 }); - session = fkv.For(new SimpleFunctions()).NewSession>(); + + ReadCacheSettings readCacheSettings = default; + foreach (var arg in TestContext.CurrentContext.Test.Arguments) + { + if (arg is ReadCopyDestination dest) + { + if (dest == ReadCopyDestination.ReadCache) + readCacheSettings = new() { PageSizeBits = 12, MemorySizeBits = 22 }; + break; + } + } + + fkv = new FasterKV(1L << 20, new LogSettings { LogDevice = log, ObjectLogDevice = null, PageSizeBits = 12, MemorySizeBits = 22, ReadCacheSettings = readCacheSettings }, + fasterSettings: new FasterSettings { SupportsLocking = true }); + session = fkv.For(new ManualFunctions()).NewSession(); } [TearDown] @@ -51,12 +95,386 @@ void Populate() } } + (bool xlock, bool slock) IsLocked(ManualFasterOperations manualOps, int key, long logicalAddress, bool stub, out RecordInfo recordInfo) + { + // We have the epoch protected so can access the address directly. For ReadCache, which does not expose addresses, we must look up the key + if (logicalAddress != Constants.kInvalidAddress) + { + var physicalAddress = fkv.hlog.GetPhysicalAddress(logicalAddress); + recordInfo = fkv.hlog.GetInfo(physicalAddress); + Assert.AreEqual(stub, recordInfo.Stub, "stub mismatch, valid Address"); + } + else + { + int inoutDummy = default; + RecordMetadata recordMetadata = default; + var status = manualOps.Read(ref key, ref inoutDummy, ref inoutDummy, ref recordMetadata); + Assert.AreNotEqual(Status.PENDING, status); + Assert.AreEqual(logicalAddress, recordMetadata.Address); // Either kInvalidAddress for readCache, or the expected address + + recordInfo = recordMetadata.RecordInfo; + Assert.AreEqual(stub, recordInfo.Stub, "stub mismatch"); + } + return (recordInfo.IsLockedExclusive, recordInfo.IsLockedShared); + } + + void AssertIsLocked(ManualFasterOperations manualOps, int key, long logicalAddress, bool xlock, bool slock, bool stub) + { + var (isX, isS) = IsLocked(manualOps, key, logicalAddress, stub, out var recordInfo); + Assert.AreEqual(xlock, isX, "xlock mismatch"); + Assert.AreEqual(slock, isS, "slock mismatch"); + } + + void PrepareRecordLocation(FlushMode recordLocation) + { + if (recordLocation == FlushMode.ReadOnly) + this.fkv.Log.ShiftReadOnlyAddress(this.fkv.Log.TailAddress, wait: true); + else if (recordLocation == FlushMode.OnDisk) + this.fkv.Log.FlushAndEvict(wait: true); + } + + static void ClearCountsOnError(ManualFasterOperations manualOps) + { + // If we already have an exception, clear these counts so "Run" will not report them spuriously. + manualOps.sharedLockCount = 0; + manualOps.exclusiveLockCount = 0; + } + + void EnsureNoLocks() + { + using var iter = this.fkv.Log.Scan(this.fkv.Log.BeginAddress, this.fkv.Log.TailAddress); + long count = 0; + while (iter.GetNext(out var recordInfo, out var key, out var value)) + { + ++count; + Assert.False(recordInfo.IsLocked, $"Unexpected Locked record: {(recordInfo.IsLockedShared ? "S" : "")} {(recordInfo.IsLockedExclusive ? "X" : "")}"); + } + + // We delete some records so just make sure the test worked. + Assert.Greater(count, numRecords - 10); + } + + [Test] + [Category(TestUtils.ManualOpsTestCategory)] + [Category(TestUtils.SmokeTestCategory)] + public void InMemorySimpleLockTxnTest([Values] ResultLockTarget resultLockTarget, [Values] ReadCopyDestination readCopyDestination, + [Values]FlushMode flushMode, [Values(Phase.REST, Phase.INTERMEDIATE)] Phase phase, [Values] UpdateOp updateOp) + { + Populate(); + PrepareRecordLocation(flushMode); + + Dictionary locks = new(); + LockInfo lockInfo = default; + + // SetUp also reads this to determine whether to supply ReadCacheSettings. If ReadCache is specified it wins over CopyToTail. + bool useReadCache = readCopyDestination == ReadCopyDestination.ReadCache && flushMode == FlushMode.OnDisk; + var useRMW = updateOp == UpdateOp.RMW; + bool initialDestWillBeStub = resultLockTarget == ResultLockTarget.Stub || flushMode == FlushMode.OnDisk; + int resultKey = resultLockTarget == ResultLockTarget.Stub ? numRecords + 1 : 75; + int resultValue = -1; + int expectedResult = (24 + 51) * valueMult; + Status status; + + using (var manualOps = session.GetManualOperations()) + { + manualOps.UnsafeResumeThread(out var epoch); + + try + { + { // key scope + // Get initial source values + int key = 24; + manualOps.Lock(key, LockType.Shared, retrieveData: true, ref lockInfo); + Assert.AreEqual(useReadCache, lockInfo.Address == Constants.kInvalidAddress); + locks[key] = lockInfo; + AssertIsLocked(manualOps, key, lockInfo.Address, xlock: false, slock: true, stub: false); + key = 51; + manualOps.Lock(key, LockType.Shared, retrieveData: true, ref lockInfo); + Assert.AreEqual(useReadCache, lockInfo.Address == Constants.kInvalidAddress); + locks[key] = lockInfo; + AssertIsLocked(manualOps, key, lockInfo.Address, xlock: false, slock: true, stub: false); + + // Lock destination value (which may entail dropping a stub). + manualOps.Lock(resultKey, LockType.Exclusive, retrieveData: false, ref lockInfo); + Assert.AreEqual(useReadCache && !initialDestWillBeStub, lockInfo.Address == Constants.kInvalidAddress); + locks[resultKey] = lockInfo; + AssertIsLocked(manualOps, resultKey, lockInfo.Address, xlock: true, slock: false, stub: initialDestWillBeStub); + + // Re-get source values, to verify (e.g. they may be in readcache now) + int value24 = -1, value51 = -1; + status = manualOps.Read(24, out value24); + Assert.AreNotEqual(Status.PENDING, status); + status = manualOps.Read(51, out value51); + Assert.AreNotEqual(Status.PENDING, status); + + // Set the phase to Phase.INTERMEDIATE to test the non-Phase.REST blocks + session.ctx.phase = phase; + int dummyInOut = 0; + RecordMetadata recordMetadata = default; + status = useRMW + ? manualOps.RMW(ref resultKey, ref expectedResult, ref dummyInOut, out recordMetadata) + : manualOps.Upsert(ref resultKey, ref dummyInOut, ref expectedResult, ref dummyInOut, out recordMetadata); + Assert.AreNotEqual(Status.PENDING, status); + if (initialDestWillBeStub || flushMode == FlushMode.ReadOnly) + { + // We initially created a stub for locking -or- we initially locked a RO record and then the update required RCU. + // Under these circumstances, we allocated a new record and transferred the lock to it. + Assert.AreNotEqual(locks[resultKey].Address, recordMetadata.Address); + AssertIsLocked(manualOps, resultKey, locks[resultKey].Address, xlock: false, slock: false, stub: initialDestWillBeStub); + AssertIsLocked(manualOps, resultKey, recordMetadata.Address, xlock: true, slock: false, stub: false); + lockInfo = locks[resultKey]; + lockInfo.Address = recordMetadata.Address; + locks[resultKey] = lockInfo; + } + else + Assert.AreEqual(locks[resultKey].Address, recordMetadata.Address); + + // Reread the destination to verify + status = manualOps.Read(resultKey, out resultValue); + Assert.AreNotEqual(Status.PENDING, status); + Assert.AreEqual(expectedResult, resultValue); + } + foreach (var key in locks.Keys.OrderBy(key => key)) + manualOps.Unlock(key, locks[key].LockType); + } + catch (Exception) + { + ClearCountsOnError(manualOps); + throw; + } + finally + { + manualOps.UnsafeSuspendThread(); + } + } + + // Verify reading the destination from the full session. + status = session.Read(resultKey, out resultValue); + Assert.AreNotEqual(Status.PENDING, status); + Assert.AreEqual(expectedResult, resultValue); + EnsureNoLocks(); + } + + [Test] + [Category(TestUtils.ManualOpsTestCategory)] + [Category(TestUtils.SmokeTestCategory)] + public void InMemoryLongLockTest([Values] ResultLockTarget resultLockTarget, [Values] FlushMode flushMode, [Values(Phase.REST, Phase.INTERMEDIATE)] Phase phase, [Values] UpdateOp updateOp) + { + Populate(); + PrepareRecordLocation(flushMode); + + LockInfo lockInfo = default; + bool initialDestWillBeStub = resultLockTarget == ResultLockTarget.Stub || flushMode == FlushMode.OnDisk; + int resultKey = initialDestWillBeStub ? numRecords + 1 : 75; + int resultValue = -1; + const int expectedResult = (24 + 51) * valueMult; + var useRMW = updateOp == UpdateOp.RMW; + Status status; + + using var manualOps = session.GetManualOperations(); + manualOps.UnsafeResumeThread(); + + try + { + manualOps.Lock(51, LockType.Exclusive, retrieveData: true, ref lockInfo); + + status = manualOps.Read(24, out var value24); + if (flushMode == FlushMode.OnDisk) + { + Assert.AreEqual(Status.PENDING, status); + manualOps.UnsafeCompletePendingWithOutputs(out var completedOutputs, wait: true); + (status, value24) = TestUtils.GetSinglePendingResult(completedOutputs); + Assert.AreEqual(Status.OK, status); + Assert.AreEqual(24 * valueMult, value24); + } + else + Assert.AreNotEqual(Status.PENDING, status); + + // We just locked this above, so it should not be PENDING + status = manualOps.Read(51, out var value51); + Assert.AreNotEqual(Status.PENDING, status); + Assert.AreEqual(51 * valueMult, value51); + + // Set the phase to Phase.INTERMEDIATE to test the non-Phase.REST blocks + session.ctx.phase = phase; + status = useRMW + ? manualOps.RMW(resultKey, value24 + value51) + : manualOps.Upsert(resultKey, value24 + value51); + Assert.AreNotEqual(Status.PENDING, status); + + status = manualOps.Read(resultKey, out resultValue); + Assert.AreNotEqual(Status.PENDING, status); + Assert.AreEqual(expectedResult, resultValue); + + manualOps.Unlock(51, ref lockInfo); + } + catch (Exception) + { + ClearCountsOnError(manualOps); + throw; + } + finally + { + manualOps.UnsafeSuspendThread(); + } + + // Verify from the full session. + status = session.Read(resultKey, out resultValue); + Assert.AreNotEqual(Status.PENDING, status); + Assert.AreEqual(expectedResult, resultValue); + EnsureNoLocks(); + } + + [Test] + [Category(TestUtils.ManualOpsTestCategory)] + [Category(TestUtils.SmokeTestCategory)] + public void InMemoryDeleteTest([Values] ResultLockTarget resultLockTarget, [Values] ReadCopyDestination readCopyDestination, + [Values(FlushMode.NoFlush, FlushMode.ReadOnly)] FlushMode flushMode, [Values(Phase.REST, Phase.INTERMEDIATE)] Phase phase) + { + // Phase.INTERMEDIATE is to test the non-Phase.REST blocks + Populate(); + PrepareRecordLocation(flushMode); + + Dictionary locks = new(); + LockInfo lockInfo = default; + + // SetUp also reads this to determine whether to supply ReadCacheSettings. If ReadCache is specified it wins over CopyToTail. + bool useReadCache = readCopyDestination == ReadCopyDestination.ReadCache && flushMode == FlushMode.OnDisk; + bool initialDestWillBeStub = resultLockTarget == ResultLockTarget.Stub || flushMode == FlushMode.OnDisk; + int resultKey = resultLockTarget == ResultLockTarget.Stub ? numRecords + 1 : 75; + Status status; + + using (var manualOps = session.GetManualOperations()) + { + manualOps.UnsafeResumeThread(out var epoch); + + try + { + // Lock destination value (which may entail dropping a stub). + manualOps.Lock(resultKey, LockType.Exclusive, retrieveData: false, ref lockInfo); + Assert.AreEqual(useReadCache && !initialDestWillBeStub, lockInfo.Address == Constants.kInvalidAddress); + locks[resultKey] = lockInfo; + AssertIsLocked(manualOps, resultKey, lockInfo.Address, xlock: true, slock: false, stub: initialDestWillBeStub); + + // Set the phase to Phase.INTERMEDIATE to test the non-Phase.REST blocks + session.ctx.phase = phase; + status = manualOps.Delete(ref resultKey); + Assert.AreNotEqual(Status.PENDING, status); + + // If we initially created a stub for locking then we've updated it in place, unlike Upsert or RMW. + if (!initialDestWillBeStub && flushMode == FlushMode.ReadOnly) + { + // We initially locked a RO record and then the delete required inserting a new record. + // Under these circumstances, we allocated a new record and transferred the lock to it. + Assert.AreNotEqual(locks[resultKey].Address, session.functions.deletedRecordAddress); + AssertIsLocked(manualOps, resultKey, locks[resultKey].Address, xlock: false, slock: false, stub: initialDestWillBeStub); + AssertIsLocked(manualOps, resultKey, session.functions.deletedRecordAddress, xlock: true, slock: false, stub: false); + lockInfo = locks[resultKey]; + lockInfo.Address = session.functions.deletedRecordAddress; + locks[resultKey] = lockInfo; + } + else + Assert.AreEqual(locks[resultKey].Address, session.functions.deletedRecordAddress); + + // Reread the destination to verify + status = manualOps.Read(resultKey, out var _); + Assert.AreEqual(Status.NOTFOUND, status); + + foreach (var key in locks.Keys.OrderBy(key => key)) + manualOps.Unlock(key, locks[key].LockType); + } + catch (Exception) + { + ClearCountsOnError(manualOps); + throw; + } + finally + { + manualOps.UnsafeSuspendThread(); + } + } + + // Verify reading the destination from the full session. + status = session.Read(resultKey, out var _); + Assert.AreEqual(Status.NOTFOUND, status); + EnsureNoLocks(); + } + [Test] [Category(TestUtils.ManualOpsTestCategory)] [Category(TestUtils.SmokeTestCategory)] - public void InMemoryLockTest() + public void StressLocks([Values(1, 8)] int numLockThreads, [Values(1, 8)] int numOpThreads) { Populate(); + + // Lock in ordered sequence (avoiding deadlocks) + const int baseKey = 42; + const int numKeys = 20; + const int numIncrement = 5; + const int numIterations = 1000; + + void runLockThread(int tid) + { + Dictionary locks = new(); + Random rng = new(tid + 101); + + using var localSession = fkv.For(new ManualFunctions()).NewSession(); + using var manualOps = localSession.GetManualOperations(); + manualOps.UnsafeResumeThread(); + + for (var iteration = 0; iteration < numIterations; ++iteration) + { + for (var key = baseKey + rng.Next(numIncrement); key < baseKey + numKeys; key += rng.Next(1, numIncrement)) + { + var lockType = rng.Next(100) < 60 ? LockType.Shared : LockType.Exclusive; + LockInfo lockInfo = default; + manualOps.Lock(key, lockType, retrieveData: true, ref lockInfo); + locks[key] = lockInfo; + } + + foreach (var key in locks.Keys.OrderBy(key => key)) + manualOps.Unlock(key, locks[key].LockType); + locks.Clear(); + } + + manualOps.UnsafeSuspendThread(); + } + + void runOpThread(int tid) + { + Random rng = new(tid + 101); + + using var localSession = fkv.For(new ManualFunctions()).NewSession(); + + for (var iteration = 0; iteration < numIterations; ++iteration) + { + for (var key = baseKey + rng.Next(numIncrement); key < baseKey + numKeys; key += rng.Next(1, numIncrement)) + { + var rand = rng.Next(100); + if (rand < 33) + localSession.Read(key); + else if (rand < 66) + localSession.Upsert(key, key * valueMult); + else + localSession.RMW(key, key * valueMult); + } + } + } + + // Run a mix of ManualOps and normal ClientSession operations + int numThreads = numLockThreads + numOpThreads; + Thread[] threads = new Thread[numThreads]; + for (int t = 0; t < numThreads; t++) + { + var tid = t; + threads[t] = new Thread(() => { if (tid < numLockThreads) runLockThread(tid); else runOpThread(tid); }); + } + for (int t = 0; t < numThreads; t++) + threads[t].Start(); + for (int t = 0; t < numThreads; t++) + threads[t].Join(); + + EnsureNoLocks(); } } } diff --git a/cs/test/ReadAddressTests.cs b/cs/test/ReadAddressTests.cs index f630a0b0b..1a4cfa8b3 100644 --- a/cs/test/ReadAddressTests.cs +++ b/cs/test/ReadAddressTests.cs @@ -5,6 +5,7 @@ using FASTER.core; using NUnit.Framework; using System.Threading.Tasks; +using System.Collections.Generic; namespace FASTER.test.readaddress { @@ -54,9 +55,31 @@ public struct Output private static long SetReadOutput(long key, long value) => (key << 32) | value; + public enum UseReadCache { NoReadCache, ReadCache } + internal class Functions : FunctionsBase { internal long lastWriteAddress = Constants.kInvalidAddress; + bool useReadCache; + bool copyReadsToTail; // Note: not currently used; not necessary due to setting SkipCopyToTail, and we get the copied-to address for CopyToTail (unlike ReadCache). + internal ReadFlags readFlags = ReadFlags.None; + + internal Functions() + { + foreach (var arg in TestContext.CurrentContext.Test.Arguments) + { + if (arg is UseReadCache urc) + { + this.useReadCache = urc == UseReadCache.ReadCache; + continue; + } + if (arg is CopyReadsToTail crtt) + { + this.copyReadsToTail = crtt != CopyReadsToTail.None; + continue; + } + } + } public override bool ConcurrentReader(ref Key key, ref Value input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) { @@ -101,7 +124,12 @@ public override void CopyUpdater(ref Key key, ref Value input, ref Value oldValu public override void ReadCompletionCallback(ref Key key, ref Value input, ref Output output, Empty ctx, Status status, RecordMetadata recordMetadata) { if (status == Status.OK) - Assert.AreEqual(output.address, recordMetadata.Address); + { + if (this.useReadCache && !this.readFlags.HasFlag(ReadFlags.SkipReadCache)) + Assert.AreEqual(Constants.kInvalidAddress, recordMetadata.Address, $"key {key}"); + else + Assert.AreEqual(output.address, recordMetadata.Address, $"key {key}"); + } } public override void RMWCompletionCallback(ref Key key, ref Value input, ref Output output, Empty ctx, Status status, RecordMetadata recordMetadata) @@ -234,12 +262,13 @@ public void Dispose() } // readCache and copyReadsToTail are mutually exclusive and orthogonal to populating by RMW vs. Upsert. - [TestCase(false, CopyReadsToTail.None, false, false)] - [TestCase(false, CopyReadsToTail.FromStorage, true, true)] - [TestCase(true, CopyReadsToTail.None, false, true)] + [TestCase(UseReadCache.NoReadCache, CopyReadsToTail.None, false, false)] + [TestCase(UseReadCache.NoReadCache, CopyReadsToTail.FromStorage, true, true)] + [TestCase(UseReadCache.ReadCache, CopyReadsToTail.None, false, true)] [Category("FasterKV")] - public void VersionedReadSyncTests(bool useReadCache, CopyReadsToTail copyReadsToTail, bool useRMW, bool flush) + public void VersionedReadSyncTests(UseReadCache urc, CopyReadsToTail copyReadsToTail, bool useRMW, bool flush) { + var useReadCache = urc == UseReadCache.ReadCache; using var testStore = new TestStore(useReadCache, copyReadsToTail, flush); testStore.Populate(useRMW, useAsync:false).GetAwaiter().GetResult(); using var session = testStore.fkv.For(new Functions()).NewSession(); @@ -254,7 +283,22 @@ public void VersionedReadSyncTests(bool useReadCache, CopyReadsToTail copyReadsT for (int lap = maxLap - 1; /* tested in loop */; --lap) { - var status = session.Read(ref key, ref input, ref output, ref recordMetadata, serialNo: maxLap + 1); + // If the ordinal is not in the range of the most recent record versions, do not copy to readcache or tail. + session.functions.readFlags = (lap < maxLap - 1) ? ReadFlags.SkipCopyReads : ReadFlags.None; + + var status = session.Read(ref key, ref input, ref output, ref recordMetadata, session.functions.readFlags, serialNo: maxLap + 1); + + if (iteration == 1 && lap == maxLap - 1 && useReadCache) + { + // This should have been served from the readcache. Verify that, then reissue the query without readcache, so we can + // get the prev address for the chain. + Assert.AreNotEqual(Status.PENDING, status); + Assert.AreEqual(Constants.kInvalidAddress, recordMetadata.Address); + Assert.IsTrue(testStore.ProcessChainRecord(status, recordMetadata, lap, ref output)); + session.functions.readFlags = ReadFlags.SkipReadCache; + status = session.Read(ref key, ref input, ref output, ref recordMetadata, session.functions.readFlags, serialNo: maxLap + 1); + } + if (status == Status.PENDING) { // This will wait for each retrieved record; not recommended for performance-critical code or when retrieving multiple records unless necessary. @@ -268,12 +312,13 @@ public void VersionedReadSyncTests(bool useReadCache, CopyReadsToTail copyReadsT } // readCache and copyReadsToTail are mutually exclusive and orthogonal to populating by RMW vs. Upsert. - [TestCase(false, CopyReadsToTail.None, false, false)] - [TestCase(false, CopyReadsToTail.FromStorage, true, true)] - [TestCase(true, CopyReadsToTail.None, false, true)] + [TestCase(UseReadCache.NoReadCache, CopyReadsToTail.None, false, false)] + [TestCase(UseReadCache.NoReadCache, CopyReadsToTail.FromStorage, true, true)] + [TestCase(UseReadCache.ReadCache, CopyReadsToTail.None, false, true)] [Category("FasterKV")] - public async Task VersionedReadAsyncTests(bool useReadCache, CopyReadsToTail copyReadsToTail, bool useRMW, bool flush) + public async Task VersionedReadAsyncTests(UseReadCache urc, CopyReadsToTail copyReadsToTail, bool useRMW, bool flush) { + var useReadCache = urc == UseReadCache.ReadCache; using var testStore = new TestStore(useReadCache, copyReadsToTail, flush); await testStore.Populate(useRMW, useAsync: true); using var session = testStore.fkv.For(new Functions()).NewSession(); @@ -287,8 +332,23 @@ public async Task VersionedReadAsyncTests(bool useReadCache, CopyReadsToTail cop for (int lap = maxLap - 1; /* tested in loop */; --lap) { - var readAsyncResult = await session.ReadAsync(ref key, ref input, recordMetadata.RecordInfo.PreviousAddress, default, serialNo: maxLap + 1); + // If the ordinal is not in the range of the most recent record versions, do not copy to readcache or tail. + session.functions.readFlags = (lap < maxLap - 1) ? ReadFlags.SkipCopyReads : ReadFlags.None; + + var readAsyncResult = await session.ReadAsync(ref key, ref input, recordMetadata.RecordInfo.PreviousAddress, session.functions.readFlags, default, serialNo: maxLap + 1); var (status, output) = readAsyncResult.Complete(out recordMetadata); + + if (iteration == 1 && lap == maxLap - 1 && useReadCache) + { + // This should have been served from the readcache. Verify that, then reissue the query without readcache, so we can + // get the prev address for the chain. + Assert.AreEqual(Constants.kInvalidAddress, recordMetadata.Address); + Assert.IsTrue(testStore.ProcessChainRecord(status, recordMetadata, lap, ref output)); + session.functions.readFlags = ReadFlags.SkipReadCache; + readAsyncResult = await session.ReadAsync(ref key, ref input, recordMetadata.RecordInfo.PreviousAddress, session.functions.readFlags, default, serialNo: maxLap + 1); + (status, output) = readAsyncResult.Complete(out recordMetadata); + } + if (!testStore.ProcessChainRecord(status, recordMetadata, lap, ref output)) break; } @@ -296,12 +356,13 @@ public async Task VersionedReadAsyncTests(bool useReadCache, CopyReadsToTail cop } // readCache and copyReadsToTail are mutually exclusive and orthogonal to populating by RMW vs. Upsert. - [TestCase(false, CopyReadsToTail.None, false, false)] - [TestCase(false, CopyReadsToTail.FromStorage, true, true)] - [TestCase(true, CopyReadsToTail.None, false, true)] + [TestCase(UseReadCache.NoReadCache, CopyReadsToTail.None, false, false)] + [TestCase(UseReadCache.NoReadCache, CopyReadsToTail.FromStorage, true, true)] + [TestCase(UseReadCache.ReadCache, CopyReadsToTail.None, false, true)] [Category("FasterKV")] - public void ReadAtAddressSyncTests(bool useReadCache, CopyReadsToTail copyReadsToTail, bool useRMW, bool flush) + public void ReadAtAddressSyncTests(UseReadCache urc, CopyReadsToTail copyReadsToTail, bool useRMW, bool flush) { + var useReadCache = urc == UseReadCache.ReadCache; using var testStore = new TestStore(useReadCache, copyReadsToTail, flush); testStore.Populate(useRMW, useAsync: false).GetAwaiter().GetResult(); using var session = testStore.fkv.For(new Functions()).NewSession(); @@ -318,13 +379,17 @@ public void ReadAtAddressSyncTests(bool useReadCache, CopyReadsToTail copyReadsT { var readAtAddress = recordMetadata.RecordInfo.PreviousAddress; - var status = session.Read(ref key, ref input, ref output, ref recordMetadata, serialNo: maxLap + 1); + var status = session.Read(ref key, ref input, ref output, ref recordMetadata, session.functions.readFlags, serialNo: maxLap + 1); if (status == Status.PENDING) { // This will wait for each retrieved record; not recommended for performance-critical code or when retrieving multiple records unless necessary. session.CompletePendingWithOutputs(out var completedOutputs, wait: true); (status, output) = TestUtils.GetSinglePendingResult(completedOutputs, out recordMetadata); } + + // After the first Read, do not allow copies to or lookups in ReadCache. + session.functions.readFlags = ReadFlags.SkipCopyReads; + if (!testStore.ProcessChainRecord(status, recordMetadata, lap, ref output)) break; @@ -333,7 +398,7 @@ public void ReadAtAddressSyncTests(bool useReadCache, CopyReadsToTail copyReadsT var saveOutput = output; var saveRecordMetadata = recordMetadata; - status = session.ReadAtAddress(readAtAddress, ref input, ref output, serialNo: maxLap + 1); + status = session.ReadAtAddress(readAtAddress, ref input, ref output, session.functions.readFlags, serialNo: maxLap + 1); if (status == Status.PENDING) { // This will wait for each retrieved record; not recommended for performance-critical code or when retrieving multiple records unless necessary. @@ -349,12 +414,13 @@ public void ReadAtAddressSyncTests(bool useReadCache, CopyReadsToTail copyReadsT } // readCache and copyReadsToTail are mutually exclusive and orthogonal to populating by RMW vs. Upsert. - [TestCase(false, CopyReadsToTail.None, false, false)] - [TestCase(false, CopyReadsToTail.FromStorage, true, true)] - [TestCase(true, CopyReadsToTail.None, false, true)] + [TestCase(UseReadCache.NoReadCache, CopyReadsToTail.None, false, false)] + [TestCase(UseReadCache.NoReadCache, CopyReadsToTail.FromStorage, true, true)] + [TestCase(UseReadCache.ReadCache, CopyReadsToTail.None, false, true)] [Category("FasterKV")] - public async Task ReadAtAddressAsyncTests(bool useReadCache, CopyReadsToTail copyReadsToTail, bool useRMW, bool flush) + public async Task ReadAtAddressAsyncTests(UseReadCache urc, CopyReadsToTail copyReadsToTail, bool useRMW, bool flush) { + var useReadCache = urc == UseReadCache.ReadCache; using var testStore = new TestStore(useReadCache, copyReadsToTail, flush); await testStore.Populate(useRMW, useAsync: true); using var session = testStore.fkv.For(new Functions()).NewSession(); @@ -370,8 +436,12 @@ public async Task ReadAtAddressAsyncTests(bool useReadCache, CopyReadsToTail cop { var readAtAddress = recordMetadata.RecordInfo.PreviousAddress; - var readAsyncResult = await session.ReadAsync(ref key, ref input, recordMetadata.RecordInfo.PreviousAddress, default, serialNo: maxLap + 1); + var readAsyncResult = await session.ReadAsync(ref key, ref input, recordMetadata.RecordInfo.PreviousAddress, session.functions.readFlags, default, serialNo: maxLap + 1); var (status, output) = readAsyncResult.Complete(out recordMetadata); + + // After the first Read, do not allow copies to or lookups in ReadCache. + session.functions.readFlags = ReadFlags.SkipCopyReads; + if (!testStore.ProcessChainRecord(status, recordMetadata, lap, ref output)) break; @@ -380,7 +450,7 @@ public async Task ReadAtAddressAsyncTests(bool useReadCache, CopyReadsToTail cop var saveOutput = output; var saveRecordMetadata = recordMetadata; - readAsyncResult = await session.ReadAtAddressAsync(readAtAddress, ref input, default, serialNo: maxLap + 1); + readAsyncResult = await session.ReadAtAddressAsync(readAtAddress, ref input, session.functions.readFlags, default, serialNo: maxLap + 1); (status, output) = readAsyncResult.Complete(out recordMetadata); Assert.AreEqual(saveOutput, output); @@ -391,12 +461,13 @@ public async Task ReadAtAddressAsyncTests(bool useReadCache, CopyReadsToTail cop } // Test is similar to others but tests the Overload where RadFlag.none is set -- probably don't need all combinations of test but doesn't hurt - [TestCase(false, CopyReadsToTail.None, false, false)] - [TestCase(false, CopyReadsToTail.FromStorage, true, true)] - [TestCase(true, CopyReadsToTail.None, false, true)] + [TestCase(UseReadCache.NoReadCache, CopyReadsToTail.None, false, false)] + [TestCase(UseReadCache.NoReadCache, CopyReadsToTail.FromStorage, true, true)] + [TestCase(UseReadCache.ReadCache, CopyReadsToTail.None, false, true)] [Category("FasterKV")] - public async Task ReadAtAddressAsyncReadFlagsNoneTests(bool useReadCache, CopyReadsToTail copyReadsToTail, bool useRMW, bool flush) + public async Task ReadAtAddressAsyncReadFlagsNoneTests(UseReadCache urc, CopyReadsToTail copyReadsToTail, bool useRMW, bool flush) { + var useReadCache = urc == UseReadCache.ReadCache; using var testStore = new TestStore(useReadCache, copyReadsToTail, flush); await testStore.Populate(useRMW, useAsync: true); using var session = testStore.fkv.For(new Functions()).NewSession(); @@ -412,8 +483,12 @@ public async Task ReadAtAddressAsyncReadFlagsNoneTests(bool useReadCache, CopyRe { var readAtAddress = recordMetadata.RecordInfo.PreviousAddress; - var readAsyncResult = await session.ReadAsync(ref key, ref input, recordMetadata.RecordInfo.PreviousAddress, default, serialNo: maxLap + 1); + var readAsyncResult = await session.ReadAsync(ref key, ref input, recordMetadata.RecordInfo.PreviousAddress, session.functions.readFlags, default, serialNo: maxLap + 1); var (status, output) = readAsyncResult.Complete(out recordMetadata); + + // After the first Read, do not allow copies to or lookups in ReadCache. + session.functions.readFlags = ReadFlags.SkipCopyReads; + if (!testStore.ProcessChainRecord(status, recordMetadata, lap, ref output)) break; @@ -422,7 +497,7 @@ public async Task ReadAtAddressAsyncReadFlagsNoneTests(bool useReadCache, CopyRe var saveOutput = output; var saveRecordMetadata = recordMetadata; - readAsyncResult = await session.ReadAtAddressAsync(readAtAddress, ref input, ReadFlags.None, default, serialNo: maxLap + 1); + readAsyncResult = await session.ReadAtAddressAsync(readAtAddress, ref input, session.functions.readFlags, default, serialNo: maxLap + 1); (status, output) = readAsyncResult.Complete(out recordMetadata); Assert.AreEqual(saveOutput, output); @@ -433,12 +508,13 @@ public async Task ReadAtAddressAsyncReadFlagsNoneTests(bool useReadCache, CopyRe } // Test is similar to others but tests the Overload where RadFlag.SkipReadCache is set - [TestCase(false, CopyReadsToTail.None, false, false)] - [TestCase(false, CopyReadsToTail.FromStorage, true, true)] - [TestCase(true, CopyReadsToTail.None, false, true)] + [TestCase(UseReadCache.NoReadCache, CopyReadsToTail.None, false, false)] + [TestCase(UseReadCache.NoReadCache, CopyReadsToTail.FromStorage, true, true)] + [TestCase(UseReadCache.ReadCache, CopyReadsToTail.None, false, true)] [Category("FasterKV")] - public async Task ReadAtAddressAsyncReadFlagsSkipCacheTests(bool useReadCache, CopyReadsToTail copyReadsToTail, bool useRMW, bool flush) + public async Task ReadAtAddressAsyncReadFlagsSkipCacheTests(UseReadCache urc, CopyReadsToTail copyReadsToTail, bool useRMW, bool flush) { + var useReadCache = urc == UseReadCache.ReadCache; using var testStore = new TestStore(useReadCache, copyReadsToTail, flush); await testStore.Populate(useRMW, useAsync: true); using var session = testStore.fkv.For(new Functions()).NewSession(); @@ -454,8 +530,12 @@ public async Task ReadAtAddressAsyncReadFlagsSkipCacheTests(bool useReadCache, C { var readAtAddress = recordMetadata.RecordInfo.PreviousAddress; - var readAsyncResult = await session.ReadAsync(ref key, ref input, recordMetadata.RecordInfo.PreviousAddress, default, serialNo: maxLap + 1); + var readAsyncResult = await session.ReadAsync(ref key, ref input, recordMetadata.RecordInfo.PreviousAddress, session.functions.readFlags, default, serialNo: maxLap + 1); var (status, output) = readAsyncResult.Complete(out recordMetadata); + + // After the first Read, do not allow copies to or lookups in ReadCache. + session.functions.readFlags = ReadFlags.SkipCopyReads; + if (!testStore.ProcessChainRecord(status, recordMetadata, lap, ref output)) break; @@ -475,12 +555,13 @@ public async Task ReadAtAddressAsyncReadFlagsSkipCacheTests(bool useReadCache, C } // readCache and copyReadsToTail are mutually exclusive and orthogonal to populating by RMW vs. Upsert. - [TestCase(false, CopyReadsToTail.None, false, false)] - [TestCase(false, CopyReadsToTail.FromStorage, true, true)] - [TestCase(true, CopyReadsToTail.None, false, true)] + [TestCase(UseReadCache.NoReadCache, CopyReadsToTail.None, false, false)] + [TestCase(UseReadCache.NoReadCache, CopyReadsToTail.FromStorage, true, true)] + [TestCase(UseReadCache.ReadCache, CopyReadsToTail.None, false, true)] [Category("FasterKV")] - public void ReadNoKeySyncTests(bool useReadCache, CopyReadsToTail copyReadsToTail, bool useRMW, bool flush) // readCache and copyReadsToTail are mutually exclusive and orthogonal to populating by RMW vs. Upsert. + public void ReadNoKeySyncTests(UseReadCache urc, CopyReadsToTail copyReadsToTail, bool useRMW, bool flush) // readCache and copyReadsToTail are mutually exclusive and orthogonal to populating by RMW vs. Upsert. { + var useReadCache = urc == UseReadCache.ReadCache; using var testStore = new TestStore(useReadCache, copyReadsToTail, flush); testStore.Populate(useRMW, useAsync: false).GetAwaiter().GetResult(); using var session = testStore.fkv.For(new Functions()).NewSession(); @@ -495,7 +576,11 @@ public void ReadNoKeySyncTests(bool useReadCache, CopyReadsToTail copyReadsToTai for (int ii = 0; ii < numKeys; ++ii) { var keyOrdinal = rng.Next(numKeys); - var status = session.ReadAtAddress(testStore.InsertAddresses[keyOrdinal], ref input, ref output, serialNo: maxLap + 1); + + // If the ordinal is not in the range of the most recent record versions, do not copy to readcache or tail. + session.functions.readFlags = (keyOrdinal <= (numKeys - keyMod)) ? ReadFlags.SkipCopyReads : ReadFlags.None; + + var status = session.ReadAtAddress(testStore.InsertAddresses[keyOrdinal], ref input, ref output, session.functions.readFlags, serialNo: maxLap + 1); if (status == Status.PENDING) { // This will wait for each retrieved record; not recommended for performance-critical code or when retrieving multiple records unless necessary. @@ -511,12 +596,13 @@ public void ReadNoKeySyncTests(bool useReadCache, CopyReadsToTail copyReadsToTai } // readCache and copyReadsToTail are mutually exclusive and orthogonal to populating by RMW vs. Upsert. - [TestCase(false, CopyReadsToTail.None, false, false)] - [TestCase(false, CopyReadsToTail.FromStorage, true, true)] - [TestCase(true, CopyReadsToTail.None, false, true)] + [TestCase(UseReadCache.NoReadCache, CopyReadsToTail.None, false, false)] + [TestCase(UseReadCache.NoReadCache, CopyReadsToTail.FromStorage, true, true)] + [TestCase(UseReadCache.ReadCache, CopyReadsToTail.None, false, true)] [Category("FasterKV")] - public async Task ReadNoKeyAsyncTests(bool useReadCache, CopyReadsToTail copyReadsToTail, bool useRMW, bool flush) + public async Task ReadNoKeyAsyncTests(UseReadCache urc, CopyReadsToTail copyReadsToTail, bool useRMW, bool flush) { + var useReadCache = urc == UseReadCache.ReadCache; using var testStore = new TestStore(useReadCache, copyReadsToTail, flush); await testStore.Populate(useRMW, useAsync: true); using var session = testStore.fkv.For(new Functions()).NewSession(); @@ -531,10 +617,18 @@ public async Task ReadNoKeyAsyncTests(bool useReadCache, CopyReadsToTail copyRea for (int ii = 0; ii < numKeys; ++ii) { var keyOrdinal = rng.Next(numKeys); - var readAsyncResult = await session.ReadAtAddressAsync(testStore.InsertAddresses[keyOrdinal], ref input, default, serialNo: maxLap + 1); + + // If the ordinal is not in the range of the most recent record versions, do not copy to readcache or tail. + session.functions.readFlags = (keyOrdinal <= (numKeys - keyMod)) ? ReadFlags.SkipCopyReads : ReadFlags.None; + + var readAsyncResult = await session.ReadAtAddressAsync(testStore.InsertAddresses[keyOrdinal], ref input, session.functions.readFlags, default, serialNo: maxLap + 1); var (status, output) = readAsyncResult.Complete(out recordMetadata); + TestStore.ProcessNoKeyRecord(status, ref output, keyOrdinal); } + + // After the first Read, do not allow copies to or lookups in ReadCache. + session.functions.readFlags = ReadFlags.SkipReadCache; } await testStore.Flush(); From bc91d2b8673a97a9914a355076e6d2f061404c6e Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Tue, 30 Nov 2021 16:47:59 -0800 Subject: [PATCH 04/25] Locking docs --- cs/FASTER.sln | 1 + cs/src/core/Index/FASTER/FASTERImpl.cs | 49 +++++-- cs/test/ManualOperationsTests.cs | 5 +- docs/_docs/30-fasterkv-manual-locking.md | 179 +++++++++++++++++++++++ 4 files changed, 220 insertions(+), 14 deletions(-) create mode 100644 docs/_docs/30-fasterkv-manual-locking.md diff --git a/cs/FASTER.sln b/cs/FASTER.sln index 417e3329e..299bd0af7 100644 --- a/cs/FASTER.sln +++ b/cs/FASTER.sln @@ -72,6 +72,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "docs", "docs", "{C60F148B-2 ..\docs\_docs\25-fasterkv-recovery.md = ..\docs\_docs\25-fasterkv-recovery.md ..\docs\_docs\26-fasterkv-samples.md = ..\docs\_docs\26-fasterkv-samples.md ..\docs\_docs\29-fasterkv-cpp.md = ..\docs\_docs\29-fasterkv-cpp.md + ..\docs\_docs\30-fasterkv-manual-locking.md = ..\docs\_docs\30-fasterkv-manual-locking.md ..\docs\_docs\40-fasterlog-basics.md = ..\docs\_docs\40-fasterlog-basics.md ..\docs\_docs\43-fasterlog-tuning.md = ..\docs\_docs\43-fasterlog-tuning.md ..\docs\_docs\46-fasterlog-samples.md = ..\docs\_docs\46-fasterlog-samples.md diff --git a/cs/src/core/Index/FASTER/FASTERImpl.cs b/cs/src/core/Index/FASTER/FASTERImpl.cs index 9cc9fe6cb..bd89b584a 100644 --- a/cs/src/core/Index/FASTER/FASTERImpl.cs +++ b/cs/src/core/Index/FASTER/FASTERImpl.cs @@ -251,7 +251,16 @@ internal OperationStatus InternalRead( pendingContext.logicalAddress = logicalAddress; ref Value recordValue = ref hlog.GetValue(physicalAddress); - if (recordInfo.Tombstone) + if (fasterSession.IsManualOperations && recordInfo.Stub) + { + // This is only for ManualFasterOperations, and we assume we hold this lock, but we should not be reading or (un)locking a Stub (we should promote or Delete). + return OperationStatus.NOTFOUND; + } + else if (IsIntermediate(ref pendingContext.recordInfo, useStartAddress)) + { + return RetryOnIntermediateRecord(ref recordInfo, ref key, ref hlog.GetValue(physicalAddress), fasterSession); + } + else if (recordInfo.Tombstone) { if (fasterSession.IsManualOperations && pendingContext.lockOperation.IsSet) { @@ -260,7 +269,9 @@ internal OperationStatus InternalRead( } } else if (fasterSession.ConcurrentReader(ref key, ref input, ref recordValue, ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress)) + { return OperationStatus.SUCCESS; + } return OperationStatus.NOTFOUND; } @@ -270,10 +281,17 @@ internal OperationStatus InternalRead( ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); pendingContext.recordInfo = recordInfo; pendingContext.logicalAddress = logicalAddress; - if (IsIntermediate(ref pendingContext.recordInfo, useStartAddress)) - return RetryOnIntermediateRecord(ref recordInfo, ref key, ref hlog.GetValue(physicalAddress), fasterSession); - if (recordInfo.Tombstone) + if (fasterSession.IsManualOperations && recordInfo.Stub) + { + // This is only for ManualFasterOperations, and we assume we hold this lock, but we should not be reading or (un)locking a Stub (we should promote or Delete). + return OperationStatus.NOTFOUND; + } + else if (IsIntermediate(ref pendingContext.recordInfo, useStartAddress)) + { + return RetryOnIntermediateRecord(ref recordInfo, ref key, ref hlog.GetValue(physicalAddress), fasterSession); + } + else if (recordInfo.Tombstone) { if (fasterSession.IsManualOperations && pendingContext.lockOperation.IsSet) { @@ -463,6 +481,7 @@ internal OperationStatus InternalUpsert( { // This is only for ManualFasterOperations, and we assume we hold this lock. We will need to transfer the lock to the updated record. stubPhysicalAddress = physicalAddress; + status = OperationStatus.SUCCESS; goto CreateNewRecord; } ref Value recordValue = ref hlog.GetValue(physicalAddress); @@ -532,6 +551,7 @@ internal OperationStatus InternalUpsert( status = OperationStatus.SUCCESS; goto LatchRelease; // Release shared latch (if acquired) } + goto CreateNewRecord; } else { @@ -550,6 +570,7 @@ internal OperationStatus InternalUpsert( // ConcurrentWriter failed (e.g. insufficient space). Another thread may come along to do this update in-place; Seal it to prevent that. if (!SealRecord(ref recordInfo, ref key, ref recordValue, fasterSession)) return OperationStatus.RETRY_NOW; + goto CreateNewRecord; } } else if (fasterSession.IsManualOperations) @@ -563,10 +584,18 @@ internal OperationStatus InternalUpsert( pendingContext.logicalAddress = logicalAddress; status = OperationStatus.SUCCESS; - if (pendingContext.lockOperation.IsSet) + if (!recordInfo.Stub) { - fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref recordValue, ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress); - goto LatchRelease; // Release shared latch (if acquired) + if (pendingContext.lockOperation.IsSet) + { + fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref recordValue, ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress); + goto LatchRelease; // Release shared latch (if acquired) + } + else if (IsIntermediate(ref recordInfo)) + { + status = RetryOnIntermediateRecord(ref recordInfo, ref key, ref recordValue, fasterSession); + goto LatchRelease; // Release shared latch (if acquired) + } } // This ManualOps instance already owns this lock, or we wouldn't be here. Create a new record and transfer the lock @@ -930,7 +959,7 @@ internal OperationStatus InternalRMW( } // Fuzzy Region: Must go pending due to lost-update anomaly - else if (!fasterSession.IsManualOperations && (logicalAddress >= hlog.SafeReadOnlyAddress && !hlog.GetInfo(physicalAddress).Tombstone)) // TODO replace with Sealed + else if (logicalAddress >= hlog.SafeReadOnlyAddress && !hlog.GetInfo(physicalAddress).Tombstone) // TODO replace with Sealed { status = OperationStatus.RETRY_LATER; // Do not retain latch for pendings ops in relaxed CPR @@ -1376,7 +1405,7 @@ internal OperationStatus InternalDelete( if (fasterSession.IsManualOperations && recordInfo.Stub) { // This is only for ManualFasterOperations, and we assume we hold this lock. We can Tombstone the record directly and ManualsOps - // will also clear Stub. Caller must still unlock. + // will also clear Stub. Caller (ManualOps) must still unlock. fasterSession.ConcurrentDeleter(ref hlog.GetKey(physicalAddress), ref recordValue, ref recordInfo, logicalAddress); status = OperationStatus.SUCCESS; goto LatchRelease; // Release shared latch (if acquired) @@ -1814,8 +1843,6 @@ internal OperationStatus InternalContinuePendingRMW SingleWriter + - Do LockOp in ReadCache (wins if both RC and CTT specified) -> SingleWriter + - TODO: Updating remove from ReadCache (without eliding entire string) + - TODO: Unlock remove from LockCache + - Already in readcache: Lock + - TODO: Updating remove from ReadCache (without eliding entire string) +- LockOp is not set: + - in-mem (mutable and RO): + - Stub: lock + retry + - Sealed: Yield() and retry + - Other: as currently + - On-disk: + - TODO: PENDING then apply **lockcache** + +## Upsert + +- LockOp.IsSet (Note: Unlock doesn't happen; Upsert() is not called by Unlock()) + - Stub: Create new record and transfer locks from Stub to it + - This is Stub promotion; locking of added record is done in SingleWriter + - Sealed: Yield() and retry + - Tombstone: Do Lock op (e.g. unlock) + - Normal: + - Mutable and RO: Do LockOp in ConcurrentWriter + - on-disk: doesn't happen; Upsert does not have PENDING code path + - Already in readcache: Lock + - Note: This adds RO region handling to Upsert(), for lock handling +- LockOp is not set: + - in-mem (mutable and RO): + - Stub: + - ManualOps session: Create new record and transfer locks from Stub record + - non-ManualOps session: lock + retry + - Sealed: retry (no lock, bc we use this for transfer and that would confuse counts, so just Yield()) + - Other: as currently + - on-disk: doesn't happen; Upsert does not have PENDING code path + - TODO: apply **lockcache** + - TODO: When updating, promote to CTT from ReadCache *without* eliding entire readcache prefix chain + +## RMW + +- Lock/Unlock: None. Manual locking does not use RMW +- LockOp is not set: + - in-mem (mutable and RO): + - Stub + - ManualOps session: Create new record and transfer locks from Stub to it (TODO reviv: update in place) + - This is Stub promotion: locking of added record is done in InitialUpdater + - non-ManualOps session: lock + retry + - Sealed: Yield() and retry + - Tombstone: + - Nothing here as we do not process LockOp in RMW + - Normal: as currently + - on-disk: PENDING and **lockcache** + - If Stub promotion, locking of added record done in InitialUpdater out of `InternalCompletePendingRMW` + - TODO: When updating, promote to CTT from ReadCache *without* eliding entire readcache prefix chain + - TODO: replace "fuzzy" region with Sealed + +## Delete + +- Lock/Unlock: None. Manual locking does not use Delete + - Normal: + - in-mem (mutable and RO): + - Stub: + - ManualOps: Change Stub to Tombstone via ConcurrentDeleter + - non-ManualOps: lock + retry + - Sealed: Yield() and retry + - Other: as currently + - on-disk: doesn't happen; Upsert does not have PENDING code path + - TODO: apply **lockcache** + - TODO: When deleting, remove from ReadCache *without* eliding entire readcache prefix chain From bce809e9e0849ad84fdb97379655ebb7477c4747 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Mon, 6 Dec 2021 09:55:07 -0800 Subject: [PATCH 05/25] Update 30-fasterkv-manual-locking.md --- docs/_docs/30-fasterkv-manual-locking.md | 251 +++++++++++++++-------- 1 file changed, 169 insertions(+), 82 deletions(-) diff --git a/docs/_docs/30-fasterkv-manual-locking.md b/docs/_docs/30-fasterkv-manual-locking.md index 85074537b..2e56b0fe9 100644 --- a/docs/_docs/30-fasterkv-manual-locking.md +++ b/docs/_docs/30-fasterkv-manual-locking.md @@ -35,9 +35,9 @@ Lock multiple keys: manualOps.UnsafeResumeThread(out var epoch); LockInfo lockInfo = default; - manualOps.Lock(24, LockType.Shared, retrieveData: true, ref lockInfo); - manualOps.Lock(51, LockType.Shared, retrieveData: true, ref lockInfo); - manualOps.Lock(75, LockType.Exclusive, retrieveData: false, ref lockInfo); + manualOps.Lock(24, LockType.Shared); + manualOps.Lock(51, LockType.Shared); + manualOps.Lock(75, LockType.Exclusive); manualOps.Read(24, out var value24); manualOps.Read(51, out var value51); @@ -47,7 +47,7 @@ Lock multiple keys: manualOps.Unlock(51, LockType.Shared); manualOps.Unlock(75, LockType.Exclusive); - manualOps.UnsafeSuspendThread(out var epoch); + manualOps.UnsafeSuspendThread(); ``` Lock multiple keys: @@ -57,7 +57,7 @@ Lock multiple keys: manualOps.UnsafeResumeThread(out var epoch); LockInfo lockInfo = default; - manualOps.Lock(51, LockType.Shared, retrieveData: true, ref lockInfo); + manualOps.Lock(51, LockType.Shared); manualOps.Read(24, out var value24); manualOps.Read(51, out var value51); @@ -65,115 +65,202 @@ Lock multiple keys: manualOps.Unlock(51, LockType.Shared); - manualOps.UnsafeSuspendThread(out var epoch); + manualOps.UnsafeSuspendThread(); ``` +TODO: Add sample with `manualOps.LocalCurrentEpoch`. + ## Internal Design This section covers the internal design and implementation of manual locking. -Manual locking is integrated into `FASTERImpl.cs` methods, notably `InternalRead` and `InternalCompletePendingRead`, `InternalUpsert`, `InternalRMW` and `InternalCompletePendingRMW`, and `InternalDelete`. These modifications are exposed via the `Lock()` and `Unlock()` APIs on `ManualFasterOperations`. This integration is necessary because many parts are dependent upon the called routine, such as Stub and Sealed record handling and how to handle operations that reference on-disk records. Most LockOperation-specific code is protected by an `if (fasterSession.IsManualOperations)` test, which is a static bool member of the `FasterSession` implementation so the comparison should optimize out. +Manual locking is integrated into `FASTERImpl.cs` methods, notably `InternalRead` and `InternalCompletePendingRead`, `InternalUpsert`, `InternalRMW` and `InternalCompletePendingRMW`, and `InternalDelete`. These modifications are exposed via the `Lock()` and `Unlock()` APIs on `ManualFasterOperations`. LockOperation-specific code done in `InternalUpsert` and is protected by an `if (fasterSession.IsManualOperations)` test, which is a static bool member of the `FasterSession` implementation so the comparison should optimize it out. Because epoch protection is done by user calls, ManualFasterOperations methods call the internal ContextRead etc. methods, which are called by the API methods that do Resume and Suspend of epoch protection. -The actual implementation of `Lock()` uses only `InternalRead` and `InternalUpsert`, and the implementation of `Unlock()` uses only InternalRead. - -At a high level, `Lock()` checks to see if the `retrieveData` parameter is true. If so, it calls a new overload of ContextRead(). This will cause on-disk data to be retrieved via PENDING operations, and then the lock will be applied to it. If the record is not found, or if retrieveData is false, then a new overload of ContextUpser() is called. Upsert by design does not issue PENDING operations to retrieve on-disk data; if the record is not found, a Stub record is inserted at the tail of the log and is exclusively locked. - -The semantics of Stub records are: -- This is a placeholder only; it has no valid data. It should not be Read(). -- When an Upsert or RMW encounters a Stub record, it checks to see if the current operation is being done from an instance of `ManualFasterOperations`. - - If so, it assumes the current session holds the lock, and proceeds with the update by inserting a new record (which will be locked) and unlocking the Stub. - - Otherwise, the Stub is invalid, and the operation first issues a Lock on it in order to wait. Once the lock is acquired, if the record is still a Stub, the operation returns with a RETRY_NOW status. This allows retrying until the Stub is replaced by its actual value via Upsert or RMW, or is Deleted. - -For non-stub (that is, normal) records, the lock or unlock is applied directly. This includes records in the ReadOnly region, since Locks do not alter data. - -Sealed records are records for which an update operation is being done; they should not be updated directly, but rather the operation should be retried immediately; Sealed records are short-term (e.g. a single call to Upsert or RMW). Note that unlike a Stub record, the operation does not attempt to Lock a Sealed record; Sealed records are used in other Lock-transfer situations and thus locks should not be taken. Rather, the calling operation issues a `Thread.Yield()` prior to retrying. - -Following are the specific operations and their flow for the various lock states. +At a high level, `Lock()` and `Unlock()` call `ContextUpsert()` which in turn calls `InternalUpsert()`. Upsert by design does not issue PENDING operations to retrieve on-disk data, and locking/unlocking is designed to avoid pending I/O operations by use of a [`LockTable`](#locktable-overview) consisting of {`TKey`, `RecordInfo`} pairs, where `TKey` is the FasterKV Key type and `RecordInfo` is used to perform the locking/unlocking. + +Locking and unlocking use bits in the `RecordInfo` header to obtain one exclusive lock or up to 64 shared locks. Because locking does not affect data, even records in the ReadOnly region may be locked and unlocked directly. + +### Relevant RecordInfo bits + +The following sections refer to the following two in the `RecordInfo`: +- **Tentative**: a record marked Tentative is very short-term; it indicates that the thread is performing a Tentative insertion of the record, and may make the Tentative record final by removing the Tentative bit, or may back off the insertion by setting the record to Invalid and returning RETRY_NOW. +- **Sealed**: a record marked Sealed is one for which an update is known to be in progress. Sealed records are "visible" only short-term (e.g. a single call to Upsert or RMW, or a transfer to/from the `LockTable`). A thread encountering this should immediately return RETRY_NOW. +- **Invalid**: This is a well-known bit from v1 included here for clarity: its behavior is that the record is to be skipped, using its `.PreviousAddress` to move along the chain. + +### LockTable Overview + +For records not found in memory, the `LockTable` is used. The semantics of `LockTable` entries are as follow. This is a conceptual view; implementation details are described in subsequent sections: +- On a `Lock` call, if the key is not found in memory, the `LockTable` is searched for the Key. + - If it is not found, an entry is made in the `LockTable` with an empty `RecordInfo`. + - The requested `LockType` is then taken on the `RecordInfo` for that Key. +- On an `Unlock` call, if the key is not found in memory, the `LockTable` is searched for the Key. + - If it is not found, a Debug.Fail() is issued. + - Otherwise, the requested `LockType` is unlocked. If this leaves the `RecordInfo` unlocked, its entry is deleted from the `LockTable`. +- When a Read or RMW obtains a record from ON-DISK, it consults the `LockTable`; if the key is found, the locks are transferred to the retrieved recordInfo, and the `LockTable` entry is removed. +- When an Upsert (without `LockOperations`) or Delete does not find a key in memory, it consults the `LockTable`, and if the key is found: + - it Seals the RecordInfo in the `LockTable` + - it performs the usual "append at tail of Log" operation + - it removes the entry from the `LockTable` +- Because `LockTable` use does not verify that the key actually exists (as it does not issue a pending operation to ensure the requested key, and not a collision, is found in the on-disk portion), it is possible that keys will exist in the `LockTable` that do not in fact exist in the log. This is fine; if we do more than `Lock` them, then they will be added to the log at that time, and the locks applied to them. + +#### Insertion to LockTable due to Lock +When a thread doing `Lock()` looks for a key in the LockTable and cannot find it, it must do a tentative insertion into the locktable, because it is possible that another thread CAS'd that key to the Tail of the log after the current thread had passed the hash table lookup: +- We do not find the record in memory starting from current TailAddress, so we record that TailAddress as A. +- Locktable does not have entry so we create a tentative entry in locktable +- We check if key exists between current tail and A + - if yes we have to back off the LockTable entry creation by setting it Invalid and returning RETRY_NOW. + - Any thread trying an operation in the Lock Table on a Tentative record must spin until the Tentative bit is removed; this will be soon, because we are only following the hash chain back to A. + - Any waiting thread sees Invalid and in this case, it must also return RETRY_NOW. + - if no, we can set locktable entry as final by removing the Tentative bit + - Any waiting thread proceeds normally + +#### Removal from LockTable +Here are the sequences of operations to remove records from the Lock Table: +- Unlock + - If the lock count goes to 0, remove from `LockTable` conditionally on IsLocked == false. +- Pending Read to `ReadCache` or `CopyToTail`, Pending RMW to Tail, or Upsert or Delete of a key in the LockTable + - For all but Read(), we are modifying or removing the record, so we must acquire an Exclusive lock + - This is not done for `ManualFasterOperations`, which we assume owns the lock + - The `LockTable` record is CAS'd to Sealed. + - Lock and Unlock must return an out bool sealedWhenLocked + - If so, then it reverts and retries + - Other operations retry upon seeing the record is sealed + - The Insplice to the main log is done + - If this fails, the Sealed bit is removed from the `LockTable` entry and the thread does RETRY_NOW + - Else the record is removed from the `LockTable` + +### ReadCache Overview + +When the `ReadCache` is enabled, "records" from the `ReadCache` (actually simply their `RecordInfo` headers) are inserted into the chain starting at the `HashTable` (these records are identified as `ReadCache` by a combination of `FasterKV.UseReadCache` being set *and* the ReadCache bit in the `RecordInfo` is set). All `ReadCache` records come before any main log record. So (using r#### to indicate a `ReadCache` record and m#### to indicate a main log record): +- When there are no `ReadCache` entries in a hash chain, it looks like: `HashTable` -> m4000 -> m3000 -> m... +- When there are `ReadCache` entries in a hash chain, it looks like: `HashTable` -> r8000 -> r7000 -> m4000 -> m3000 -> m... + +As a terminology note, the sub-chain of r#### records is referred to as the `ReadCache` prefix of that hash chain. + +In FASTER v1, updates involving `ReadCache` records strip the entire `ReadCache` prefix from the chain. Additionally, the `ReadCache` prefix is stripped from the hash chain when a `ReadCache` page with that hashcode is evicted due to memory limits. In FASTER v2, because `ReadCache` records may be locked, we must not lose those locks. This is resolved in two ways: +- On record updates, `ReadCache` prefixes are preserved except for the specific record being updated, which is spliced out and transferred to a `CopyToTail` on the main log, including any locks. +- When `ReadCache` pages are evicted, their records are removed from the `ReadCache` prefix, and any with locks are transferred to the `LockTable`. + +### Record Transfers + +In normal FASTER operation, records are appended at the tail of the log and do not move. The `HashTable` points to these records for each distinct hash code. + +Record transfers occur when a ReadCache entry must be updated, or a record is evicted from either ReadCache or the main log while it holds locks. + +#### `ReadCache` Records at Tail of Log +Note that this resolution is only needed if there is an active `ManualFasterOperations` session at the time `ReadCacheEvict` is called. However, we must already traverse the `ReadCache` records, and it is possible for a new `ManualFasterOperations` session to start during the duration of `ReadCacheEvict`, so we do not optimize for the no-`ManualFasterOperations` case. + +For brevity, `ReadCache` is abbreviated RC, `CopyToTail` is abbreviated CTT, and `LockTable` is abbreviated LT. Main refers to the main log. The "final RC record" is the one at the RC->Main log boundary. As always, avoiding locking cost is a primary concern. + +For record transfers involving the ReadCache, we have the following high-level considerations: +- There is no record-transfer concern if the first record in the chain is not a `ReadCache` entry (normal CAS will do all necessary concurrency control as presently, and there are no outsplices). + - Otherwise, we insplice between the final RC entry and the first main-log entry; we never splice into the middle of the RC prefix chain. +- Even when RC entries are present in the tail, we must avoid latching because it affects all insert operations (at a minimum). +- "Insplicing" occurs when a new record is inserted into the main log after the end of the ReadCache prefix string. +- "Outsplicing" occurs when a record is spliced out of the RC portion of the hash chain (main log records are never spliced out) because the value for that key must be updated, or because we are evicting records from the ReadCache. Outsplicing introduces concurrency considerations but we must support it; we cannot simply mark ReadCache entries as Invalid and leave them there, or the chain will grow without bound. + - Insplicing: For splicing into the chain, we always CAS at the final RC entry rather than at the HashTable bucket slot (we never splice into the middle of the RC prefix chain). + - Add the new record to the tail of main by pointing to the existing tail of in its `.PreviousAddress`. + - CAS the existing final RC record to point to the new record (set its .PreviousAddress and CAS). + - If CAS failed, someone else inserted -or- the `ReadCacheEvict` thread outspliced the final RC record (in this case, the (formerly) final RC record will have its Invalid bit set), so RETRY_NOW + - For outsplicing (which only applies to RC records), we use a modified "mark and sweep" approach: + - First mark the RC record being outspliced as Invalid via CAS loop; this ensures that the final RC record has a consistent .PreviousAddress (in the event another thread is insplicing while the final RC record is being marked Invalid). No latching is required in this mark phase. + - The Invalid entries are finally removed during ReadCacheEvict: + - CAS the RC record to be removed to be Sealed. This will cause any other operations to retry. + - CAS the preceding RC record to point to the to-be-removed RC record's .PreviousAddress (standard singly-linked-list operations) + - CAS the now-removed RC record to be Invalid. + +The above covers single-record operations on the RC prefix. Two-record operations occur when we must outsplice one record and insplice another, because the value for a record in the RC prefix is updated, e.g. Upsert updating a record in the ReadOnly region or RMW doing a CopyUpdater (of mutable or readonly), or either of these operating updating a key that is in the RC prefix chain. The considerations here are: +- Updating an RC record: + - Mark the RC record as Sealed + - Do the insplice as described above + - If the insplice succeeds, mark the RC entry as Invalid, else remove the Sealed marking and RETRY_NOW +- Updating an in-memory main log record: + - Mark the main log record as Sealed + - Do the insplice as described above + - If the insplice succeeds, leave the main log entry as Sealed, else remove the Sealed marking and RETRY_NOW + +#### Main Log Evictions + +When main log pages are evicted due to memory limits, *if* there are any active `ManualFasterOperations` sessions, then each record on those pages must be examined and any locks transferred to `LockTable` entries. + +Transfers to the `LockTable` due to main log evictions are handled in the following manner: +- A new `TentativeHeadAddress` (THA) field is added next to `HeadAddress`. +- Shifting HeadAddress is now done in two steps: Update THA, then update HeadAddress +- The purpose of THA is: + - No record at address < THA shall be touched for Lock operations (lock or unlock). + - Existing locks will remain there until transferred to the lock table. + - A thread that attempts to unlock a record < THA must wait for the lock table to get populated (details below) + - To mitigate unlocking records that were never locked, this must abandon the wait when the record to be unlocked becomes < HeadAddress. + +Here is the sequence of operations to perform record eviction with `LockTable` Transfer: +- In (PageAligned)ShiftHeadAddress: + - TentativeHeadAddress = desired new HeadAddress + - BumpCurrentEpoch(() => OnPagesReadyToTransfer()) + - OnPagesReadyToTransfer() is a new routine: + - Scan from OldTentativeHeadAddress to new TentativeHeadAddress: + - Transfer records to `LockTable` + - This is safe because we are executing in the epoch Drain thread + - OldTentativeHeadAddress = TentativeHeadAddress + - HeadAddress = TentativeHeadAddress + +### FASTER Operations + +Following are the 4 FASTER operations and their flow for the various lock states. Abbreviations: - LockOp: The `LockOperations` instance passed to one of the InternalXxx methods. - CTT: CopyToTail - ITCTT: InternalTryCopyToTail +- Unfound refers to entries that are not found in memory (the hash chain passes below HeadAddress) or are not found in the Hash chain -Reference to Sealed record and the LockCache will be filled out as those are implemented. - -## Read +#### Read -- If LockOp.IsSet - - Stub: return NOTFOUND - - Sealed: Yield() and retry - - Tombstone: Do Lock op (e.g. unlock) - - Normal: - - Mutable: Do LockOp in ConcReader - - RO: Do LockOp in SingleReader - - On-disk: Issue PENDING operation - - Do LockOp in ITCTT -> SingleWriter - - Do LockOp in ReadCache (wins if both RC and CTT specified) -> SingleWriter - - TODO: Updating remove from ReadCache (without eliding entire string) - - TODO: Unlock remove from LockCache - - Already in readcache: Lock - - TODO: Updating remove from ReadCache (without eliding entire string) +- Lock/Unlock: None. Manual locking does not use Read - LockOp is not set: - in-mem (mutable and RO): - - Stub: lock + retry - - Sealed: Yield() and retry - - Other: as currently - - On-disk: - - TODO: PENDING then apply **lockcache** + - Sealed: Yield() and retry + - Other: as currently + - Unfound: After PENDING, transfer any `LockTable` entry as described above + - Splice out from readcache prefix chain if applicable -## Upsert +#### Upsert -- LockOp.IsSet (Note: Unlock doesn't happen; Upsert() is not called by Unlock()) - - Stub: Create new record and transfer locks from Stub to it - - This is Stub promotion; locking of added record is done in SingleWriter +- LockOp.IsSet - Sealed: Yield() and retry - Tombstone: Do Lock op (e.g. unlock) - Normal: - Mutable and RO: Do LockOp in ConcurrentWriter - - on-disk: doesn't happen; Upsert does not have PENDING code path + - on-disk: Perform `LockTable` insertion as described above - Already in readcache: Lock - - Note: This adds RO region handling to Upsert(), for lock handling + - Note: Locking adds ReadOnly-region handling to Upsert() - LockOp is not set: - in-mem (mutable and RO): - - Stub: - - ManualOps session: Create new record and transfer locks from Stub record - - non-ManualOps session: lock + retry - - Sealed: retry (no lock, bc we use this for transfer and that would confuse counts, so just Yield()) + - Sealed: Yield() and retry - Other: as currently - - on-disk: doesn't happen; Upsert does not have PENDING code path - - TODO: apply **lockcache** - - TODO: When updating, promote to CTT from ReadCache *without* eliding entire readcache prefix chain + - Splice out from readcache prefix chain if applicable + - Unfound: if found in `LockTable`, do [Removal From LockTable](#removal-from-locktable) -## RMW +#### RMW - Lock/Unlock: None. Manual locking does not use RMW - LockOp is not set: - - in-mem (mutable and RO): - - Stub - - ManualOps session: Create new record and transfer locks from Stub to it (TODO reviv: update in place) - - This is Stub promotion: locking of added record is done in InitialUpdater - - non-ManualOps session: lock + retry - - Sealed: Yield() and retry - - Tombstone: - - Nothing here as we do not process LockOp in RMW - - Normal: as currently - - on-disk: PENDING and **lockcache** - - If Stub promotion, locking of added record done in InitialUpdater out of `InternalCompletePendingRMW` - - TODO: When updating, promote to CTT from ReadCache *without* eliding entire readcache prefix chain - - TODO: replace "fuzzy" region with Sealed + - in-mem (mutable and RO): + - Sealed: Yield() and retry + - Tombstone: Nothing here as we do not process LockOp in RMW + - Normal: as currently + - Splice out from readcache prefix chain if applicable + - Unfound: if found in `LockTable`, do [Removal From LockTable](#removal-from-locktable) + - Note: Splice out from readcache prefix chain if applicable + - TODO: potentially replace "fuzzy" region with Sealed -## Delete +#### Delete - Lock/Unlock: None. Manual locking does not use Delete - - Normal: - - in-mem (mutable and RO): - - Stub: - - ManualOps: Change Stub to Tombstone via ConcurrentDeleter - - non-ManualOps: lock + retry - - Sealed: Yield() and retry - - Other: as currently - - on-disk: doesn't happen; Upsert does not have PENDING code path - - TODO: apply **lockcache** - - TODO: When deleting, remove from ReadCache *without* eliding entire readcache prefix chain + - in-mem (mutable and RO): + - Sealed: Yield() and retry + - Other: as currently + - Splice out from readcache prefix chain if applicable + - Unfound: if found in `LockTable`, do [Removal From LockTable](#removal-from-locktable) + - Note: Splice out from readcache prefix chain if applicable From d3ca1405cf7aed798587144d7a3afd69d2f2a1b1 Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Wed, 8 Dec 2021 02:58:03 -0800 Subject: [PATCH 06/25] Updated doc to GH review and discussions --- docs/_docs/30-fasterkv-manual-locking.md | 224 ++++++++++++++++------- 1 file changed, 155 insertions(+), 69 deletions(-) diff --git a/docs/_docs/30-fasterkv-manual-locking.md b/docs/_docs/30-fasterkv-manual-locking.md index 2e56b0fe9..f7309c8bc 100644 --- a/docs/_docs/30-fasterkv-manual-locking.md +++ b/docs/_docs/30-fasterkv-manual-locking.md @@ -85,10 +85,14 @@ Locking and unlocking use bits in the `RecordInfo` header to obtain one exclusiv ### Relevant RecordInfo bits The following sections refer to the following two in the `RecordInfo`: +- **Lock Bits**: There is one Exclusive Lock bit and 6 Shared Lock bits (allowing 64 shared locks) in the RecordInfo. - **Tentative**: a record marked Tentative is very short-term; it indicates that the thread is performing a Tentative insertion of the record, and may make the Tentative record final by removing the Tentative bit, or may back off the insertion by setting the record to Invalid and returning RETRY_NOW. - **Sealed**: a record marked Sealed is one for which an update is known to be in progress. Sealed records are "visible" only short-term (e.g. a single call to Upsert or RMW, or a transfer to/from the `LockTable`). A thread encountering this should immediately return RETRY_NOW. + - Sealing is done via `RecordInfo.Seal`. This is used in locking scenarios rather than a sequence of "CAS to set Sealed; test Sealed bit` because the after-Seal locking is fuzzy; we don't know whether the record was CTT'd before or after a post-Seal lock, and thus we don't know if the transferred record "owns" our lock. `RecordInfo.Seal` does a CAS with both the XLock and Seal bits, then Unlocks the XLock bit; this ensures it works whether SupportsLocking is true or false. It returns true if successsful or false if another thread Sealed the record. - **Invalid**: This is a well-known bit from v1 included here for clarity: its behavior is that the record is to be skipped, using its `.PreviousAddress` to move along the chain. +Additionally, the `SupportsLocking` flag has been moved from IFunctions to a `FasterKV` constructor argument. This value must be uniform across all asessions. It is only to control the locking done by FasterKV; this replaces the concept of user-controlled locking that was provided with the `IFunctions` methods for concurrent record access. + ### LockTable Overview For records not found in memory, the `LockTable` is used. The semantics of `LockTable` entries are as follow. This is a conceptual view; implementation details are described in subsequent sections: @@ -106,30 +110,37 @@ For records not found in memory, the `LockTable` is used. The semantics of `Lock - Because `LockTable` use does not verify that the key actually exists (as it does not issue a pending operation to ensure the requested key, and not a collision, is found in the on-disk portion), it is possible that keys will exist in the `LockTable` that do not in fact exist in the log. This is fine; if we do more than `Lock` them, then they will be added to the log at that time, and the locks applied to them. #### Insertion to LockTable due to Lock -When a thread doing `Lock()` looks for a key in the LockTable and cannot find it, it must do a tentative insertion into the locktable, because it is possible that another thread CAS'd that key to the Tail of the log after the current thread had passed the hash table lookup: -- We do not find the record in memory starting from current TailAddress, so we record that TailAddress as A. -- Locktable does not have entry so we create a tentative entry in locktable -- We check if key exists between current tail and A - - if yes we have to back off the LockTable entry creation by setting it Invalid and returning RETRY_NOW. + +When a thread doing `Lock()` looks for a key in the LockTable and cannot find it, it must do a Tentative insertion into the locktable, because it is possible that another thread CAS'd that key to the Tail of the log after the current thread had passed the hash table lookup: +- We do not find the record in memory starting from current TailAddress, so we record that TailAddress as prevTailAddress. +- Locktable does not have an entry for this key so we create a Tentative entry in the LockTable for it +- We check if key exists between current TailAddress and prevTailAddress + - if yes we have to back off the LockTable entry creation by setting it Invalid (so anyone holding it to spin-test sees it is invalid), removing it from the LockTable, and returning RETRY_NOW. - Any thread trying an operation in the Lock Table on a Tentative record must spin until the Tentative bit is removed; this will be soon, because we are only following the hash chain back to A. + - If prevTailAddress has escaped to disk by the time we start following the hash chain from Tail to prevTailAddress, we must retry. See the InternalTryCopyToTail scan to expectedLogicalAddress and ON_DISK as an example of this. - Any waiting thread sees Invalid and in this case, it must also return RETRY_NOW. - if no, we can set locktable entry as final by removing the Tentative bit - Any waiting thread proceeds normally #### Removal from LockTable + Here are the sequences of operations to remove records from the Lock Table: - Unlock - - If the lock count goes to 0, remove from `LockTable` conditionally on IsLocked == false. + - If the lock count goes to 0, remove from `LockTable` conditionally on IsLocked == false and Sealed == false. + - Since only lock bits are relevant in LockTable, this is equivalent to saying RecordInfo.word == 0, which is a faster test. - Pending Read to `ReadCache` or `CopyToTail`, Pending RMW to Tail, or Upsert or Delete of a key in the LockTable - - For all but Read(), we are modifying or removing the record, so we must acquire an Exclusive lock + - For all but Read(), we are modifying or removing the record, so we must acquire an Exclusive lock on the LockTable entry - This is not done for `ManualFasterOperations`, which we assume owns the lock - - The `LockTable` record is CAS'd to Sealed. - - Lock and Unlock must return an out bool sealedWhenLocked - - If so, then it reverts and retries - - Other operations retry upon seeing the record is sealed + - The `LockTable` record is Sealed as described in [Relevant RecordInfo bits](#relevant-recordInfo-bits) + - If this fails, the operation retries + - Other operation threads retry upon seeing the record is sealed - The Insplice to the main log is done - If this fails, the Sealed bit is removed from the `LockTable` entry and the thread does RETRY_NOW - Else the record is removed from the `LockTable` + - Note: there is no concern about other threads that did not find the record on lookup and "lag behind" the thread doing the LockTable-entry removal and arrive at the LockTable after that record has been removed, because: + - If the lagging thread is from a pending Read operation, then that pending operation will retry due to the InternalTryCopyToTail expectedLogicalAddress check or the readcache "dual 2pc" check in [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) + - If the lagging thread is from a pending RMW operation, then that pending operation will retry due to the InternalContinuePendingRMW previousFirstRecordAddress check or the readcache "dual 2pc" check in [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) + - Upsert and Delete would find the LT entry directly ### ReadCache Overview @@ -150,16 +161,15 @@ In normal FASTER operation, records are appended at the tail of the log and do n Record transfers occur when a ReadCache entry must be updated, or a record is evicted from either ReadCache or the main log while it holds locks. #### `ReadCache` Records at Tail of Log -Note that this resolution is only needed if there is an active `ManualFasterOperations` session at the time `ReadCacheEvict` is called. However, we must already traverse the `ReadCache` records, and it is possible for a new `ManualFasterOperations` session to start during the duration of `ReadCacheEvict`, so we do not optimize for the no-`ManualFasterOperations` case. For brevity, `ReadCache` is abbreviated RC, `CopyToTail` is abbreviated CTT, and `LockTable` is abbreviated LT. Main refers to the main log. The "final RC record" is the one at the RC->Main log boundary. As always, avoiding locking cost is a primary concern. For record transfers involving the ReadCache, we have the following high-level considerations: -- There is no record-transfer concern if the first record in the chain is not a `ReadCache` entry (normal CAS will do all necessary concurrency control as presently, and there are no outsplices). +- There is no record-transfer concern if the first record in the hash chain is not a `ReadCache` entry. - Otherwise, we insplice between the final RC entry and the first main-log entry; we never splice into the middle of the RC prefix chain. -- Even when RC entries are present in the tail, we must avoid latching because it affects all insert operations (at a minimum). +- Even when there are RC entries in the hash chain, we must avoid latching because that would slow down all record-insertion operations (upsert, RMW of a new record, Delete of an on-disk record, etc.) as well as some Read situations. - "Insplicing" occurs when a new record is inserted into the main log after the end of the ReadCache prefix string. -- "Outsplicing" occurs when a record is spliced out of the RC portion of the hash chain (main log records are never spliced out) because the value for that key must be updated, or because we are evicting records from the ReadCache. Outsplicing introduces concurrency considerations but we must support it; we cannot simply mark ReadCache entries as Invalid and leave them there, or the chain will grow without bound. +- "Outsplicing" occurs when a record is spliced out of the RC portion of the hash chain (main log records are never spliced out) because the value for that key must be updated, or because we are evicting records from the ReadCache. Outsplicing introduces concurrency considerations but we must support it; we cannot simply mark ReadCache entries as Invalid and leave them there, or the chain will grow without bound. For concurrency reasons we defer outsplicing to readcache eviction time, when readcache records are destroyed, as described below. - Insplicing: For splicing into the chain, we always CAS at the final RC entry rather than at the HashTable bucket slot (we never splice into the middle of the RC prefix chain). - Add the new record to the tail of main by pointing to the existing tail of in its `.PreviousAddress`. - CAS the existing final RC record to point to the new record (set its .PreviousAddress and CAS). @@ -170,6 +180,7 @@ For record transfers involving the ReadCache, we have the following high-level c - CAS the RC record to be removed to be Sealed. This will cause any other operations to retry. - CAS the preceding RC record to point to the to-be-removed RC record's .PreviousAddress (standard singly-linked-list operations) - CAS the now-removed RC record to be Invalid. + - We only actually transfer records from the RC prefix to the LockTable if there is an active `ManualFasterOperations` session at the time `ReadCacheEvict` is called; otherwise there will be no locks. However, we must already traverse the `ReadCache` records, and it is possible for a new `ManualFasterOperations` session to start during the duration of `ReadCacheEvict`, so there is no benefit to checking for the no-`ManualFasterOperations` case (unlike [Main Log Evictions](#main-log-evictions), which can avoid page scans by checking for this). The above covers single-record operations on the RC prefix. Two-record operations occur when we must outsplice one record and insplice another, because the value for a record in the RC prefix is updated, e.g. Upsert updating a record in the ReadOnly region or RMW doing a CopyUpdater (of mutable or readonly), or either of these operating updating a key that is in the RC prefix chain. The considerations here are: - Updating an RC record: @@ -187,23 +198,22 @@ When main log pages are evicted due to memory limits, *if* there are any active Transfers to the `LockTable` due to main log evictions are handled in the following manner: - A new `TentativeHeadAddress` (THA) field is added next to `HeadAddress`. -- Shifting HeadAddress is now done in two steps: Update THA, then update HeadAddress -- The purpose of THA is: - - No record at address < THA shall be touched for Lock operations (lock or unlock). - - Existing locks will remain there until transferred to the lock table. - - A thread that attempts to unlock a record < THA must wait for the lock table to get populated (details below) - - To mitigate unlocking records that were never locked, this must abandon the wait when the record to be unlocked becomes < HeadAddress. - -Here is the sequence of operations to perform record eviction with `LockTable` Transfer: -- In (PageAligned)ShiftHeadAddress: - - TentativeHeadAddress = desired new HeadAddress - - BumpCurrentEpoch(() => OnPagesReadyToTransfer()) - - OnPagesReadyToTransfer() is a new routine: - - Scan from OldTentativeHeadAddress to new TentativeHeadAddress: - - Transfer records to `LockTable` - - This is safe because we are executing in the epoch Drain thread - - OldTentativeHeadAddress = TentativeHeadAddress - - HeadAddress = TentativeHeadAddress +- Shifting HeadAddress is now done in three steps: Update THA, handle evictions, then update HeadAddress + - In (PageAligned)ShiftHeadAddress, we now: + - epoch.BumpCurrentEpoch(() => OnPagesReadyToClose(oldTentativeHeadAddress, newHeadAddress)); + - OnPagesReadyToTransfer() is a new routine: + - ReadCacheEvict (via EvictCallback) + - epoch.BumpCurrentEpoch(() => OnPagesClosed(newHeadAddress)); + - This actually evicts the pages + +### Recovery Considerations + +We must clear in-memory records' lock bits during FoldOver recovery. +- Add to checkpoint information an indication of whether any `ManualFasterOperations` were active during the Checkpoint. +- If this MRO indicator is true: + - Scan pages, clearing the locks of any records + - These pages do not need to be flushed to disk + - Ensure random reads and scans will NOT be flummoxed by the weird lock bits ### FASTER Operations @@ -215,52 +225,128 @@ Abbreviations: - ITCTT: InternalTryCopyToTail - Unfound refers to entries that are not found in memory (the hash chain passes below HeadAddress) or are not found in the Hash chain +#### Conflict Between Upsert/RMW and Reading From Disk to ReadCache + +One big consideration for Upsert is that it blindly upserts when a scan for a record drops below HeadAddress. This in conjunction with our two insertion points--at HT->RC and at RC->MainLog--gives rise to the following lost-update anomaly: +- We Upsert k1 to the main log, splicing it into the RC->MainLog point +- At the same time, we did a read of k1 which brought the previous k1 value from disk into the read cache, inserting it at the HT->RC point +- Thus our upsert "failed", as the chain contains the old k1 in RC (even though the chain leads eventually to the new k1 at tail of main, operations will find the one in the readcache first). + +General algorithm, iff readcache entries are present: each participating thread adds a Tentative entry, then scans; if it does not find an existing record, then finalize. This is modified by ensuring that the update wins (its data is more recent). We *must* have such a two-phase operation at both ends, to ensure that whichever side completes the scan first, it will find an entry, either final or Tentative, for the other operation. +- Upsert (blind only) or RMW when reading from disk: + - Save HT->RC record address as prevFirstRCAddress + - Do the usual check-for-mutable: + - Call SkipAndInvalidateReadCache + - If the record is found in mutable and updated, return SUCCESS + - It was not mutable, so we must insert at end of log + - Insert at RC->MainLog boundary. This is *not* tentative, because we want Upsert to win any ties + - SkipAndInvalidateReadCache until prevFirstRCAddress + - We want the Upsert to win, so this pass ensures that any newly-added readcache entry for this key, whether tentative or not, is marked Invalid + - Remove the tentative +- Read: + - Prior to its SkipReadCache/TracebackForKeyMatch, it sets a tentative record at the HT->RC boundary. + - it does the scan + - if the Tentative record is now Invalid, it means Upsert/RMW set it so for a later update; return NOTFOUND + - else if it found a non-RC record for this key, it sets the Tentative record to Invalid and returns NOTFOUND + - else it removes the Tentative flag + +OPTIMIZATION: Use readcache records rather than going to disk. However, there are issues here with the record being marked Invalid/Sealed in case multiple threads do it. + #### Read -- Lock/Unlock: None. Manual locking does not use Read -- LockOp is not set: - - in-mem (mutable and RO): - - Sealed: Yield() and retry - - Other: as currently - - Unfound: After PENDING, transfer any `LockTable` entry as described above - - Splice out from readcache prefix chain if applicable +Note that this changes specified here, including both shared and exclusive locks in the ReadOnly region, clarifies the distinction between a data-centric view of the ReadOnly region being implicitly read-locked (because it cannot be updated), vs. a transactional view that requires explicit read locks. In a transactional view, a read lock prevents an exclusive lock; implicit readlocks based on address cannot do this in FASTER, because we can always do XLock or an RCU. Therefore, we need explicit read locks, and reads must of course block if there is an XLock. This also means that SingleReader would have to lock anyway, losing any distinction between it and ConcurrentReader. Therefore, we have consolidated ConcurrentReader and SingleReader into a single function. + +- for both mutable and RO records, if the RecordInfo is: + - Sealed: Yield() and retry + - If SupportsLocking, we would ephemerally readlock the record, and we can't lock Sealed records as the lock may be transferred with the record. + - Tombstone: as current + - Other: as currently, including ephemeral locking + - Change IFunctions.SingleReader and .ConcurrentReader to simply .Reader +- On-disk: + - After PENDING + - if copying to readcache, do so in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) + - else if CopyToTail do [Removal From LockTable](#removal-from-locktable) #### Upsert -- LockOp.IsSet - - Sealed: Yield() and retry - - Tombstone: Do Lock op (e.g. unlock) - - Normal: - - Mutable and RO: Do LockOp in ConcurrentWriter - - on-disk: Perform `LockTable` insertion as described above - - Already in readcache: Lock - - Note: Locking adds ReadOnly-region handling to Upsert() -- LockOp is not set: - - in-mem (mutable and RO): +Note: Upsert skips RO ops if the current FasterSession is not `ManualFasterOperations` (MFO); this comparison is a static bool property of to the IFasterOperations implementation + +- If LockOp.IsSet + - If the record is in readcache: + - Do the Lock op: + - retry if the record is or becomes Sealed + - ignore/continue if the record is or becomes Invalid + - else for both mutable (and RO if MFO is active) records, if the RecordInfo is: - Sealed: Yield() and retry - - Other: as currently - - Splice out from readcache prefix chain if applicable - - Unfound: if found in `LockTable`, do [Removal From LockTable](#removal-from-locktable) + - Tombstone: Do the Lock op (e.g. unlock) + - Other: + - Do the LockOp in ConcurrentWriter for both Mutable and RO + - else // key is not found or hash chain goes below HeadAddress + - Perform `LockTable` insertion as described in [Insertion to LockTable due to Lock](#insertion-to-locktable-due-to-lock) +- else // LockOp is not set: + - If the record is in readcache: + - Invalidate it + - insert the new value in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) + - else if the record is in the mutable region and the RecordInfo is: + - Sealed: Yield() and retry + - Tombstone: as current + - Other: IPU (including ephemeral locks) + - If this returns false + - Set RecordInfo Sealed as described in [Relevant RecordInfo bits](#relevant-recordinfo-bits) + - Insert in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) + - else if the record is in ReadOnly and the RecordInfo is: + - Sealed: Yield() and retry + - Tombstone: as current + - Other: Do CopyUpdater and insert in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) + - else // key is not found or hash chain goes below HeadAddress + - if the key is in the lock table + - XLock it + - If it is Sealed or Invalid, then RETRY_NOW (someone else did an operation that removed it) + - Else + - Insert new record + - Remove locktable entry per [Removal From LockTable](#removal-from-locktable) + - InitialUpdater and insert in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) #### RMW -- Lock/Unlock: None. Manual locking does not use RMW -- LockOp is not set: - - in-mem (mutable and RO): - - Sealed: Yield() and retry - - Tombstone: Nothing here as we do not process LockOp in RMW - - Normal: as currently - - Splice out from readcache prefix chain if applicable - - Unfound: if found in `LockTable`, do [Removal From LockTable](#removal-from-locktable) - - Note: Splice out from readcache prefix chain if applicable - - TODO: potentially replace "fuzzy" region with Sealed +RMW considerations are similar to Upsert from the sealing and "encountering locks" point of view. It does not do lock operations. + +- If the record is in readcache: + - Invalidate it + - CopyUpdater and insert the new value in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) +- else if the record is in the mutable region and the RecordInfo is: + - Sealed: Yield() and retry + - Tombstone: as current + - Other: IPU (including ephemeral locks) + - If this returns false + - Set RecordInfo Sealed as described in [Relevant RecordInfo bits](#relevant-recordinfo-bits) + - Do CopyUpdater and insert in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) +- else if the record is in ReadOnly and the RecordInfo is: + - Sealed: Yield() and retry + - Tombstone: as current + - Other: Do CopyUpdater and insert in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) +- else // key is not found or hash chain goes below HeadAddress + - InitialUpdater and insert in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) +- TODO: potentially replace "fuzzy" region at SafeReadOnlyAddress with Sealed, which should avoid the lost-update anomaly #### Delete -- Lock/Unlock: None. Manual locking does not use Delete - - in-mem (mutable and RO): - - Sealed: Yield() and retry - - Other: as currently - - Splice out from readcache prefix chain if applicable - - Unfound: if found in `LockTable`, do [Removal From LockTable](#removal-from-locktable) - - Note: Splice out from readcache prefix chain if applicable +- If the record is in readcache: + - Invalidate it + - insert the new deleted record in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) +- else if the record is in the mutable region and the RecordInfo is: + - Sealed: Yield() and retry + - Tombstone: as current (nothing) + - Other: Mark as tombstone +- else if the record is in ReadOnly and the RecordInfo is: + - Sealed: Yield() and retry + - Tombstone: as current (nothing) + - Other: Insert deleted record in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) +- else // key is not found or hash chain goes below HeadAddress + - if the key is in the lock table + - XLock it + - If it is Sealed or Invalid, then RETRY_NOW (someone else did an operation that removed it) + - Else + - Insert deleted record + - Remove locktable entry per [Removal From LockTable](#removal-from-locktable) + - Insert deleted record in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) From 7d0a5fc3675f22233d13aac9dbe2947d32e5bac9 Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Fri, 17 Dec 2021 05:35:55 -0800 Subject: [PATCH 07/25] WIP for LockableRawContext --- cs/src/core/ClientSession/ClientSession.cs | 63 +- .../ClientSession/ManualFasterOperations.cs | 243 +---- cs/src/core/Epochs/LightEpoch.cs | 1 + cs/src/core/Index/Common/Contexts.cs | 10 +- cs/src/core/Index/Common/RecordInfo.cs | 81 +- cs/src/core/Index/FASTER/FASTER.cs | 13 +- cs/src/core/Index/FASTER/FASTERImpl.cs | 949 +++++++++++------- cs/src/core/Index/FASTER/FASTERThread.cs | 8 +- .../core/Index/Interfaces/IFasterSession.cs | 17 +- cs/src/core/Utilities/LockTable.cs | 285 ++++++ cs/src/core/Utilities/LockType.cs | 34 +- cs/src/core/Utilities/LockUtility.cs | 74 ++ .../Utilities/SafeConcurrentDictionary.cs | 16 + cs/src/core/VarLen/MemoryFunctions.cs | 1 - cs/src/core/VarLen/SpanByteFunctions.cs | 1 - cs/test/ManualOperationsTests.cs | 210 ++-- cs/test/ObjectReadCacheTests.cs | 2 +- docs/_docs/30-fasterkv-manual-locking.md | 247 +++-- 18 files changed, 1400 insertions(+), 855 deletions(-) create mode 100644 cs/src/core/Utilities/LockTable.cs create mode 100644 cs/src/core/Utilities/LockUtility.cs diff --git a/cs/src/core/ClientSession/ClientSession.cs b/cs/src/core/ClientSession/ClientSession.cs index 27daeb05b..d3e8da63a 100644 --- a/cs/src/core/ClientSession/ClientSession.cs +++ b/cs/src/core/ClientSession/ClientSession.cs @@ -225,8 +225,7 @@ public Status Read(ref Key key, ref Input input, ref Output output, ref RecordMe if (SupportAsync) UnsafeResumeThread(); try { - LockOperation lockOp = default; - return fht.ContextRead(ref key, ref input, ref output, ref lockOp, ref recordMetadata, readFlags, userContext, FasterSession, serialNo, ctx); + return fht.ContextRead(ref key, ref input, ref output, ref recordMetadata, readFlags, userContext, FasterSession, serialNo, ctx); } finally { @@ -335,8 +334,7 @@ public Status Upsert(ref Key key, ref Input input, ref Value desiredValue, ref O if (SupportAsync) UnsafeResumeThread(); try { - LockOperation lockOp = default; - return fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, ref lockOp, out recordMetadata, userContext, FasterSession, serialNo, ctx); + return fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, out recordMetadata, userContext, FasterSession, serialNo, ctx); } finally { @@ -812,7 +810,7 @@ public InternalFasterSession(ClientSession _clientSession.functions.SupportsPostOperations; - public bool IsManualOperations => false; + public bool IsManualLocking => false; #endregion IFunctions - Optional features supported #region IFunctions - Reads @@ -827,14 +825,14 @@ public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref public bool ConcurrentReaderLock(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, long address) { - this.LockShared(ref recordInfo); + recordInfo.LockShared(); try { return _clientSession.functions.ConcurrentReader(ref key, ref input, ref value, ref dst, ref recordInfo, address); } finally { - this.UnlockShared(ref recordInfo); + recordInfo.UnlockShared(); } } @@ -855,7 +853,7 @@ public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value if (this.SupportsPostOperations && this.SupportsLocking) { // Lock must be taken after the value is initialized. Unlocked in PostSingleWriterLock. - this.LockExclusive(ref recordInfo); + recordInfo.LockExclusive(); } } @@ -886,7 +884,7 @@ private void PostSingleWriterLock(ref Key key, ref Input input, ref Value src, r } finally { - this.UnlockExclusive(ref recordInfo); + recordInfo.UnlockExclusive(); } } @@ -907,14 +905,14 @@ private bool ConcurrentWriterNoLock(ref Key key, ref Input input, ref Value src, [MethodImpl(MethodImplOptions.AggressiveInlining)] private bool ConcurrentWriterLock(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) { - this.LockExclusive(ref recordInfo); + recordInfo.LockExclusive(); try { return !recordInfo.Tombstone && ConcurrentWriterNoLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); } finally { - this.UnlockExclusive(ref recordInfo); + recordInfo.UnlockExclusive(); } } @@ -936,7 +934,7 @@ public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Ou if (this.SupportsPostOperations && this.SupportsLocking) { // Lock must be taken after the value is initialized. Unlocked in PostInitialUpdaterLock. - this.LockExclusive(ref recordInfo); + recordInfo.LockExclusive(); } } @@ -967,7 +965,7 @@ private void PostInitialUpdaterLock(ref Key key, ref Input input, ref Value valu } finally { - this.UnlockExclusive(ref recordInfo); + recordInfo.UnlockExclusive(); } } #endregion InitialUpdater @@ -985,7 +983,7 @@ public void CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Va if (this.SupportsPostOperations && this.SupportsLocking) { // Lock must be taken after the value is initialized. Unlocked in PostInitialUpdaterLock. - this.LockExclusive(ref recordInfo); + recordInfo.LockExclusive(); } } @@ -1016,7 +1014,7 @@ private bool PostCopyUpdaterLock(ref Key key, ref Input input, ref Output output } finally { - this.UnlockExclusive(ref recordInfo); + recordInfo.UnlockExclusive(); } } #endregion CopyUpdater @@ -1038,14 +1036,14 @@ private bool InPlaceUpdaterNoLock(ref Key key, ref Input input, ref Output outpu private bool InPlaceUpdaterLock(ref Key key, ref Input input, ref Output output, ref Value value, ref RecordInfo recordInfo, long address) { - this.LockExclusive(ref recordInfo); + recordInfo.LockExclusive(); try { return !recordInfo.Tombstone && InPlaceUpdaterNoLock(ref key, ref input, ref output, ref value, ref recordInfo, address); } finally { - this.UnlockExclusive(ref recordInfo); + recordInfo.UnlockExclusive(); } } @@ -1085,14 +1083,14 @@ private bool ConcurrentDeleterNoLock(ref Key key, ref Value value, ref RecordInf [MethodImpl(MethodImplOptions.AggressiveInlining)] private bool ConcurrentDeleterLock(ref Key key, ref Value value, ref RecordInfo recordInfo, long address) { - this.LockExclusive(ref recordInfo); + recordInfo.LockExclusive(); try { return ConcurrentDeleterNoLock(ref key, ref value, ref recordInfo, address); } finally { - this.UnlockExclusive(ref recordInfo); + recordInfo.UnlockExclusive(); } } @@ -1100,33 +1098,6 @@ public void DeleteCompletionCallback(ref Key key, Context ctx) => _clientSession.functions.DeleteCompletionCallback(ref key, ctx); #endregion IFunctions - Deletes - #region IFunctions - Locking - - public void LockExclusive(ref RecordInfo recordInfo) => recordInfo.LockExclusive(); - - public void UnlockExclusive(ref RecordInfo recordInfo) => recordInfo.UnlockExclusive(); - - public bool TryLockExclusive(ref RecordInfo recordInfo, int spinCount = 1) => recordInfo.TryLockExclusive(spinCount); - - public void LockShared(ref RecordInfo recordInfo) => recordInfo.LockShared(); - - public void UnlockShared(ref RecordInfo recordInfo) => recordInfo.UnlockShared(); - - public bool TryLockShared(ref RecordInfo recordInfo, int spinCount = 1) => recordInfo.TryLockShared(spinCount); - - public void LockExclusiveFromShared(ref RecordInfo recordInfo) => recordInfo.LockExclusiveFromShared(); - - public bool TryLockExclusiveFromShared(ref RecordInfo recordInfo, int spinCount = 1) => recordInfo.TryLockExclusiveFromShared(spinCount); - - public bool IsLocked(ref RecordInfo recordInfo) => recordInfo.IsLocked; - - public bool IsLockedExclusive(ref RecordInfo recordInfo) => recordInfo.IsLockedExclusive; - - public bool IsLockedShared(ref RecordInfo recordInfo) => recordInfo.IsLockedShared; - - public void TransferLocks(ref RecordInfo oldRecordInfo, ref RecordInfo newRecordInfo) => newRecordInfo.TransferLocksFrom(ref oldRecordInfo); - #endregion IFunctions - Locking - #region IFunctions - Checkpointing public void CheckpointCompletionCallback(string guid, CommitPoint commitPoint) { diff --git a/cs/src/core/ClientSession/ManualFasterOperations.cs b/cs/src/core/ClientSession/ManualFasterOperations.cs index 12e2213ac..e66542d1b 100644 --- a/cs/src/core/ClientSession/ManualFasterOperations.cs +++ b/cs/src/core/ClientSession/ManualFasterOperations.cs @@ -45,10 +45,15 @@ internal ManualFasterOperations(ClientSession /// Resume session on current thread. IMPORTANT: Call SuspendThread before any async op. /// - /// Epoch that session resumes on; can be saved to see if epoch has changed + /// Epoch that the session resumed on; can be saved to see if epoch has changed [MethodImpl(MethodImplOptions.AggressiveInlining)] public void UnsafeResumeThread(out int resumeEpoch) => clientSession.UnsafeResumeThread(out resumeEpoch); + /// + /// Current epoch of the session + /// + public int LocalCurrentEpoch => clientSession.fht.epoch.LocalCurrentEpoch; + /// /// Suspend session on current thread /// @@ -79,6 +84,7 @@ public bool UnsafeCompletePendingWithOutputs(out CompletedOutputIterator 0) throw new FasterException($"Disposing ManualFasterOperations with locks held: {sharedLockCount} shared locks, {exclusiveLockCount} exclusive locks"); + Interlocked.Decrement(ref this.clientSession.fht.NumActiveLockingSessions); } #endregion Acquire and Dispose @@ -103,49 +110,19 @@ public void Dispose() /// /// The key to lock /// The type of lock to take - /// Whether to retrieve data (and copy to the tail of the log) if the key is not in the mutable region - /// Information about the acquired lock - public unsafe void Lock(ref Key key, LockType lockType, bool retrieveData, ref LockInfo lockInfo) + public unsafe void Lock(ref Key key, LockType lockType) { CheckAcquired(); - LockOperation lockOp = new(LockOperationType.LockRead, lockType); - - Input input = default; - Output output = default; - RecordMetadata recordMetadata = default; - // Note: this does not use RMW because that would complicate the RMW process: - // - InternalRMW would have to know whether we are doing retrieveData - // - this.CopyUpdater would have to call SingleWriter to simply copy the data over unchanged - // The assumption is that if retrieveData is true, there is an expectation the key already exists, so only ContextRead would be called. - - bool success = false; - if (retrieveData) - { - var status = clientSession.fht.ContextRead(ref key, ref input, ref output, ref lockOp, ref recordMetadata, ReadFlags.CopyToTail, context: default, FasterSession, serialNo: 0, clientSession.ctx); - success = status == Status.OK; - if (status == Status.PENDING) - { - // This bottoms out in WaitPending which assumes the epoch is protected, and releases it. So we don't release it here. - this.UnsafeCompletePendingWithOutputs(out var completedOutputs, wait: true); - completedOutputs.Next(); - recordMetadata = completedOutputs.Current.RecordMetadata; - completedOutputs.Dispose(); - success = true; - } - } + LockOperation lockOp = new(LockOperationType.Lock, lockType); - if (!success) - { - lockOp.LockOperationType = LockOperationType.LockUpsert; - Value value = default; - var status = clientSession.fht.ContextUpsert(ref key, ref input, ref value, ref output, ref lockOp, out recordMetadata, context: default, FasterSession, serialNo: 0, clientSession.ctx); - Debug.Assert(status == Status.OK); - } + OperationStatus status; + do + status = clientSession.fht.InternalLock(ref key, lockOp, out _, FasterSession, clientSession.ctx); + while (status == OperationStatus.RETRY_NOW); + Debug.Assert(status == OperationStatus.SUCCESS); - lockInfo.LockType = lockType == LockType.ExclusiveFromShared ? LockType.Exclusive : lockType; - lockInfo.Address = recordMetadata.Address; - if (lockInfo.LockType == LockType.Exclusive) + if (lockType == LockType.Exclusive) ++this.exclusiveLockCount; else ++this.sharedLockCount; @@ -156,83 +133,63 @@ public unsafe void Lock(ref Key key, LockType lockType, bool retrieveData, ref L /// /// The key to lock /// The type of lock to take - /// Whether to retrieve data (and copy to the tail of the log) if the key is not in the mutable region - /// Information about the acquired lock - public unsafe void Lock(Key key, LockType lockType, bool retrieveData, ref LockInfo lockInfo) - => Lock(ref key, lockType, retrieveData, ref lockInfo); - - /// - /// Lock the key with the specified , waiting until it is acquired - /// - /// The key to lock - /// The type of lock to take - /// Whether to retrieve data (and copy to the tail of the log) if the key is not in the mutable region - public unsafe void Lock(ref Key key, LockType lockType, bool retrieveData) - { - LockInfo lockInfo = default; - Lock(ref key, lockType, retrieveData, ref lockInfo); - } - - /// - /// Lock the key with the specified , waiting until it is acquired - /// - /// The key to lock - /// The type of lock to take - /// Whether to retrieve data (and copy to the tail of the log) if the key is not in the mutable region - public unsafe void Lock(Key key, LockType lockType, bool retrieveData) - { - LockInfo lockInfo = default; - Lock(ref key, lockType, retrieveData, ref lockInfo); - } + public unsafe void Lock(Key key, LockType lockType) => Lock(ref key, lockType); /// /// Lock the key with the specified /// /// The key to lock - /// The type of lock to take - /// Information about the acquired lock - public void Unlock(ref Key key, LockType lockType, ref LockInfo lockInfo) + /// The type of lock to release + public void Unlock(ref Key key, LockType lockType) { CheckAcquired(); - LockOperation lockOp = new(LockOperationType.Unlock, lockType); - Input input = default; - Output output = default; - RecordMetadata recordMetadata = default; + LockOperation lockOp = new(LockOperationType.Unlock, lockType); - var status = clientSession.fht.ContextRead(ref key, ref input, ref output, ref lockOp, ref recordMetadata, ReadFlags.None, context: default, FasterSession, serialNo: 0, clientSession.ctx); - if (status == Status.PENDING) - { - // Do nothing here, as a lock that goes into the on-disk region is considered unlocked--we will not allow that anyway. - // This bottoms out in WaitPending which assumes the epoch is protected, and releases it. So we don't release it here. - this.UnsafeCompletePending(wait: true); - } + OperationStatus status; + do + status = clientSession.fht.InternalLock(ref key, lockOp, out _, FasterSession, clientSession.ctx); + while (status == OperationStatus.RETRY_NOW); + Debug.Assert(status == OperationStatus.SUCCESS); - if (lockInfo.LockType == LockType.Exclusive) + if (lockType == LockType.Exclusive) --this.exclusiveLockCount; else --this.sharedLockCount; } /// - /// Lock the key with the specified lock type. + /// Unlock the key with the specified /// /// The key to lock - /// Information about the acquired lock - public void Unlock(Key key, ref LockInfo lockInfo) - => Unlock(ref key, lockInfo.LockType, ref lockInfo); + /// The type of lock to release + public void Unlock(Key key, LockType lockType) => Unlock(ref key, lockType); /// - /// Lock the key with the specified + /// Determines if the key is locked. Note this value may be obsolete as soon as it returns. /// /// The key to lock - /// The type of lock to take - public void Unlock(Key key, LockType lockType) + public (bool exclusive, bool shared) IsLocked(ref Key key) { - LockInfo lockInfo = default; - Unlock(ref key, lockType, ref lockInfo); + CheckAcquired(); + + LockOperation lockOp = new(LockOperationType.IsLocked, LockType.None); + + OperationStatus status; + RecordInfo lockInfo; + do + status = clientSession.fht.InternalLock(ref key, lockOp, out lockInfo, FasterSession, clientSession.ctx); + while (status == OperationStatus.RETRY_NOW); + Debug.Assert(status == OperationStatus.SUCCESS); + return (lockInfo.IsLockedExclusive, lockInfo.IsLockedShared); } + /// + /// Determines if the key is locked. Note this value may be obsolete as soon as it returns. + /// + /// The key to lock + public (bool exclusive, bool shared) IsLocked(Key key) => IsLocked(ref key); + #endregion Key Locking #region IFasterOperations @@ -284,8 +241,7 @@ public Status Read(Key key, out Output output, Context userContext = default, lo public Status Read(ref Key key, ref Input input, ref Output output, ref RecordMetadata recordMetadata, ReadFlags readFlags = ReadFlags.None, Context userContext = default, long serialNo = 0) { Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); - LockOperation lockOp = default; - return clientSession.fht.ContextRead(ref key, ref input, ref output, ref lockOp, ref recordMetadata, readFlags, userContext, FasterSession, serialNo, clientSession.ctx); + return clientSession.fht.ContextRead(ref key, ref input, ref output, ref recordMetadata, readFlags, userContext, FasterSession, serialNo, clientSession.ctx); } /// @@ -374,8 +330,7 @@ public Status Upsert(ref Key key, ref Input input, ref Value desiredValue, ref O public Status Upsert(ref Key key, ref Input input, ref Value desiredValue, ref Output output, out RecordMetadata recordMetadata, Context userContext = default, long serialNo = 0) { Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); - LockOperation lockOp = default; - return clientSession.fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, ref lockOp, out recordMetadata, userContext, FasterSession, serialNo, clientSession.ctx); + return clientSession.fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, out recordMetadata, userContext, FasterSession, serialNo, clientSession.ctx); } /// @@ -516,61 +471,19 @@ public InternalFasterSession(ClientSession true; // We need this for user record locking, but check for user's setting before calling user code - public bool IsManualOperations => true; + public bool IsManualLocking => true; #endregion IFunctions - Optional features supported - [MethodImpl(MethodImplOptions.AggressiveInlining)] - void HandleLockOperation(ref RecordInfo recordInfo, ref LockOperation lockOp, out bool isLock) - { - isLock = false; - if (lockOp.LockOperationType == LockOperationType.Unlock) - { - if (recordInfo.Stub) - { - recordInfo.Stub = false; - recordInfo.SetInvalid(); - } - if (lockOp.LockType == LockType.Shared) - this.UnlockShared(ref recordInfo); - else if (lockOp.LockType == LockType.Exclusive) - this.UnlockExclusive(ref recordInfo); - else - Debug.Fail($"Unexpected LockType: {lockOp.LockType}"); - return; - } - isLock = true; - if (lockOp.LockType == LockType.Shared) - this.LockShared(ref recordInfo); - else if (lockOp.LockType == LockType.Exclusive) - this.LockExclusive(ref recordInfo); - else if (lockOp.LockType == LockType.ExclusiveFromShared) - this.LockExclusiveFromShared(ref recordInfo); - else - Debug.Fail($"Unexpected LockType: {lockOp.LockType}"); - } - #region IFunctions - Reads [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool SingleReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) { - if (lockOp.IsSet) - { - // No value is returned to the client through the lock sequence; for consistency all key locks must be acquired before their values are read. - HandleLockOperation(ref recordInfo, ref lockOp, out _); - return true; - } return _clientSession.functions.SingleReader(ref key, ref input, ref value, ref dst, ref recordInfo, address); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) { - if (lockOp.IsSet) - { - // No value is returned to the client through the lock sequence; for consistency all key locks must be acquired before their values are read. - HandleLockOperation(ref recordInfo, ref lockOp, out _); - return true; - } return _clientSession.functions.ConcurrentReader(ref key, ref input, ref value, ref dst, ref recordInfo, address); } @@ -588,21 +501,6 @@ public void ReadCompletionCallback(ref Key key, ref Input input, ref Output outp public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) { _clientSession.functions.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); - - // Lock here, and do not unlock in PostSingleWriter; wait for the user to explicitly unlock - if (lockOp.IsSet) - { - Debug.Assert(lockOp.LockOperationType != LockOperationType.Unlock); // Should have caught this in InternalUpsert - HandleLockOperation(ref recordInfo, ref lockOp, out _); - - // If this is a lock for upsert, then we've failed to find an in-memory record for this key, and we're creating a stub with a default value. - if (lockOp.LockOperationType == LockOperationType.LockUpsert) - recordInfo.Stub = true; - } - else if (lockOp.IsStubPromotion) - { - this.LockExclusive(ref recordInfo); - } } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -615,13 +513,6 @@ public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Va [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) { - if (lockOp.IsSet) - { - // All lock operations in ConcurrentWriter can return immediately. - HandleLockOperation(ref recordInfo, ref lockOp, out _); - return true; - } - // Note: KeyIndexes do not need notification of in-place updates because the key does not change. return _clientSession.functions.ConcurrentWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); } @@ -640,10 +531,6 @@ public bool NeedInitialUpdate(ref Key key, ref Input input, ref Output output) public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) { _clientSession.functions.InitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); - if (lockOp.IsStubPromotion) - { - this.LockExclusive(ref recordInfo); - } } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -697,7 +584,6 @@ public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long addre public bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address) { recordInfo.Tombstone = true; - recordInfo.Stub = false; return _clientSession.functions.ConcurrentDeleter(ref key, ref value, ref recordInfo, address); } @@ -705,33 +591,6 @@ public void DeleteCompletionCallback(ref Key key, Context ctx) => _clientSession.functions.DeleteCompletionCallback(ref key, ctx); #endregion IFunctions - Deletes - #region IFunctions - Locking - - public void LockExclusive(ref RecordInfo recordInfo) => recordInfo.LockExclusive(); - - public void UnlockExclusive(ref RecordInfo recordInfo) => recordInfo.UnlockExclusive(); - - public bool TryLockExclusive(ref RecordInfo recordInfo, int spinCount = 1) => recordInfo.TryLockExclusive(spinCount); - - public void LockShared(ref RecordInfo recordInfo) => recordInfo.LockShared(); - - public void UnlockShared(ref RecordInfo recordInfo) => recordInfo.UnlockShared(); - - public bool TryLockShared(ref RecordInfo recordInfo, int spinCount = 1) => recordInfo.TryLockShared(spinCount); - - public void LockExclusiveFromShared(ref RecordInfo recordInfo) => recordInfo.LockExclusiveFromShared(); - - public bool TryLockExclusiveFromShared(ref RecordInfo recordInfo, int spinCount = 1) => recordInfo.TryLockExclusiveFromShared(spinCount); - - public bool IsLocked(ref RecordInfo recordInfo) => recordInfo.IsLocked; - - public bool IsLockedExclusive(ref RecordInfo recordInfo) => recordInfo.IsLockedExclusive; - - public bool IsLockedShared(ref RecordInfo recordInfo) => recordInfo.IsLockedShared; - - public void TransferLocks(ref RecordInfo oldRecordInfo, ref RecordInfo newRecordInfo) => newRecordInfo.TransferLocksFrom(ref oldRecordInfo); - #endregion IFunctions - Locking - #region IFunctions - Checkpointing public void CheckpointCompletionCallback(string guid, CommitPoint commitPoint) { diff --git a/cs/src/core/Epochs/LightEpoch.cs b/cs/src/core/Epochs/LightEpoch.cs index 03314b49e..783925903 100644 --- a/cs/src/core/Epochs/LightEpoch.cs +++ b/cs/src/core/Epochs/LightEpoch.cs @@ -242,6 +242,7 @@ private void Acquire() threadEntryIndexCount++; } + internal int LocalCurrentEpoch => (*(tableAligned + threadEntryIndex)).localCurrentEpoch; /// /// Thread releases its epoch entry diff --git a/cs/src/core/Index/Common/Contexts.cs b/cs/src/core/Index/Common/Contexts.cs index 320684f00..24bf86ec6 100644 --- a/cs/src/core/Index/Common/Contexts.cs +++ b/cs/src/core/Index/Common/Contexts.cs @@ -89,15 +89,15 @@ internal struct PendingContext internal long minAddress; internal LockOperation lockOperation; - // Note: Must be kept in sync with corresponding ReadFlags enum values + // BEGIN Must be kept in sync with corresponding ReadFlags enum values internal const ushort kSkipReadCache = 0x0001; internal const ushort kMinAddress = 0x0002; internal const ushort kCopyReadsToTail = 0x0004; internal const ushort kSkipCopyReadsToTail = 0x0008; + // END Must be kept in sync with corresponding ReadFlags enum values internal const ushort kNoKey = 0x0100; internal const ushort kIsAsync = 0x0200; - internal const ushort kIsReadingAtAddress = 0x0400; [MethodImpl(MethodImplOptions.AggressiveInlining)] internal IHeapContainer DetachKey() @@ -176,12 +176,6 @@ internal bool IsAsync set => operationFlags = value ? (byte)(operationFlags | kIsAsync) : (byte)(operationFlags & ~kIsAsync); } - internal bool IsReadingAtAddress - { - get => (operationFlags & kIsReadingAtAddress) != 0; - set => operationFlags = value ? (ushort)(operationFlags | kIsReadingAtAddress) : (ushort)(operationFlags & ~kIsReadingAtAddress); - } - public void Dispose() { key?.Dispose(); diff --git a/cs/src/core/Index/Common/RecordInfo.cs b/cs/src/core/Index/Common/RecordInfo.cs index 2193ca022..e602c0a14 100644 --- a/cs/src/core/Index/Common/RecordInfo.cs +++ b/cs/src/core/Index/Common/RecordInfo.cs @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. -#pragma warning disable 1591 +#pragma warning disable CS1591 // Missing XML comment for publicly visible type or member using System.Diagnostics; using System.Runtime.CompilerServices; @@ -11,7 +11,7 @@ namespace FASTER.core { // RecordInfo layout (64 bits total): - // [--][InNewVersion][Filler][Dirty][Stub][Sealed] [Valid][Tombstone][X][SSSSSS] [RAAAAAAA] [AAAAAAAA] [AAAAAAAA] [AAAAAAAA] [AAAAAAAA] [AAAAAAAA] + // [--][InNewVersion][Filler][Dirty][Tentative][Sealed] [Valid][Tombstone][X][SSSSSS] [RAAAAAAA] [AAAAAAAA] [AAAAAAAA] [AAAAAAAA] [AAAAAAAA] [AAAAAAAA] // where X = exclusive lock, S = shared lock, R = readcache, A = address, - = unused [StructLayout(LayoutKind.Explicit, Size = 8)] public struct RecordInfo @@ -41,15 +41,15 @@ public struct RecordInfo // Other marker bits const int kTombstoneBitOffset = kExclusiveLockBitOffset + 1; const int kValidBitOffset = kTombstoneBitOffset + 1; - const int kStubBitOffset = kValidBitOffset + 1; - const int kSealedBitOffset = kStubBitOffset + 1; + const int kTentativeBitOffset = kValidBitOffset + 1; + const int kSealedBitOffset = kTentativeBitOffset + 1; const int kDirtyBitOffset = kSealedBitOffset + 1; const int kFillerBitOffset = kDirtyBitOffset + 1; const int kInNewVersionBitOffset = kFillerBitOffset + 1; const long kTombstoneBitMask = 1L << kTombstoneBitOffset; const long kValidBitMask = 1L << kValidBitOffset; - const long kStubBitMask = 1L << kStubBitOffset; + const long kTentativeBitMask = 1L << kTentativeBitOffset; const long kSealedBitMask = 1L << kSealedBitOffset; const long kDirtyBitMask = 1L << kDirtyBitOffset; const long kFillerBitMask = 1L << kFillerBitOffset; @@ -68,13 +68,17 @@ public static void WriteInfo(ref RecordInfo info, bool inNewVersion, bool tombst info.InNewVersion = inNewVersion; } + public bool Equals(RecordInfo other) => this.word == other.word; + + public long GetHashCode64() => Utility.GetHashCode(this.word); + public bool IsLocked => (word & (kExclusiveLockBitMask | kSharedLockMaskInWord)) != 0; public bool IsLockedExclusive => (word & kExclusiveLockBitMask) != 0; public bool IsLockedShared => (word & kSharedLockMaskInWord) != 0; - public bool IsIntermediate => (word & (kStubBitMask | kSealedBitMask)) != 0; + public bool IsIntermediate => (word & (kTentativeBitMask | kSealedBitMask)) != 0; /// /// Take exclusive (write) lock on RecordInfo @@ -88,7 +92,7 @@ public static void WriteInfo(ref RecordInfo info, bool inNewVersion, bool tombst [MethodImpl(MethodImplOptions.AggressiveInlining)] public void UnlockExclusive() { - Debug.Assert(IsLockedExclusive); + Debug.Assert(IsLockedExclusive, "Trying to X unlock an unlocked record"); word &= ~kExclusiveLockBitMask; // Safe because there should be no other threads (e.g., readers) updating the word at this point } @@ -130,7 +134,7 @@ public bool TryLockExclusive(int spinCount = 1) [MethodImpl(MethodImplOptions.AggressiveInlining)] public void UnlockShared() { - Debug.Assert((word & kSharedLockMaskInWord) != 0); + Debug.Assert((word & kSharedLockMaskInWord) != 0, "Trying to S unlock an unlocked record"); Interlocked.Add(ref word, -kSharedLockIncrement); } @@ -192,14 +196,23 @@ public bool TryLockExclusiveFromShared(int spinCount = 1) return true; } - public void TransferLocksFrom(ref RecordInfo other) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void CopyLocksFrom(RecordInfo other) { - // We should only be calling this when the record is sealed, to avoid an attempt to do a lock operation on the old record during this. - Debug.Assert(other.Sealed); word &= ~(kExclusiveLockBitMask | kSharedLockMaskInWord); word |= (other.word & (kExclusiveLockBitMask | kSharedLockMaskInWord)); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool TryUpdateAddress(long newPrevAddress) + { + var expectedWord = word; + RecordInfo newRI = default; + newRI.PreviousAddress = newPrevAddress; + var foundWord = Interlocked.CompareExchange(ref this.word, newRI.word, expectedWord); + return foundWord == expectedWord; + } + public bool IsNull() => word == 0; public bool Tombstone @@ -222,26 +235,52 @@ public bool Valid } } - public bool Stub + public bool Tentative { - get => (word & kStubBitMask) > 0; + get => (word & kTentativeBitMask) > 0; set { - if (value) word |= kStubBitMask; - else word &= ~kStubBitMask; + if (value) word |= kTentativeBitMask; + else word &= ~kTentativeBitMask; } } - public bool Sealed + public bool Sealed => (word & kSealedBitMask) > 0; + + // Ensure we have exclusive access before sealing. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool Seal(bool isManualLocking = false) { - get => (word & kSealedBitMask) > 0; - set + if (isManualLocking) { - if (value) word |= kSealedBitMask; - else word &= ~kSealedBitMask; + // We own this lock, so just set the sealed bit. + word |= kSealedBitMask; + return true; + } + while (true) + { + if ((word & kExclusiveLockBitMask) == 0) + { + long expected_word = word; + long new_word = word | kExclusiveLockBitMask | kSealedBitMask; + long current_word = Interlocked.CompareExchange(ref word, new_word, expected_word); + if (expected_word == current_word) + { + // Lock+Seal succeeded; remove lock + this.UnlockExclusive(); + return true; + } + + // If someone else sealed this, we fail this attempt. + if ((word & kSealedBitMask) > 0 || this.Invalid) + return false; + } + Thread.Yield(); } } + public void Unseal() => word &= ~kSealedBitMask; + public bool DirtyAtomic { set @@ -294,7 +333,7 @@ public bool InNewVersion public bool Invalid => (word & kValidBitMask) == 0; - public bool SkipOnScan => Invalid || (word & (kSealedBitMask | kStubBitMask)) != 0; + public bool SkipOnScan => Invalid || (word & (kSealedBitMask | kTentativeBitMask)) != 0; public long PreviousAddress { diff --git a/cs/src/core/Index/FASTER/FASTER.cs b/cs/src/core/Index/FASTER/FASTER.cs index 4ed1de0cd..4005cad33 100644 --- a/cs/src/core/Index/FASTER/FASTER.cs +++ b/cs/src/core/Index/FASTER/FASTER.cs @@ -113,6 +113,8 @@ public partial class FasterKV : FasterBase, internal ConcurrentDictionary _recoveredSessions; internal bool SupportsLocking; + internal LockTable LockTable; + internal long NumActiveLockingSessions = 0; /// /// Create FASTER instance @@ -192,6 +194,8 @@ public FasterKV(long size, LogSettings logSettings, UpdateVarLen(ref variableLengthStructSettings); + IVariableLengthStruct keyLen = null; + if ((!Utility.IsBlittable() && variableLengthStructSettings?.keyLength is null) || (!Utility.IsBlittable() && variableLengthStructSettings?.valueLength is null)) { @@ -217,6 +221,7 @@ public FasterKV(long size, LogSettings logSettings, } else if (variableLengthStructSettings != null) { + keyLen = variableLengthStructSettings.keyLength; hlog = new VariableLengthBlittableAllocator(logSettings, variableLengthStructSettings, this.comparer, null, epoch); Log = new LogAccessor(this, hlog); @@ -260,6 +265,8 @@ public FasterKV(long size, LogSettings logSettings, sectorSize = (int)logSettings.LogDevice.SectorSize; Initialize(size, sectorSize); + this.LockTable = new LockTable(keyLen, this.comparer, keyLen is null ? null : hlog.bufferPool); + systemState = SystemState.Make(Phase.REST, 1); } @@ -592,13 +599,12 @@ internal Status ContextRead(ref Key key, } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal Status ContextRead(ref Key key, ref Input input, ref Output output, ref LockOperation lockOp, ref RecordMetadata recordMetadata, ReadFlags readFlags, Context context, + internal Status ContextRead(ref Key key, ref Input input, ref Output output, ref RecordMetadata recordMetadata, ReadFlags readFlags, Context context, FasterSession fasterSession, long serialNo, FasterExecutionContext sessionCtx) where FasterSession : IFasterSession { var pcontext = default(PendingContext); pcontext.SetOperationFlags(readFlags, recordMetadata.RecordInfo.PreviousAddress); - pcontext.lockOperation = lockOp; OperationStatus internalStatus; do internalStatus = InternalRead(ref key, ref input, ref output, recordMetadata.RecordInfo.PreviousAddress, ref context, ref pcontext, fasterSession, sessionCtx, serialNo); @@ -677,12 +683,11 @@ internal Status ContextUpsert(ref Key key } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal Status ContextUpsert(ref Key key, ref Input input, ref Value value, ref Output output, ref LockOperation lockOp, out RecordMetadata recordMetadata, + internal Status ContextUpsert(ref Key key, ref Input input, ref Value value, ref Output output, out RecordMetadata recordMetadata, Context context, FasterSession fasterSession, long serialNo, FasterExecutionContext sessionCtx) where FasterSession : IFasterSession { var pcontext = default(PendingContext); - pcontext.lockOperation = lockOp; OperationStatus internalStatus; do diff --git a/cs/src/core/Index/FASTER/FASTERImpl.cs b/cs/src/core/Index/FASTER/FASTERImpl.cs index bd89b584a..7bd5867b7 100644 --- a/cs/src/core/Index/FASTER/FASTERImpl.cs +++ b/cs/src/core/Index/FASTER/FASTERImpl.cs @@ -17,14 +17,10 @@ public unsafe partial class FasterKV : FasterBase, IFasterKV - /// - /// - /// /// The logical address of the traced record for the key - /// /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool CheckEntryVersionNew(long logicalAddress, FasterExecutionContext sessionCtx) + private bool CheckEntryVersionNew(long logicalAddress) { HashBucketEntry entry = default; entry.word = logicalAddress; @@ -61,48 +57,6 @@ internal enum LatchOperation : byte Exclusive } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static OperationStatus RetryOnIntermediateRecord(ref RecordInfo recordInfo, ref Key key, ref Value value, FasterSession fasterSession) - where FasterSession : IFasterSession - { - // These are "intermediate" states that will be replaced by either a Delete or an Upsert of an actual value. - // - If the record is Sealed, we do not acquire the lock, because that would be a problem if we were in the middle of transferring the locks from one record to another. - // Sealed is very short-duration, so we just yield and then RETRY_NOW. - // - A Stub lock is held longer, but still likely only for the duration of one or more operations on multiple records, e.g. "acquire a lock on 3 source records and 1 - // destination record, the do an operation on the source values and put them into the destination value." So for this case we lock, then immediatly unlock and RETRY_NOW. - if (!recordInfo.Sealed && fasterSession.SupportsLocking) - { - fasterSession.LockShared(ref recordInfo); - fasterSession.UnlockShared(ref recordInfo); - } - else - Thread.Yield(); - return OperationStatus.RETRY_NOW; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static bool SealRecord(ref RecordInfo recordInfo, ref Key key, ref Value value, FasterSession fasterSession) - where FasterSession : IFasterSession - { - // This is the only time we do a Sealed-related lock, and it's just to know whether we are the thread that set it to Sealed. - if (fasterSession.SupportsLocking) - fasterSession.LockExclusive(ref recordInfo); - if (recordInfo.Sealed || recordInfo.Invalid || recordInfo.Tombstone) - { - // Another thread was doing the same thing; unlock and retry. - if (fasterSession.SupportsLocking) - fasterSession.UnlockExclusive(ref recordInfo); - return false; - } - - // We were the sealer, so continue the current operation. Note: the caller here does not unseal; the record must remain sealed to avoid a race condition - // when the CAS of the new record has not yet been done. - recordInfo.Sealed = true; - return true; - } - - internal static bool IsIntermediate(ref RecordInfo recordInfo, bool isReadingAtAddress = false) => recordInfo.Stub || (recordInfo.Sealed && !isReadingAtAddress); - #region Read Operation /// @@ -168,6 +122,7 @@ internal OperationStatus InternalRead( OperationStatus status; long logicalAddress; + long prevTailAddress = hlog.GetTailAddress(); var useStartAddress = startAddress != Constants.kInvalidAddress && !pendingContext.HasMinAddress; bool tagExists; if (!useStartAddress) @@ -188,9 +143,9 @@ internal OperationStatus InternalRead( { if (pendingContext.SkipReadCache || pendingContext.NoKey) { - SkipReadCache(ref logicalAddress); + SkipReadCache(ref logicalAddress, out _); } - else if (ReadFromCache(ref key, ref logicalAddress, ref physicalAddress)) + else if (ReadFromCache(ref key, ref logicalAddress, ref physicalAddress, out status)) { // When session is in PREPARE phase, a read-cache record cannot be new-version. // This is because a new-version record insertion would have elided the read-cache entry. @@ -204,6 +159,8 @@ internal OperationStatus InternalRead( return fasterSession.SingleReader(ref key, ref input, ref readcache.GetValue(physicalAddress), ref output, ref pendingContext.lockOperation, ref recordInfo, Constants.kInvalidAddress) ? OperationStatus.SUCCESS : OperationStatus.NOTFOUND; } + else if (status != OperationStatus.SUCCESS) + return status; } if (logicalAddress >= hlog.HeadAddress) @@ -251,24 +208,12 @@ internal OperationStatus InternalRead( pendingContext.logicalAddress = logicalAddress; ref Value recordValue = ref hlog.GetValue(physicalAddress); - if (fasterSession.IsManualOperations && recordInfo.Stub) + if (recordInfo.IsIntermediate(out status, useStartAddress)) { - // This is only for ManualFasterOperations, and we assume we hold this lock, but we should not be reading or (un)locking a Stub (we should promote or Delete). - return OperationStatus.NOTFOUND; - } - else if (IsIntermediate(ref pendingContext.recordInfo, useStartAddress)) - { - return RetryOnIntermediateRecord(ref recordInfo, ref key, ref hlog.GetValue(physicalAddress), fasterSession); - } - else if (recordInfo.Tombstone) - { - if (fasterSession.IsManualOperations && pendingContext.lockOperation.IsSet) - { - fasterSession.ConcurrentReader(ref key, ref input, ref recordValue, ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress); - return OperationStatus.SUCCESS; - } + return status; } - else if (fasterSession.ConcurrentReader(ref key, ref input, ref recordValue, ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress)) + else if (!recordInfo.Tombstone + && fasterSession.ConcurrentReader(ref key, ref input, ref recordValue, ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress)) { return OperationStatus.SUCCESS; } @@ -282,24 +227,12 @@ internal OperationStatus InternalRead( pendingContext.recordInfo = recordInfo; pendingContext.logicalAddress = logicalAddress; - if (fasterSession.IsManualOperations && recordInfo.Stub) + if (recordInfo.IsIntermediate(out status, useStartAddress)) { - // This is only for ManualFasterOperations, and we assume we hold this lock, but we should not be reading or (un)locking a Stub (we should promote or Delete). - return OperationStatus.NOTFOUND; - } - else if (IsIntermediate(ref pendingContext.recordInfo, useStartAddress)) - { - return RetryOnIntermediateRecord(ref recordInfo, ref key, ref hlog.GetValue(physicalAddress), fasterSession); - } - else if (recordInfo.Tombstone) - { - if (fasterSession.IsManualOperations && pendingContext.lockOperation.IsSet) - { - fasterSession.SingleReader(ref key, ref input, ref hlog.GetValue(physicalAddress), ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress); - return OperationStatus.SUCCESS; - } + return status; } - else if (fasterSession.SingleReader(ref key, ref input, ref hlog.GetValue(physicalAddress), ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress)) + else if (!recordInfo.Tombstone + && fasterSession.SingleReader(ref key, ref input, ref hlog.GetValue(physicalAddress), ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress)) { if (CopyReadsToTail == CopyReadsToTail.FromReadOnly && !pendingContext.SkipCopyReadsToTail) { @@ -372,8 +305,7 @@ internal OperationStatus InternalRead( pendingContext.version = sessionCtx.version; pendingContext.serialNum = lsn; pendingContext.heldLatch = heldOperation; - pendingContext.recordInfo.PreviousAddress = startAddress; - pendingContext.IsReadingAtAddress = useStartAddress; + pendingContext.recordInfo.PreviousAddress = prevTailAddress; } #endregion @@ -451,8 +383,15 @@ internal OperationStatus InternalUpsert( var logicalAddress = entry.Address; var physicalAddress = default(long); + long lastReadCachePhysicalAddress = Constants.kInvalidAddress; + long prevFirstReadCacheLogicalAddress = Constants.kInvalidAddress; if (UseReadCache) - SkipAndInvalidateReadCache(ref logicalAddress, ref key); + { + prevFirstReadCacheLogicalAddress = logicalAddress; + SkipReadCache(ref logicalAddress, out lastReadCachePhysicalAddress); + if (prevFirstReadCacheLogicalAddress == logicalAddress) // if there were no readcache records + prevFirstReadCacheLogicalAddress = Constants.kInvalidAddress; + } var latestLogicalAddress = logicalAddress; if (logicalAddress >= hlog.ReadOnlyAddress) @@ -471,32 +410,17 @@ internal OperationStatus InternalUpsert( #endregion // Optimization for the most common case - long stubPhysicalAddress = Constants.kInvalidAddress; + long unsealPhysicalAddress = Constants.kInvalidAddress; if (sessionCtx.phase == Phase.REST) { if (logicalAddress >= hlog.ReadOnlyAddress) { ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); - if (fasterSession.IsManualOperations && recordInfo.Stub) - { - // This is only for ManualFasterOperations, and we assume we hold this lock. We will need to transfer the lock to the updated record. - stubPhysicalAddress = physicalAddress; - status = OperationStatus.SUCCESS; - goto CreateNewRecord; - } ref Value recordValue = ref hlog.GetValue(physicalAddress); - if (IsIntermediate(ref recordInfo)) - return RetryOnIntermediateRecord(ref recordInfo, ref key, ref recordValue, fasterSession); + if (recordInfo.IsIntermediate(out status)) + return status; - if (recordInfo.Tombstone) - { - if (fasterSession.IsManualOperations && pendingContext.lockOperation.IsSet) - { - fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref hlog.GetValue(physicalAddress), ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress); - return OperationStatus.SUCCESS; - } - } - else + if (!recordInfo.Tombstone) { if (fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref hlog.GetValue(physicalAddress), ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress)) { @@ -507,8 +431,9 @@ internal OperationStatus InternalUpsert( } // ConcurrentWriter failed (e.g. insufficient space). Another thread may come along to do this update in-place; Seal it to prevent that. - if (!SealRecord(ref recordInfo, ref key, ref recordValue, fasterSession)) + if (!recordInfo.Seal(fasterSession.IsManualLocking)) return OperationStatus.RETRY_NOW; + unsealPhysicalAddress = physicalAddress; } goto CreateNewRecord; } @@ -519,9 +444,9 @@ internal OperationStatus InternalUpsert( { latchDestination = AcquireLatchUpsert(sessionCtx, bucket, ref status, ref latchOperation, ref entry, logicalAddress); } -#endregion + #endregion -#region Normal processing + #region Normal processing // Mutable Region: Update the record in-place if (latchDestination == LatchDestination.NormalProcessing) @@ -529,31 +454,13 @@ internal OperationStatus InternalUpsert( if (logicalAddress >= hlog.ReadOnlyAddress) { ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); - if (fasterSession.IsManualOperations && recordInfo.Stub) - { - // This is only for ManualFasterOperations, and we assume we hold this lock. We will need to transfer the lock to the updated record. - stubPhysicalAddress = physicalAddress; - status = OperationStatus.SUCCESS; - goto CreateNewRecord; - } ref Value recordValue = ref hlog.GetValue(physicalAddress); - if (IsIntermediate(ref recordInfo)) + if (recordInfo.IsIntermediate(out status)) { - status = RetryOnIntermediateRecord(ref recordInfo, ref key, ref recordValue, fasterSession); goto LatchRelease; // Release shared latch (if acquired) } - if (recordInfo.Tombstone) - { - if (fasterSession.IsManualOperations && pendingContext.lockOperation.IsSet) - { - fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref recordValue, ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress); - status = OperationStatus.SUCCESS; - goto LatchRelease; // Release shared latch (if acquired) - } - goto CreateNewRecord; - } - else + if (!recordInfo.Tombstone) { if (fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref recordValue, ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress)) { @@ -568,47 +475,32 @@ internal OperationStatus InternalUpsert( } // ConcurrentWriter failed (e.g. insufficient space). Another thread may come along to do this update in-place; Seal it to prevent that. - if (!SealRecord(ref recordInfo, ref key, ref recordValue, fasterSession)) - return OperationStatus.RETRY_NOW; + if (!recordInfo.Seal(fasterSession.IsManualLocking)) + { + status = OperationStatus.RETRY_NOW; + goto LatchRelease; // Release shared latch (if acquired) + } + unsealPhysicalAddress = physicalAddress; goto CreateNewRecord; } } - else if (fasterSession.IsManualOperations) + else if (logicalAddress >= hlog.HeadAddress) { - if (logicalAddress >= hlog.HeadAddress) - { - physicalAddress = hlog.GetPhysicalAddress(logicalAddress); - ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); - ref Value recordValue = ref hlog.GetValue(physicalAddress); - pendingContext.recordInfo = recordInfo; - pendingContext.logicalAddress = logicalAddress; - status = OperationStatus.SUCCESS; - - if (!recordInfo.Stub) - { - if (pendingContext.lockOperation.IsSet) - { - fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref recordValue, ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress); - goto LatchRelease; // Release shared latch (if acquired) - } - else if (IsIntermediate(ref recordInfo)) - { - status = RetryOnIntermediateRecord(ref recordInfo, ref key, ref recordValue, fasterSession); - goto LatchRelease; // Release shared latch (if acquired) - } - } + // Only need to go below ReadOnly here for locking and Sealing. + physicalAddress = hlog.GetPhysicalAddress(logicalAddress); + ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); + pendingContext.recordInfo = recordInfo; + pendingContext.logicalAddress = logicalAddress; - // This ManualOps instance already owns this lock, or we wouldn't be here. Create a new record and transfer the lock - if (recordInfo.Stub || recordInfo.IsLocked) - stubPhysicalAddress = physicalAddress; - goto CreateNewRecord; - } - else if (pendingContext.lockOperation.LockOperationType == LockOperationType.Unlock) + if (recordInfo.IsIntermediate(out status)) + goto LatchRelease; // Release shared latch (if acquired) + if (!recordInfo.Seal(fasterSession.IsManualLocking)) { - Debug.Fail("Trying to unlock a non-existent value"); - status = OperationStatus.SUCCESS; + status = OperationStatus.RETRY_NOW; goto LatchRelease; // Release shared latch (if acquired) } + unsealPhysicalAddress = physicalAddress; + goto CreateNewRecord; } } @@ -617,26 +509,38 @@ internal OperationStatus InternalUpsert( #region Create new record in the mutable region CreateNewRecord: + // Invalidate the entry in the read cache, as we did not do IPU. + if (UseReadCache) + { + prevFirstReadCacheLogicalAddress = logicalAddress; + if (!SkipAndInvalidateReadCache(ref logicalAddress, ref key, out lastReadCachePhysicalAddress, out OperationStatus internalStatus)) + return internalStatus; + } + if (latchDestination != LatchDestination.CreatePendingContext) { // Immutable region or new record - status = CreateNewRecordUpsert(ref key, ref input, ref value, ref output, ref pendingContext, fasterSession, sessionCtx, bucket, slot, tag, entry, latestLogicalAddress, - stubPhysicalAddress != Constants.kInvalidAddress); - if (status != OperationStatus.ALLOCATE_FAILED) + status = CreateNewRecordUpsert(ref key, ref input, ref value, ref output, ref pendingContext, fasterSession, sessionCtx, bucket, slot, tag, entry, + latestLogicalAddress, prevFirstReadCacheLogicalAddress, lastReadCachePhysicalAddress, unsealPhysicalAddress); + if (status != OperationStatus.SUCCESS) { - if (fasterSession.IsManualOperations && stubPhysicalAddress != Constants.kInvalidAddress) + if (unsealPhysicalAddress != Constants.kInvalidAddress) { - // Unlock the *old* record--thereby "transferring" the lock to the new record. - ref RecordInfo recordInfo = ref hlog.GetInfo(stubPhysicalAddress); - fasterSession.UnlockExclusive(ref recordInfo); + // Operation failed, so unseal the old record. + hlog.GetInfo(unsealPhysicalAddress).Unseal(); + } + if (status == OperationStatus.ALLOCATE_FAILED) + { + latchDestination = LatchDestination.CreatePendingContext; + goto CreatePendingContext; } - goto LatchRelease; } - latchDestination = LatchDestination.CreatePendingContext; + goto LatchRelease; } -#endregion + #endregion -#region Create pending context + #region Create pending context + CreatePendingContext: Debug.Assert(latchDestination == LatchDestination.CreatePendingContext, $"Upsert CreatePendingContext encountered latchDest == {latchDestination}"); { pendingContext.type = OperationType.UPSERT; @@ -706,7 +610,7 @@ private LatchDestination AcquireLatchUpsert(FasterExecut } case Phase.IN_PROGRESS: { - if (!CheckEntryVersionNew(logicalAddress, sessionCtx)) + if (!CheckEntryVersionNew(logicalAddress)) { if (HashBucket.TryAcquireExclusiveLatch(bucket)) { @@ -724,7 +628,7 @@ private LatchDestination AcquireLatchUpsert(FasterExecut } case Phase.WAIT_FLUSH: { - if (!CheckEntryVersionNew(logicalAddress, sessionCtx)) + if (!CheckEntryVersionNew(logicalAddress)) { return LatchDestination.CreateNewRecord; // Create a (v+1) record } @@ -738,7 +642,7 @@ private LatchDestination AcquireLatchUpsert(FasterExecut private OperationStatus CreateNewRecordUpsert(ref Key key, ref Input input, ref Value value, ref Output output, ref PendingContext pendingContext, FasterSession fasterSession, FasterExecutionContext sessionCtx, HashBucket* bucket, int slot, ushort tag, HashBucketEntry entry, - long latestLogicalAddress, bool isStubPromotion) + long latestLogicalAddress, long prevFirstReadCacheLogicalAddress, long lastReadCachePhysicalAddress, long unsealPhysicalAddress) where FasterSession : IFasterSession { var (actualSize, allocateSize) = hlog.GetRecordSize(ref key, ref value); @@ -751,24 +655,48 @@ private OperationStatus CreateNewRecordUpsertbucket_entries[slot], updatedEntry.word, entry.word); + success = foundEntry.word == entry.word; + } + else + { + // Splice into the gap of the last readcache/first main log entries. + ref RecordInfo rcri = ref readcache.GetInfo(lastReadCachePhysicalAddress); + if (rcri.PreviousAddress != latestLogicalAddress) + return OperationStatus.RETRY_NOW; + + // Splice a non-tentative record into the readcache/mainlog gap. + success = rcri.TryUpdateAddress(newLogicalAddress); - var foundEntry = default(HashBucketEntry); - foundEntry.word = Interlocked.CompareExchange(ref bucket->bucket_entries[slot], updatedEntry.word, entry.word); + // Now see if we have added a readcache entry from a pending read while we were inserting; if so it is obsolete and must be Invalidated. + InvalidateUpdatedRecordInReadCache(entry.word, ref key, prevFirstReadCacheLogicalAddress); + } - if (foundEntry.word == entry.word) + if (success) { + if (unsealPhysicalAddress != Constants.kInvalidAddress) + recordInfo.CopyLocksFrom(hlog.GetInfo(unsealPhysicalAddress)); + else if (LockTable.IsActive && !LockTable.ApplyToLogRecord(ref key, ref recordInfo)) + return OperationStatus.RETRY_NOW; + recordInfo.Tentative = false; + fasterSession.PostSingleWriter(ref key, ref input, ref value, ref newValue, ref output, ref pendingContext.lockOperation, ref recordInfo, newLogicalAddress); pendingContext.recordInfo = recordInfo; pendingContext.logicalAddress = newLogicalAddress; @@ -860,9 +788,15 @@ internal OperationStatus InternalRMW( FindOrCreateTag(hash, tag, ref bucket, ref slot, ref entry, hlog.BeginAddress); var logicalAddress = entry.Address; - // For simplicity, we don't let RMW operations use read cache + long lastReadCachePhysicalAddress = Constants.kInvalidAddress; + long prevFirstReadCacheLogicalAddress = Constants.kInvalidAddress; if (UseReadCache) - SkipReadCache(ref logicalAddress); + { + prevFirstReadCacheLogicalAddress = logicalAddress; + SkipReadCache(ref logicalAddress, out lastReadCachePhysicalAddress); + if (prevFirstReadCacheLogicalAddress == logicalAddress) // if there were no readcache records + prevFirstReadCacheLogicalAddress = Constants.kInvalidAddress; + } var latestLogicalAddress = logicalAddress; if (logicalAddress >= hlog.HeadAddress) @@ -882,20 +816,13 @@ internal OperationStatus InternalRMW( #endregion // Optimization for the most common case - long stubPhysicalAddress = Constants.kInvalidAddress; + long unsealPhysicalAddress = Constants.kInvalidAddress; if (sessionCtx.phase == Phase.REST && logicalAddress >= hlog.ReadOnlyAddress) { ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); - if (fasterSession.IsManualOperations && recordInfo.Stub) - { - // This is only for ManualFasterOperations, and we assume we hold this lock. We will need to transfer the lock to the updated record. - stubPhysicalAddress = physicalAddress; - status = OperationStatus.SUCCESS; - goto CreateNewRecord; - } ref Value recordValue = ref hlog.GetValue(physicalAddress); - if (IsIntermediate(ref recordInfo)) - return RetryOnIntermediateRecord(ref recordInfo, ref key, ref recordValue, fasterSession); + if (recordInfo.IsIntermediate(out status)) + return status; if (!recordInfo.Tombstone) { @@ -908,8 +835,9 @@ internal OperationStatus InternalRMW( } // ConcurrentWriter failed (e.g. insufficient space). Another thread may come along to do this update in-place; Seal it to prevent that. - if (!SealRecord(ref recordInfo, ref key, ref recordValue, fasterSession)) + if (!recordInfo.Seal(fasterSession.IsManualLocking)) return OperationStatus.RETRY_NOW; + unsealPhysicalAddress = physicalAddress; } goto CreateNewRecord; } @@ -929,16 +857,9 @@ internal OperationStatus InternalRMW( if (logicalAddress >= hlog.ReadOnlyAddress) { ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); - if (fasterSession.IsManualOperations && recordInfo.Stub) - { - // This is only for ManualFasterOperations, and we assume we hold this lock. We will need to transfer the lock to the updated record. - stubPhysicalAddress = physicalAddress; - status = OperationStatus.SUCCESS; - goto CreateNewRecord; - } ref Value recordValue = ref hlog.GetValue(physicalAddress); - if (IsIntermediate(ref recordInfo)) - return RetryOnIntermediateRecord(ref recordInfo, ref key, ref recordValue, fasterSession); + if (recordInfo.IsIntermediate(out status)) + return status; if (!recordInfo.Tombstone) { @@ -953,8 +874,9 @@ internal OperationStatus InternalRMW( } // ConcurrentWriter failed (e.g. insufficient space). Another thread may come along to do this update in-place; Seal it to prevent that. - if (!SealRecord(ref recordInfo, ref key, ref recordValue, fasterSession)) + if (!recordInfo.Seal(fasterSession.IsManualLocking)) return OperationStatus.RETRY_NOW; + unsealPhysicalAddress = physicalAddress; } } @@ -979,23 +901,11 @@ internal OperationStatus InternalRMW( else if (logicalAddress >= hlog.HeadAddress) { ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); - if (fasterSession.IsManualOperations && recordInfo.Stub) - { - // This is only for ManualFasterOperations, and we assume we hold this lock. We will need to transfer the lock to the updated record. - stubPhysicalAddress = physicalAddress; - status = OperationStatus.SUCCESS; - goto CreateNewRecord; - } - ref Value recordValue = ref hlog.GetValue(physicalAddress); - if (IsIntermediate(ref recordInfo)) - { - status = RetryOnIntermediateRecord(ref recordInfo, ref key, ref recordValue, fasterSession); + if (recordInfo.IsIntermediate(out status)) goto LatchRelease; // Release shared latch (if acquired) - } - - // This ManualOps instance already owns this lock, or we wouldn't be here. Create a new record and transfer the lock - if (fasterSession.IsManualOperations && recordInfo.IsLocked) - stubPhysicalAddress = physicalAddress; + if (!recordInfo.Seal(fasterSession.IsManualLocking)) + return OperationStatus.RETRY_NOW; + unsealPhysicalAddress = physicalAddress; goto CreateNewRecord; } @@ -1027,25 +937,37 @@ internal OperationStatus InternalRMW( #region Create new record CreateNewRecord: + // Invalidate the entry in the read cache, as we did not do IPU. + if (UseReadCache) + { + prevFirstReadCacheLogicalAddress = logicalAddress; + if (!SkipAndInvalidateReadCache(ref logicalAddress, ref key, out lastReadCachePhysicalAddress, out OperationStatus internalStatus)) + return internalStatus; + } + if (latchDestination != LatchDestination.CreatePendingContext) { status = CreateNewRecordRMW(ref key, ref input, ref output, ref pendingContext, fasterSession, sessionCtx, bucket, slot, logicalAddress, physicalAddress, tag, entry, - latestLogicalAddress, stubPhysicalAddress != Constants.kInvalidAddress); - if (status != OperationStatus.ALLOCATE_FAILED) + latestLogicalAddress, prevFirstReadCacheLogicalAddress, lastReadCachePhysicalAddress, unsealPhysicalAddress); + if (status != OperationStatus.SUCCESS) { - if (fasterSession.IsManualOperations && stubPhysicalAddress != Constants.kInvalidAddress) + if (unsealPhysicalAddress != Constants.kInvalidAddress) { - // Unlock the *old* record--thereby "transferring" the lock to the new record. - ref RecordInfo recordInfo = ref hlog.GetInfo(stubPhysicalAddress); - fasterSession.UnlockExclusive(ref recordInfo); + // Operation failed, so unseal the old record. + hlog.GetInfo(unsealPhysicalAddress).Unseal(); + } + if (status == OperationStatus.ALLOCATE_FAILED) + { + latchDestination = LatchDestination.CreatePendingContext; + goto CreatePendingContext; } - goto LatchRelease; } - latchDestination = LatchDestination.CreatePendingContext; + goto LatchRelease; } #endregion #region Create failure context + CreatePendingContext: Debug.Assert(latchDestination == LatchDestination.CreatePendingContext, $"RMW CreatePendingContext encountered latchDest == {latchDestination}"); { pendingContext.type = OperationType.RMW; @@ -1112,7 +1034,7 @@ private LatchDestination AcquireLatchRMW(PendingContext< } case Phase.IN_PROGRESS: { - if (!CheckEntryVersionNew(logicalAddress, sessionCtx)) + if (!CheckEntryVersionNew(logicalAddress)) { Debug.Assert(pendingContext.heldLatch != LatchOperation.Shared); if (pendingContext.heldLatch == LatchOperation.Exclusive || HashBucket.TryAcquireExclusiveLatch(bucket)) @@ -1132,7 +1054,7 @@ private LatchDestination AcquireLatchRMW(PendingContext< } case Phase.WAIT_FLUSH: { - if (!CheckEntryVersionNew(logicalAddress, sessionCtx)) + if (!CheckEntryVersionNew(logicalAddress)) { if (logicalAddress >= hlog.HeadAddress) return LatchDestination.CreateNewRecord; // Create a (v+1) record @@ -1147,7 +1069,8 @@ private LatchDestination AcquireLatchRMW(PendingContext< private OperationStatus CreateNewRecordRMW(ref Key key, ref Input input, ref Output output, ref PendingContext pendingContext, FasterSession fasterSession, FasterExecutionContext sessionCtx, HashBucket* bucket, int slot, long logicalAddress, - long physicalAddress, ushort tag, HashBucketEntry entry, long latestLogicalAddress, bool isStubPromotion) + long physicalAddress, ushort tag, HashBucketEntry entry, long latestLogicalAddress, + long prevFirstReadCacheLogicalAddress, long lastReadCachePhysicalAddress, long unsealPhysicalAddress) where FasterSession : IFasterSession { // Determine if we should allocate a new record @@ -1171,6 +1094,7 @@ private OperationStatus CreateNewRecordRMW= hlog.HeadAddress) { @@ -1202,7 +1124,6 @@ ref hlog.GetValue(newPhysicalAddress, newPhysicalAddress + actualSize), ref output, ref recordInfo, newLogicalAddress); status = OperationStatus.SUCCESS; } - pendingContext.lockOperation.IsStubPromotion = false; } else { @@ -1211,16 +1132,42 @@ ref hlog.GetValue(newPhysicalAddress, newPhysicalAddress + actualSize), return OperationStatus.RETRY_NOW; } - var updatedEntry = default(HashBucketEntry); - updatedEntry.Tag = tag; - updatedEntry.Address = newLogicalAddress & Constants.kAddressMask; - updatedEntry.Pending = entry.Pending; - updatedEntry.Tentative = false; + bool success = true; + if (lastReadCachePhysicalAddress == Constants.kInvalidAddress) + { + // Insert as the first record in the hash chain. + var updatedEntry = default(HashBucketEntry); + updatedEntry.Tag = tag; + updatedEntry.Address = newLogicalAddress & Constants.kAddressMask; + updatedEntry.Pending = entry.Pending; + updatedEntry.Tentative = false; + + var foundEntry = default(HashBucketEntry); + foundEntry.word = Interlocked.CompareExchange(ref bucket->bucket_entries[slot], updatedEntry.word, entry.word); + success = foundEntry.word == entry.word; + } + else + { + // Splice into the gap of the last readcache/first main log entries. + ref RecordInfo rcri = ref readcache.GetInfo(lastReadCachePhysicalAddress); + if (rcri.PreviousAddress != latestLogicalAddress) + return OperationStatus.RETRY_NOW; + + // Splice a non-tentative record into the readcache/mainlog gap. + success = rcri.TryUpdateAddress(newLogicalAddress); + + // Now see if we have added a readcache entry from a pending read while we were inserting; if so it is obsolete and must be Invalidated. + InvalidateUpdatedRecordInReadCache(entry.word, ref key, prevFirstReadCacheLogicalAddress); + } - var foundEntry = default(HashBucketEntry); - foundEntry.word = Interlocked.CompareExchange(ref bucket->bucket_entries[slot], updatedEntry.word, entry.word); - if (foundEntry.word == entry.word) + if (success) { + if (unsealPhysicalAddress != Constants.kInvalidAddress) + recordInfo.CopyLocksFrom(hlog.GetInfo(unsealPhysicalAddress)); + else if (LockTable.IsActive && !LockTable.ApplyToLogRecord(ref key, ref recordInfo)) + return OperationStatus.RETRY_NOW; + recordInfo.Tentative = false; + // If IU, status will be NOTFOUND; return that. if (status != OperationStatus.SUCCESS) { @@ -1303,10 +1250,10 @@ internal OperationStatus InternalDelete( var logicalAddress = Constants.kInvalidAddress; var physicalAddress = default(long); var latchOperation = default(LatchOperation); + long unsealPhysicalAddress = Constants.kInvalidAddress; var hash = comparer.GetHashCode64(ref key); var tag = (ushort)((ulong)hash >> Constants.kHashTagShift); - long stubPhysicalAddress = Constants.kInvalidAddress; if (sessionCtx.phase != Phase.REST) HeavyEnter(hash, sessionCtx, fasterSession); @@ -1319,8 +1266,15 @@ internal OperationStatus InternalDelete( logicalAddress = entry.Address; + long lastReadCachePhysicalAddress = Constants.kInvalidAddress; + long prevFirstReadCacheLogicalAddress = Constants.kInvalidAddress; if (UseReadCache) - SkipAndInvalidateReadCache(ref logicalAddress, ref key); + { + prevFirstReadCacheLogicalAddress = logicalAddress; + SkipReadCache(ref logicalAddress, out lastReadCachePhysicalAddress); + if (prevFirstReadCacheLogicalAddress == logicalAddress) // if there were no readcache records + prevFirstReadCacheLogicalAddress = Constants.kInvalidAddress; + } var latestLogicalAddress = logicalAddress; if (logicalAddress >= hlog.ReadOnlyAddress) @@ -1364,7 +1318,7 @@ internal OperationStatus InternalDelete( } case Phase.IN_PROGRESS: { - if (!CheckEntryVersionNew(logicalAddress, sessionCtx)) + if (!CheckEntryVersionNew(logicalAddress)) { if (HashBucket.TryAcquireExclusiveLatch(bucket)) { @@ -1382,7 +1336,7 @@ internal OperationStatus InternalDelete( } case Phase.WAIT_FLUSH: { - if (!CheckEntryVersionNew(logicalAddress, sessionCtx)) + if (!CheckEntryVersionNew(logicalAddress)) { goto CreateNewRecord; // Create a (v+1) record } @@ -1392,35 +1346,25 @@ internal OperationStatus InternalDelete( break; } } -#endregion + #endregion -#region Normal processing + #region Normal processing // Mutable Region: Update the record in-place if (logicalAddress >= hlog.ReadOnlyAddress) { ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); ref Value recordValue = ref hlog.GetValue(physicalAddress); - if (fasterSession.IsManualOperations && recordInfo.Stub) + if (recordInfo.IsIntermediate(out status)) { - // This is only for ManualFasterOperations, and we assume we hold this lock. We can Tombstone the record directly and ManualsOps - // will also clear Stub. Caller (ManualOps) must still unlock. - fasterSession.ConcurrentDeleter(ref hlog.GetKey(physicalAddress), ref recordValue, ref recordInfo, logicalAddress); - status = OperationStatus.SUCCESS; - goto LatchRelease; // Release shared latch (if acquired) - } - - if (IsIntermediate(ref recordInfo)) - { - status = RetryOnIntermediateRecord(ref recordInfo, ref key, ref recordValue, fasterSession); goto LatchRelease; // Release shared latch (if acquired) } if (!fasterSession.ConcurrentDeleter(ref hlog.GetKey(physicalAddress), ref recordValue, ref recordInfo, logicalAddress)) goto CreateNewRecord; - if (sessionCtx.phase == Phase.REST) + if (sessionCtx.phase == Phase.REST) hlog.MarkPage(logicalAddress, sessionCtx.version); else hlog.MarkPageAtomic(logicalAddress, sessionCtx.version); @@ -1447,23 +1391,23 @@ internal OperationStatus InternalDelete( status = OperationStatus.SUCCESS; goto LatchRelease; // Release shared latch (if acquired) } - else if (fasterSession.IsManualOperations) + else if (logicalAddress >= hlog.HeadAddress) { - if (logicalAddress >= hlog.HeadAddress) - { - physicalAddress = hlog.GetPhysicalAddress(logicalAddress); - ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); + // Only need to go below ReadOnly here for locking and Sealing. + physicalAddress = hlog.GetPhysicalAddress(logicalAddress); + ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); + pendingContext.recordInfo = recordInfo; + pendingContext.logicalAddress = logicalAddress; - // This ManualOps instance already owns this lock, or we wouldn't be here. Create a new record and transfer the lock - if (recordInfo.Stub || recordInfo.IsLocked) - stubPhysicalAddress = physicalAddress; - goto CreateNewRecord; - } - else if (pendingContext.lockOperation.LockOperationType == LockOperationType.Unlock) + if (recordInfo.IsIntermediate(out status)) + goto LatchRelease; // Release shared latch (if acquired) + if (!recordInfo.Seal(fasterSession.IsManualLocking)) { - Debug.Fail("Trying to unlock a non-existent value"); - return OperationStatus.SUCCESS; + status = OperationStatus.RETRY_NOW; + goto LatchRelease; // Release shared latch (if acquired) } + unsealPhysicalAddress = physicalAddress; + goto CreateNewRecord; } // All other regions: Create a record in the mutable region @@ -1479,42 +1423,58 @@ internal OperationStatus InternalDelete( BlockAllocate(allocateSize, out long newLogicalAddress, sessionCtx, fasterSession, pendingContext.IsAsync); if (newLogicalAddress == 0) { + if (unsealPhysicalAddress != Constants.kInvalidAddress) + { + // Operation failed, so unseal the old record. + hlog.GetInfo(unsealPhysicalAddress).Unseal(); + } status = OperationStatus.ALLOCATE_FAILED; goto CreatePendingContext; } var newPhysicalAddress = hlog.GetPhysicalAddress(newLogicalAddress); ref RecordInfo recordInfo = ref hlog.GetInfo(newPhysicalAddress); + recordInfo.Tentative = true; RecordInfo.WriteInfo(ref recordInfo, inNewVersion: sessionCtx.InNewVersion, tombstone: true, dirty: true, latestLogicalAddress); hlog.Serialize(ref key, newPhysicalAddress); - // Nobody does anything to this record because it is Tombstoned, so we do not lock it unless we are transferring from a stub. - if (fasterSession.IsManualOperations && stubPhysicalAddress != Constants.kInvalidAddress) + bool success = true; + if (lastReadCachePhysicalAddress == Constants.kInvalidAddress) { - fasterSession.LockExclusive(ref recordInfo); + // Insert as the first record in the hash chain. + var updatedEntry = default(HashBucketEntry); + updatedEntry.Tag = tag; + updatedEntry.Address = newLogicalAddress & Constants.kAddressMask; + updatedEntry.Pending = entry.Pending; + updatedEntry.Tentative = false; + + var foundEntry = default(HashBucketEntry); + foundEntry.word = Interlocked.CompareExchange(ref bucket->bucket_entries[slot], updatedEntry.word, entry.word); + success = foundEntry.word == entry.word; } + else + { + // Splice into the gap of the last readcache/first main log entries. + ref RecordInfo rcri = ref readcache.GetInfo(lastReadCachePhysicalAddress); + if (rcri.PreviousAddress != latestLogicalAddress) + return OperationStatus.RETRY_NOW; - var updatedEntry = default(HashBucketEntry); - updatedEntry.Tag = tag; - updatedEntry.Address = newLogicalAddress & Constants.kAddressMask; - updatedEntry.Pending = entry.Pending; - updatedEntry.Tentative = false; + // Splice a non-tentative record into the readcache/mainlog gap. + success = rcri.TryUpdateAddress(newLogicalAddress); - var foundEntry = default(HashBucketEntry); - foundEntry.word = Interlocked.CompareExchange( - ref bucket->bucket_entries[slot], - updatedEntry.word, entry.word); + // Now see if we have added a readcache entry from a pending read while we were inserting; if so it is obsolete and must be Invalidated. + InvalidateUpdatedRecordInReadCache(entry.word, ref key, prevFirstReadCacheLogicalAddress); + } - if (foundEntry.word == entry.word) + if (success) { - if (fasterSession.IsManualOperations && stubPhysicalAddress != Constants.kInvalidAddress) - { - // Unlock the *old* record--thereby "transferring" the lock to the new record. - ref RecordInfo stubRecordInfo = ref hlog.GetInfo(stubPhysicalAddress); - fasterSession.UnlockExclusive(ref stubRecordInfo); - } + if (unsealPhysicalAddress != Constants.kInvalidAddress) + recordInfo.CopyLocksFrom(hlog.GetInfo(unsealPhysicalAddress)); + else if (LockTable.IsActive && !LockTable.ApplyToLogRecord(ref key, ref recordInfo)) + return OperationStatus.RETRY_NOW; + recordInfo.Tentative = false; // Note that this is the new logicalAddress; we have not retrieved the old one if it was below HeadAddress, and thus // we do not know whether 'logicalAddress' belongs to 'key' or is a collision. @@ -1528,6 +1488,12 @@ internal OperationStatus InternalDelete( { recordInfo.SetInvalid(); status = OperationStatus.RETRY_NOW; + + if (unsealPhysicalAddress != Constants.kInvalidAddress) + { + // Operation failed, so unseal the old record. + hlog.GetInfo(unsealPhysicalAddress).Unseal(); + } goto LatchRelease; } } @@ -1566,9 +1532,107 @@ internal OperationStatus InternalDelete( return status; } -#endregion + #endregion + + /// + /// Manual Lock operation. Locks the record corresponding to 'key'. + /// + /// key of the record. + /// Lock operation being done. + /// Receives the recordInfo of the record being locked + /// Callback functions. + /// Session context + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal OperationStatus InternalLock( + ref Key key, LockOperation lockOp, out RecordInfo lockInfo, FasterSession fasterSession, + FasterExecutionContext sessionCtx) + where FasterSession : IFasterSession + { + var bucket = default(HashBucket*); + var slot = default(int); + + var hash = comparer.GetHashCode64(ref key); + var tag = (ushort)((ulong)hash >> Constants.kHashTagShift); -#region ContainsKeyInMemory + var prevTailAddress = hlog.GetTailAddress(); + + #region Trace back for record in in-memory HybridLog + var entry = default(HashBucketEntry); + FindOrCreateTag(hash, tag, ref bucket, ref slot, ref entry, hlog.BeginAddress); + var logicalAddress = entry.Address; + var physicalAddress = default(long); + + OperationStatus status; + if (UseReadCache) + { + if (LockReadCacheRecord(logicalAddress, ref key, lockOp, out lockInfo, out status)) + return status; + } + + if (logicalAddress >= hlog.ReadOnlyAddress) + { + physicalAddress = hlog.GetPhysicalAddress(logicalAddress); + if (!comparer.Equals(ref key, ref hlog.GetKey(physicalAddress))) + { + logicalAddress = hlog.GetInfo(physicalAddress).PreviousAddress; + TraceBackForKeyMatch(ref key, + logicalAddress, + hlog.ReadOnlyAddress, + out logicalAddress, + out physicalAddress); + } + } + #endregion + + lockInfo = default; + if (logicalAddress >= hlog.HeadAddress) + { + physicalAddress = hlog.GetPhysicalAddress(logicalAddress); + ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); + if (!recordInfo.IsIntermediate(out status)) + { + if (lockOp.LockOperationType == LockOperationType.IsLocked) + status = OperationStatus.SUCCESS; + else + recordInfo.HandleLockOperation(lockOp, out _); + } + if (lockOp.LockOperationType == LockOperationType.IsLocked) + lockInfo = recordInfo; + return status; + } + + // Not in memory. Do LockTable operations + if (lockOp.LockOperationType == LockOperationType.IsLocked) + { + this.LockTable.Get(ref key, out lockInfo); + return OperationStatus.SUCCESS; + } + + if (lockOp.LockOperationType == LockOperationType.Unlock) + { + this.LockTable.Unlock(ref key, lockOp.LockType); + return OperationStatus.SUCCESS; + } + + // Try to lock + bool tentativeLock; + while (!this.LockTable.LockOrTentative(ref key, lockOp.LockType, out tentativeLock)) + { + // Sealed by someone else, so retry + return OperationStatus.RETRY_NOW; + } + + // We got the lock. If we had another record with this key inserted, RETRY. + if (FindTag(hash, tag, ref bucket, ref slot, ref entry) && entry.Address >= prevTailAddress) + return OperationStatus.RETRY_NOW; + + // Success + if (tentativeLock) + this.LockTable.ClearTentative(ref key); + return OperationStatus.SUCCESS; + } + + #region ContainsKeyInMemory [MethodImpl(MethodImplOptions.AggressiveInlining)] internal Status InternalContainsKeyInMemory( @@ -1598,7 +1662,7 @@ internal Status InternalContainsKeyInMemory= fromAddress) { @@ -1673,8 +1737,8 @@ internal OperationStatus InternalContinuePendingRead> Constants.kHashTagShift); + long lastReadCachePhysicalAddress = Constants.kInvalidAddress; + long prevFirstReadCacheLogicalAddress = Constants.kInvalidAddress; + +#region Trace Back for Record on In-Memory HybridLog while (true) { -#region Trace Back for Record on In-Memory HybridLog var entry = default(HashBucketEntry); FindOrCreateTag(hash, tag, ref bucket, ref slot, ref entry, hlog.BeginAddress); logicalAddress = entry.Address; - // For simplicity, we don't let RMW operations use read cache + // Invalidate the entry in the read cache, as we did not do IPU. if (UseReadCache) - SkipReadCache(ref logicalAddress); + { + prevFirstReadCacheLogicalAddress = logicalAddress; + if (!SkipAndInvalidateReadCache(ref logicalAddress, ref key, out lastReadCachePhysicalAddress, out status)) + return status; + if (prevFirstReadCacheLogicalAddress == logicalAddress) // if there were no readcache records + prevFirstReadCacheLogicalAddress = Constants.kInvalidAddress; + } var latestLogicalAddress = logicalAddress; if (logicalAddress >= hlog.HeadAddress) @@ -1841,6 +1919,7 @@ internal OperationStatus InternalContinuePendingRMWbucket_entries[slot], updatedEntry.word, entry.word); - if (foundEntry.word == entry.word) + var foundEntry = default(HashBucketEntry); + foundEntry.word = Interlocked.CompareExchange(ref bucket->bucket_entries[slot], updatedEntry.word, entry.word); + success = foundEntry.word == entry.word; + } + else { + // Splice into the gap of the last readcache/first main log entries. + ref RecordInfo rcri = ref readcache.GetInfo(lastReadCachePhysicalAddress); + if (rcri.PreviousAddress != latestLogicalAddress) + return OperationStatus.RETRY_NOW; + + // Splice a non-tentative record into the readcache/mainlog gap. + success = rcri.TryUpdateAddress(newLogicalAddress); + + // Now see if we have added a readcache entry from a pending read while we were inserting; if so it is obsolete and must be Invalidated. + InvalidateUpdatedRecordInReadCache(entry.word, ref key, prevFirstReadCacheLogicalAddress); + } + + if (success) + { + if (LockTable.IsActive && !LockTable.ApplyToLogRecord(ref key, ref recordInfo)) + return OperationStatus.RETRY_NOW; + recordInfo.Tentative = false; + // If IU, status will be NOTFOUND; return that. if (status != OperationStatus.SUCCESS) { @@ -1955,7 +2058,7 @@ internal Status HandleOperationStatus( SynchronizeEpoch(opCtx, currentCtx, ref pendingContext, fasterSession); } - Debug.Assert(status != OperationStatus.RETRY_NOW || pendingContext.type == OperationType.READ); + // RMW now suppports RETRY_NOW due to Sealed records. if (status == OperationStatus.CPR_SHIFT_DETECTED || status == OperationStatus.RETRY_NOW || ((asyncOp || RelaxedCPR) && status == OperationStatus.RETRY_LATER)) { #region Retry as (v+1) Operation @@ -2259,12 +2362,23 @@ internal OperationStatus InternalTryCopyToTail> Constants.kHashTagShift); + #region Trace back for record in in-memory HybridLog + // Find the entry in the log and make sure someone didn't insert another record after we decided there wasn't one. var entry = default(HashBucketEntry); FindOrCreateTag(hash, tag, ref bucket, ref slot, ref entry, hlog.BeginAddress); var logicalAddress = entry.Address; var physicalAddress = default(long); + var prevTailAddress = pendingContext.recordInfo.PreviousAddress; + + long lastReadCachePhysicalAddress = Constants.kInvalidAddress; + long prevFirstReadCacheLogicalAddress = Constants.kInvalidAddress; if (UseReadCache) - SkipReadCache(ref logicalAddress); + { + prevFirstReadCacheLogicalAddress = logicalAddress; + SkipReadCache(ref logicalAddress, out lastReadCachePhysicalAddress); + if (prevFirstReadCacheLogicalAddress == logicalAddress) // if there were no readcache records + prevFirstReadCacheLogicalAddress = Constants.kInvalidAddress; + } var latestLogicalAddress = logicalAddress; if (logicalAddress >= hlog.HeadAddress) @@ -2291,6 +2405,7 @@ internal OperationStatus InternalTryCopyToTailbucket_entries[slot], updatedEntry.word, entry.word); + success = foundEntry.word == entry.word; - var foundEntry = default(HashBucketEntry); - foundEntry.word = Interlocked.CompareExchange( - ref bucket->bucket_entries[slot], - updatedEntry.word, - entry.word); - if (foundEntry.word != entry.word) + if (success && UseReadCache) + { + // See if we have added a main-log entry from an update while we were inserting; if so, the new readcache + // record is obsolete and must be Invalidated. + ref RecordInfo rcri = ref readcache.GetInfo(newPhysicalAddress); + for (var la = entry.Address; la >= prevTailAddress; /* incremented in loop */) + { + var pa = hlog.GetPhysicalAddress(la); + if (comparer.Equals(ref key, ref hlog.GetKey(pa))) + { + rcri.SetInvalid(); + break; + } + la = hlog.GetInfo(pa).PreviousAddress; + } + if (!rcri.Invalid) + rcri.Tentative = false; + } + } + else + { + // Splice into the gap of the last readcache/first main log entries. + ref RecordInfo rcri = ref readcache.GetInfo(lastReadCachePhysicalAddress); + if (rcri.PreviousAddress != latestLogicalAddress) + return OperationStatus.RETRY_NOW; + + // Splice a non-tentative record into the readcache/mainlog gap. + success = rcri.TryUpdateAddress(newLogicalAddress); + + // Now see if we have added a readcache entry from a pending read while we were inserting; if so it is obsolete and must be Invalidated. + InvalidateUpdatedRecordInReadCache(entry.word, ref key, prevFirstReadCacheLogicalAddress); + } + + if (!success) { if (!copyToReadCache) hlog.GetInfo(newPhysicalAddress).SetInvalid(); return OperationStatus.RETRY_NOW; @@ -2348,6 +2505,15 @@ ref hlog.GetValue(newPhysicalAddress, newPhysicalAddress + actualSize), ref outp { var log = copyToReadCache ? readcache : hlog; ref RecordInfo recordInfo = ref log.GetInfo(newPhysicalAddress); + + if (LockTable.IsActive && !LockTable.ApplyToLogRecord(ref key, ref recordInfo)) + { + recordInfo.SetInvalid(); + recordInfo.Tentative = false; + return OperationStatus.RETRY_NOW; + } + recordInfo.Tentative = false; + pendingContext.recordInfo = recordInfo; pendingContext.logicalAddress = copyToReadCache ? Constants.kInvalidAddress /* We do not expose readcache addresses */ : newLogicalAddress; fasterSession.PostSingleWriter(ref key, ref input, ref value, @@ -2591,32 +2757,33 @@ private long TraceBackForOtherChainStart(long logicalAddress, int bit) #endregion #region Read Cache - private bool ReadFromCache(ref Key key, ref long logicalAddress, ref long physicalAddress) + private bool ReadFromCache(ref Key key, ref long logicalAddress, ref long physicalAddress, out OperationStatus internalStatus) { // logicalAddress is retrieved from the main FKV's hash table. HashBucketEntry entry = default; entry.word = logicalAddress; + internalStatus = OperationStatus.SUCCESS; if (!entry.ReadCache) return false; physicalAddress = readcache.GetPhysicalAddress(logicalAddress & ~Constants.kReadCacheBitMask); while (true) { - if (!readcache.GetInfo(physicalAddress).Invalid && comparer.Equals(ref key, ref readcache.GetKey(physicalAddress))) + ref RecordInfo recordInfo = ref readcache.GetInfo(physicalAddress); + if (!recordInfo.Invalid && comparer.Equals(ref key, ref readcache.GetKey(physicalAddress))) { if ((logicalAddress & ~Constants.kReadCacheBitMask) >= readcache.SafeReadOnlyAddress) { // This is a valid readcache record. - return true; + return !recordInfo.IsIntermediate(out internalStatus); } Debug.Assert((logicalAddress & ~Constants.kReadCacheBitMask) >= readcache.SafeHeadAddress); - // TODO: copy to tail of read cache - // and return new cache entry } - logicalAddress = readcache.GetInfo(physicalAddress).PreviousAddress; + logicalAddress = recordInfo.PreviousAddress; entry.word = logicalAddress; - if (!entry.ReadCache) break; + if (!entry.ReadCache) + break; physicalAddress = readcache.GetPhysicalAddress(logicalAddress & ~Constants.kReadCacheBitMask); } @@ -2625,24 +2792,75 @@ private bool ReadFromCache(ref Key key, ref long logicalAddress, ref long physic return false; } - private void SkipReadCache(ref long logicalAddress) + // Skip over all readcache records in this key's chain (advancing logicalAddress to the first non-readcache record we encounter). + private void SkipReadCache(ref long logicalAddress, out long lastReadCachePhysicalAddress) { HashBucketEntry entry = default; entry.word = logicalAddress; - if (!entry.ReadCache) return; + if (!entry.ReadCache) + { + lastReadCachePhysicalAddress = Constants.kInvalidAddress; + return; + } var physicalAddress = readcache.GetPhysicalAddress(logicalAddress & ~Constants.kReadCacheBitMask); while (true) { + lastReadCachePhysicalAddress = physicalAddress; logicalAddress = readcache.GetInfo(physicalAddress).PreviousAddress; entry.word = logicalAddress; - if (!entry.ReadCache) return; + if (!entry.ReadCache) + return; + physicalAddress = readcache.GetPhysicalAddress(logicalAddress & ~Constants.kReadCacheBitMask); + } + } + + private bool LockReadCacheRecord(long logicalAddress, ref Key key, LockOperation lockOp, out RecordInfo lockInfo, out OperationStatus internalStatus) + { + HashBucketEntry entry = default; + lockInfo = default; + entry.word = logicalAddress; + if (!entry.ReadCache) + { + internalStatus = OperationStatus.SUCCESS; + return false; + } + + var physicalAddress = readcache.GetPhysicalAddress(logicalAddress & ~Constants.kReadCacheBitMask); + + while (true) + { + ref RecordInfo recordInfo = ref readcache.GetInfo(physicalAddress); + if (!recordInfo.Invalid && comparer.Equals(ref key, ref readcache.GetKey(physicalAddress))) + { + if ((logicalAddress & ~Constants.kReadCacheBitMask) >= readcache.SafeReadOnlyAddress) + { + // This is a valid readcache record. + if (!recordInfo.IsIntermediate(out internalStatus)) + { + if (lockOp.LockOperationType != LockOperationType.IsLocked) + recordInfo.HandleLockOperation(lockOp, out _); + lockInfo = recordInfo; + } + return true; + } + Debug.Assert((logicalAddress & ~Constants.kReadCacheBitMask) >= readcache.SafeHeadAddress); + } + + logicalAddress = recordInfo.PreviousAddress; + entry.word = logicalAddress; + if (!entry.ReadCache) + break; physicalAddress = readcache.GetPhysicalAddress(logicalAddress & ~Constants.kReadCacheBitMask); } + + internalStatus = OperationStatus.SUCCESS; + return false; } - // Skip over all records in this key's chain in the readcache (advancing logicalAddress to the first non-readcache record we encounter). + // Skip over all readcache records in all key chains in this bucket, updating the bucket to point to the first main log record. + // Called during checkpointing; we create a copy of the hash table page, eliminate read cache pointers from this copy, then write this copy to disk. private void SkipReadCacheBucket(HashBucket* bucket) { for (int index = 0; index < Constants.kOverflowBucketIndex; ++index) @@ -2659,33 +2877,72 @@ private void SkipReadCacheBucket(HashBucket* bucket) { logicalAddress = readcache.GetInfo(physicalAddress).PreviousAddress; entry->Address = logicalAddress; - if (!entry->ReadCache) break; + if (!entry->ReadCache) + break; physicalAddress = readcache.GetPhysicalAddress(logicalAddress & ~Constants.kReadCacheBitMask); } } } - // Skip over all records in this key's chain in the readcache (advancing logicalAddress to the first non-readcache record we encounter). - // Invalidate each record we skip over. - private void SkipAndInvalidateReadCache(ref long logicalAddress, ref Key key) + // Skip over all readcache records in this key's chain (advancing logicalAddress to the first non-readcache record we encounter). + // Invalidate each record we skip over that matches the key. + private void InvalidateUpdatedRecordInReadCache(long logicalAddress, ref Key key, long untilAddress) { HashBucketEntry entry = default; entry.word = logicalAddress; - if (!entry.ReadCache) return; + if (!entry.ReadCache) + return; var physicalAddress = readcache.GetPhysicalAddress(logicalAddress & ~Constants.kReadCacheBitMask); + while (logicalAddress != untilAddress) + { + // Invalidate read cache entry if key found. This is called when an updated value has been inserted to the main log tail, + // so instead of waiting just invalidate and return. + ref RecordInfo recordInfo = ref readcache.GetInfo(physicalAddress); + if (comparer.Equals(ref key, ref readcache.GetKey(physicalAddress))) + recordInfo.SetInvalid(); + + logicalAddress = recordInfo.PreviousAddress; + entry.word = logicalAddress; + if (!entry.ReadCache) + return; + physicalAddress = readcache.GetPhysicalAddress(logicalAddress & ~Constants.kReadCacheBitMask); + } + } + + private bool SkipAndInvalidateReadCache(ref long logicalAddress, ref Key key, out long lastReadCachePhysicalAddress, out OperationStatus internalStatus) + { + internalStatus = OperationStatus.SUCCESS; + HashBucketEntry entry = default; + entry.word = logicalAddress; + if (!entry.ReadCache) + { + lastReadCachePhysicalAddress = Constants.kInvalidAddress; + return true; + } + + var physicalAddress = readcache.GetPhysicalAddress(logicalAddress & ~Constants.kReadCacheBitMask); + lastReadCachePhysicalAddress = physicalAddress; + while (true) { // Invalidate read cache entry if key found + ref RecordInfo recordInfo = ref readcache.GetInfo(physicalAddress); if (comparer.Equals(ref key, ref readcache.GetKey(physicalAddress))) { - readcache.GetInfo(physicalAddress).SetInvalid(); + if (recordInfo.IsIntermediate(out internalStatus)) + return false; + recordInfo.LockExclusive(); + recordInfo.SetInvalid(); + recordInfo.UnlockExclusive(); } - logicalAddress = readcache.GetInfo(physicalAddress).PreviousAddress; + lastReadCachePhysicalAddress = physicalAddress; + logicalAddress = recordInfo.PreviousAddress; entry.word = logicalAddress; - if (!entry.ReadCache) return; + if (!entry.ReadCache) + return true; physicalAddress = readcache.GetPhysicalAddress(logicalAddress & ~Constants.kReadCacheBitMask); } } diff --git a/cs/src/core/Index/FASTER/FASTERThread.cs b/cs/src/core/Index/FASTER/FASTERThread.cs index 19215f58f..09b9e000b 100644 --- a/cs/src/core/Index/FASTER/FASTERThread.cs +++ b/cs/src/core/Index/FASTER/FASTERThread.cs @@ -265,7 +265,6 @@ ref pendingContext.value.Get(), default: throw new FasterException("Operation type not allowed for retry"); } - } } #endregion @@ -356,6 +355,9 @@ internal Status InternalCompletePendingRequestFromContext : IFasterS bool SupportsPostOperations { get; } - bool IsManualOperations { get; } + bool IsManualLocking { get; } #endregion Optional features supported by this implementation #region Reads @@ -72,21 +72,6 @@ internal interface IFasterSession : IFasterS void DeleteCompletionCallback(ref Key key, Context ctx); #endregion Deletes - #region Locking - void LockExclusive(ref RecordInfo recordInfo); - void UnlockExclusive(ref RecordInfo recordInfo); - bool TryLockExclusive(ref RecordInfo recordInfo, int spinCount = 1); - void LockShared(ref RecordInfo recordInfo); - void UnlockShared(ref RecordInfo recordInfo); - bool TryLockShared(ref RecordInfo recordInfo, int spinCount = 1); - void LockExclusiveFromShared(ref RecordInfo recordInfo); - bool TryLockExclusiveFromShared(ref RecordInfo recordInfo, int spinCount = 1); - bool IsLocked(ref RecordInfo recordInfo); - bool IsLockedExclusive(ref RecordInfo recordInfo); - bool IsLockedShared(ref RecordInfo recordInfo); - void TransferLocks(ref RecordInfo fromRecordInfo, ref RecordInfo toRecordInfo); - #endregion Locking - bool CompletePendingWithOutputs(out CompletedOutputIterator completedOutputs, bool wait = false, bool spinWaitForCommit = false); IHeapContainer GetHeapContainer(ref Input input); diff --git a/cs/src/core/Utilities/LockTable.cs b/cs/src/core/Utilities/LockTable.cs new file mode 100644 index 000000000..d799e2202 --- /dev/null +++ b/cs/src/core/Utilities/LockTable.cs @@ -0,0 +1,285 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; + +namespace FASTER.core +{ + // We need to duplicate the Key because we can't get to the key object of the dictionary to Return() it. + // This is a class rather than a struct because a struct would update a copy. + internal class LockTableEntry : IEqualityComparer> + { + internal IHeapContainer key; + internal RecordInfo logRecordInfo; // in main log + internal RecordInfo lockRecordInfo; // in lock table; we have to Lock/Seal/Tentative the LockTable entry separately from logRecordInfo + + internal LockTableEntry(IHeapContainer key, RecordInfo logRecordInfo, RecordInfo lockRecordInfo) + { + this.key = key; + this.logRecordInfo = logRecordInfo; + this.lockRecordInfo = lockRecordInfo; + } + + public bool Equals(LockTableEntry k1, LockTableEntry k2) => k1.logRecordInfo.Equals(k2.logRecordInfo); + + public int GetHashCode(LockTableEntry k) => (int)k.logRecordInfo.GetHashCode64(); + } + + internal class LockTable + { + class KeyComparer : IEqualityComparer> + { + readonly internal IFasterEqualityComparer comparer; + + internal KeyComparer(IFasterEqualityComparer comparer) => this.comparer = comparer; + + public bool Equals(IHeapContainer k1, IHeapContainer k2) => comparer.Equals(ref k1.Get(), ref k2.Get()); + + public int GetHashCode(IHeapContainer k) => (int)comparer.GetHashCode64(ref k.Get()); + } + + readonly SafeConcurrentDictionary, LockTableEntry> dict; + readonly IVariableLengthStruct keyLen; + readonly KeyComparer keyComparer; + readonly SectorAlignedBufferPool bufferPool; + + internal LockTable(IVariableLengthStruct keyLen, IFasterEqualityComparer comparer, SectorAlignedBufferPool bufferPool) + { + this.keyLen = keyLen; + this.keyComparer = new(comparer); + this.bufferPool = bufferPool; + this.dict = new(this.keyComparer); + } + + internal bool IsActive => this.dict.Count > 0; + + IHeapContainer GetKeyContainer(ref TKey key) + { + if (bufferPool is null) + return new StandardHeapContainer(ref key); + return new VarLenHeapContainer(ref key, keyLen, bufferPool); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void Lock(ref TKey key, LockType lockType) + { + var keyContainer = GetKeyContainer(ref key); + _ = dict.AddOrUpdate(keyContainer, + key => { + RecordInfo logRecordInfo = default; + logRecordInfo.Lock(lockType); + return new(key, logRecordInfo, default); + }, (key, lte) => { + lte.logRecordInfo.Lock(lockType); + return lte; + }); + } + + // Provide our own implementation of "Update by lambda" + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool Update(ref TKey key, Func, LockTableEntry> updateFactory) + { + using var keyContainer = GetKeyContainer(ref key); + while (dict.TryGetValue(keyContainer, out var lte)) + { + if (dict.TryUpdate(keyContainer, updateFactory(lte), lte)) + return true; + } + return false; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void Unlock(ref TKey key, LockType lockType) + { + if (Update(ref key, lte => { lte.lockRecordInfo.Unlock(lockType); return lte; })) + { + TryRemoveIfNoLocks(ref key); + return; + } + Debug.Fail("Trying to unlock a nonexistent key"); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void TransferFrom(ref TKey key, RecordInfo logRecordInfo) + { + var keyContainer = GetKeyContainer(ref key); + RecordInfo newRec = default; + newRec.CopyLocksFrom(logRecordInfo); + if (!dict.TryAdd(keyContainer, new(keyContainer, newRec, default))) + { + keyContainer.Dispose(); + Debug.Fail("Trying to Transfer to an existing key"); + } + } + + // Lock the LockTable record for the key if it exists, else add a Tentative record for it. + // Returns true if the record was locked or tentative; else false (a Sealed or already-Tentative record was encountered) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal bool LockOrTentative(ref TKey key, LockType lockType, out bool tentative) + { + var keyContainer = GetKeyContainer(ref key); + bool existingConflict = false; + var lte = dict.AddOrUpdate(keyContainer, + key => { + RecordInfo lockRecordInfo = default; + lockRecordInfo.Tentative = true; + RecordInfo logRecordInfo = default; + logRecordInfo.Lock(lockType); + return new(key, logRecordInfo, lockRecordInfo); + }, (key, lte) => { + if (lte.lockRecordInfo.Tentative || lte.lockRecordInfo.Sealed) + existingConflict = true; + lte.logRecordInfo.Lock(lockType); + if (lte.lockRecordInfo.Sealed) + { + existingConflict = true; + lte.logRecordInfo.Unlock(lockType); + } + return lte; + }); + tentative = lte.lockRecordInfo.Tentative; + return !existingConflict; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void UnlockOrClearTentative(ref TKey key, LockType lockType, bool wasTentative) + { + using var lookupKey = GetKeyContainer(ref key); + if (dict.TryGetValue(lookupKey, out var lte)) + { + Debug.Assert(wasTentative || !lte.lockRecordInfo.Tentative, "lockRecordInfo.Tentative was not expected"); + Debug.Assert(!lte.lockRecordInfo.Sealed, "lockRecordInfo.Sealed was not expected"); + + // We assume that we own the lock or placed the Tentative record. + if (!lte.lockRecordInfo.Tentative) + lte.lockRecordInfo.Unlock(lockType); + if (!dict.TryRemove(lookupKey, out _)) + Debug.Fail("Could not remove Tentative record"); + return; + } + Debug.Fail("Trying to UnlockOrClearTentative on nonexistent key"); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void ClearTentative(ref TKey key) + { + if (!Update(ref key, lte => { lte.lockRecordInfo.Tentative = false; return lte; })) + Debug.Fail("Trying to remove Tentative bit from nonexistent locktable entry"); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void TryRemoveIfNoLocks(ref TKey key) + { + using var lookupKey = GetKeyContainer(ref key); + + // From https://devblogs.microsoft.com/pfxteam/little-known-gems-atomic-conditional-removals-from-concurrentdictionary/ + while (dict.TryGetValue(lookupKey, out var lte)) + { + if (lte.lockRecordInfo.IsLocked || lte.lockRecordInfo.Sealed || lte.logRecordInfo.IsLocked) + return; + if (dict.TryRemoveConditional(lookupKey, lte)) + { + lte.key.Dispose(); + return; + } + } + // If we make it here, the key was already removed. + } + + // False is legit, as the record may have been removed between the time it was known to be here and the time Seal was called, + // or this may be called by SealOrTentative. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool TrySeal(ref TKey key, out bool exists) + { + using var lookupKey = GetKeyContainer(ref key); + if (!dict.ContainsKey(lookupKey)) + { + exists = false; + return true; + } + exists = false; + return Update(ref key, lte => { lte.lockRecordInfo.Seal(); return lte; }); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void Unseal(ref TKey key) + { + if (!Update(ref key, lte => { lte.lockRecordInfo.Unseal(); return lte; })) + Debug.Fail("Trying to remove Unseal nonexistent key"); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal bool TrySealOrTentative(ref TKey key, out bool tentative) + { + tentative = false; + if (this.TrySeal(ref key, out bool exists)) + return true; + if (exists) + return false; + + var keyContainer = GetKeyContainer(ref key); + RecordInfo lockRecordInfo = default; + lockRecordInfo.Tentative = tentative = true; + if (dict.TryAdd(keyContainer, new(keyContainer, default, lockRecordInfo))) + return true; + + // Someone else already inserted a tentative record + keyContainer.Dispose(); + return false; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal bool Get(ref TKey key, out RecordInfo recordInfo) + { + using var lookupKey = GetKeyContainer(ref key); + if (dict.TryGetValue(lookupKey, out var lte)) + { + recordInfo = lte.logRecordInfo; + return true; + } + recordInfo = default; + return false; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal bool ContainsKey(ref TKey key) + { + using var lookupKey = GetKeyContainer(ref key); + return dict.ContainsKey(lookupKey); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal bool ApplyToLogRecord(ref TKey key, ref RecordInfo logRecord) + { + using var lookupKey = GetKeyContainer(ref key); + if (dict.TryGetValue(lookupKey, out var lte)) + { + // If it's a Tentative record, ignore it--it will be removed by Lock() and retried against the inserted log record. + if (lte.lockRecordInfo.Tentative) + return true; + + // If Sealing fails, we have to retry; it could mean that a pending read (readcache or copytotail) grabbed the locks + // before the Upsert/etc. got to them. In that case, the upsert must retry so those locks will be drained from the + // read entry. Note that Seal() momentarily xlocks the record being sealed, which in this case is the LockTable record; + // this does not affect the lock count of the contained record. + if (!lte.lockRecordInfo.Seal()) + return false; + + logRecord.CopyLocksFrom(lte.logRecordInfo); + lte.lockRecordInfo.SetInvalid(); + if (dict.TryRemove(lookupKey, out _)) + lte.key.Dispose(); + lte.lockRecordInfo.Tentative = false; + } + + // No locks to apply, or we applied them all. + return true; + } + + public override string ToString() => this.dict.Count.ToString(); + } +} diff --git a/cs/src/core/Utilities/LockType.cs b/cs/src/core/Utilities/LockType.cs index 52823bcf3..77f73927c 100644 --- a/cs/src/core/Utilities/LockType.cs +++ b/cs/src/core/Utilities/LockType.cs @@ -8,6 +8,11 @@ namespace FASTER.core /// public enum LockType : byte { + /// + /// No lock + /// + None, + /// /// Shared lock, taken on Read /// @@ -24,38 +29,18 @@ public enum LockType : byte ExclusiveFromShared } - /// - /// Information returned from - /// - public struct LockInfo - { - /// - /// The type of lock that was acquired - /// - public LockType LockType; - - /// - /// The address of the record that was locked. Useful for calling - /// - public long Address; - - /// - public override string ToString() => $"{LockType}: addr {Address}"; - } - internal enum LockOperationType : byte { None, - LockRead, - LockUpsert, - Unlock + Lock, + Unlock, + IsLocked } internal struct LockOperation { internal LockType LockType; internal LockOperationType LockOperationType; - internal bool IsStubPromotion; internal bool IsSet => LockOperationType != LockOperationType.None; @@ -63,9 +48,8 @@ internal LockOperation(LockOperationType opType, LockType lockType) { this.LockType = lockType; this.LockOperationType = opType; - this.IsStubPromotion = false; } - public override string ToString() => $"{LockType}: opType {LockOperationType}, isStubPromo {IsStubPromotion}"; + public override string ToString() => $"{LockType}: opType {LockOperationType}"; } } diff --git a/cs/src/core/Utilities/LockUtility.cs b/cs/src/core/Utilities/LockUtility.cs new file mode 100644 index 000000000..ac859eada --- /dev/null +++ b/cs/src/core/Utilities/LockUtility.cs @@ -0,0 +1,74 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Threading; + +namespace FASTER.core +{ + internal static class LockUtility + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static bool IsIntermediate(this ref RecordInfo recordInfo, out OperationStatus internalStatus, bool isReadingAtAddress = false) + { + // First a fast check so there is only one "if" + internalStatus = OperationStatus.SUCCESS; + if (!recordInfo.IsIntermediate) + return false; + + // Separate routine to reduce impact on inlining decision. + return HandleIntermediate(ref recordInfo, out internalStatus, isReadingAtAddress); + } + + internal static bool HandleIntermediate(this ref RecordInfo recordInfo, out OperationStatus internalStatus, bool isReadingAtAddress = false) + { + while (recordInfo.Tentative) + Thread.Yield(); + + if (recordInfo.Sealed && !isReadingAtAddress) + { + Thread.Yield(); + internalStatus = OperationStatus.RETRY_NOW; + return true; + } + internalStatus = OperationStatus.SUCCESS; + return false; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void HandleLockOperation(this ref RecordInfo recordInfo, LockOperation lockOp, out bool isLock) + { + isLock = lockOp.LockOperationType == LockOperationType.Lock; + + if (isLock) + recordInfo.Lock(lockOp.LockType); + else if (lockOp.LockOperationType == LockOperationType.Unlock) + recordInfo.Unlock(lockOp.LockType); + else + Debug.Fail($"Unexpected LockOperation {lockOp.LockOperationType}"); + } + + internal static void Lock(this ref RecordInfo recordInfo, LockType lockType) + { + if (lockType == LockType.Shared) + recordInfo.LockShared(); + else if (lockType == LockType.Exclusive) + recordInfo.LockExclusive(); + else if (lockType == LockType.ExclusiveFromShared) + recordInfo.LockExclusiveFromShared(); + else + Debug.Fail($"Unexpected LockType: {lockType}"); + } + + internal static void Unlock(this ref RecordInfo recordInfo, LockType lockType) + { + if (lockType == LockType.Shared) + recordInfo.UnlockShared(); + else if (lockType == LockType.Exclusive) + recordInfo.UnlockExclusive(); + else + Debug.Fail($"Unexpected LockType: {lockType}"); + } + } +} diff --git a/cs/src/core/Utilities/SafeConcurrentDictionary.cs b/cs/src/core/Utilities/SafeConcurrentDictionary.cs index 89a1a7576..7f2aa190b 100644 --- a/cs/src/core/Utilities/SafeConcurrentDictionary.cs +++ b/cs/src/core/Utilities/SafeConcurrentDictionary.cs @@ -32,6 +32,11 @@ public SafeConcurrentDictionary(IEnumerable> initialC this.dictionary = new(initialCollection); } + public SafeConcurrentDictionary(IEqualityComparer comparer) + { + this.dictionary = new(comparer); + } + /// /// Returns the count of the dictionary. /// @@ -219,6 +224,17 @@ public bool TryRemove(TKey key, out TValue value) return dictionary.TryRemove(key, out value); } + /// + /// Attempts to remove the value for the specified key based on equality to . + /// Returns true if successful, false otherwise (value changed or key not found). + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool TryRemoveConditional(TKey key, in TValue ifValue) + { + // From https://devblogs.microsoft.com/pfxteam/little-known-gems-atomic-conditional-removals-from-concurrentdictionary/ + return ((ICollection>)dictionary).Remove(new KeyValuePair(key, ifValue)); + } + /// /// Compares the existing value for the specified key with a specified value, /// and updates it if and only if it is a match. Returns true is updated or diff --git a/cs/src/core/VarLen/MemoryFunctions.cs b/cs/src/core/VarLen/MemoryFunctions.cs index e87c7bf87..ea012ff45 100644 --- a/cs/src/core/VarLen/MemoryFunctions.cs +++ b/cs/src/core/VarLen/MemoryFunctions.cs @@ -38,7 +38,6 @@ public override bool ConcurrentWriter(ref Key key, ref Memory input, ref Memo if (dst.Length < src.Length) { - recordInfo.Sealed = true; return false; } diff --git a/cs/src/core/VarLen/SpanByteFunctions.cs b/cs/src/core/VarLen/SpanByteFunctions.cs index 27af87fd2..e6ecff2b0 100644 --- a/cs/src/core/VarLen/SpanByteFunctions.cs +++ b/cs/src/core/VarLen/SpanByteFunctions.cs @@ -33,7 +33,6 @@ public override bool ConcurrentWriter(ref Key key, ref SpanByte input, ref SpanB if (dst.Length < src.Length) { - recordInfo.Sealed = true; return false; } diff --git a/cs/test/ManualOperationsTests.cs b/cs/test/ManualOperationsTests.cs index bfdd867f1..9d2586a5f 100644 --- a/cs/test/ManualOperationsTests.cs +++ b/cs/test/ManualOperationsTests.cs @@ -32,7 +32,7 @@ public override bool ConcurrentDeleter(ref int key, ref int value, ref RecordInf } } - public enum ResultLockTarget { MutableLock, Stub } + public enum ResultLockTarget { MutableLock, LockTable } public enum ReadCopyDestination { Tail, ReadCache } @@ -95,32 +95,12 @@ void Populate() } } - (bool xlock, bool slock) IsLocked(ManualFasterOperations manualOps, int key, long logicalAddress, bool stub, out RecordInfo recordInfo) - { - // We have the epoch protected so can access the address directly. For ReadCache, which does not expose addresses, we must look up the key - if (logicalAddress != Constants.kInvalidAddress) - { - var physicalAddress = fkv.hlog.GetPhysicalAddress(logicalAddress); - recordInfo = fkv.hlog.GetInfo(physicalAddress); - Assert.AreEqual(stub, recordInfo.Stub, "stub mismatch, valid Address"); - } - else - { - int inoutDummy = default; - RecordMetadata recordMetadata = default; - var status = manualOps.Read(ref key, ref inoutDummy, ref inoutDummy, ref recordMetadata); - Assert.AreNotEqual(Status.PENDING, status); - Assert.AreEqual(logicalAddress, recordMetadata.Address); // Either kInvalidAddress for readCache, or the expected address + static void AssertIsLocked(ManualFasterOperations manualOps, int key, LockType lockType) + => AssertIsLocked(manualOps, key, lockType == LockType.Exclusive, lockType == LockType.Shared); - recordInfo = recordMetadata.RecordInfo; - Assert.AreEqual(stub, recordInfo.Stub, "stub mismatch"); - } - return (recordInfo.IsLockedExclusive, recordInfo.IsLockedShared); - } - - void AssertIsLocked(ManualFasterOperations manualOps, int key, long logicalAddress, bool xlock, bool slock, bool stub) + static void AssertIsLocked(ManualFasterOperations manualOps, int key, bool xlock, bool slock) { - var (isX, isS) = IsLocked(manualOps, key, logicalAddress, stub, out var recordInfo); + var (isX, isS) = manualOps.IsLocked(key); Assert.AreEqual(xlock, isX, "xlock mismatch"); Assert.AreEqual(slock, isS, "slock mismatch"); } @@ -163,17 +143,14 @@ public void InMemorySimpleLockTxnTest([Values] ResultLockTarget resultLockTarget Populate(); PrepareRecordLocation(flushMode); - Dictionary locks = new(); - LockInfo lockInfo = default; - // SetUp also reads this to determine whether to supply ReadCacheSettings. If ReadCache is specified it wins over CopyToTail. bool useReadCache = readCopyDestination == ReadCopyDestination.ReadCache && flushMode == FlushMode.OnDisk; var useRMW = updateOp == UpdateOp.RMW; - bool initialDestWillBeStub = resultLockTarget == ResultLockTarget.Stub || flushMode == FlushMode.OnDisk; - int resultKey = resultLockTarget == ResultLockTarget.Stub ? numRecords + 1 : 75; + int resultKey = resultLockTarget == ResultLockTarget.LockTable ? numRecords + 1 : 75; int resultValue = -1; int expectedResult = (24 + 51) * valueMult; Status status; + Dictionary locks = new(); using (var manualOps = session.GetManualOperations()) { @@ -184,57 +161,91 @@ public void InMemorySimpleLockTxnTest([Values] ResultLockTarget resultLockTarget { // key scope // Get initial source values int key = 24; - manualOps.Lock(key, LockType.Shared, retrieveData: true, ref lockInfo); - Assert.AreEqual(useReadCache, lockInfo.Address == Constants.kInvalidAddress); - locks[key] = lockInfo; - AssertIsLocked(manualOps, key, lockInfo.Address, xlock: false, slock: true, stub: false); + manualOps.Lock(key, LockType.Shared); + AssertIsLocked(manualOps, key, xlock: false, slock: true); + locks[key] = LockType.Shared; + key = 51; - manualOps.Lock(key, LockType.Shared, retrieveData: true, ref lockInfo); - Assert.AreEqual(useReadCache, lockInfo.Address == Constants.kInvalidAddress); - locks[key] = lockInfo; - AssertIsLocked(manualOps, key, lockInfo.Address, xlock: false, slock: true, stub: false); - - // Lock destination value (which may entail dropping a stub). - manualOps.Lock(resultKey, LockType.Exclusive, retrieveData: false, ref lockInfo); - Assert.AreEqual(useReadCache && !initialDestWillBeStub, lockInfo.Address == Constants.kInvalidAddress); - locks[resultKey] = lockInfo; - AssertIsLocked(manualOps, resultKey, lockInfo.Address, xlock: true, slock: false, stub: initialDestWillBeStub); - - // Re-get source values, to verify (e.g. they may be in readcache now) + manualOps.Lock(key, LockType.Shared); + locks[key] = LockType.Shared; + AssertIsLocked(manualOps, key, xlock: false, slock: true); + + // Lock destination value. + manualOps.Lock(resultKey, LockType.Exclusive); + locks[resultKey] = LockType.Exclusive; + AssertIsLocked(manualOps, resultKey, xlock: true, slock: false); + + // Re-get source values, to verify (e.g. they may be in readcache now). + // We just locked this above, but for FlushMode.OnDisk it will be in the LockTable and will still be PENDING. status = manualOps.Read(24, out var value24); - Assert.AreNotEqual(Status.PENDING, status); + if (flushMode == FlushMode.OnDisk) + { + if (status == Status.PENDING) + { + manualOps.UnsafeCompletePendingWithOutputs(out var completedOutputs, wait: true); + Assert.True(completedOutputs.Next()); + value24 = completedOutputs.Current.Output; + Assert.False(completedOutputs.Current.RecordMetadata.RecordInfo.IsLockedExclusive); + Assert.True(completedOutputs.Current.RecordMetadata.RecordInfo.IsLockedShared); + Assert.False(completedOutputs.Next()); + completedOutputs.Dispose(); + } + } + else + { + Assert.AreNotEqual(Status.PENDING, status); + } + status = manualOps.Read(51, out var value51); - Assert.AreNotEqual(Status.PENDING, status); + if (flushMode == FlushMode.OnDisk) + { + if (status == Status.PENDING) + { + manualOps.UnsafeCompletePendingWithOutputs(out var completedOutputs, wait: true); + Assert.True(completedOutputs.Next()); + value51 = completedOutputs.Current.Output; + Assert.False(completedOutputs.Current.RecordMetadata.RecordInfo.IsLockedExclusive); + Assert.True(completedOutputs.Current.RecordMetadata.RecordInfo.IsLockedShared); + Assert.False(completedOutputs.Next()); + completedOutputs.Dispose(); + } + } + else + { + Assert.AreNotEqual(Status.PENDING, status); + } // Set the phase to Phase.INTERMEDIATE to test the non-Phase.REST blocks session.ctx.phase = phase; int dummyInOut = 0; - RecordMetadata recordMetadata = default; status = useRMW - ? manualOps.RMW(ref resultKey, ref expectedResult, ref dummyInOut, out recordMetadata) + ? manualOps.RMW(ref resultKey, ref expectedResult, ref dummyInOut, out RecordMetadata recordMetadata) : manualOps.Upsert(ref resultKey, ref dummyInOut, ref expectedResult, ref dummyInOut, out recordMetadata); - Assert.AreNotEqual(Status.PENDING, status); - if (initialDestWillBeStub || flushMode == FlushMode.ReadOnly) + if (flushMode == FlushMode.OnDisk) { - // We initially created a stub for locking -or- we initially locked a RO record and then the update required RCU. - // Under these circumstances, we allocated a new record and transferred the lock to it. - Assert.AreNotEqual(locks[resultKey].Address, recordMetadata.Address); - AssertIsLocked(manualOps, resultKey, locks[resultKey].Address, xlock: false, slock: false, stub: initialDestWillBeStub); - AssertIsLocked(manualOps, resultKey, recordMetadata.Address, xlock: true, slock: false, stub: false); - lockInfo = locks[resultKey]; - lockInfo.Address = recordMetadata.Address; - locks[resultKey] = lockInfo; + if (status == Status.PENDING) + { + manualOps.UnsafeCompletePendingWithOutputs(out var completedOutputs, wait: true); + Assert.True(completedOutputs.Next()); + resultValue = completedOutputs.Current.Output; + Assert.True(completedOutputs.Current.RecordMetadata.RecordInfo.IsLockedExclusive); + Assert.False(completedOutputs.Current.RecordMetadata.RecordInfo.IsLockedShared); + Assert.False(completedOutputs.Next()); + completedOutputs.Dispose(); + } } else - Assert.AreEqual(locks[resultKey].Address, recordMetadata.Address); + { + Assert.AreNotEqual(Status.PENDING, status); + } // Reread the destination to verify status = manualOps.Read(resultKey, out resultValue); Assert.AreNotEqual(Status.PENDING, status); Assert.AreEqual(expectedResult, resultValue); } - foreach (var key in locks.Keys.OrderBy(key => key)) - manualOps.Unlock(key, locks[key].LockType); + foreach (var key in locks.Keys.OrderBy(key => -key)) + manualOps.Unlock(key, locks[key]); } catch (Exception) { @@ -262,10 +273,9 @@ public void InMemoryLongLockTest([Values] ResultLockTarget resultLockTarget, [Va Populate(); PrepareRecordLocation(flushMode); - LockInfo lockInfo = default; - bool initialDestWillBeStub = resultLockTarget == ResultLockTarget.Stub || flushMode == FlushMode.OnDisk; - int resultKey = initialDestWillBeStub ? numRecords + 1 : 75; - int resultValue = -1; + bool initialDestWillBeLockTable = resultLockTarget == ResultLockTarget.LockTable || flushMode == FlushMode.OnDisk; + int resultKey = initialDestWillBeLockTable ? numRecords + 1 : 75; + int resultValue; const int expectedResult = (24 + 51) * valueMult; var useRMW = updateOp == UpdateOp.RMW; Status status; @@ -275,7 +285,7 @@ public void InMemoryLongLockTest([Values] ResultLockTarget resultLockTarget, [Va try { - manualOps.Lock(51, LockType.Exclusive, retrieveData: true, ref lockInfo); + manualOps.Lock(51, LockType.Exclusive); status = manualOps.Read(24, out var value24); if (flushMode == FlushMode.OnDisk) @@ -289,9 +299,25 @@ public void InMemoryLongLockTest([Values] ResultLockTarget resultLockTarget, [Va else Assert.AreNotEqual(Status.PENDING, status); - // We just locked this above, so it should not be PENDING + // We just locked this above, but for FlushMode.OnDisk it will be in the LockTable and will still be PENDING. status = manualOps.Read(51, out var value51); - Assert.AreNotEqual(Status.PENDING, status); + if (flushMode == FlushMode.OnDisk) + { + if (status == Status.PENDING) + { + manualOps.UnsafeCompletePendingWithOutputs(out var completedOutputs, wait: true); + Assert.True(completedOutputs.Next()); + value51 = completedOutputs.Current.Output; + Assert.True(completedOutputs.Current.RecordMetadata.RecordInfo.IsLockedExclusive); + Assert.False(completedOutputs.Current.RecordMetadata.RecordInfo.IsLockedShared); + Assert.False(completedOutputs.Next()); + completedOutputs.Dispose(); + } + } + else + { + Assert.AreNotEqual(Status.PENDING, status); + } Assert.AreEqual(51 * valueMult, value51); // Set the phase to Phase.INTERMEDIATE to test the non-Phase.REST blocks @@ -305,7 +331,7 @@ public void InMemoryLongLockTest([Values] ResultLockTarget resultLockTarget, [Va Assert.AreNotEqual(Status.PENDING, status); Assert.AreEqual(expectedResult, resultValue); - manualOps.Unlock(51, ref lockInfo); + manualOps.Unlock(51, LockType.Exclusive); } catch (Exception) { @@ -334,13 +360,12 @@ public void InMemoryDeleteTest([Values] ResultLockTarget resultLockTarget, [Valu Populate(); PrepareRecordLocation(flushMode); - Dictionary locks = new(); - LockInfo lockInfo = default; + Dictionary locks = new(); // SetUp also reads this to determine whether to supply ReadCacheSettings. If ReadCache is specified it wins over CopyToTail. bool useReadCache = readCopyDestination == ReadCopyDestination.ReadCache && flushMode == FlushMode.OnDisk; - bool initialDestWillBeStub = resultLockTarget == ResultLockTarget.Stub || flushMode == FlushMode.OnDisk; - int resultKey = resultLockTarget == ResultLockTarget.Stub ? numRecords + 1 : 75; + bool initialDestWillBeLockTable = resultLockTarget == ResultLockTarget.LockTable || flushMode == FlushMode.OnDisk; + int resultKey = resultLockTarget == ResultLockTarget.LockTable ? numRecords + 1 : 75; Status status; using (var manualOps = session.GetManualOperations()) @@ -349,38 +374,22 @@ public void InMemoryDeleteTest([Values] ResultLockTarget resultLockTarget, [Valu try { - // Lock destination value (which may entail dropping a stub). - manualOps.Lock(resultKey, LockType.Exclusive, retrieveData: false, ref lockInfo); - Assert.AreEqual(useReadCache && !initialDestWillBeStub, lockInfo.Address == Constants.kInvalidAddress); - locks[resultKey] = lockInfo; - AssertIsLocked(manualOps, resultKey, lockInfo.Address, xlock: true, slock: false, stub: initialDestWillBeStub); + // Lock destination value. + manualOps.Lock(resultKey, LockType.Exclusive); + locks[resultKey] = LockType.Exclusive; + AssertIsLocked(manualOps, resultKey, xlock: true, slock: false); // Set the phase to Phase.INTERMEDIATE to test the non-Phase.REST blocks session.ctx.phase = phase; status = manualOps.Delete(ref resultKey); Assert.AreNotEqual(Status.PENDING, status); - // If we initially created a stub for locking then we've updated it in place, unlike Upsert or RMW. - if (!initialDestWillBeStub && flushMode == FlushMode.ReadOnly) - { - // We initially locked a RO record and then the delete required inserting a new record. - // Under these circumstances, we allocated a new record and transferred the lock to it. - Assert.AreNotEqual(locks[resultKey].Address, session.functions.deletedRecordAddress); - AssertIsLocked(manualOps, resultKey, locks[resultKey].Address, xlock: false, slock: false, stub: initialDestWillBeStub); - AssertIsLocked(manualOps, resultKey, session.functions.deletedRecordAddress, xlock: true, slock: false, stub: false); - lockInfo = locks[resultKey]; - lockInfo.Address = session.functions.deletedRecordAddress; - locks[resultKey] = lockInfo; - } - else - Assert.AreEqual(locks[resultKey].Address, session.functions.deletedRecordAddress); - // Reread the destination to verify status = manualOps.Read(resultKey, out var _); Assert.AreEqual(Status.NOTFOUND, status); foreach (var key in locks.Keys.OrderBy(key => key)) - manualOps.Unlock(key, locks[key].LockType); + manualOps.Unlock(key, locks[key]); } catch (Exception) { @@ -414,7 +423,7 @@ public void StressLocks([Values(1, 8)] int numLockThreads, [Values(1, 8)] int nu void runLockThread(int tid) { - Dictionary locks = new(); + Dictionary locks = new(); Random rng = new(tid + 101); using var localSession = fkv.For(new ManualFunctions()).NewSession(); @@ -426,13 +435,12 @@ void runLockThread(int tid) for (var key = baseKey + rng.Next(numIncrement); key < baseKey + numKeys; key += rng.Next(1, numIncrement)) { var lockType = rng.Next(100) < 60 ? LockType.Shared : LockType.Exclusive; - LockInfo lockInfo = default; - manualOps.Lock(key, lockType, retrieveData: true, ref lockInfo); - locks[key] = lockInfo; + manualOps.Lock(key, lockType); + locks[key] = lockType; } foreach (var key in locks.Keys.OrderBy(key => key)) - manualOps.Unlock(key, locks[key].LockType); + manualOps.Unlock(key, locks[key]); locks.Clear(); } diff --git a/cs/test/ObjectReadCacheTests.cs b/cs/test/ObjectReadCacheTests.cs index 780aea558..c769a2ede 100644 --- a/cs/test/ObjectReadCacheTests.cs +++ b/cs/test/ObjectReadCacheTests.cs @@ -138,7 +138,7 @@ public void ObjectDiskWriteReadCache() var value = new MyValue { value = i + 1 }; var status = session.Read(ref key1, ref input, ref output, Empty.Default, 0); - Assert.AreEqual(Status.OK, status); + Assert.AreEqual(Status.OK, status, $"key = {key1.key}"); Assert.AreEqual(value.value, output.value.value); } } diff --git a/docs/_docs/30-fasterkv-manual-locking.md b/docs/_docs/30-fasterkv-manual-locking.md index f7309c8bc..4a700bb74 100644 --- a/docs/_docs/30-fasterkv-manual-locking.md +++ b/docs/_docs/30-fasterkv-manual-locking.md @@ -13,7 +13,7 @@ Manual locking in FasterKV refers to the user specifying when records will be lo Manual locking is done by obtaining the `ManualFasterOperations` instance from a `ClientSession`. This provides an implementation of `IFasterOperations` that: - Does not do automatic locking (except when updated records are inserted, as described below) - Does not do automatic epoch protection; instead, the user must call `UnsafeResumeThread` and `UnsafeSuspendThread`. In these, "Unsafe" refers to the fact it is the user's responsibility to make the correct calls. -- Exposes `Lock()` and `Unlock()` APIs. +- Exposes `Lock()` and `Unlock()` APIs. These are the *only* way records are locked in `ManualFasterOperations`; we assume that all locks are taken before any operations are done. Therefore, `ManualFasterOperations` `IFunctions` update operations do not honor locks; it assumes that it owns them. Here are two use case examples: - Lock key1, key2, and key3, then Read key1 and key2 values, calculate the result, write them to key3, and unlock all keys. This ensures that key3 has a consistent value based on key1 and key2 values. @@ -34,7 +34,6 @@ Lock multiple keys: { manualOps.UnsafeResumeThread(out var epoch); - LockInfo lockInfo = default; manualOps.Lock(24, LockType.Shared); manualOps.Lock(51, LockType.Shared); manualOps.Lock(75, LockType.Exclusive); @@ -56,7 +55,6 @@ Lock multiple keys: { manualOps.UnsafeResumeThread(out var epoch); - LockInfo lockInfo = default; manualOps.Lock(51, LockType.Shared); manualOps.Read(24, out var value24); @@ -72,13 +70,15 @@ TODO: Add sample with `manualOps.LocalCurrentEpoch`. ## Internal Design -This section covers the internal design and implementation of manual locking. +This section covers the internal design and implementation of manual locking. Although Sealing a record is not strictly a lock, it is still part of this document because it is closely intertwined with [Record Transfers](#record-transfers). -Manual locking is integrated into `FASTERImpl.cs` methods, notably `InternalRead` and `InternalCompletePendingRead`, `InternalUpsert`, `InternalRMW` and `InternalCompletePendingRMW`, and `InternalDelete`. These modifications are exposed via the `Lock()` and `Unlock()` APIs on `ManualFasterOperations`. LockOperation-specific code done in `InternalUpsert` and is protected by an `if (fasterSession.IsManualOperations)` test, which is a static bool member of the `FasterSession` implementation so the comparison should optimize it out. +Manual locking and checking is integrated into `FASTERImpl.cs` methods: +- The locking and unlocking are implemented in `InternalLock` +- Other record operations that must consider locks are `InternalUpsert`, `InternalRead` and `InternalCompletePendingRead`, `InternalRMW` and `InternalCompletePendingRMW`, and `InternalDelete`. These modifications are exposed via the `Lock()` and `Unlock()`. Because epoch protection is done by user calls, ManualFasterOperations methods call the internal ContextRead etc. methods, which are called by the API methods that do Resume and Suspend of epoch protection. -At a high level, `Lock()` and `Unlock()` call `ContextUpsert()` which in turn calls `InternalUpsert()`. Upsert by design does not issue PENDING operations to retrieve on-disk data, and locking/unlocking is designed to avoid pending I/O operations by use of a [`LockTable`](#locktable-overview) consisting of {`TKey`, `RecordInfo`} pairs, where `TKey` is the FasterKV Key type and `RecordInfo` is used to perform the locking/unlocking. +At a high level, `Lock()` and `Unlock()` call `InternalLock()`. Locking does not issue PENDING operations to retrieve on-disk data, and locking/unlocking is designed to avoid pending I/O operations by use of a [`LockTable`](#locktable-overview) consisting of {`TKey`, `RecordInfo`} pairs, where `TKey` is the FasterKV Key type and `RecordInfo` is used to perform the locking/unlocking. Locking and unlocking use bits in the `RecordInfo` header to obtain one exclusive lock or up to 64 shared locks. Because locking does not affect data, even records in the ReadOnly region may be locked and unlocked directly. @@ -88,8 +88,8 @@ The following sections refer to the following two in the `RecordInfo`: - **Lock Bits**: There is one Exclusive Lock bit and 6 Shared Lock bits (allowing 64 shared locks) in the RecordInfo. - **Tentative**: a record marked Tentative is very short-term; it indicates that the thread is performing a Tentative insertion of the record, and may make the Tentative record final by removing the Tentative bit, or may back off the insertion by setting the record to Invalid and returning RETRY_NOW. - **Sealed**: a record marked Sealed is one for which an update is known to be in progress. Sealed records are "visible" only short-term (e.g. a single call to Upsert or RMW, or a transfer to/from the `LockTable`). A thread encountering this should immediately return RETRY_NOW. - - Sealing is done via `RecordInfo.Seal`. This is used in locking scenarios rather than a sequence of "CAS to set Sealed; test Sealed bit` because the after-Seal locking is fuzzy; we don't know whether the record was CTT'd before or after a post-Seal lock, and thus we don't know if the transferred record "owns" our lock. `RecordInfo.Seal` does a CAS with both the XLock and Seal bits, then Unlocks the XLock bit; this ensures it works whether SupportsLocking is true or false. It returns true if successsful or false if another thread Sealed the record. -- **Invalid**: This is a well-known bit from v1 included here for clarity: its behavior is that the record is to be skipped, using its `.PreviousAddress` to move along the chain. + - Sealing is done via `RecordInfo.Seal`. This is used in locking scenarios rather than a sequence of "CAS to set Sealed; test Sealed bit because the after-Seal locking is fuzzy; we don't know whether the record was CTT'd before or after a post-Seal lock, and thus we don't know if the transferred record "owns" our lock. `RecordInfo.Seal` does a CAS with both the XLock and Seal bits, then Unlocks the XLock bit; this ensures it works whether SupportsLocking is true or false. It returns true if successsful or false if another thread Sealed the record. However, `ManualFasterOperations` must not try to lock as it owns the lock already. +- **Invalid**: This is a well-known bit from v1 included here for clarity: its behavior is that the record is to be skipped, using its `.PreviousAddress` to move along the chain. This has relevance to some areas of [Record Transfers](#record-transfers), particularly with respect to the `ReadCache`. Additionally, the `SupportsLocking` flag has been moved from IFunctions to a `FasterKV` constructor argument. This value must be uniform across all asessions. It is only to control the locking done by FasterKV; this replaces the concept of user-controlled locking that was provided with the `IFunctions` methods for concurrent record access. @@ -97,30 +97,66 @@ Additionally, the `SupportsLocking` flag has been moved from IFunctions to a `Fa For records not found in memory, the `LockTable` is used. The semantics of `LockTable` entries are as follow. This is a conceptual view; implementation details are described in subsequent sections: - On a `Lock` call, if the key is not found in memory, the `LockTable` is searched for the Key. - - If it is not found, an entry is made in the `LockTable` with an empty `RecordInfo`. - - The requested `LockType` is then taken on the `RecordInfo` for that Key. + - if the RecordInfo is in the `LockTable` it is locked as specified + - else a new Tentative record is added and subsequently finalized as in [Insertion to LockTable due to Lock](#insertion-to-locktable-due-to-lock) - On an `Unlock` call, if the key is not found in memory, the `LockTable` is searched for the Key. - If it is not found, a Debug.Fail() is issued. - Otherwise, the requested `LockType` is unlocked. If this leaves the `RecordInfo` unlocked, its entry is deleted from the `LockTable`. -- When a Read or RMW obtains a record from ON-DISK, it consults the `LockTable`; if the key is found, the locks are transferred to the retrieved recordInfo, and the `LockTable` entry is removed. -- When an Upsert (without `LockOperations`) or Delete does not find a key in memory, it consults the `LockTable`, and if the key is found: +- When a Read or RMW obtains a record from ON-DISK, it consults the `LockTable`; if the key is found: + - if the RecordInfo is in the `LockTable` it is Sealed, else a new Tentative record is added + - it transfers the locks from the `LockTable` entry to the retrieved recordInfo + - it removes the Seal from the entry in the `LockTable`, or deletes the entry if it was Tentative +- When an Upsert or Delete does not find a key in memory, it consults the `LockTable`, and if the key is found: - it Seals the RecordInfo in the `LockTable` + - it transfers the locks from the `LockTable` entry to the retrieved recordInfo - it performs the usual "append at tail of Log" operation - - it removes the entry from the `LockTable` + - it removes the Seal from the entry in the `LockTable`, or deletes the entry if it was Tentative - Because `LockTable` use does not verify that the key actually exists (as it does not issue a pending operation to ensure the requested key, and not a collision, is found in the on-disk portion), it is possible that keys will exist in the `LockTable` that do not in fact exist in the log. This is fine; if we do more than `Lock` them, then they will be added to the log at that time, and the locks applied to them. +We implement the `LockTable` with a `ConcurrentDictionary` because the use is expected to be very low--the vast majority of locks should not last long enough to be evicted from either the `ReadCache` or main memory. Thus, most operations on the `LockTable` will simply compare to `Count > 0`. + #### Insertion to LockTable due to Lock +This is the complementary side of [Insertion to LockTable due to Upsert](#insertion-to-locktable-due-to-upsert): + When a thread doing `Lock()` looks for a key in the LockTable and cannot find it, it must do a Tentative insertion into the locktable, because it is possible that another thread CAS'd that key to the Tail of the log after the current thread had passed the hash table lookup: -- We do not find the record in memory starting from current TailAddress, so we record that TailAddress as prevTailAddress. -- Locktable does not have an entry for this key so we create a Tentative entry in the LockTable for it -- We check if key exists between current TailAddress and prevTailAddress - - if yes we have to back off the LockTable entry creation by setting it Invalid (so anyone holding it to spin-test sees it is invalid), removing it from the LockTable, and returning RETRY_NOW. - - Any thread trying an operation in the Lock Table on a Tentative record must spin until the Tentative bit is removed; this will be soon, because we are only following the hash chain back to A. - - If prevTailAddress has escaped to disk by the time we start following the hash chain from Tail to prevTailAddress, we must retry. See the InternalTryCopyToTail scan to expectedLogicalAddress and ON_DISK as an example of this. - - Any waiting thread sees Invalid and in this case, it must also return RETRY_NOW. - - if no, we can set locktable entry as final by removing the Tentative bit - - Any waiting thread proceeds normally +- If Lock() finds the key in memory: + - if the record is Tentative, Lock() spins until the record is no longer Tentative + - if the record is Invalid, Lock() does a RETRY_NOW loop. + - Lock() locks that record and exits successfully. +- Otherwise: + - We record the current TailAddress as prevTailAddress. + - If `LockTable` has an entry for that key + - if it is Tentative, we spin until it is no longer Tentative + - if the record is Invalid, Lock() does a RETRY_NOW loop. + - Lock() locks that `LockTable` entry and exits successfully. + - Otherwise, Lock() creates a Tentative entry in the LockTable for the key + - Lock() checks to see if the key exists on the log between current TailAddress and prevTailAddress + - if yes: + - if the record is Tentative, Lock() spins until the record is no longer Tentative + - Lock() backs off the LockTable entry creation by setting it Invalid (so anyone holding it to spin-test sees it is invalid), removing it from the LockTable per [Removal from LockTable](#removal-from-locktable), and doing a RETRY_NOW loop. + - If prevTailAddress has escaped to disk by the time we start following the hash chain from Tail to prevTailAddress, Lock() must do a RETRY_NOW loop. See the InternalTryCopyToTail scan to expectedLogicalAddress and ON_DISK as an example of this. + - Any waiting thread sees Invalid and in this case, it must also return RETRY_NOW. + - if no, we can set locktable entry as final by removing the Tentative bit + - Any waiting thread proceeds normally + +#### Insertion to LockTable due to Upsert + +This is the complementary side of [Insertion to LockTable due to Lock](#insertion-to-locktable-due-to-lock) and applies RMW and Delete as well, when any of these append a record to the tail of the log (for brevity, Upsert is used). It is necessary so that threads that try to Lock() the Upsert()ed record as soon as it is CAS'd into the Log will not "split" locks between the log record and a `LockTable` entry. There is a bit of Catch-22 here; we cannot CAS in the non-Tentative log record before we have transferred the locks from a LockTable entry; but we must have a record on the log so that Lock() will not try to add a new entry, or lock an existing entry, while Upsert is in the process of creating the record and possibly transferring the locks from the `LockTable`. + +For performance reasons, Upsert cannot do an operation on the `LockTable` for each added record; therefore, we defer the cost until the last possible point, where we know we have to do something with the `LockTable` (which is very rare). + +When Upsert must append a new record: +- Upsert CASes in a record marked Tentative + - Note that Upsert does NOT check the locktable before its CAS, for performance reasons. + - Any thread seeing a Tentative record will spinwait until it's no longer Tentative, so no thread will try to lock this newly-CAS'd record. +- Upsert checks the `LockTable` to see if there is an entry in it for this key. + - If an entry is in the `LockTable`, then Upsert checks to see if it is marked Tentative. + - If so, then it is ignored; per [Insertion to LockTable due to Lock](#insertion-to-locktable-due-to-lock), it will be removed by the Lock() thread. + - Otherwise, Upsert: + - Applies the locks to its newly-CAS'd record (which is still Tentative) + - Sets the LockTable entry Invalid and removes it + - Clears the newly-CAS'd record's Tentative, and exits successfully #### Removal from LockTable @@ -129,8 +165,6 @@ Here are the sequences of operations to remove records from the Lock Table: - If the lock count goes to 0, remove from `LockTable` conditionally on IsLocked == false and Sealed == false. - Since only lock bits are relevant in LockTable, this is equivalent to saying RecordInfo.word == 0, which is a faster test. - Pending Read to `ReadCache` or `CopyToTail`, Pending RMW to Tail, or Upsert or Delete of a key in the LockTable - - For all but Read(), we are modifying or removing the record, so we must acquire an Exclusive lock on the LockTable entry - - This is not done for `ManualFasterOperations`, which we assume owns the lock - The `LockTable` record is Sealed as described in [Relevant RecordInfo bits](#relevant-recordInfo-bits) - If this fails, the operation retries - Other operation threads retry upon seeing the record is sealed @@ -138,8 +172,8 @@ Here are the sequences of operations to remove records from the Lock Table: - If this fails, the Sealed bit is removed from the `LockTable` entry and the thread does RETRY_NOW - Else the record is removed from the `LockTable` - Note: there is no concern about other threads that did not find the record on lookup and "lag behind" the thread doing the LockTable-entry removal and arrive at the LockTable after that record has been removed, because: - - If the lagging thread is from a pending Read operation, then that pending operation will retry due to the InternalTryCopyToTail expectedLogicalAddress check or the readcache "dual 2pc" check in [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) - - If the lagging thread is from a pending RMW operation, then that pending operation will retry due to the InternalContinuePendingRMW previousFirstRecordAddress check or the readcache "dual 2pc" check in [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) + - If the lagging thread is from a pending Read operation, then that pending operation will retry due to the InternalTryCopyToTail expectedLogicalAddress check or the readcache "dual 2pc" check in [Conflict Between Updates and Reading From Disk to ReadCache](#conflict-between-updates-and-reading-from-disk-to-readcache) + - If the lagging thread is from a pending RMW operation, then that pending operation will retry due to the InternalContinuePendingRMW previousFirstRecordAddress check or the readcache "dual 2pc" check in [Conflict Between Updates and Reading From Disk to ReadCache](#conflict-between-updates-and-reading-from-disk-to-readcache) - Upsert and Delete would find the LT entry directly ### ReadCache Overview @@ -225,7 +259,7 @@ Abbreviations: - ITCTT: InternalTryCopyToTail - Unfound refers to entries that are not found in memory (the hash chain passes below HeadAddress) or are not found in the Hash chain -#### Conflict Between Upsert/RMW and Reading From Disk to ReadCache +#### Conflict Between Updates and Reading From Disk to ReadCache One big consideration for Upsert is that it blindly upserts when a scan for a record drops below HeadAddress. This in conjunction with our two insertion points--at HT->RC and at RC->MainLog--gives rise to the following lost-update anomaly: - We Upsert k1 to the main log, splicing it into the RC->MainLog point @@ -243,110 +277,147 @@ General algorithm, iff readcache entries are present: each participating thread - SkipAndInvalidateReadCache until prevFirstRCAddress - We want the Upsert to win, so this pass ensures that any newly-added readcache entry for this key, whether tentative or not, is marked Invalid - Remove the tentative -- Read: - - Prior to its SkipReadCache/TracebackForKeyMatch, it sets a tentative record at the HT->RC boundary. - - it does the scan - - if the Tentative record is now Invalid, it means Upsert/RMW set it so for a later update; return NOTFOUND - - else if it found a non-RC record for this key, it sets the Tentative record to Invalid and returns NOTFOUND - - else it removes the Tentative flag +- Read that goes pending: + - Saves TailAddress to pcontext.RecordInfo.PreviousAddress + - Complete Pending Read: + - Prior to its SkipReadCache/TracebackForKeyMatch, it sets a tentative record at the HT->RC boundary. + - it does the scan + - if the Tentative record is now Invalid, it means Upsert/RMW set it so for a later update; return NOTFOUND + - else searches from current RC->MainLog record to pcontext.RecordInfo.PreviousAddress record for a match + - if it finds this non-RC record for this key, it sets the Tentative RC record to Invalid and returns NOTFOUND + - else it removes the Tentative flag OPTIMIZATION: Use readcache records rather than going to disk. However, there are issues here with the record being marked Invalid/Sealed in case multiple threads do it. +#### Lock +- If the record is in readcache: + - Do the Lock op: + - Tentative: spinwait; must fall through to check Sealed + - Sealed: Yield and Retry + - ignore/continue if the record is or becomes Invalid +- else for both mutable (and RO if MFO is active) records, if the RecordInfo is: + - Sealed: Yield() and retry + - Tombstone: Do the Lock op (e.g. unlock) + - Other: + - Do the LockOp in ConcurrentWriter for both Mutable and RO +- else // key is not found or hash chain goes below HeadAddress + - Perform `LockTable` insertion as described in [Insertion to LockTable due to Lock](#insertion-to-locktable-due-to-lock) + #### Read Note that this changes specified here, including both shared and exclusive locks in the ReadOnly region, clarifies the distinction between a data-centric view of the ReadOnly region being implicitly read-locked (because it cannot be updated), vs. a transactional view that requires explicit read locks. In a transactional view, a read lock prevents an exclusive lock; implicit readlocks based on address cannot do this in FASTER, because we can always do XLock or an RCU. Therefore, we need explicit read locks, and reads must of course block if there is an XLock. This also means that SingleReader would have to lock anyway, losing any distinction between it and ConcurrentReader. Therefore, we have consolidated ConcurrentReader and SingleReader into a single function. +- if the record is in readcache: + - Tentative: spinwait; must fall through to check Sealed + - Sealed: Yield and Retry + - SingleReader: will ephemerally lock if needed - for both mutable and RO records, if the RecordInfo is: - Sealed: Yield() and retry - - If SupportsLocking, we would ephemerally readlock the record, and we can't lock Sealed records as the lock may be transferred with the record. - - Tombstone: as current + - Tombstone: as current - Other: as currently, including ephemeral locking - Change IFunctions.SingleReader and .ConcurrentReader to simply .Reader - On-disk: - After PENDING - - if copying to readcache, do so in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) + - if copying to readcache, do so in accordance with [Conflict Between Updates and Reading From Disk to ReadCache](#conflict-between-updates-and-reading-from-disk-to-readcache) - else if CopyToTail do [Removal From LockTable](#removal-from-locktable) #### Upsert -Note: Upsert skips RO ops if the current FasterSession is not `ManualFasterOperations` (MFO); this comparison is a static bool property of to the IFasterOperations implementation - -- If LockOp.IsSet - - If the record is in readcache: - - Do the Lock op: - - retry if the record is or becomes Sealed - - ignore/continue if the record is or becomes Invalid - - else for both mutable (and RO if MFO is active) records, if the RecordInfo is: - - Sealed: Yield() and retry - - Tombstone: Do the Lock op (e.g. unlock) - - Other: - - Do the LockOp in ConcurrentWriter for both Mutable and RO - - else // key is not found or hash chain goes below HeadAddress - - Perform `LockTable` insertion as described in [Insertion to LockTable due to Lock](#insertion-to-locktable-due-to-lock) -- else // LockOp is not set: - - If the record is in readcache: - - Invalidate it - - insert the new value in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) - - else if the record is in the mutable region and the RecordInfo is: - - Sealed: Yield() and retry - - Tombstone: as current - - Other: IPU (including ephemeral locks) - - If this returns false - - Set RecordInfo Sealed as described in [Relevant RecordInfo bits](#relevant-recordinfo-bits) - - Insert in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) - - else if the record is in ReadOnly and the RecordInfo is: - - Sealed: Yield() and retry - - Tombstone: as current - - Other: Do CopyUpdater and insert in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) - - else // key is not found or hash chain goes below HeadAddress - - if the key is in the lock table - - XLock it - - If it is Sealed or Invalid, then RETRY_NOW (someone else did an operation that removed it) - - Else - - Insert new record - - Remove locktable entry per [Removal From LockTable](#removal-from-locktable) - - InitialUpdater and insert in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) +- If the record is in readcache: + - if it is + - Tentative: spinwait; must fall through to check Sealed + - Sealed: Yield and Retry + - Invalidate it + - insert the new value in accordance with [Conflict Between Updates and Reading From Disk to ReadCache](#conflict-between-updates-and-reading-from-disk-to-readcache) +- else if the record is in the mutable region and the RecordInfo is: + - Tentative: spinwait; must fall through to check Sealed + - Sealed: Yield and Retry + - Tombstone: as current + - Other: IPU (including ephemeral locks) + - If this returns false + - Set RecordInfo Sealed as described in [Relevant RecordInfo bits](#relevant-recordinfo-bits) + - Insert in accordance with [Conflict Between Updates and Reading From Disk to ReadCache](#conflict-between-updates-and-reading-from-disk-to-readcache) +- else if the record is in ReadOnly and the RecordInfo is: + - Sealed: Yield() and retry + - Tombstone: as current + - Other: + - Seal RO record + - Do insert in accordance with [Conflict Between Updates and Reading From Disk to ReadCache](#conflict-between-updates-and-reading-from-disk-to-readcache) + - Leave the RO record Sealed +- else // key is not found or hash chain goes below HeadAddress + - if the key is in the lock table + - Seal it (Seal XLocks so all locks are drained; we have none to transfer) + - If Seal() fails or the record is marked Invalid, then and RETRY_NOW (someone else did an operation that removed it) + - Else + - Insert new record to log + - Transfer locks from LockTable entry + - Remove locktable entry per [Removal From LockTable](#removal-from-locktable) + - Unlock it + - InitialUpdater and insert in accordance with [Conflict Between Updates and Reading From Disk to ReadCache](#conflict-between-updates-and-reading-from-disk-to-readcache) #### RMW -RMW considerations are similar to Upsert from the sealing and "encountering locks" point of view. It does not do lock operations. +RMW considerations are similar to Upsert from the sealing and "encountering locks" point of view. - If the record is in readcache: + - if it is + - Tentative: spinwait; must fall through to check Sealed + - Sealed: Yield and Retry - Invalidate it - - CopyUpdater and insert the new value in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) + - CopyUpdater and insert the new value in accordance with [Conflict Between Updates and Reading From Disk to ReadCache](#conflict-between-updates-and-reading-from-disk-to-readcache) - else if the record is in the mutable region and the RecordInfo is: - - Sealed: Yield() and retry + - Tentative: spinwait; must fall through to check Sealed + - Sealed: Yield and Retry - Tombstone: as current - Other: IPU (including ephemeral locks) - If this returns false - Set RecordInfo Sealed as described in [Relevant RecordInfo bits](#relevant-recordinfo-bits) - - Do CopyUpdater and insert in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) + - Do CopyUpdater and insert in accordance with [Conflict Between Updates and Reading From Disk to ReadCache](#conflict-between-updates-and-reading-from-disk-to-readcache) + - Iff this fails, Unseal RO record - else if the record is in ReadOnly and the RecordInfo is: - - Sealed: Yield() and retry + - Tentative: spinwait; must fall through to check Sealed + - Sealed: Yield and Retry - Tombstone: as current - - Other: Do CopyUpdater and insert in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) + - Other: Do CopyUpdater and insert in accordance with [Conflict Between Updates and Reading From Disk to ReadCache](#conflict-between-updates-and-reading-from-disk-to-readcache) - else // key is not found or hash chain goes below HeadAddress - - InitialUpdater and insert in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) + - if the key is in the lock table + - Seal it (Seal XLocks so all locks are drained; we have none to transfer) + - If Seal() fails or the record is marked Invalid, then and RETRY_NOW (someone else did an operation that removed it) + - Else + - Insert new record to log + - Transfer locks from LockTable entry + - Remove locktable entry per [Removal From LockTable](#removal-from-locktable) + - Unlock it + - InitialUpdater and insert in accordance with [Conflict Between Updates and Reading From Disk to ReadCache](#conflict-between-updates-and-reading-from-disk-to-readcache) - TODO: potentially replace "fuzzy" region at SafeReadOnlyAddress with Sealed, which should avoid the lost-update anomaly #### Delete - If the record is in readcache: + - if it is + - Tentative: spinwait; must fall through to check Sealed + - Sealed: Yield and Retry - Invalidate it - - insert the new deleted record in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) + - insert the new deleted record in accordance with [Conflict Between Updates and Reading From Disk to ReadCache](#conflict-between-updates-and-reading-from-disk-to-readcache) - else if the record is in the mutable region and the RecordInfo is: - - Sealed: Yield() and retry + - Tentative: spinwait; must fall through to check Sealed + - Sealed: Yield and Retry - Tombstone: as current (nothing) - Other: Mark as tombstone - else if the record is in ReadOnly and the RecordInfo is: - - Sealed: Yield() and retry + - Tentative: spinwait; must fall through to check Sealed + - Sealed: Yield and Retry - Tombstone: as current (nothing) - - Other: Insert deleted record in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) + - Other: + - Seal RO record + - Do insert of Tombstoned record in accordance with [Conflict Between Updates and Reading From Disk to ReadCache](#conflict-between-updates-and-reading-from-disk-to-readcache) + - Leave the RO record Sealed - else // key is not found or hash chain goes below HeadAddress - if the key is in the lock table - - XLock it - - If it is Sealed or Invalid, then RETRY_NOW (someone else did an operation that removed it) + - Seal it (Seal XLocks so all locks are drained; we have none to transfer) + - If Seal() fails or the record is marked Invalid, then and RETRY_NOW (someone else did an operation that removed it) - Else - - Insert deleted record + - Insert new deleted record to log + - Transfer locks from LockTable entry - Remove locktable entry per [Removal From LockTable](#removal-from-locktable) - - Insert deleted record in accordance with [Conflict Between Upsert/RMW and Reading From Disk to ReadCache](#conflict-between-upsert-rmw-and-reading-from-disk-to-readcache) + - Insert deleted record in accordance with [Conflict Between Updates and Reading From Disk to ReadCache](#conflict-between-updates-and-reading-from-disk-to-readcache) From 29542aa184e9079ebc589c2941cf367ee50f203a Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Sat, 18 Dec 2021 00:16:19 -0800 Subject: [PATCH 08/25] Existing tests run --- cs/samples/ReadAddress/VersionedReadApp.cs | 4 +- cs/src/core/ClientSession/ClientSession.cs | 12 +-- .../core/ClientSession/IFasterOperations.cs | 8 +- .../ClientSession/ManualFasterOperations.cs | 16 ++-- cs/src/core/Index/Common/Contexts.cs | 19 +++-- cs/src/core/Index/FASTER/FASTERImpl.cs | 82 +++++++++++-------- cs/src/core/Index/FASTER/FASTERThread.cs | 3 - .../core/Index/Interfaces/IFasterSession.cs | 12 +-- cs/src/core/Utilities/LockTable.cs | 35 -------- cs/test/ReadAddressTests.cs | 55 +------------ 10 files changed, 88 insertions(+), 158 deletions(-) diff --git a/cs/samples/ReadAddress/VersionedReadApp.cs b/cs/samples/ReadAddress/VersionedReadApp.cs index 211fc2846..fbabec403 100644 --- a/cs/samples/ReadAddress/VersionedReadApp.cs +++ b/cs/samples/ReadAddress/VersionedReadApp.cs @@ -154,7 +154,7 @@ private static void ScanStore(FasterKV store, int keyValue) RecordMetadata recordMetadata = default; for (int lap = 9; /* tested in loop */; --lap) { - var status = session.Read(ref key, ref input, ref output, ref recordMetadata, serialNo: maxLap + 1); + var status = session.Read(ref key, ref input, ref output, ref recordMetadata, ReadFlags.SkipCopyReads, serialNo: maxLap + 1); // This will wait for each retrieved record; not recommended for performance-critical code or when retrieving multiple records unless necessary. if (status == Status.PENDING) @@ -185,7 +185,7 @@ private static async Task ScanStoreAsync(FasterKV store, int keyValu RecordMetadata recordMetadata = default; for (int lap = 9; /* tested in loop */; --lap) { - var readAsyncResult = await session.ReadAsync(ref key, ref input, recordMetadata.RecordInfo.PreviousAddress, default, serialNo: maxLap + 1, cancellationToken: cancellationToken); + var readAsyncResult = await session.ReadAsync(ref key, ref input, recordMetadata.RecordInfo.PreviousAddress, ReadFlags.SkipCopyReads, default, serialNo: maxLap + 1, cancellationToken: cancellationToken); cancellationToken.ThrowIfCancellationRequested(); var (status, output) = readAsyncResult.Complete(out recordMetadata); if (!ProcessRecord(store, status, recordMetadata.RecordInfo, lap, ref output)) diff --git a/cs/src/core/ClientSession/ClientSession.cs b/cs/src/core/ClientSession/ClientSession.cs index d3e8da63a..1e80598c7 100644 --- a/cs/src/core/ClientSession/ClientSession.cs +++ b/cs/src/core/ClientSession/ClientSession.cs @@ -814,11 +814,11 @@ public InternalFasterSession(ClientSession _clientSession.functions.SingleReader(ref key, ref input, ref value, ref dst, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) + public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, long address) => !this.SupportsLocking ? _clientSession.functions.ConcurrentReader(ref key, ref input, ref value, ref dst, ref recordInfo, address) : ConcurrentReaderLock(ref key, ref input, ref value, ref dst, ref recordInfo, address); @@ -846,7 +846,7 @@ public void ReadCompletionCallback(ref Key key, ref Input input, ref Output outp #region IFunctions - Upserts [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) + public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) { _clientSession.functions.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); @@ -858,7 +858,7 @@ public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) + public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) { if (!this.SupportsPostOperations) return; @@ -889,7 +889,7 @@ private void PostSingleWriterLock(ref Key key, ref Input input, ref Value src, r } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) + public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) => !this.SupportsLocking ? ConcurrentWriterNoLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address) : ConcurrentWriterLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); @@ -927,7 +927,7 @@ public bool NeedInitialUpdate(ref Key key, ref Input input, ref Output output) => _clientSession.functions.NeedInitialUpdate(ref key, ref input, ref output); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) + public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) { _clientSession.functions.InitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); diff --git a/cs/src/core/ClientSession/IFasterOperations.cs b/cs/src/core/ClientSession/IFasterOperations.cs index 9dc3de8b5..12d5fa037 100644 --- a/cs/src/core/ClientSession/IFasterOperations.cs +++ b/cs/src/core/ClientSession/IFasterOperations.cs @@ -80,7 +80,7 @@ public interface IFasterOperations /// /// /// - /// Flags for controlling operations within the read, such as ReadCache interaction + /// Flags for controlling operations within the read, such as ReadCache interaction. When doing versioned reads, this should be set to /// User application context passed in case the read goes pending due to IO /// The serial number of the operation (used in recovery) /// is populated by the implementation @@ -92,7 +92,7 @@ public interface IFasterOperations /// The address to look up /// Input to help extract the retrieved value into /// The location to place the retrieved value - /// Flags for controlling operations within the read, such as ReadCache interaction + /// Flags for controlling operations within the read, such as ReadCache interaction. When doing versioned reads, this should be set to /// User application context passed in case the read goes pending due to IO /// The serial number of the operation (used in recovery) /// is populated by the implementation; this should store the key if it needs it @@ -178,7 +178,7 @@ public interface IFasterOperations /// The key to look up /// Input to help extract the retrieved value into output /// Start at this address rather than the address in the hash table for "/> - /// Flags for controlling operations within the read, such as ReadCache interaction + /// Flags for controlling operations within the read, such as ReadCache interaction. When doing versioned reads, this should be set to /// User application context passed in case the read goes pending due to IO /// The serial number of the operation (used in recovery) /// Token to cancel the operation @@ -199,7 +199,7 @@ ValueTask.ReadAsyncResult> ReadAsyn /// /// The address to look up /// Input to help extract the retrieved value into output - /// Flags for controlling operations within the read, such as ReadCache interaction + /// Flags for controlling operations within the read, such as ReadCache interaction. When doing versioned reads, this should be set to /// User application context passed in case the read goes pending due to IO /// The serial number of the operation (used in recovery) /// Token to cancel the operation diff --git a/cs/src/core/ClientSession/ManualFasterOperations.cs b/cs/src/core/ClientSession/ManualFasterOperations.cs index e66542d1b..b28129ebb 100644 --- a/cs/src/core/ClientSession/ManualFasterOperations.cs +++ b/cs/src/core/ClientSession/ManualFasterOperations.cs @@ -476,13 +476,13 @@ public InternalFasterSession(ClientSession() call. Unlike - // ClientSession, we do *not* unlock in the Post call; instead we wait for explicit client user unlock. + // Our general locking rule in this "session" is: we don't lock unless explicitly requested. #region IFunctions - Upserts [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) + public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) { _clientSession.functions.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) + public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) { if (_clientSession.functions.SupportsPostOperations) _clientSession.functions.PostSingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) + public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) { // Note: KeyIndexes do not need notification of in-place updates because the key does not change. return _clientSession.functions.ConcurrentWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); @@ -528,7 +526,7 @@ public bool NeedInitialUpdate(ref Key key, ref Input input, ref Output output) => _clientSession.functions.NeedInitialUpdate(ref key, ref input, ref output); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address) + public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) { _clientSession.functions.InitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); } diff --git a/cs/src/core/Index/Common/Contexts.cs b/cs/src/core/Index/Common/Contexts.cs index 24bf86ec6..2409bf04b 100644 --- a/cs/src/core/Index/Common/Contexts.cs +++ b/cs/src/core/Index/Common/Contexts.cs @@ -98,6 +98,7 @@ internal struct PendingContext internal const ushort kNoKey = 0x0100; internal const ushort kIsAsync = 0x0200; + internal const ushort kHasPrevTailAddress = 0x0400; [MethodImpl(MethodImplOptions.AggressiveInlining)] internal IHeapContainer DetachKey() @@ -143,37 +144,43 @@ internal void SetOperationFlags(ushort flags, long address) internal bool NoKey { get => (operationFlags & kNoKey) != 0; - set => operationFlags = value ? (byte)(operationFlags | kNoKey) : (byte)(operationFlags & ~kNoKey); + set => operationFlags = value ? (ushort)(operationFlags | kNoKey) : (ushort)(operationFlags & ~kNoKey); } internal bool SkipReadCache { get => (operationFlags & kSkipReadCache) != 0; - set => operationFlags = value ? (byte)(operationFlags | kSkipReadCache) : (byte)(operationFlags & ~kSkipReadCache); + set => operationFlags = value ? (ushort)(operationFlags | kSkipReadCache) : (ushort)(operationFlags & ~kSkipReadCache); } internal bool HasMinAddress { get => (operationFlags & kMinAddress) != 0; - set => operationFlags = value ? (byte)(operationFlags | kMinAddress) : (byte)(operationFlags & ~kMinAddress); + set => operationFlags = value ? (ushort)(operationFlags | kMinAddress) : (ushort)(operationFlags & ~kMinAddress); } internal bool CopyReadsToTail { get => (operationFlags & kCopyReadsToTail) != 0; - set => operationFlags = value ? (byte)(operationFlags | kCopyReadsToTail) : (byte)(operationFlags & ~kCopyReadsToTail); + set => operationFlags = value ? (ushort)(operationFlags | kCopyReadsToTail) : (ushort)(operationFlags & ~kCopyReadsToTail); } internal bool SkipCopyReadsToTail { get => (operationFlags & kSkipCopyReadsToTail) != 0; - set => operationFlags = value ? (byte)(operationFlags | kSkipCopyReadsToTail) : (byte)(operationFlags & ~kSkipCopyReadsToTail); + set => operationFlags = value ? (ushort)(operationFlags | kSkipCopyReadsToTail) : (ushort)(operationFlags & ~kSkipCopyReadsToTail); } internal bool IsAsync { get => (operationFlags & kIsAsync) != 0; - set => operationFlags = value ? (byte)(operationFlags | kIsAsync) : (byte)(operationFlags & ~kIsAsync); + set => operationFlags = value ? (ushort)(operationFlags | kIsAsync) : (ushort)(operationFlags & ~kIsAsync); + } + + internal bool HasPrevTailAddress + { + get => (operationFlags & kHasPrevTailAddress) != 0; + set => operationFlags = value ? (ushort)(operationFlags | kHasPrevTailAddress) : (ushort)(operationFlags & ~kHasPrevTailAddress); } public void Dispose() diff --git a/cs/src/core/Index/FASTER/FASTERImpl.cs b/cs/src/core/Index/FASTER/FASTERImpl.cs index 7bd5867b7..212df17f7 100644 --- a/cs/src/core/Index/FASTER/FASTERImpl.cs +++ b/cs/src/core/Index/FASTER/FASTERImpl.cs @@ -156,7 +156,7 @@ internal OperationStatus InternalRead( // ReadCache addresses are not valid for indexing etc. so pass kInvalidAddress. ref RecordInfo recordInfo = ref readcache.GetInfo(physicalAddress); pendingContext.recordInfo = recordInfo; - return fasterSession.SingleReader(ref key, ref input, ref readcache.GetValue(physicalAddress), ref output, ref pendingContext.lockOperation, ref recordInfo, Constants.kInvalidAddress) + return fasterSession.SingleReader(ref key, ref input, ref readcache.GetValue(physicalAddress), ref output, ref recordInfo, Constants.kInvalidAddress) ? OperationStatus.SUCCESS : OperationStatus.NOTFOUND; } else if (status != OperationStatus.SUCCESS) @@ -213,7 +213,7 @@ internal OperationStatus InternalRead( return status; } else if (!recordInfo.Tombstone - && fasterSession.ConcurrentReader(ref key, ref input, ref recordValue, ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress)) + && fasterSession.ConcurrentReader(ref key, ref input, ref recordValue, ref output, ref recordInfo, logicalAddress)) { return OperationStatus.SUCCESS; } @@ -232,12 +232,12 @@ internal OperationStatus InternalRead( return status; } else if (!recordInfo.Tombstone - && fasterSession.SingleReader(ref key, ref input, ref hlog.GetValue(physicalAddress), ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress)) + && fasterSession.SingleReader(ref key, ref input, ref hlog.GetValue(physicalAddress), ref output, ref recordInfo, logicalAddress)) { if (CopyReadsToTail == CopyReadsToTail.FromReadOnly && !pendingContext.SkipCopyReadsToTail) { var container = hlog.GetValueContainer(ref hlog.GetValue(physicalAddress)); - InternalTryCopyToTail(sessionCtx, ref pendingContext, ref key, ref input, ref container.Get(), ref output, ref pendingContext.lockOperation, logicalAddress, fasterSession, sessionCtx); + InternalTryCopyToTail(sessionCtx, ref pendingContext, ref key, ref input, ref container.Get(), ref output, logicalAddress, fasterSession, sessionCtx); container.Dispose(); } return OperationStatus.SUCCESS; @@ -305,6 +305,8 @@ internal OperationStatus InternalRead( pendingContext.version = sessionCtx.version; pendingContext.serialNum = lsn; pendingContext.heldLatch = heldOperation; + + pendingContext.HasPrevTailAddress = true; pendingContext.recordInfo.PreviousAddress = prevTailAddress; } #endregion @@ -422,7 +424,7 @@ internal OperationStatus InternalUpsert( if (!recordInfo.Tombstone) { - if (fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref hlog.GetValue(physicalAddress), ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress)) + if (fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref hlog.GetValue(physicalAddress), ref output, ref recordInfo, logicalAddress)) { hlog.MarkPage(logicalAddress, sessionCtx.version); pendingContext.recordInfo = recordInfo; @@ -462,7 +464,7 @@ internal OperationStatus InternalUpsert( if (!recordInfo.Tombstone) { - if (fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref recordValue, ref output, ref pendingContext.lockOperation, ref recordInfo, logicalAddress)) + if (fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref recordValue, ref output, ref recordInfo, logicalAddress)) { if (sessionCtx.phase == Phase.REST) hlog.MarkPage(logicalAddress, sessionCtx.version); @@ -659,7 +661,7 @@ private OperationStatus CreateNewRecordUpsert= hlog.HeadAddress) @@ -1114,7 +1116,7 @@ private OperationStatus CreateNewRecordRMW( var entry = default(HashBucketEntry); FindOrCreateTag(hash, tag, ref bucket, ref slot, ref entry, hlog.BeginAddress); var logicalAddress = entry.Address; - var physicalAddress = default(long); OperationStatus status; if (UseReadCache) @@ -1569,9 +1570,10 @@ internal OperationStatus InternalLock( return status; } + var physicalAddress = hlog.GetPhysicalAddress(logicalAddress); + if (logicalAddress >= hlog.ReadOnlyAddress) { - physicalAddress = hlog.GetPhysicalAddress(logicalAddress); if (!comparer.Equals(ref key, ref hlog.GetKey(physicalAddress))) { logicalAddress = hlog.GetInfo(physicalAddress).PreviousAddress; @@ -1587,7 +1589,6 @@ internal OperationStatus InternalLock( lockInfo = default; if (logicalAddress >= hlog.HeadAddress) { - physicalAddress = hlog.GetPhysicalAddress(logicalAddress); ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); if (!recordInfo.IsIntermediate(out status)) { @@ -1733,15 +1734,15 @@ internal OperationStatus InternalContinuePendingRead= hlog.BeginAddress) { - ref RecordInfo recordInfo = ref hlog.GetInfoFromBytePointer(request.record.GetValidPointer()); - if (recordInfo.IsIntermediate(out var internalStatus)) return internalStatus; - if (hlog.GetInfoFromBytePointer(request.record.GetValidPointer()).Tombstone) - return OperationStatus.NOTFOUND; + if (recordInfo.Tombstone) + goto NotFound; // If NoKey, we do not have the key in the initial call and must use the key from the satisfied request. // With the new overload of CompletePending that returns CompletedOutputs, pendingContext must have the key. @@ -1750,22 +1751,24 @@ internal OperationStatus InternalContinuePendingRead @@ -1794,7 +1797,7 @@ internal void InternalContinuePendingReadCopyToTail @@ -1927,7 +1930,7 @@ internal OperationStatus InternalContinuePendingRMW { OperationStatus internalStatus; - LockOperation dummyLockOperation = default; PendingContext pendingContext = default; do - internalStatus = InternalTryCopyToTail(currentCtx, ref pendingContext, ref key, ref input, ref value, ref output, ref dummyLockOperation, expectedLogicalAddress, fasterSession, currentCtx, noReadCache); + internalStatus = InternalTryCopyToTail(currentCtx, ref pendingContext, ref key, ref input, ref value, ref output, expectedLogicalAddress, fasterSession, currentCtx, noReadCache); while (internalStatus == OperationStatus.RETRY_NOW); return internalStatus; } @@ -2330,7 +2332,6 @@ internal OperationStatus InternalCopyToTail /// /// - /// /// /// The expected address of the record being copied. /// @@ -2349,7 +2350,7 @@ internal OperationStatus InternalCopyToTail internal OperationStatus InternalTryCopyToTail( FasterExecutionContext opCtx, ref PendingContext pendingContext, - ref Key key, ref Input input, ref Value value, ref Output output, ref LockOperation lockOperation, + ref Key key, ref Input input, ref Value value, ref Output output, long expectedLogicalAddress, FasterSession fasterSession, FasterExecutionContext currentCtx, @@ -2368,6 +2369,7 @@ internal OperationStatus InternalTryCopyToTailbucket_entries[slot], updatedEntry.word, entry.word); success = foundEntry.word == entry.word; - if (success && UseReadCache) + if (success && UseReadCache && pendingContext.HasPrevTailAddress) { // See if we have added a main-log entry from an update while we were inserting; if so, the new readcache // record is obsolete and must be Invalidated. ref RecordInfo rcri = ref readcache.GetInfo(newPhysicalAddress); - for (var la = entry.Address; la >= prevTailAddress; /* incremented in loop */) + var la = entry.Address; + SkipReadCache(ref la, out _); + for ( ; la >= prevTailAddress; /* incremented in loop */) { var pa = hlog.GetPhysicalAddress(la); if (comparer.Equals(ref key, ref hlog.GetKey(pa))) @@ -2479,7 +2483,15 @@ ref hlog.GetValue(newPhysicalAddress, newPhysicalAddress + actualSize), ref outp la = hlog.GetInfo(pa).PreviousAddress; } if (!rcri.Invalid) + { + // prevTailAddress may have escaped to disk, so we must retry. + if (prevTailAddress < hlog.HeadAddress) + { + rcri.SetInvalid(); + return OperationStatus.RETRY_NOW; + } rcri.Tentative = false; + } } } else @@ -2518,7 +2530,7 @@ ref hlog.GetValue(newPhysicalAddress, newPhysicalAddress + actualSize), ref outp pendingContext.logicalAddress = copyToReadCache ? Constants.kInvalidAddress /* We do not expose readcache addresses */ : newLogicalAddress; fasterSession.PostSingleWriter(ref key, ref input, ref value, ref log.GetValue(newPhysicalAddress, newPhysicalAddress + actualSize), ref output, - ref lockOperation, ref recordInfo, pendingContext.logicalAddress); + ref recordInfo, pendingContext.logicalAddress); return OperationStatus.SUCCESS; } #endregion diff --git a/cs/src/core/Index/FASTER/FASTERThread.cs b/cs/src/core/Index/FASTER/FASTERThread.cs index 09b9e000b..d2e4395d5 100644 --- a/cs/src/core/Index/FASTER/FASTERThread.cs +++ b/cs/src/core/Index/FASTER/FASTERThread.cs @@ -355,9 +355,6 @@ internal Status InternalCompletePendingRequestFromContext : IFasterS #endregion Optional features supported by this implementation #region Reads - bool SingleReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref LockOperation lockOp, ref RecordInfo recordInfo, long address); - bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref LockOperation lockOp, ref RecordInfo recordInfo, long address); + bool SingleReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, long address); + bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, long address); void ReadCompletionCallback(ref Key key, ref Input input, ref Output output, Context ctx, Status status, RecordMetadata recordMetadata); #endregion reads #region Upserts - void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address); - void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address); - bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address); + void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address); + void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address); + bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address); void UpsertCompletionCallback(ref Key key, ref Input input, ref Value value, Context ctx); #endregion Upserts #region RMWs #region InitialUpdater bool NeedInitialUpdate(ref Key key, ref Input input, ref Output output); - void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref LockOperation lockOp, ref RecordInfo recordInfo, long address); + void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address); void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address); #endregion InitialUpdater diff --git a/cs/src/core/Utilities/LockTable.cs b/cs/src/core/Utilities/LockTable.cs index d799e2202..0a6b75b3e 100644 --- a/cs/src/core/Utilities/LockTable.cs +++ b/cs/src/core/Utilities/LockTable.cs @@ -4,7 +4,6 @@ using System; using System.Collections.Generic; using System.Diagnostics; -using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; namespace FASTER.core @@ -64,21 +63,6 @@ IHeapContainer GetKeyContainer(ref TKey key) return new VarLenHeapContainer(ref key, keyLen, bufferPool); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void Lock(ref TKey key, LockType lockType) - { - var keyContainer = GetKeyContainer(ref key); - _ = dict.AddOrUpdate(keyContainer, - key => { - RecordInfo logRecordInfo = default; - logRecordInfo.Lock(lockType); - return new(key, logRecordInfo, default); - }, (key, lte) => { - lte.logRecordInfo.Lock(lockType); - return lte; - }); - } - // Provide our own implementation of "Update by lambda" [MethodImpl(MethodImplOptions.AggressiveInlining)] private bool Update(ref TKey key, Func, LockTableEntry> updateFactory) @@ -145,25 +129,6 @@ internal bool LockOrTentative(ref TKey key, LockType lockType, out bool tentativ return !existingConflict; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void UnlockOrClearTentative(ref TKey key, LockType lockType, bool wasTentative) - { - using var lookupKey = GetKeyContainer(ref key); - if (dict.TryGetValue(lookupKey, out var lte)) - { - Debug.Assert(wasTentative || !lte.lockRecordInfo.Tentative, "lockRecordInfo.Tentative was not expected"); - Debug.Assert(!lte.lockRecordInfo.Sealed, "lockRecordInfo.Sealed was not expected"); - - // We assume that we own the lock or placed the Tentative record. - if (!lte.lockRecordInfo.Tentative) - lte.lockRecordInfo.Unlock(lockType); - if (!dict.TryRemove(lookupKey, out _)) - Debug.Fail("Could not remove Tentative record"); - return; - } - Debug.Fail("Trying to UnlockOrClearTentative on nonexistent key"); - } - [MethodImpl(MethodImplOptions.AggressiveInlining)] internal void ClearTentative(ref TKey key) { diff --git a/cs/test/ReadAddressTests.cs b/cs/test/ReadAddressTests.cs index 53d9c6d37..9c5512eb6 100644 --- a/cs/test/ReadAddressTests.cs +++ b/cs/test/ReadAddressTests.cs @@ -61,7 +61,7 @@ internal class Functions : FunctionsBase internal long lastWriteAddress = Constants.kInvalidAddress; bool useReadCache; bool copyReadsToTail; // Note: not currently used; not necessary due to setting SkipCopyToTail, and we get the copied-to address for CopyToTail (unlike ReadCache). - internal ReadFlags readFlags = ReadFlags.None; + internal ReadFlags readFlags = ReadFlags.SkipCopyReads; internal Functions() { @@ -230,10 +230,10 @@ internal bool ProcessChainRecord(Status status, RecordMetadata recordMetadata, i Assert.GreaterOrEqual(lap, 0); long expectedValue = SetReadOutput(defaultKeyToScan, LapOffset(lap) + defaultKeyToScan); - Assert.AreEqual(status == Status.NOTFOUND, recordInfo.Tombstone, $"status({status}) == NOTFOUND != Tombstone ({recordInfo.Tombstone})"); + Assert.AreEqual(status == Status.NOTFOUND, recordInfo.Tombstone, $"status({status}) == NOTFOUND != Tombstone ({recordInfo.Tombstone}) on lap {lap}"); Assert.AreEqual(lap == deleteLap, recordInfo.Tombstone, $"lap({lap}) == deleteLap({deleteLap}) != Tombstone ({recordInfo.Tombstone})"); if (!recordInfo.Tombstone) - Assert.AreEqual(expectedValue, actualOutput.value); + Assert.AreEqual(expectedValue, actualOutput.value, $"lap({lap})"); // Check for end of loop return recordInfo.PreviousAddress >= fkv.Log.BeginAddress; @@ -282,22 +282,8 @@ public void VersionedReadSyncTests(UseReadCache urc, CopyReadsToTail copyReadsTo for (int lap = maxLap - 1; /* tested in loop */; --lap) { - // If the ordinal is not in the range of the most recent record versions, do not copy to readcache or tail. - session.functions.readFlags = (lap < maxLap - 1) ? ReadFlags.SkipCopyReads : ReadFlags.None; - var status = session.Read(ref key, ref input, ref output, ref recordMetadata, session.functions.readFlags, serialNo: maxLap + 1); - if (iteration == 1 && lap == maxLap - 1 && useReadCache) - { - // This should have been served from the readcache. Verify that, then reissue the query without readcache, so we can - // get the prev address for the chain. - Assert.AreNotEqual(Status.PENDING, status); - Assert.AreEqual(Constants.kInvalidAddress, recordMetadata.Address); - Assert.IsTrue(testStore.ProcessChainRecord(status, recordMetadata, lap, ref output)); - session.functions.readFlags = ReadFlags.SkipReadCache; - status = session.Read(ref key, ref input, ref output, ref recordMetadata, session.functions.readFlags, serialNo: maxLap + 1); - } - if (status == Status.PENDING) { // This will wait for each retrieved record; not recommended for performance-critical code or when retrieving multiple records unless necessary. @@ -331,23 +317,9 @@ public async Task VersionedReadAsyncTests(UseReadCache urc, CopyReadsToTail copy for (int lap = maxLap - 1; /* tested in loop */; --lap) { - // If the ordinal is not in the range of the most recent record versions, do not copy to readcache or tail. - session.functions.readFlags = (lap < maxLap - 1) ? ReadFlags.SkipCopyReads : ReadFlags.None; - var readAsyncResult = await session.ReadAsync(ref key, ref input, recordMetadata.RecordInfo.PreviousAddress, session.functions.readFlags, default, serialNo: maxLap + 1); var (status, output) = readAsyncResult.Complete(out recordMetadata); - if (iteration == 1 && lap == maxLap - 1 && useReadCache) - { - // This should have been served from the readcache. Verify that, then reissue the query without readcache, so we can - // get the prev address for the chain. - Assert.AreEqual(Constants.kInvalidAddress, recordMetadata.Address); - Assert.IsTrue(testStore.ProcessChainRecord(status, recordMetadata, lap, ref output)); - session.functions.readFlags = ReadFlags.SkipReadCache; - readAsyncResult = await session.ReadAsync(ref key, ref input, recordMetadata.RecordInfo.PreviousAddress, session.functions.readFlags, default, serialNo: maxLap + 1); - (status, output) = readAsyncResult.Complete(out recordMetadata); - } - if (!testStore.ProcessChainRecord(status, recordMetadata, lap, ref output)) break; } @@ -386,9 +358,6 @@ public void ReadAtAddressSyncTests(UseReadCache urc, CopyReadsToTail copyReadsTo (status, output) = TestUtils.GetSinglePendingResult(completedOutputs, out recordMetadata); } - // After the first Read, do not allow copies to or lookups in ReadCache. - session.functions.readFlags = ReadFlags.SkipCopyReads; - if (!testStore.ProcessChainRecord(status, recordMetadata, lap, ref output)) break; @@ -438,9 +407,6 @@ public async Task ReadAtAddressAsyncTests(UseReadCache urc, CopyReadsToTail copy var readAsyncResult = await session.ReadAsync(ref key, ref input, recordMetadata.RecordInfo.PreviousAddress, session.functions.readFlags, default, serialNo: maxLap + 1); var (status, output) = readAsyncResult.Complete(out recordMetadata); - // After the first Read, do not allow copies to or lookups in ReadCache. - session.functions.readFlags = ReadFlags.SkipCopyReads; - if (!testStore.ProcessChainRecord(status, recordMetadata, lap, ref output)) break; @@ -485,9 +451,6 @@ public async Task ReadAtAddressAsyncReadFlagsNoneTests(UseReadCache urc, CopyRea var readAsyncResult = await session.ReadAsync(ref key, ref input, recordMetadata.RecordInfo.PreviousAddress, session.functions.readFlags, default, serialNo: maxLap + 1); var (status, output) = readAsyncResult.Complete(out recordMetadata); - // After the first Read, do not allow copies to or lookups in ReadCache. - session.functions.readFlags = ReadFlags.SkipCopyReads; - if (!testStore.ProcessChainRecord(status, recordMetadata, lap, ref output)) break; @@ -532,9 +495,6 @@ public async Task ReadAtAddressAsyncReadFlagsSkipCacheTests(UseReadCache urc, Co var readAsyncResult = await session.ReadAsync(ref key, ref input, recordMetadata.RecordInfo.PreviousAddress, session.functions.readFlags, default, serialNo: maxLap + 1); var (status, output) = readAsyncResult.Complete(out recordMetadata); - // After the first Read, do not allow copies to or lookups in ReadCache. - session.functions.readFlags = ReadFlags.SkipCopyReads; - if (!testStore.ProcessChainRecord(status, recordMetadata, lap, ref output)) break; @@ -576,9 +536,6 @@ public void ReadNoKeySyncTests(UseReadCache urc, CopyReadsToTail copyReadsToTail { var keyOrdinal = rng.Next(numKeys); - // If the ordinal is not in the range of the most recent record versions, do not copy to readcache or tail. - session.functions.readFlags = (keyOrdinal <= (numKeys - keyMod)) ? ReadFlags.SkipCopyReads : ReadFlags.None; - var status = session.ReadAtAddress(testStore.InsertAddresses[keyOrdinal], ref input, ref output, session.functions.readFlags, serialNo: maxLap + 1); if (status == Status.PENDING) { @@ -617,17 +574,11 @@ public async Task ReadNoKeyAsyncTests(UseReadCache urc, CopyReadsToTail copyRead { var keyOrdinal = rng.Next(numKeys); - // If the ordinal is not in the range of the most recent record versions, do not copy to readcache or tail. - session.functions.readFlags = (keyOrdinal <= (numKeys - keyMod)) ? ReadFlags.SkipCopyReads : ReadFlags.None; - var readAsyncResult = await session.ReadAtAddressAsync(testStore.InsertAddresses[keyOrdinal], ref input, session.functions.readFlags, default, serialNo: maxLap + 1); var (status, output) = readAsyncResult.Complete(out recordMetadata); TestStore.ProcessNoKeyRecord(status, ref output, keyOrdinal); } - - // After the first Read, do not allow copies to or lookups in ReadCache. - session.functions.readFlags = ReadFlags.SkipReadCache; } await testStore.Flush(); From 2fb97a3cf0577ff71470cac13209e1d00104ce97 Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Sat, 18 Dec 2021 01:10:35 -0800 Subject: [PATCH 09/25] Rename to LockableRawContext, IFasterContext --- cs/src/core/ClientSession/ClientSession.cs | 8 +- ...IFasterOperations.cs => IFasterContext.cs} | 2 +- ...Operations.cs => LockableUnsafeContext.cs} | 26 ++-- cs/src/core/Index/FASTER/FASTER.cs | 2 +- ...Tests.cs => LockableUnsafeContextTests.cs} | 128 +++++++++--------- cs/test/TestUtils.cs | 2 +- docs/_docs/30-fasterkv-manual-locking.md | 62 ++++----- 7 files changed, 115 insertions(+), 115 deletions(-) rename cs/src/core/ClientSession/{IFasterOperations.cs => IFasterContext.cs} (99%) rename cs/src/core/ClientSession/{ManualFasterOperations.cs => LockableUnsafeContext.cs} (97%) rename cs/test/{ManualOperationsTests.cs => LockableUnsafeContextTests.cs} (80%) diff --git a/cs/src/core/ClientSession/ClientSession.cs b/cs/src/core/ClientSession/ClientSession.cs index 1e80598c7..d4ecda660 100644 --- a/cs/src/core/ClientSession/ClientSession.cs +++ b/cs/src/core/ClientSession/ClientSession.cs @@ -19,7 +19,7 @@ namespace FASTER.core /// /// /// - public sealed class ClientSession : IClientSession, IFasterOperations, IDisposable + public sealed class ClientSession : IClientSession, IFasterContext, IDisposable where Functions : IFunctions { internal readonly FasterKV fht; @@ -36,7 +36,7 @@ public sealed class ClientSession internal readonly InternalFasterSession FasterSession; - ManualFasterOperations manualOperations; + LockableUnsafeContext manualOperations; internal const string NotAsyncSessionErr = "Session does not support async operations"; @@ -161,9 +161,9 @@ public void Dispose() /// /// Return a new interface to Faster operations that supports manual locking and epoch control. /// - public ManualFasterOperations GetManualOperations() + public LockableUnsafeContext GetLockableUnsafeContext() { - this.manualOperations ??= new ManualFasterOperations(this); + this.manualOperations ??= new LockableUnsafeContext(this); this.manualOperations.Acquire(); return this.manualOperations; } diff --git a/cs/src/core/ClientSession/IFasterOperations.cs b/cs/src/core/ClientSession/IFasterContext.cs similarity index 99% rename from cs/src/core/ClientSession/IFasterOperations.cs rename to cs/src/core/ClientSession/IFasterContext.cs index 12d5fa037..1bd132b75 100644 --- a/cs/src/core/ClientSession/IFasterOperations.cs +++ b/cs/src/core/ClientSession/IFasterContext.cs @@ -9,7 +9,7 @@ namespace FASTER.core /// /// Interface for FASTER operations /// - public interface IFasterOperations + public interface IFasterContext { /// /// Read operation diff --git a/cs/src/core/ClientSession/ManualFasterOperations.cs b/cs/src/core/ClientSession/LockableUnsafeContext.cs similarity index 97% rename from cs/src/core/ClientSession/ManualFasterOperations.cs rename to cs/src/core/ClientSession/LockableUnsafeContext.cs index b28129ebb..71711b340 100644 --- a/cs/src/core/ClientSession/ManualFasterOperations.cs +++ b/cs/src/core/ClientSession/LockableUnsafeContext.cs @@ -12,7 +12,7 @@ namespace FASTER.core /// /// Faster Operations implementation that allows manual control of record locking and epoch management. For advanced use only. /// - public sealed class ManualFasterOperations : IFasterOperations, IDisposable + public sealed class LockableUnsafeContext : IFasterContext, IDisposable where Functions : IFunctions { readonly ClientSession clientSession; @@ -27,10 +27,10 @@ public sealed class ManualFasterOperations clientSession) + internal LockableUnsafeContext(ClientSession clientSession) { this.clientSession = clientSession; FasterSession = new InternalFasterSession(clientSession); @@ -40,25 +40,25 @@ internal ManualFasterOperations(ClientSession [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void UnsafeResumeThread() => clientSession.UnsafeResumeThread(); + public void ResumeThread() => clientSession.UnsafeResumeThread(); /// /// Resume session on current thread. IMPORTANT: Call SuspendThread before any async op. /// /// Epoch that the session resumed on; can be saved to see if epoch has changed [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void UnsafeResumeThread(out int resumeEpoch) => clientSession.UnsafeResumeThread(out resumeEpoch); + public void ResumeThread(out int resumeEpoch) => clientSession.UnsafeResumeThread(out resumeEpoch); /// - /// Current epoch of the session + /// Suspend session on current thread /// - public int LocalCurrentEpoch => clientSession.fht.epoch.LocalCurrentEpoch; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void SuspendThread() => clientSession.UnsafeSuspendThread(); /// - /// Suspend session on current thread + /// Current epoch of the session /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void UnsafeSuspendThread() => clientSession.UnsafeSuspendThread(); + public int LocalCurrentEpoch => clientSession.fht.epoch.LocalCurrentEpoch; /// /// Synchronously complete outstanding pending synchronous operations. @@ -86,7 +86,7 @@ internal void Acquire() { Interlocked.Increment(ref this.clientSession.fht.NumActiveLockingSessions); if (this.isAcquired) - throw new FasterException("Trying to acquire an already-acquired ManualFasterOperations"); + throw new FasterException("Trying to acquire an already-acquired LockableUnsafeContext"); this.isAcquired = true; } @@ -96,9 +96,9 @@ internal void Acquire() public void Dispose() { if (LightEpoch.AnyInstanceProtected()) - throw new FasterException("Disposing ManualFasterOperations with a protected epoch; must call UnsafeSuspendThread"); + throw new FasterException("Disposing LockableUnsafeContext with a protected epoch; must call UnsafeSuspendThread"); if (TotalLockCount > 0) - throw new FasterException($"Disposing ManualFasterOperations with locks held: {sharedLockCount} shared locks, {exclusiveLockCount} exclusive locks"); + throw new FasterException($"Disposing LockableUnsafeContext with locks held: {sharedLockCount} shared locks, {exclusiveLockCount} exclusive locks"); Interlocked.Decrement(ref this.clientSession.fht.NumActiveLockingSessions); } #endregion Acquire and Dispose diff --git a/cs/src/core/Index/FASTER/FASTER.cs b/cs/src/core/Index/FASTER/FASTER.cs index 4005cad33..05a0193ff 100644 --- a/cs/src/core/Index/FASTER/FASTER.cs +++ b/cs/src/core/Index/FASTER/FASTER.cs @@ -39,7 +39,7 @@ public enum ReadFlags /// /// Force a copy to tail if we read from immutable or on-disk. If this and ReadCache are both specified, ReadCache wins. /// This avoids log pollution for read-mostly workloads. Used mostly in conjunction with - /// locking. + /// locking. /// CopyToTail = 0x00000004, diff --git a/cs/test/ManualOperationsTests.cs b/cs/test/LockableUnsafeContextTests.cs similarity index 80% rename from cs/test/ManualOperationsTests.cs rename to cs/test/LockableUnsafeContextTests.cs index 9d2586a5f..977f67fda 100644 --- a/cs/test/ManualOperationsTests.cs +++ b/cs/test/LockableUnsafeContextTests.cs @@ -14,7 +14,7 @@ namespace FASTER.test // Functions for the "Simple lock transaction" case, e.g.: // - Lock key1, key2, key3, keyResult // - Do some operation on value1, value2, value3 and write the result to valueResult - class ManualFunctions : SimpleFunctions + class LockableUnsafeFunctions : SimpleFunctions { internal long deletedRecordAddress; @@ -41,13 +41,13 @@ public enum FlushMode { NoFlush, ReadOnly, OnDisk } public enum UpdateOp { Upsert, RMW } [TestFixture] - class ManualOperationsTests + class LockableUnsafeContextTests { const int numRecords = 1000; const int valueMult = 1_000_000; private FasterKV fkv; - private ClientSession session; + private ClientSession session; private IDevice log; [SetUp] @@ -70,7 +70,7 @@ public void Setup() fkv = new FasterKV(1L << 20, new LogSettings { LogDevice = log, ObjectLogDevice = null, PageSizeBits = 12, MemorySizeBits = 22, ReadCacheSettings = readCacheSettings }, fasterSettings: new FasterSettings { SupportsLocking = true }); - session = fkv.For(new ManualFunctions()).NewSession(); + session = fkv.For(new LockableUnsafeFunctions()).NewSession(); } [TearDown] @@ -95,12 +95,12 @@ void Populate() } } - static void AssertIsLocked(ManualFasterOperations manualOps, int key, LockType lockType) - => AssertIsLocked(manualOps, key, lockType == LockType.Exclusive, lockType == LockType.Shared); + static void AssertIsLocked(LockableUnsafeContext luContext, int key, LockType lockType) + => AssertIsLocked(luContext, key, lockType == LockType.Exclusive, lockType == LockType.Shared); - static void AssertIsLocked(ManualFasterOperations manualOps, int key, bool xlock, bool slock) + static void AssertIsLocked(LockableUnsafeContext luContext, int key, bool xlock, bool slock) { - var (isX, isS) = manualOps.IsLocked(key); + var (isX, isS) = luContext.IsLocked(key); Assert.AreEqual(xlock, isX, "xlock mismatch"); Assert.AreEqual(slock, isS, "slock mismatch"); } @@ -113,11 +113,11 @@ void PrepareRecordLocation(FlushMode recordLocation) this.fkv.Log.FlushAndEvict(wait: true); } - static void ClearCountsOnError(ManualFasterOperations manualOps) + static void ClearCountsOnError(LockableUnsafeContext luContext) { // If we already have an exception, clear these counts so "Run" will not report them spuriously. - manualOps.sharedLockCount = 0; - manualOps.exclusiveLockCount = 0; + luContext.sharedLockCount = 0; + luContext.exclusiveLockCount = 0; } void EnsureNoLocks() @@ -135,7 +135,7 @@ void EnsureNoLocks() } [Test] - [Category(TestUtils.ManualOpsTestCategory)] + [Category(TestUtils.LockableUnsafeContextCategory)] [Category(TestUtils.SmokeTestCategory)] public void InMemorySimpleLockTxnTest([Values] ResultLockTarget resultLockTarget, [Values] ReadCopyDestination readCopyDestination, [Values]FlushMode flushMode, [Values(Phase.REST, Phase.INTERMEDIATE)] Phase phase, [Values] UpdateOp updateOp) @@ -152,37 +152,37 @@ public void InMemorySimpleLockTxnTest([Values] ResultLockTarget resultLockTarget Status status; Dictionary locks = new(); - using (var manualOps = session.GetManualOperations()) + using (var luContext = session.GetLockableUnsafeContext()) { - manualOps.UnsafeResumeThread(out var epoch); + luContext.ResumeThread(out var epoch); try { { // key scope // Get initial source values int key = 24; - manualOps.Lock(key, LockType.Shared); - AssertIsLocked(manualOps, key, xlock: false, slock: true); + luContext.Lock(key, LockType.Shared); + AssertIsLocked(luContext, key, xlock: false, slock: true); locks[key] = LockType.Shared; key = 51; - manualOps.Lock(key, LockType.Shared); + luContext.Lock(key, LockType.Shared); locks[key] = LockType.Shared; - AssertIsLocked(manualOps, key, xlock: false, slock: true); + AssertIsLocked(luContext, key, xlock: false, slock: true); // Lock destination value. - manualOps.Lock(resultKey, LockType.Exclusive); + luContext.Lock(resultKey, LockType.Exclusive); locks[resultKey] = LockType.Exclusive; - AssertIsLocked(manualOps, resultKey, xlock: true, slock: false); + AssertIsLocked(luContext, resultKey, xlock: true, slock: false); // Re-get source values, to verify (e.g. they may be in readcache now). // We just locked this above, but for FlushMode.OnDisk it will be in the LockTable and will still be PENDING. - status = manualOps.Read(24, out var value24); + status = luContext.Read(24, out var value24); if (flushMode == FlushMode.OnDisk) { if (status == Status.PENDING) { - manualOps.UnsafeCompletePendingWithOutputs(out var completedOutputs, wait: true); + luContext.UnsafeCompletePendingWithOutputs(out var completedOutputs, wait: true); Assert.True(completedOutputs.Next()); value24 = completedOutputs.Current.Output; Assert.False(completedOutputs.Current.RecordMetadata.RecordInfo.IsLockedExclusive); @@ -196,12 +196,12 @@ public void InMemorySimpleLockTxnTest([Values] ResultLockTarget resultLockTarget Assert.AreNotEqual(Status.PENDING, status); } - status = manualOps.Read(51, out var value51); + status = luContext.Read(51, out var value51); if (flushMode == FlushMode.OnDisk) { if (status == Status.PENDING) { - manualOps.UnsafeCompletePendingWithOutputs(out var completedOutputs, wait: true); + luContext.UnsafeCompletePendingWithOutputs(out var completedOutputs, wait: true); Assert.True(completedOutputs.Next()); value51 = completedOutputs.Current.Output; Assert.False(completedOutputs.Current.RecordMetadata.RecordInfo.IsLockedExclusive); @@ -219,13 +219,13 @@ public void InMemorySimpleLockTxnTest([Values] ResultLockTarget resultLockTarget session.ctx.phase = phase; int dummyInOut = 0; status = useRMW - ? manualOps.RMW(ref resultKey, ref expectedResult, ref dummyInOut, out RecordMetadata recordMetadata) - : manualOps.Upsert(ref resultKey, ref dummyInOut, ref expectedResult, ref dummyInOut, out recordMetadata); + ? luContext.RMW(ref resultKey, ref expectedResult, ref dummyInOut, out RecordMetadata recordMetadata) + : luContext.Upsert(ref resultKey, ref dummyInOut, ref expectedResult, ref dummyInOut, out recordMetadata); if (flushMode == FlushMode.OnDisk) { if (status == Status.PENDING) { - manualOps.UnsafeCompletePendingWithOutputs(out var completedOutputs, wait: true); + luContext.UnsafeCompletePendingWithOutputs(out var completedOutputs, wait: true); Assert.True(completedOutputs.Next()); resultValue = completedOutputs.Current.Output; Assert.True(completedOutputs.Current.RecordMetadata.RecordInfo.IsLockedExclusive); @@ -240,21 +240,21 @@ public void InMemorySimpleLockTxnTest([Values] ResultLockTarget resultLockTarget } // Reread the destination to verify - status = manualOps.Read(resultKey, out resultValue); + status = luContext.Read(resultKey, out resultValue); Assert.AreNotEqual(Status.PENDING, status); Assert.AreEqual(expectedResult, resultValue); } foreach (var key in locks.Keys.OrderBy(key => -key)) - manualOps.Unlock(key, locks[key]); + luContext.Unlock(key, locks[key]); } catch (Exception) { - ClearCountsOnError(manualOps); + ClearCountsOnError(luContext); throw; } finally { - manualOps.UnsafeSuspendThread(); + luContext.SuspendThread(); } } @@ -266,7 +266,7 @@ public void InMemorySimpleLockTxnTest([Values] ResultLockTarget resultLockTarget } [Test] - [Category(TestUtils.ManualOpsTestCategory)] + [Category(TestUtils.LockableUnsafeContextCategory)] [Category(TestUtils.SmokeTestCategory)] public void InMemoryLongLockTest([Values] ResultLockTarget resultLockTarget, [Values] FlushMode flushMode, [Values(Phase.REST, Phase.INTERMEDIATE)] Phase phase, [Values] UpdateOp updateOp) { @@ -280,18 +280,18 @@ public void InMemoryLongLockTest([Values] ResultLockTarget resultLockTarget, [Va var useRMW = updateOp == UpdateOp.RMW; Status status; - using var manualOps = session.GetManualOperations(); - manualOps.UnsafeResumeThread(); + using var luContext = session.GetLockableUnsafeContext(); + luContext.ResumeThread(); try { - manualOps.Lock(51, LockType.Exclusive); + luContext.Lock(51, LockType.Exclusive); - status = manualOps.Read(24, out var value24); + status = luContext.Read(24, out var value24); if (flushMode == FlushMode.OnDisk) { Assert.AreEqual(Status.PENDING, status); - manualOps.UnsafeCompletePendingWithOutputs(out var completedOutputs, wait: true); + luContext.UnsafeCompletePendingWithOutputs(out var completedOutputs, wait: true); (status, value24) = TestUtils.GetSinglePendingResult(completedOutputs); Assert.AreEqual(Status.OK, status); Assert.AreEqual(24 * valueMult, value24); @@ -300,12 +300,12 @@ public void InMemoryLongLockTest([Values] ResultLockTarget resultLockTarget, [Va Assert.AreNotEqual(Status.PENDING, status); // We just locked this above, but for FlushMode.OnDisk it will be in the LockTable and will still be PENDING. - status = manualOps.Read(51, out var value51); + status = luContext.Read(51, out var value51); if (flushMode == FlushMode.OnDisk) { if (status == Status.PENDING) { - manualOps.UnsafeCompletePendingWithOutputs(out var completedOutputs, wait: true); + luContext.UnsafeCompletePendingWithOutputs(out var completedOutputs, wait: true); Assert.True(completedOutputs.Next()); value51 = completedOutputs.Current.Output; Assert.True(completedOutputs.Current.RecordMetadata.RecordInfo.IsLockedExclusive); @@ -323,24 +323,24 @@ public void InMemoryLongLockTest([Values] ResultLockTarget resultLockTarget, [Va // Set the phase to Phase.INTERMEDIATE to test the non-Phase.REST blocks session.ctx.phase = phase; status = useRMW - ? manualOps.RMW(resultKey, value24 + value51) - : manualOps.Upsert(resultKey, value24 + value51); + ? luContext.RMW(resultKey, value24 + value51) + : luContext.Upsert(resultKey, value24 + value51); Assert.AreNotEqual(Status.PENDING, status); - status = manualOps.Read(resultKey, out resultValue); + status = luContext.Read(resultKey, out resultValue); Assert.AreNotEqual(Status.PENDING, status); Assert.AreEqual(expectedResult, resultValue); - manualOps.Unlock(51, LockType.Exclusive); + luContext.Unlock(51, LockType.Exclusive); } catch (Exception) { - ClearCountsOnError(manualOps); + ClearCountsOnError(luContext); throw; } finally { - manualOps.UnsafeSuspendThread(); + luContext.SuspendThread(); } // Verify from the full session. @@ -351,7 +351,7 @@ public void InMemoryLongLockTest([Values] ResultLockTarget resultLockTarget, [Va } [Test] - [Category(TestUtils.ManualOpsTestCategory)] + [Category(TestUtils.LockableUnsafeContextCategory)] [Category(TestUtils.SmokeTestCategory)] public void InMemoryDeleteTest([Values] ResultLockTarget resultLockTarget, [Values] ReadCopyDestination readCopyDestination, [Values(FlushMode.NoFlush, FlushMode.ReadOnly)] FlushMode flushMode, [Values(Phase.REST, Phase.INTERMEDIATE)] Phase phase) @@ -368,37 +368,37 @@ public void InMemoryDeleteTest([Values] ResultLockTarget resultLockTarget, [Valu int resultKey = resultLockTarget == ResultLockTarget.LockTable ? numRecords + 1 : 75; Status status; - using (var manualOps = session.GetManualOperations()) + using (var luContext = session.GetLockableUnsafeContext()) { - manualOps.UnsafeResumeThread(out var epoch); + luContext.ResumeThread(out var epoch); try { // Lock destination value. - manualOps.Lock(resultKey, LockType.Exclusive); + luContext.Lock(resultKey, LockType.Exclusive); locks[resultKey] = LockType.Exclusive; - AssertIsLocked(manualOps, resultKey, xlock: true, slock: false); + AssertIsLocked(luContext, resultKey, xlock: true, slock: false); // Set the phase to Phase.INTERMEDIATE to test the non-Phase.REST blocks session.ctx.phase = phase; - status = manualOps.Delete(ref resultKey); + status = luContext.Delete(ref resultKey); Assert.AreNotEqual(Status.PENDING, status); // Reread the destination to verify - status = manualOps.Read(resultKey, out var _); + status = luContext.Read(resultKey, out var _); Assert.AreEqual(Status.NOTFOUND, status); foreach (var key in locks.Keys.OrderBy(key => key)) - manualOps.Unlock(key, locks[key]); + luContext.Unlock(key, locks[key]); } catch (Exception) { - ClearCountsOnError(manualOps); + ClearCountsOnError(luContext); throw; } finally { - manualOps.UnsafeSuspendThread(); + luContext.SuspendThread(); } } @@ -409,7 +409,7 @@ public void InMemoryDeleteTest([Values] ResultLockTarget resultLockTarget, [Valu } [Test] - [Category(TestUtils.ManualOpsTestCategory)] + [Category(TestUtils.LockableUnsafeContextCategory)] [Category(TestUtils.SmokeTestCategory)] public void StressLocks([Values(1, 8)] int numLockThreads, [Values(1, 8)] int numOpThreads) { @@ -426,32 +426,32 @@ void runLockThread(int tid) Dictionary locks = new(); Random rng = new(tid + 101); - using var localSession = fkv.For(new ManualFunctions()).NewSession(); - using var manualOps = localSession.GetManualOperations(); - manualOps.UnsafeResumeThread(); + using var localSession = fkv.For(new LockableUnsafeFunctions()).NewSession(); + using var luContext = localSession.GetLockableUnsafeContext(); + luContext.ResumeThread(); for (var iteration = 0; iteration < numIterations; ++iteration) { for (var key = baseKey + rng.Next(numIncrement); key < baseKey + numKeys; key += rng.Next(1, numIncrement)) { var lockType = rng.Next(100) < 60 ? LockType.Shared : LockType.Exclusive; - manualOps.Lock(key, lockType); + luContext.Lock(key, lockType); locks[key] = lockType; } foreach (var key in locks.Keys.OrderBy(key => key)) - manualOps.Unlock(key, locks[key]); + luContext.Unlock(key, locks[key]); locks.Clear(); } - manualOps.UnsafeSuspendThread(); + luContext.SuspendThread(); } void runOpThread(int tid) { Random rng = new(tid + 101); - using var localSession = fkv.For(new ManualFunctions()).NewSession(); + using var localSession = fkv.For(new LockableUnsafeFunctions()).NewSession(); for (var iteration = 0; iteration < numIterations; ++iteration) { @@ -468,7 +468,7 @@ void runOpThread(int tid) } } - // Run a mix of ManualOps and normal ClientSession operations + // Run a mix of luContext and normal ClientSession operations int numThreads = numLockThreads + numOpThreads; Thread[] threads = new Thread[numThreads]; for (int t = 0; t < numThreads; t++) diff --git a/cs/test/TestUtils.cs b/cs/test/TestUtils.cs index 5df277875..c72d7b561 100644 --- a/cs/test/TestUtils.cs +++ b/cs/test/TestUtils.cs @@ -17,7 +17,7 @@ internal static class TestUtils // Various categories used to group tests internal const string SmokeTestCategory = "Smoke"; internal const string FasterKVTestCategory = "FasterKV"; - internal const string ManualOpsTestCategory = "ManualOps"; + internal const string LockableUnsafeContextCategory = "LockableUnsafeContext"; /// /// Delete a directory recursively diff --git a/docs/_docs/30-fasterkv-manual-locking.md b/docs/_docs/30-fasterkv-manual-locking.md index 4a700bb74..046da82e7 100644 --- a/docs/_docs/30-fasterkv-manual-locking.md +++ b/docs/_docs/30-fasterkv-manual-locking.md @@ -10,10 +10,10 @@ toc: true Manual locking in FasterKV refers to the user specifying when records will be locked. This is different from the per-operation locks that ensure consistency for concurrent operations, e.g. ConcurrentReader and ConcurrentWriter. Manual locks have a longer duration. -Manual locking is done by obtaining the `ManualFasterOperations` instance from a `ClientSession`. This provides an implementation of `IFasterOperations` that: +Manual locking is done by obtaining the `LockableUnsafeContext` instance from a `ClientSession`. This provides an implementation of `IFasterContext` that: - Does not do automatic locking (except when updated records are inserted, as described below) -- Does not do automatic epoch protection; instead, the user must call `UnsafeResumeThread` and `UnsafeSuspendThread`. In these, "Unsafe" refers to the fact it is the user's responsibility to make the correct calls. -- Exposes `Lock()` and `Unlock()` APIs. These are the *only* way records are locked in `ManualFasterOperations`; we assume that all locks are taken before any operations are done. Therefore, `ManualFasterOperations` `IFunctions` update operations do not honor locks; it assumes that it owns them. +- Does not do automatic epoch protection; instead, the user must call the `LockableUnsafeContext`'s`ResumeThread` and `SuspendThread`. In these, "Unsafe" refers to the fact it is the user's responsibility to make the correct calls. +- Exposes `Lock()` and `Unlock()` APIs. These are the *only* way records are locked in `LockableUnsafeContext`; we assume that all locks are taken before any operations are done. Therefore, `LockableUnsafeContext`'s `IFunctions` update operations do not honor locks; it assumes that it owns them. Here are two use case examples: - Lock key1, key2, and key3, then Read key1 and key2 values, calculate the result, write them to key3, and unlock all keys. This ensures that key3 has a consistent value based on key1 and key2 values. @@ -23,50 +23,50 @@ Here are two use case examples: All keys must be locked in a deterministic order, and unlocked in the reverse order, to avoid deadlocks. -`ManualFasterOperations` inherits from `IDisposable`. All locks must be released and `UnsafeSuspendThread` must be called before `Dispose()` is called; `Dispose()` does *not* make these calls automatically. +`LockableUnsafeContext` inherits from `IDisposable`. All locks must be released and `UnsafeSuspendThread` must be called before `Dispose()` is called; `Dispose()` does *not* make these calls automatically. ### Examples -Here are examples of the above two use cases, taken from the unit tests in `ManualOperationsTests.cs`: +Here are examples of the above two use cases, taken from the unit tests in `LockableUnsafeContextTests.cs`: Lock multiple keys: ```cs - using (var manualOps = session.GetManualOperations()) + using (var luContext = session.GetLockableUnsafeContext()) { - manualOps.UnsafeResumeThread(out var epoch); + luContext.ResumeThread(out var epoch); - manualOps.Lock(24, LockType.Shared); - manualOps.Lock(51, LockType.Shared); - manualOps.Lock(75, LockType.Exclusive); + luContext.Lock(24, LockType.Shared); + luContext.Lock(51, LockType.Shared); + luContext.Lock(75, LockType.Exclusive); - manualOps.Read(24, out var value24); - manualOps.Read(51, out var value51); - manualOps.Upsert(75, value24 + value51); + luContext.Read(24, out var value24); + luContext.Read(51, out var value51); + luContext.Upsert(75, value24 + value51); - manualOps.Unlock(24, LockType.Shared); - manualOps.Unlock(51, LockType.Shared); - manualOps.Unlock(75, LockType.Exclusive); + luContext.Unlock(24, LockType.Shared); + luContext.Unlock(51, LockType.Shared); + luContext.Unlock(75, LockType.Exclusive); - manualOps.UnsafeSuspendThread(); + luContext.SuspendThread(); ``` Lock multiple keys: ```cs - using (var manualOps = session.GetManualOperations()) + using (var luContext = session.GetLockableUnsafeContext()) { - manualOps.UnsafeResumeThread(out var epoch); + luContext.ResumeThread(out var epoch); - manualOps.Lock(51, LockType.Shared); + luContext.Lock(51, LockType.Shared); - manualOps.Read(24, out var value24); - manualOps.Read(51, out var value51); - manualOps.Upsert(75, value24 + value51); + luContext.Read(24, out var value24); + luContext.Read(51, out var value51); + luContext.Upsert(75, value24 + value51); - manualOps.Unlock(51, LockType.Shared); + luContext.Unlock(51, LockType.Shared); - manualOps.UnsafeSuspendThread(); + luContext.SuspendThread(); ``` -TODO: Add sample with `manualOps.LocalCurrentEpoch`. +TODO: Add sample with `luContext.LocalCurrentEpoch`. ## Internal Design @@ -76,7 +76,7 @@ Manual locking and checking is integrated into `FASTERImpl.cs` methods: - The locking and unlocking are implemented in `InternalLock` - Other record operations that must consider locks are `InternalUpsert`, `InternalRead` and `InternalCompletePendingRead`, `InternalRMW` and `InternalCompletePendingRMW`, and `InternalDelete`. These modifications are exposed via the `Lock()` and `Unlock()`. -Because epoch protection is done by user calls, ManualFasterOperations methods call the internal ContextRead etc. methods, which are called by the API methods that do Resume and Suspend of epoch protection. +Because epoch protection is done by user calls, LockableUnsafeContext methods call the internal ContextRead etc. methods, which are called by the API methods that do Resume and Suspend of epoch protection. At a high level, `Lock()` and `Unlock()` call `InternalLock()`. Locking does not issue PENDING operations to retrieve on-disk data, and locking/unlocking is designed to avoid pending I/O operations by use of a [`LockTable`](#locktable-overview) consisting of {`TKey`, `RecordInfo`} pairs, where `TKey` is the FasterKV Key type and `RecordInfo` is used to perform the locking/unlocking. @@ -88,7 +88,7 @@ The following sections refer to the following two in the `RecordInfo`: - **Lock Bits**: There is one Exclusive Lock bit and 6 Shared Lock bits (allowing 64 shared locks) in the RecordInfo. - **Tentative**: a record marked Tentative is very short-term; it indicates that the thread is performing a Tentative insertion of the record, and may make the Tentative record final by removing the Tentative bit, or may back off the insertion by setting the record to Invalid and returning RETRY_NOW. - **Sealed**: a record marked Sealed is one for which an update is known to be in progress. Sealed records are "visible" only short-term (e.g. a single call to Upsert or RMW, or a transfer to/from the `LockTable`). A thread encountering this should immediately return RETRY_NOW. - - Sealing is done via `RecordInfo.Seal`. This is used in locking scenarios rather than a sequence of "CAS to set Sealed; test Sealed bit because the after-Seal locking is fuzzy; we don't know whether the record was CTT'd before or after a post-Seal lock, and thus we don't know if the transferred record "owns" our lock. `RecordInfo.Seal` does a CAS with both the XLock and Seal bits, then Unlocks the XLock bit; this ensures it works whether SupportsLocking is true or false. It returns true if successsful or false if another thread Sealed the record. However, `ManualFasterOperations` must not try to lock as it owns the lock already. + - Sealing is done via `RecordInfo.Seal`. This is used in locking scenarios rather than a sequence of "CAS to set Sealed; test Sealed bit because the after-Seal locking is fuzzy; we don't know whether the record was CTT'd before or after a post-Seal lock, and thus we don't know if the transferred record "owns" our lock. `RecordInfo.Seal` does a CAS with both the XLock and Seal bits, then Unlocks the XLock bit; this ensures it works whether SupportsLocking is true or false. It returns true if successsful or false if another thread Sealed the record. However, `LockableUnsafeContext` must not try to lock as it owns the lock already. - **Invalid**: This is a well-known bit from v1 included here for clarity: its behavior is that the record is to be skipped, using its `.PreviousAddress` to move along the chain. This has relevance to some areas of [Record Transfers](#record-transfers), particularly with respect to the `ReadCache`. Additionally, the `SupportsLocking` flag has been moved from IFunctions to a `FasterKV` constructor argument. This value must be uniform across all asessions. It is only to control the locking done by FasterKV; this replaces the concept of user-controlled locking that was provided with the `IFunctions` methods for concurrent record access. @@ -214,7 +214,7 @@ For record transfers involving the ReadCache, we have the following high-level c - CAS the RC record to be removed to be Sealed. This will cause any other operations to retry. - CAS the preceding RC record to point to the to-be-removed RC record's .PreviousAddress (standard singly-linked-list operations) - CAS the now-removed RC record to be Invalid. - - We only actually transfer records from the RC prefix to the LockTable if there is an active `ManualFasterOperations` session at the time `ReadCacheEvict` is called; otherwise there will be no locks. However, we must already traverse the `ReadCache` records, and it is possible for a new `ManualFasterOperations` session to start during the duration of `ReadCacheEvict`, so there is no benefit to checking for the no-`ManualFasterOperations` case (unlike [Main Log Evictions](#main-log-evictions), which can avoid page scans by checking for this). + - We only actually transfer records from the RC prefix to the LockTable if there is an active `LockableUnsafeContext` session at the time `ReadCacheEvict` is called; otherwise there will be no locks. However, we must already traverse the `ReadCache` records, and it is possible for a new `LockableUnsafeContext` session to start during the duration of `ReadCacheEvict`, so there is no benefit to checking for the no-`LockableUnsafeContext` case (unlike [Main Log Evictions](#main-log-evictions), which can avoid page scans by checking for this). The above covers single-record operations on the RC prefix. Two-record operations occur when we must outsplice one record and insplice another, because the value for a record in the RC prefix is updated, e.g. Upsert updating a record in the ReadOnly region or RMW doing a CopyUpdater (of mutable or readonly), or either of these operating updating a key that is in the RC prefix chain. The considerations here are: - Updating an RC record: @@ -228,7 +228,7 @@ The above covers single-record operations on the RC prefix. Two-record operation #### Main Log Evictions -When main log pages are evicted due to memory limits, *if* there are any active `ManualFasterOperations` sessions, then each record on those pages must be examined and any locks transferred to `LockTable` entries. +When main log pages are evicted due to memory limits, *if* there are any active `LockableUnsafeContext` sessions, then each record on those pages must be examined and any locks transferred to `LockTable` entries. Transfers to the `LockTable` due to main log evictions are handled in the following manner: - A new `TentativeHeadAddress` (THA) field is added next to `HeadAddress`. @@ -243,7 +243,7 @@ Transfers to the `LockTable` due to main log evictions are handled in the follow ### Recovery Considerations We must clear in-memory records' lock bits during FoldOver recovery. -- Add to checkpoint information an indication of whether any `ManualFasterOperations` were active during the Checkpoint. +- Add to checkpoint information an indication of whether any `LockableUnsafeContext` were active during the Checkpoint. - If this MRO indicator is true: - Scan pages, clearing the locks of any records - These pages do not need to be flushed to disk From 56faae1f8b194ad81ab4f9bb57afcfce1ddde944 Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Mon, 20 Dec 2021 18:02:22 -0800 Subject: [PATCH 10/25] Perf workaround for LockTable.IsActive --- cs/src/core/Index/FASTER/FASTERImpl.cs | 19 +++++++++--------- cs/src/core/Utilities/LockTable.cs | 27 +++++++++++++++----------- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/cs/src/core/Index/FASTER/FASTERImpl.cs b/cs/src/core/Index/FASTER/FASTERImpl.cs index 64c10a233..86edbcc62 100644 --- a/cs/src/core/Index/FASTER/FASTERImpl.cs +++ b/cs/src/core/Index/FASTER/FASTERImpl.cs @@ -418,7 +418,6 @@ internal OperationStatus InternalUpsert( if (logicalAddress >= hlog.ReadOnlyAddress) { ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); - ref Value recordValue = ref hlog.GetValue(physicalAddress); if (recordInfo.IsIntermediate(out status)) return status; @@ -1348,10 +1347,10 @@ internal OperationStatus InternalDelete( break; } } - #endregion +#endregion - #region Normal processing +#region Normal processing // Mutable Region: Update the record in-place if (logicalAddress >= hlog.ReadOnlyAddress) @@ -1534,7 +1533,7 @@ internal OperationStatus InternalDelete( return status; } - #endregion +#endregion /// /// Manual Lock operation. Locks the record corresponding to 'key'. @@ -1558,7 +1557,7 @@ internal OperationStatus InternalLock( var prevTailAddress = hlog.GetTailAddress(); - #region Trace back for record in in-memory HybridLog +#region Trace back for record in in-memory HybridLog var entry = default(HashBucketEntry); FindOrCreateTag(hash, tag, ref bucket, ref slot, ref entry, hlog.BeginAddress); var logicalAddress = entry.Address; @@ -1584,7 +1583,7 @@ internal OperationStatus InternalLock( out physicalAddress); } } - #endregion +#endregion lockInfo = default; if (logicalAddress >= hlog.HeadAddress) @@ -1633,7 +1632,7 @@ internal OperationStatus InternalLock( return OperationStatus.SUCCESS; } - #region ContainsKeyInMemory +#region ContainsKeyInMemory [MethodImpl(MethodImplOptions.AggressiveInlining)] internal Status InternalContainsKeyInMemory( @@ -2363,7 +2362,7 @@ internal OperationStatus InternalTryCopyToTail> Constants.kHashTagShift); - #region Trace back for record in in-memory HybridLog +#region Trace back for record in in-memory HybridLog // Find the entry in the log and make sure someone didn't insert another record after we decided there wasn't one. var entry = default(HashBucketEntry); FindOrCreateTag(hash, tag, ref bucket, ref slot, ref entry, hlog.BeginAddress); @@ -2407,9 +2406,9 @@ internal OperationStatus InternalTryCopyToTail> readonly KeyComparer keyComparer; readonly SectorAlignedBufferPool bufferPool; + // dict.Empty takes locks on all tables ("snapshot semantics"), which is too much of a perf hit. So we track this + // separately. It is not atomic when items are added/removed, but by incrementing it before and decrementing it after + // we add or remove items, respectively, we achieve the desired goal of IsActive. + long approxNumItems = 0; + internal LockTable(IVariableLengthStruct keyLen, IFasterEqualityComparer comparer, SectorAlignedBufferPool bufferPool) { this.keyLen = keyLen; @@ -54,14 +60,10 @@ internal LockTable(IVariableLengthStruct keyLen, IFasterEqualityComparer this.dict.Count > 0; + internal bool IsActive => this.approxNumItems > 0; - IHeapContainer GetKeyContainer(ref TKey key) - { - if (bufferPool is null) - return new StandardHeapContainer(ref key); - return new VarLenHeapContainer(ref key, keyLen, bufferPool); - } + IHeapContainer GetKeyContainer(ref TKey key) + => bufferPool is null ? new StandardHeapContainer(ref key) : new VarLenHeapContainer(ref key, keyLen, bufferPool); // Provide our own implementation of "Update by lambda" [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -80,11 +82,9 @@ private bool Update(ref TKey key, Func, LockTableEntry { lte.lockRecordInfo.Unlock(lockType); return lte; })) - { TryRemoveIfNoLocks(ref key); - return; - } - Debug.Fail("Trying to unlock a nonexistent key"); + else + Debug.Fail("Trying to unlock a nonexistent key"); } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -113,6 +113,7 @@ internal bool LockOrTentative(ref TKey key, LockType lockType, out bool tentativ lockRecordInfo.Tentative = true; RecordInfo logRecordInfo = default; logRecordInfo.Lock(lockType); + Interlocked.Increment(ref this.approxNumItems); return new(key, logRecordInfo, lockRecordInfo); }, (key, lte) => { if (lte.lockRecordInfo.Tentative || lte.lockRecordInfo.Sealed) @@ -148,6 +149,7 @@ internal void TryRemoveIfNoLocks(ref TKey key) return; if (dict.TryRemoveConditional(lookupKey, lte)) { + Interlocked.Decrement(ref this.approxNumItems); lte.key.Dispose(); return; } @@ -237,7 +239,10 @@ internal bool ApplyToLogRecord(ref TKey key, ref RecordInfo logRecord) logRecord.CopyLocksFrom(lte.logRecordInfo); lte.lockRecordInfo.SetInvalid(); if (dict.TryRemove(lookupKey, out _)) + { + Interlocked.Decrement(ref this.approxNumItems); lte.key.Dispose(); + } lte.lockRecordInfo.Tentative = false; } From 7fb385f1579d4a1eac1a2bb4ddb7d79e9feb7a9e Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Thu, 23 Dec 2021 12:32:04 -0800 Subject: [PATCH 11/25] WIP on ReadCacheEvict and MemoryPageLockEvictionScan --- cs/src/core/Allocator/AllocatorBase.cs | 13 +- cs/src/core/Allocator/BlittableAllocator.cs | 9 +- cs/src/core/Allocator/GenericAllocator.cs | 10 +- cs/src/core/Allocator/LockEvictionObserver.cs | 59 ++++++++ .../Allocator/VarLenBlittableAllocator.cs | 10 +- cs/src/core/ClientSession/ClientSession.cs | 92 ++++++++----- .../ClientSession/LockableUnsafeContext.cs | 12 +- cs/src/core/Index/Common/Contexts.cs | 7 +- cs/src/core/Index/Common/RecordInfo.cs | 37 +++-- cs/src/core/Index/FASTER/FASTER.cs | 1 + cs/src/core/Index/FASTER/FASTERImpl.cs | 126 ++++++++++++------ .../core/Index/Interfaces/IFasterSession.cs | 8 +- cs/src/core/Utilities/LockTable.cs | 6 +- cs/src/core/Utilities/LockUtility.cs | 21 +-- cs/src/core/VarLen/MemoryFunctions.cs | 5 - cs/src/core/VarLen/SpanByteFunctions.cs | 5 - cs/test/LockTests.cs | 1 + 17 files changed, 294 insertions(+), 128 deletions(-) create mode 100644 cs/src/core/Allocator/LockEvictionObserver.cs diff --git a/cs/src/core/Allocator/AllocatorBase.cs b/cs/src/core/Allocator/AllocatorBase.cs index 76a8fe43e..af8fcaf92 100644 --- a/cs/src/core/Allocator/AllocatorBase.cs +++ b/cs/src/core/Allocator/AllocatorBase.cs @@ -244,6 +244,11 @@ public abstract partial class AllocatorBase : IDisposable /// internal IObserver> OnEvictionObserver; + /// + /// Observer for locked records getting evicted from memory (page closed) + /// + internal IObserver> OnLockEvictionObserver; + /// /// The "event" to be waited on for flush completion by the initiator of an operation /// @@ -683,6 +688,13 @@ internal void WriteAsync(IntPtr alignedSourceAddress, ulong alignedDes /// public abstract IFasterScanIterator Scan(long beginAddress, long endAddress, ScanBufferingMode scanBufferingMode = ScanBufferingMode.DoublePageBuffering); + /// + /// Scan page guaranteed to be in memory + /// + /// Begin address + /// End address + internal abstract void MemoryPageLockEvictionScan(long beginAddress, long endAddress); + /// /// Scan page guaranteed to be in memory /// @@ -691,7 +703,6 @@ internal void WriteAsync(IntPtr alignedSourceAddress, ulong alignedDes internal abstract void MemoryPageScan(long beginAddress, long endAddress); #endregion - /// /// Instantiate base allocator /// diff --git a/cs/src/core/Allocator/BlittableAllocator.cs b/cs/src/core/Allocator/BlittableAllocator.cs index 2f05aa53e..931110249 100644 --- a/cs/src/core/Allocator/BlittableAllocator.cs +++ b/cs/src/core/Allocator/BlittableAllocator.cs @@ -339,10 +339,15 @@ public override IFasterScanIterator Scan(long beginAddress, long end } /// - internal override void MemoryPageScan(long beginAddress, long endAddress) + internal override void MemoryPageLockEvictionScan(long beginAddress, long endAddress) => MemoryPageScan(beginAddress, endAddress, OnLockEvictionObserver); + + /// + internal override void MemoryPageScan(long beginAddress, long endAddress) => MemoryPageScan(beginAddress, endAddress, OnEvictionObserver); + + internal void MemoryPageScan(long beginAddress, long endAddress, IObserver> observer) { using var iter = new BlittableScanIterator(this, beginAddress, endAddress, ScanBufferingMode.NoBuffering, epoch, true); - OnEvictionObserver?.OnNext(iter); + observer?.OnNext(iter); } /// diff --git a/cs/src/core/Allocator/GenericAllocator.cs b/cs/src/core/Allocator/GenericAllocator.cs index 3f349c4a2..e2d2d4bae 100644 --- a/cs/src/core/Allocator/GenericAllocator.cs +++ b/cs/src/core/Allocator/GenericAllocator.cs @@ -1030,7 +1030,13 @@ public override IFasterScanIterator Scan(long beginAddress, long end } /// - internal override void MemoryPageScan(long beginAddress, long endAddress) + internal override void MemoryPageLockEvictionScan(long beginAddress, long endAddress) => MemoryPageScan(beginAddress, endAddress, OnLockEvictionObserver); + + /// + internal override void MemoryPageScan(long beginAddress, long endAddress) => MemoryPageScan(beginAddress, endAddress, OnEvictionObserver); + + /// + private void MemoryPageScan(long beginAddress, long endAddress, IObserver> observer) { var page = (beginAddress >> LogPageSizeBits) % BufferSize; int start = (int)(beginAddress & PageSizeMask) / recordSize; @@ -1041,7 +1047,7 @@ internal override void MemoryPageScan(long beginAddress, long endAddress) try { epoch.Suspend(); - OnEvictionObserver?.OnNext(iter); + observer?.OnNext(iter); } finally { diff --git a/cs/src/core/Allocator/LockEvictionObserver.cs b/cs/src/core/Allocator/LockEvictionObserver.cs new file mode 100644 index 000000000..2d7a727d1 --- /dev/null +++ b/cs/src/core/Allocator/LockEvictionObserver.cs @@ -0,0 +1,59 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using FASTER.core; +using System; + +namespace FASTER.core +{ + /// + /// Cache size tracker + /// + public class LockEvictionObserver : IObserver> + { + readonly FasterKV store; + + /// + /// Class to track and update cache size + /// + /// FASTER store instance + public LockEvictionObserver(FasterKV store) => this.store = store; + + /// + /// Subscriber to pages as they are getting evicted from main memory + /// + /// + public void OnNext(IFasterScanIterator iter) + { + // If there are no active locking sessions, there should be no locks in the log. + if (this.store.NumActiveLockingSessions == 0) + return; + + while (iter.GetNext(out RecordInfo info, out Key key, out Value value)) + { + // If it is not Invalid, we must Seal it so there is no possibility it will be missed while we're in the process + // of transferring it to the Lock Table. Use manualLocking as we want to transfer the locks, not drain them. + if (!info.IsLocked) + continue; + + // Seal it so there is no possibility it will be missed while we're in the process of transferring it to the Lock Table. + // Use manualLocking as we want to transfer the locks, not drain them. + info.Seal(manualLocking: true); + + // Now get it into the lock table, so it is ready as soon as the record is removed. + this.store.LockTable.TransferFrom(ref key, info); + } + } + + /// + /// OnCompleted + /// + public void OnCompleted() { } + + /// + /// OnError + /// + /// + public void OnError(Exception error) { } + } +} diff --git a/cs/src/core/Allocator/VarLenBlittableAllocator.cs b/cs/src/core/Allocator/VarLenBlittableAllocator.cs index 416935877..2ef731bb2 100644 --- a/cs/src/core/Allocator/VarLenBlittableAllocator.cs +++ b/cs/src/core/Allocator/VarLenBlittableAllocator.cs @@ -463,13 +463,17 @@ public override IFasterScanIterator Scan(long beginAddress, long end } /// - internal override void MemoryPageScan(long beginAddress, long endAddress) + internal override void MemoryPageLockEvictionScan(long beginAddress, long endAddress) => MemoryPageScan(beginAddress, endAddress, OnLockEvictionObserver); + + /// + internal override void MemoryPageScan(long beginAddress, long endAddress) => MemoryPageScan(beginAddress, endAddress, OnEvictionObserver); + + internal void MemoryPageScan(long beginAddress, long endAddress, IObserver> observer) { using var iter = new VariableLengthBlittableScanIterator(this, beginAddress, endAddress, ScanBufferingMode.NoBuffering, epoch, true); - OnEvictionObserver?.OnNext(iter); + observer?.OnNext(iter); } - /// /// Read pages from specified device /// diff --git a/cs/src/core/ClientSession/ClientSession.cs b/cs/src/core/ClientSession/ClientSession.cs index d4ecda660..dbb6451ff 100644 --- a/cs/src/core/ClientSession/ClientSession.cs +++ b/cs/src/core/ClientSession/ClientSession.cs @@ -818,16 +818,24 @@ public bool SingleReader(ref Key key, ref Input input, ref Value value, ref Outp => _clientSession.functions.SingleReader(ref key, ref input, ref value, ref dst, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, long address) - => !this.SupportsLocking - ? _clientSession.functions.ConcurrentReader(ref key, ref input, ref value, ref dst, ref recordInfo, address) - : ConcurrentReaderLock(ref key, ref input, ref value, ref dst, ref recordInfo, address); + public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, long address, out bool lockFailed) + { + lockFailed = false; + return !this.SupportsLocking + ? _clientSession.functions.ConcurrentReader(ref key, ref input, ref value, ref dst, ref recordInfo, address) + : ConcurrentReaderLock(ref key, ref input, ref value, ref dst, ref recordInfo, address, out lockFailed); + } - public bool ConcurrentReaderLock(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, long address) + public bool ConcurrentReaderLock(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, long address, out bool lockFailed) { - recordInfo.LockShared(); + if (!recordInfo.LockShared()) + { + lockFailed = true; + return false; + } try { + lockFailed = false; return _clientSession.functions.ConcurrentReader(ref key, ref input, ref value, ref dst, ref recordInfo, address); } finally @@ -852,8 +860,8 @@ public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value if (this.SupportsPostOperations && this.SupportsLocking) { - // Lock must be taken after the value is initialized. Unlocked in PostSingleWriterLock. - recordInfo.LockExclusive(); + // Lock ephemerally before we CAS into the log; Unlocked in PostSingleWriterLock. + recordInfo.SetLockExclusiveBit(); } } @@ -889,10 +897,13 @@ private void PostSingleWriterLock(ref Key key, ref Input input, ref Value src, r } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) - => !this.SupportsLocking - ? ConcurrentWriterNoLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address) - : ConcurrentWriterLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); + public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address, out bool lockFailed) + { + lockFailed = false; + return !this.SupportsLocking + ? ConcurrentWriterNoLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address) + : ConcurrentWriterLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address, out lockFailed); + } [MethodImpl(MethodImplOptions.AggressiveInlining)] private bool ConcurrentWriterNoLock(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) @@ -903,11 +914,16 @@ private bool ConcurrentWriterNoLock(ref Key key, ref Input input, ref Value src, } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool ConcurrentWriterLock(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) + private bool ConcurrentWriterLock(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address, out bool lockFailed) { - recordInfo.LockExclusive(); + if (!recordInfo.LockExclusive()) + { + lockFailed = true; + return false; + } try { + lockFailed = false; return !recordInfo.Tombstone && ConcurrentWriterNoLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); } finally @@ -933,8 +949,8 @@ public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Ou if (this.SupportsPostOperations && this.SupportsLocking) { - // Lock must be taken after the value is initialized. Unlocked in PostInitialUpdaterLock. - recordInfo.LockExclusive(); + // Lock ephemerally before we CAS into the log; Unlocked in PostInitialUpdaterLock. + recordInfo.SetLockExclusiveBit(); } } @@ -982,8 +998,8 @@ public void CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Va if (this.SupportsPostOperations && this.SupportsLocking) { - // Lock must be taken after the value is initialized. Unlocked in PostInitialUpdaterLock. - recordInfo.LockExclusive(); + // Lock ephemerally before we CAS into the log. Unlocked in PostInitialUpdaterLock. + recordInfo.SetLockExclusiveBit(); } } @@ -1021,10 +1037,13 @@ private bool PostCopyUpdaterLock(ref Key key, ref Input input, ref Output output #region InPlaceUpdater [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool InPlaceUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) - => !this.SupportsLocking - ? InPlaceUpdaterNoLock(ref key, ref input, ref output, ref value, ref recordInfo, address) - : InPlaceUpdaterLock(ref key, ref input, ref output, ref value, ref recordInfo, address); + public bool InPlaceUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address, out bool lockFailed) + { + lockFailed = false; + return !this.SupportsLocking + ? InPlaceUpdaterNoLock(ref key, ref input, ref output, ref value, ref recordInfo, address) + : InPlaceUpdaterLock(ref key, ref input, ref output, ref value, ref recordInfo, address, out lockFailed); + } [MethodImpl(MethodImplOptions.AggressiveInlining)] private bool InPlaceUpdaterNoLock(ref Key key, ref Input input, ref Output output, ref Value value, ref RecordInfo recordInfo, long address) @@ -1034,11 +1053,16 @@ private bool InPlaceUpdaterNoLock(ref Key key, ref Input input, ref Output outpu return _clientSession.functions.InPlaceUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); } - private bool InPlaceUpdaterLock(ref Key key, ref Input input, ref Output output, ref Value value, ref RecordInfo recordInfo, long address) + private bool InPlaceUpdaterLock(ref Key key, ref Input input, ref Output output, ref Value value, ref RecordInfo recordInfo, long address, out bool lockFailed) { - recordInfo.LockExclusive(); + if (!recordInfo.LockExclusive()) + { + lockFailed = true; + return false; + } try { + lockFailed = false; return !recordInfo.Tombstone && InPlaceUpdaterNoLock(ref key, ref input, ref output, ref value, ref recordInfo, address); } finally @@ -1067,10 +1091,13 @@ public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long addre } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address) - => (!this.SupportsLocking) - ? ConcurrentDeleterNoLock(ref key, ref value, ref recordInfo, address) - : ConcurrentDeleterLock(ref key, ref value, ref recordInfo, address); + public bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address, out bool lockFailed) + { + lockFailed = false; + return (!this.SupportsLocking) + ? ConcurrentDeleterNoLock(ref key, ref value, ref recordInfo, address) + : ConcurrentDeleterLock(ref key, ref value, ref recordInfo, address, out lockFailed); + } [MethodImpl(MethodImplOptions.AggressiveInlining)] private bool ConcurrentDeleterNoLock(ref Key key, ref Value value, ref RecordInfo recordInfo, long address) @@ -1081,11 +1108,16 @@ private bool ConcurrentDeleterNoLock(ref Key key, ref Value value, ref RecordInf } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool ConcurrentDeleterLock(ref Key key, ref Value value, ref RecordInfo recordInfo, long address) + private bool ConcurrentDeleterLock(ref Key key, ref Value value, ref RecordInfo recordInfo, long address, out bool lockFailed) { - recordInfo.LockExclusive(); + if (!recordInfo.LockExclusive()) + { + lockFailed = true; + return false; + } try { + lockFailed = false; return ConcurrentDeleterNoLock(ref key, ref value, ref recordInfo, address); } finally diff --git a/cs/src/core/ClientSession/LockableUnsafeContext.cs b/cs/src/core/ClientSession/LockableUnsafeContext.cs index 71711b340..6a815292c 100644 --- a/cs/src/core/ClientSession/LockableUnsafeContext.cs +++ b/cs/src/core/ClientSession/LockableUnsafeContext.cs @@ -482,8 +482,9 @@ public bool SingleReader(ref Key key, ref Input input, ref Value value, ref Outp } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, long address) + public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, long address, out bool lockFailed) { + lockFailed = false; return _clientSession.functions.ConcurrentReader(ref key, ref input, ref value, ref dst, ref recordInfo, address); } @@ -509,9 +510,10 @@ public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Va } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) + public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address, out bool lockFailed) { // Note: KeyIndexes do not need notification of in-place updates because the key does not change. + lockFailed = false; return _clientSession.functions.ConcurrentWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); } @@ -558,9 +560,10 @@ public bool PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, re #region InPlaceUpdater [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool InPlaceUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) + public bool InPlaceUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address, out bool lockFailed) { // Note: KeyIndexes do not need notification of in-place updates because the key does not change. + lockFailed = false; return _clientSession.functions.InPlaceUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); } @@ -579,8 +582,9 @@ public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long addre } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address) + public bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address, out bool lockFailed) { + lockFailed = false; recordInfo.Tombstone = true; return _clientSession.functions.ConcurrentDeleter(ref key, ref value, ref recordInfo, address); } diff --git a/cs/src/core/Index/Common/Contexts.cs b/cs/src/core/Index/Common/Contexts.cs index 2409bf04b..6657c74c8 100644 --- a/cs/src/core/Index/Common/Contexts.cs +++ b/cs/src/core/Index/Common/Contexts.cs @@ -264,7 +264,7 @@ public struct CommitPoint /// public struct HybridLogRecoveryInfo { - const int CheckpointVersion = 3; + const int CheckpointVersion = 4; /// /// Guid @@ -307,6 +307,11 @@ public struct HybridLogRecoveryInfo /// Begin address /// public long beginAddress; + /// + /// If true, there was at least one IFasterContext implementation active that did manual locking at some point during the checkpoint; + /// these pages must be scanned for lock cleanup. + /// + bool manualLockingActive; /// /// Commit tokens per session restored during Continue diff --git a/cs/src/core/Index/Common/RecordInfo.cs b/cs/src/core/Index/Common/RecordInfo.cs index e602c0a14..3e96ab396 100644 --- a/cs/src/core/Index/Common/RecordInfo.cs +++ b/cs/src/core/Index/Common/RecordInfo.cs @@ -84,7 +84,10 @@ public static void WriteInfo(ref RecordInfo info, bool inNewVersion, bool tombst /// Take exclusive (write) lock on RecordInfo /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void LockExclusive() => TryLockExclusive(spinCount: -1); + public bool LockExclusive() => TryLockExclusive(spinCount: -1); + + // For new records, which don't need the Interlocked overhead. + internal void SetLockExclusiveBit() => this.word |= kExclusiveLockBitMask; /// /// Unlock RecordInfo that was previously locked for exclusive access, via @@ -107,6 +110,8 @@ public bool TryLockExclusive(int spinCount = 1) // Acquire exclusive lock (readers may still be present; we'll drain them later) while (true) { + if (IsIntermediate) + return false; long expected_word = word; if ((expected_word & kExclusiveLockBitMask) == 0) { @@ -126,7 +131,7 @@ public bool TryLockExclusive(int spinCount = 1) /// Take shared (read) lock on RecordInfo /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void LockShared() => TryLockShared(spinCount: -1); + public bool LockShared() => TryLockShared(spinCount: -1); /// /// Unlock RecordInfo that was previously locked for shared access, via @@ -149,6 +154,8 @@ public bool TryLockShared(int spinCount = 1) // Acquire shared lock while (true) { + if (IsIntermediate) + return false; long expected_word = word; if (((expected_word & kExclusiveLockBitMask) == 0) // not exclusively locked && (expected_word & kSharedLockMaskInWord) != kSharedLockMaskInWord) // shared lock is not full @@ -166,7 +173,7 @@ public bool TryLockShared(int spinCount = 1) /// Take shared (read) lock on RecordInfo /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void LockExclusiveFromShared() => TryLockExclusiveFromShared(spinCount: -1); + public bool LockExclusiveFromShared() => TryLockExclusiveFromShared(spinCount: -1); /// /// Promote a shared (read) lock on RecordInfo to exclusive @@ -179,6 +186,9 @@ public bool TryLockExclusiveFromShared(int spinCount = 1) // Acquire shared lock while (true) { + // Even though we own the lock here, it might be in the process of eviction, which seals it + if (IsIntermediate) + return false; long expected_word = word; if ((expected_word & kExclusiveLockBitMask) == 0) // not exclusively locked { @@ -249,25 +259,24 @@ public bool Tentative // Ensure we have exclusive access before sealing. [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool Seal(bool isManualLocking = false) + public bool Seal(bool manualLocking = false) { - if (isManualLocking) - { - // We own this lock, so just set the sealed bit. - word |= kSealedBitMask; - return true; - } + // If manualLocking, we own this lock or are transferring to the Lock Table, so just set the sealed bit. + long sealBits = manualLocking ? kSealedBitMask : kExclusiveLockBitMask | kSealedBitMask; while (true) { - if ((word & kExclusiveLockBitMask) == 0) + if (IsIntermediate) + return false; + if ((word & sealBits) == 0) { long expected_word = word; - long new_word = word | kExclusiveLockBitMask | kSealedBitMask; + long new_word = word | sealBits; long current_word = Interlocked.CompareExchange(ref word, new_word, expected_word); if (expected_word == current_word) { - // Lock+Seal succeeded; remove lock - this.UnlockExclusive(); + // (Lock+)Seal succeeded; remove lock if not doing manual locking + if (!manualLocking) + this.UnlockExclusive(); return true; } diff --git a/cs/src/core/Index/FASTER/FASTER.cs b/cs/src/core/Index/FASTER/FASTER.cs index 08c0b2b98..e4bd633a6 100644 --- a/cs/src/core/Index/FASTER/FASTER.cs +++ b/cs/src/core/Index/FASTER/FASTER.cs @@ -258,6 +258,7 @@ public FasterKV(long size, LogSettings logSettings, } hlog.Initialize(); + hlog.OnLockEvictionObserver = new LockEvictionObserver(this); sectorSize = (int)logSettings.LogDevice.SectorSize; Initialize(size, sectorSize); diff --git a/cs/src/core/Index/FASTER/FASTERImpl.cs b/cs/src/core/Index/FASTER/FASTERImpl.cs index 86edbcc62..6aaf00c2e 100644 --- a/cs/src/core/Index/FASTER/FASTERImpl.cs +++ b/cs/src/core/Index/FASTER/FASTERImpl.cs @@ -209,15 +209,13 @@ internal OperationStatus InternalRead( ref Value recordValue = ref hlog.GetValue(physicalAddress); if (recordInfo.IsIntermediate(out status, useStartAddress)) - { return status; - } - else if (!recordInfo.Tombstone - && fasterSession.ConcurrentReader(ref key, ref input, ref recordValue, ref output, ref recordInfo, logicalAddress)) - { + + bool lockFailed = false; + if (!recordInfo.Tombstone + && fasterSession.ConcurrentReader(ref key, ref input, ref recordValue, ref output, ref recordInfo, logicalAddress, out lockFailed)) return OperationStatus.SUCCESS; - } - return OperationStatus.NOTFOUND; + return lockFailed ? OperationStatus.RETRY_NOW : OperationStatus.NOTFOUND; } // Immutable region @@ -423,7 +421,7 @@ internal OperationStatus InternalUpsert( if (!recordInfo.Tombstone) { - if (fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref hlog.GetValue(physicalAddress), ref output, ref recordInfo, logicalAddress)) + if (fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref hlog.GetValue(physicalAddress), ref output, ref recordInfo, logicalAddress, out bool lockFailed)) { hlog.MarkPage(logicalAddress, sessionCtx.version); pendingContext.recordInfo = recordInfo; @@ -432,7 +430,7 @@ internal OperationStatus InternalUpsert( } // ConcurrentWriter failed (e.g. insufficient space). Another thread may come along to do this update in-place; Seal it to prevent that. - if (!recordInfo.Seal(fasterSession.IsManualLocking)) + if (lockFailed || !recordInfo.Seal(fasterSession.IsManualLocking)) return OperationStatus.RETRY_NOW; unsealPhysicalAddress = physicalAddress; } @@ -463,7 +461,7 @@ internal OperationStatus InternalUpsert( if (!recordInfo.Tombstone) { - if (fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref recordValue, ref output, ref recordInfo, logicalAddress)) + if (fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref recordValue, ref output, ref recordInfo, logicalAddress, out bool lockFailed)) { if (sessionCtx.phase == Phase.REST) hlog.MarkPage(logicalAddress, sessionCtx.version); @@ -476,7 +474,7 @@ internal OperationStatus InternalUpsert( } // ConcurrentWriter failed (e.g. insufficient space). Another thread may come along to do this update in-place; Seal it to prevent that. - if (!recordInfo.Seal(fasterSession.IsManualLocking)) + if (lockFailed || !recordInfo.Seal(fasterSession.IsManualLocking)) { status = OperationStatus.RETRY_NOW; goto LatchRelease; // Release shared latch (if acquired) @@ -710,7 +708,7 @@ private OperationStatus CreateNewRecordUpsert( if (!recordInfo.Tombstone) { - if (fasterSession.InPlaceUpdater(ref key, ref input, ref hlog.GetValue(physicalAddress), ref output, ref recordInfo, logicalAddress)) + if (fasterSession.InPlaceUpdater(ref key, ref input, ref hlog.GetValue(physicalAddress), ref output, ref recordInfo, logicalAddress, out bool lockFailed)) { hlog.MarkPage(logicalAddress, sessionCtx.version); pendingContext.recordInfo = recordInfo; @@ -835,8 +833,8 @@ internal OperationStatus InternalRMW( return OperationStatus.SUCCESS; } - // ConcurrentWriter failed (e.g. insufficient space). Another thread may come along to do this update in-place; Seal it to prevent that. - if (!recordInfo.Seal(fasterSession.IsManualLocking)) + // InPlaceUpdater failed (e.g. insufficient space). Another thread may come along to do this update in-place; Seal it to prevent that. + if (lockFailed || !recordInfo.Seal(fasterSession.IsManualLocking)) return OperationStatus.RETRY_NOW; unsealPhysicalAddress = physicalAddress; } @@ -864,7 +862,7 @@ internal OperationStatus InternalRMW( if (!recordInfo.Tombstone) { - if (fasterSession.InPlaceUpdater(ref key, ref input, ref recordValue, ref output, ref recordInfo, logicalAddress)) + if (fasterSession.InPlaceUpdater(ref key, ref input, ref recordValue, ref output, ref recordInfo, logicalAddress, out bool lockFailed)) { if (sessionCtx.phase == Phase.REST) hlog.MarkPage(logicalAddress, sessionCtx.version); else hlog.MarkPageAtomic(logicalAddress, sessionCtx.version); @@ -874,8 +872,8 @@ internal OperationStatus InternalRMW( goto LatchRelease; // Release shared latch (if acquired) } - // ConcurrentWriter failed (e.g. insufficient space). Another thread may come along to do this update in-place; Seal it to prevent that. - if (!recordInfo.Seal(fasterSession.IsManualLocking)) + // InPlaceUpdater failed (e.g. insufficient space). Another thread may come along to do this update in-place; Seal it to prevent that. + if (lockFailed || !recordInfo.Seal(fasterSession.IsManualLocking)) return OperationStatus.RETRY_NOW; unsealPhysicalAddress = physicalAddress; } @@ -1362,8 +1360,12 @@ internal OperationStatus InternalDelete( goto LatchRelease; // Release shared latch (if acquired) } - if (!fasterSession.ConcurrentDeleter(ref hlog.GetKey(physicalAddress), ref recordValue, ref recordInfo, logicalAddress)) + if (!fasterSession.ConcurrentDeleter(ref hlog.GetKey(physicalAddress), ref recordValue, ref recordInfo, logicalAddress, out bool lockFailed)) + { + if (lockFailed) + status = OperationStatus.RETRY_NOW; goto CreateNewRecord; + } if (sessionCtx.phase == Phase.REST) hlog.MarkPage(logicalAddress, sessionCtx.version); @@ -1593,8 +1595,8 @@ internal OperationStatus InternalLock( { if (lockOp.LockOperationType == LockOperationType.IsLocked) status = OperationStatus.SUCCESS; - else - recordInfo.HandleLockOperation(lockOp, out _); + else if (!recordInfo.HandleLockOperation(lockOp, out _)) + return OperationStatus.RETRY_NOW; } if (lockOp.LockOperationType == LockOperationType.IsLocked) lockInfo = recordInfo; @@ -2852,7 +2854,13 @@ private bool LockReadCacheRecord(long logicalAddress, ref Key key, LockOperation if (!recordInfo.IsIntermediate(out internalStatus)) { if (lockOp.LockOperationType != LockOperationType.IsLocked) - recordInfo.HandleLockOperation(lockOp, out _); + { + if (!recordInfo.HandleLockOperation(lockOp, out _)) + { + internalStatus = OperationStatus.RETRY_NOW; + return false; + } + } lockInfo = recordInfo; } return true; @@ -2943,9 +2951,8 @@ private bool SkipAndInvalidateReadCache(ref long logicalAddress, ref Key key, ou ref RecordInfo recordInfo = ref readcache.GetInfo(physicalAddress); if (comparer.Equals(ref key, ref readcache.GetKey(physicalAddress))) { - if (recordInfo.IsIntermediate(out internalStatus)) + if (recordInfo.IsIntermediate(out internalStatus) || !recordInfo.LockExclusive()) return false; - recordInfo.LockExclusive(); recordInfo.SetInvalid(); recordInfo.UnlockExclusive(); } @@ -2967,10 +2974,9 @@ private void ReadCacheEvict(long fromHeadAddress, long toHeadAddress) var logicalAddress = Constants.kInvalidAddress; var physicalAddress = default(long); - HashBucketEntry entry = default; logicalAddress = fromHeadAddress; - // Remove readcache entries from the main FKV that are in the fromHeadAddress/toHeadAddress range in the readcache. + // Iterate readcache entries in the range fromHeadAddress/toHeadAddress range, and remove them from the primary FKV. while (logicalAddress < toHeadAddress) { physicalAddress = readcache.GetPhysicalAddress(logicalAddress); @@ -2980,36 +2986,68 @@ private void ReadCacheEvict(long fromHeadAddress, long toHeadAddress) { ref Key key = ref readcache.GetKey(physicalAddress); - // If this to-be-evicted readcache record's prevAddress points to a record in the main FKV... + // If there is a readcache entry for this hash, the chain will always be of the form: + // hashtable -> zero or more readcache entries in latest-to-earliest order -> main FKV entry. + + // If this to-be-evicted readcache record's prevAddress points to a record in the main FKV, evict all Invalid + // readcache records in this key's readcache chain in the FKV, as well as any entries in the readcache range. + // The ordering of readcache records ensures we won't miss any readcache records that are eligible for eviction, + // while only executing the body of the loop once for each hash chain. + HashBucketEntry entry = default; entry.word = info.PreviousAddress; if (!entry.ReadCache) { - // Find the index entry for the key in the main FKV. + // Find the hash index entry for the key in the main FKV. var hash = comparer.GetHashCode64(ref key); var tag = (ushort)((ulong)hash >> Constants.kHashTagShift); entry = default; var tagExists = FindTag(hash, tag, ref bucket, ref slot, ref entry); + if (!tagExists) + continue; - // Because we call SkipReadCache on upserts, if we have a readcache entry for this hash, it will be pointed to by - // the hashtable; there may be other readcache entries as well, before one that is a non-readcache entry. - // That is, if there is a readcache entry for this hash, the chain will always be of the form: - // hashtable -> zero or more readcache entries -> main FKV entry. - // Remove the readcache entry for this hash from the main FKV, unless somee other thread has done it for us. - // Note that this removes the entire leading readcache-entry set of records from the hash table pointer. - while (tagExists && entry.ReadCache) + // Traverse the chain of readcache entries for this key. + while (entry.ReadCache) { - var updatedEntry = default(HashBucketEntry); - updatedEntry.Tag = tag; - updatedEntry.Address = info.PreviousAddress; - updatedEntry.Pending = entry.Pending; - updatedEntry.Tentative = false; + var la = entry.Address & ~Constants.kReadCacheBitMask; + var pa = readcache.GetPhysicalAddress(la); + ref RecordInfo ri = ref readcache.GetInfo(pa); + + // If the record is Invalid or its address is in the from/to HeadAddress range, unlink it from the chain. + if (ri.Invalid || (la >= fromHeadAddress && la < toHeadAddress)) + { + if (ri.IsLocked) + { + // If it is not Invalid, we must Seal it so there is no possibility it will be missed while we're in the process + // of transferring it to the Lock Table. Use manualLocking as we want to transfer the locks, not drain them. + if (!ri.Invalid) + ri.Seal(manualLocking: true); + + // Now get it into the lock table, so it is ready as soon as the CAS removes this record from the RC chain. + this.LockTable.TransferFrom(ref key, ri); + } + + // Swap in the next entry in the chain, unless somee other thread has done it for us. + // Note that this removes the entire leading readcache-entry set of records from the hash table pointer. + while (tagExists && entry.ReadCache) + { + var updatedEntry = default(HashBucketEntry); + updatedEntry.Tag = tag; + updatedEntry.Address = ri.PreviousAddress; + updatedEntry.Pending = entry.Pending; + updatedEntry.Tentative = false; - if (entry.word == Interlocked.CompareExchange - (ref bucket->bucket_entries[slot], updatedEntry.word, entry.word)) - break; + if (entry.word == Interlocked.CompareExchange(ref bucket->bucket_entries[slot], updatedEntry.word, entry.word)) + { + entry.word = updatedEntry.word; + break; + } - tagExists = FindTag(hash, tag, ref bucket, ref slot, ref entry); + tagExists = FindTag(hash, tag, ref bucket, ref slot, ref entry); + } + } + else + entry.word = ri.PreviousAddress; } } } diff --git a/cs/src/core/Index/Interfaces/IFasterSession.cs b/cs/src/core/Index/Interfaces/IFasterSession.cs index dda2e3bb7..132cd4da1 100644 --- a/cs/src/core/Index/Interfaces/IFasterSession.cs +++ b/cs/src/core/Index/Interfaces/IFasterSession.cs @@ -35,14 +35,14 @@ internal interface IFasterSession : IFasterS #region Reads bool SingleReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, long address); - bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, long address); + bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, long address, out bool lockFailed); void ReadCompletionCallback(ref Key key, ref Input input, ref Output output, Context ctx, Status status, RecordMetadata recordMetadata); #endregion reads #region Upserts void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address); void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address); - bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address); + bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address, out bool lockFailed); void UpsertCompletionCallback(ref Key key, ref Input input, ref Value value, Context ctx); #endregion Upserts @@ -60,7 +60,7 @@ internal interface IFasterSession : IFasterS #endregion CopyUpdater #region InPlaceUpdater - bool InPlaceUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address); + bool InPlaceUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address, out bool lockFailed); #endregion InPlaceUpdater void RMWCompletionCallback(ref Key key, ref Input input, ref Output output, Context ctx, Status status, RecordMetadata recordMetadata); @@ -68,7 +68,7 @@ internal interface IFasterSession : IFasterS #region Deletes void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long address); - bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address); + bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address, out bool lockFailed); void DeleteCompletionCallback(ref Key key, Context ctx); #endregion Deletes diff --git a/cs/src/core/Utilities/LockTable.cs b/cs/src/core/Utilities/LockTable.cs index f068d8d81..16a46e5d6 100644 --- a/cs/src/core/Utilities/LockTable.cs +++ b/cs/src/core/Utilities/LockTable.cs @@ -112,13 +112,11 @@ internal bool LockOrTentative(ref TKey key, LockType lockType, out bool tentativ RecordInfo lockRecordInfo = default; lockRecordInfo.Tentative = true; RecordInfo logRecordInfo = default; - logRecordInfo.Lock(lockType); + existingConflict = !logRecordInfo.Lock(lockType); Interlocked.Increment(ref this.approxNumItems); return new(key, logRecordInfo, lockRecordInfo); }, (key, lte) => { - if (lte.lockRecordInfo.Tentative || lte.lockRecordInfo.Sealed) - existingConflict = true; - lte.logRecordInfo.Lock(lockType); + existingConflict = !lte.logRecordInfo.Lock(lockType); if (lte.lockRecordInfo.Sealed) { existingConflict = true; diff --git a/cs/src/core/Utilities/LockUtility.cs b/cs/src/core/Utilities/LockUtility.cs index ac859eada..d1ee424b0 100644 --- a/cs/src/core/Utilities/LockUtility.cs +++ b/cs/src/core/Utilities/LockUtility.cs @@ -37,28 +37,31 @@ internal static bool HandleIntermediate(this ref RecordInfo recordInfo, out Oper } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static void HandleLockOperation(this ref RecordInfo recordInfo, LockOperation lockOp, out bool isLock) + internal static bool HandleLockOperation(this ref RecordInfo recordInfo, LockOperation lockOp, out bool isLock) { isLock = lockOp.LockOperationType == LockOperationType.Lock; if (isLock) - recordInfo.Lock(lockOp.LockType); - else if (lockOp.LockOperationType == LockOperationType.Unlock) + return recordInfo.Lock(lockOp.LockType); + + if (lockOp.LockOperationType == LockOperationType.Unlock) recordInfo.Unlock(lockOp.LockType); else Debug.Fail($"Unexpected LockOperation {lockOp.LockOperationType}"); + return true; } - internal static void Lock(this ref RecordInfo recordInfo, LockType lockType) + internal static bool Lock(this ref RecordInfo recordInfo, LockType lockType) { if (lockType == LockType.Shared) - recordInfo.LockShared(); - else if (lockType == LockType.Exclusive) - recordInfo.LockExclusive(); - else if (lockType == LockType.ExclusiveFromShared) - recordInfo.LockExclusiveFromShared(); + return recordInfo.LockShared(); + if (lockType == LockType.Exclusive) + return recordInfo.LockExclusive(); + if (lockType == LockType.ExclusiveFromShared) + return recordInfo.LockExclusiveFromShared(); else Debug.Fail($"Unexpected LockType: {lockType}"); + return false; } internal static void Unlock(this ref RecordInfo recordInfo, LockType lockType) diff --git a/cs/src/core/VarLen/MemoryFunctions.cs b/cs/src/core/VarLen/MemoryFunctions.cs index ea012ff45..63c9ad16f 100644 --- a/cs/src/core/VarLen/MemoryFunctions.cs +++ b/cs/src/core/VarLen/MemoryFunctions.cs @@ -31,11 +31,6 @@ public override void SingleWriter(ref Key key, ref Memory input, ref Memory public override bool ConcurrentWriter(ref Key key, ref Memory input, ref Memory src, ref Memory dst, ref (IMemoryOwner, int) output, ref RecordInfo recordInfo, long address) { - // We can write the source (src) data to the existing destination (dst) in-place, - // only if there is sufficient space - if (recordInfo.Sealed) - return false; - if (dst.Length < src.Length) { return false; diff --git a/cs/src/core/VarLen/SpanByteFunctions.cs b/cs/src/core/VarLen/SpanByteFunctions.cs index e6ecff2b0..dbb6441eb 100644 --- a/cs/src/core/VarLen/SpanByteFunctions.cs +++ b/cs/src/core/VarLen/SpanByteFunctions.cs @@ -26,11 +26,6 @@ public override void SingleWriter(ref Key key, ref SpanByte input, ref SpanByte /// public override bool ConcurrentWriter(ref Key key, ref SpanByte input, ref SpanByte src, ref SpanByte dst, ref Output output, ref RecordInfo recordInfo, long address) { - // We can write the source (src) data to the existing destination (dst) in-place, - // only if there is sufficient space - if (recordInfo.Sealed) - return false; - if (dst.Length < src.Length) { return false; diff --git a/cs/test/LockTests.cs b/cs/test/LockTests.cs index 3f7c00426..4db30a88f 100644 --- a/cs/test/LockTests.cs +++ b/cs/test/LockTests.cs @@ -65,6 +65,7 @@ public unsafe void RecordInfoLockTest() RecordInfo recordInfo = new(); RecordInfo* ri = &recordInfo; + // We are not sealing in this test, so there is no need to check the return XLockTest(() => ri->LockExclusive(), () => ri->UnlockExclusive()); SLockTest(() => ri->LockShared(), () => ri->UnlockShared()); XSLockTest(() => ri->LockExclusive(), () => ri->UnlockExclusive(), () => ri->LockShared(), () => ri->UnlockShared()); From a6b1f3fdeb28d0446b436c3148375d88c98da331 Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Tue, 28 Dec 2021 02:47:56 -0800 Subject: [PATCH 12/25] Add ReadCache and LockTable-transfer tests; move ReadCache tests out to their own sub-hierarchy --- .../ClientSession/LockableUnsafeContext.cs | 23 +- cs/src/core/Index/Common/RecordInfo.cs | 2 +- cs/src/core/Index/FASTER/FASTER.cs | 4 +- cs/src/core/Index/FASTER/FASTERImpl.cs | 316 ++++++---- cs/src/core/Utilities/LockTable.cs | 46 +- cs/test/LockableUnsafeContextTests.cs | 229 ++++++- cs/test/NativeReadCacheTests.cs | 2 +- cs/test/ObjectReadCacheTests.cs | 2 +- cs/test/ReadCacheChainTests.cs | 557 ++++++++++++++++++ cs/test/ReproReadCacheTest.cs | 4 +- cs/test/TestUtils.cs | 4 +- 11 files changed, 1008 insertions(+), 181 deletions(-) create mode 100644 cs/test/ReadCacheChainTests.cs diff --git a/cs/src/core/ClientSession/LockableUnsafeContext.cs b/cs/src/core/ClientSession/LockableUnsafeContext.cs index 6a815292c..eb12a2525 100644 --- a/cs/src/core/ClientSession/LockableUnsafeContext.cs +++ b/cs/src/core/ClientSession/LockableUnsafeContext.cs @@ -40,20 +40,32 @@ internal LockableUnsafeContext(ClientSession [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void ResumeThread() => clientSession.UnsafeResumeThread(); + public void ResumeThread() + { + CheckAcquired(); + clientSession.UnsafeResumeThread(); + } /// /// Resume session on current thread. IMPORTANT: Call SuspendThread before any async op. /// /// Epoch that the session resumed on; can be saved to see if epoch has changed [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void ResumeThread(out int resumeEpoch) => clientSession.UnsafeResumeThread(out resumeEpoch); + public void ResumeThread(out int resumeEpoch) + { + CheckAcquired(); + clientSession.UnsafeResumeThread(out resumeEpoch); + } /// /// Suspend session on current thread /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void SuspendThread() => clientSession.UnsafeSuspendThread(); + public void SuspendThread() + { + Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); + clientSession.UnsafeSuspendThread(); + } /// /// Current epoch of the session @@ -67,7 +79,7 @@ internal LockableUnsafeContext(ClientSessionWait for all pending operations on session to complete /// Spin-wait until ongoing commit/checkpoint, if any, completes /// True if all pending operations have completed, false otherwise - public bool UnsafeCompletePending(bool wait = false, bool spinWaitForCommit = false) + public bool CompletePending(bool wait = false, bool spinWaitForCommit = false) => this.clientSession.UnsafeCompletePending(this.FasterSession, false, wait, spinWaitForCommit); /// @@ -78,7 +90,7 @@ public bool UnsafeCompletePending(bool wait = false, bool spinWaitForCommit = fa /// Wait for all pending operations on session to complete /// Spin-wait until ongoing commit/checkpoint, if any, completes /// True if all pending operations have completed, false otherwise - public bool UnsafeCompletePendingWithOutputs(out CompletedOutputIterator completedOutputs, bool wait = false, bool spinWaitForCommit = false) + public bool CompletePendingWithOutputs(out CompletedOutputIterator completedOutputs, bool wait = false, bool spinWaitForCommit = false) => this.clientSession.UnsafeCompletePendingWithOutputs(this.FasterSession, out completedOutputs, wait, spinWaitForCommit); #region Acquire and Dispose @@ -99,6 +111,7 @@ public void Dispose() throw new FasterException("Disposing LockableUnsafeContext with a protected epoch; must call UnsafeSuspendThread"); if (TotalLockCount > 0) throw new FasterException($"Disposing LockableUnsafeContext with locks held: {sharedLockCount} shared locks, {exclusiveLockCount} exclusive locks"); + this.isAcquired = false; Interlocked.Decrement(ref this.clientSession.fht.NumActiveLockingSessions); } #endregion Acquire and Dispose diff --git a/cs/src/core/Index/Common/RecordInfo.cs b/cs/src/core/Index/Common/RecordInfo.cs index 3e96ab396..4c44c20a1 100644 --- a/cs/src/core/Index/Common/RecordInfo.cs +++ b/cs/src/core/Index/Common/RecordInfo.cs @@ -217,7 +217,7 @@ public void CopyLocksFrom(RecordInfo other) public bool TryUpdateAddress(long newPrevAddress) { var expectedWord = word; - RecordInfo newRI = default; + RecordInfo newRI = new() { word = word }; newRI.PreviousAddress = newPrevAddress; var foundWord = Interlocked.CompareExchange(ref this.word, newRI.word, expectedWord); return foundWord == expectedWord; diff --git a/cs/src/core/Index/FASTER/FASTER.cs b/cs/src/core/Index/FASTER/FASTER.cs index e4bd633a6..78587ca48 100644 --- a/cs/src/core/Index/FASTER/FASTER.cs +++ b/cs/src/core/Index/FASTER/FASTER.cs @@ -58,12 +58,12 @@ public partial class FasterKV : FasterBase, IFasterKV { internal readonly AllocatorBase hlog; - private readonly AllocatorBase readcache; + internal readonly AllocatorBase readcache; /// /// Compares two keys /// - protected readonly IFasterEqualityComparer comparer; + internal protected readonly IFasterEqualityComparer comparer; internal readonly bool UseReadCache; private readonly CopyReadsToTail CopyReadsToTail; diff --git a/cs/src/core/Index/FASTER/FASTERImpl.cs b/cs/src/core/Index/FASTER/FASTERImpl.cs index 6aaf00c2e..edc639b67 100644 --- a/cs/src/core/Index/FASTER/FASTERImpl.cs +++ b/cs/src/core/Index/FASTER/FASTERImpl.cs @@ -383,14 +383,14 @@ internal OperationStatus InternalUpsert( var logicalAddress = entry.Address; var physicalAddress = default(long); - long lastReadCachePhysicalAddress = Constants.kInvalidAddress; - long prevFirstReadCacheLogicalAddress = Constants.kInvalidAddress; + long lowestReadCachePhysicalAddress = Constants.kInvalidAddress; + long prevHighestReadCacheLogicalAddress = Constants.kInvalidAddress; if (UseReadCache) { - prevFirstReadCacheLogicalAddress = logicalAddress; - SkipReadCache(ref logicalAddress, out lastReadCachePhysicalAddress); - if (prevFirstReadCacheLogicalAddress == logicalAddress) // if there were no readcache records - prevFirstReadCacheLogicalAddress = Constants.kInvalidAddress; + prevHighestReadCacheLogicalAddress = logicalAddress; + SkipReadCache(ref logicalAddress, out lowestReadCachePhysicalAddress); + if (prevHighestReadCacheLogicalAddress == logicalAddress) // if there were no readcache records + prevHighestReadCacheLogicalAddress = Constants.kInvalidAddress; } var latestLogicalAddress = logicalAddress; @@ -511,8 +511,8 @@ internal OperationStatus InternalUpsert( // Invalidate the entry in the read cache, as we did not do IPU. if (UseReadCache) { - prevFirstReadCacheLogicalAddress = logicalAddress; - if (!SkipAndInvalidateReadCache(ref logicalAddress, ref key, out lastReadCachePhysicalAddress, out OperationStatus internalStatus)) + var la = prevHighestReadCacheLogicalAddress; + if (!SkipAndInvalidateReadCache(ref la, ref key, out lowestReadCachePhysicalAddress, out OperationStatus internalStatus)) return internalStatus; } @@ -520,7 +520,7 @@ internal OperationStatus InternalUpsert( { // Immutable region or new record status = CreateNewRecordUpsert(ref key, ref input, ref value, ref output, ref pendingContext, fasterSession, sessionCtx, bucket, slot, tag, entry, - latestLogicalAddress, prevFirstReadCacheLogicalAddress, lastReadCachePhysicalAddress, unsealPhysicalAddress); + latestLogicalAddress, prevHighestReadCacheLogicalAddress, lowestReadCachePhysicalAddress, unsealPhysicalAddress); if (status != OperationStatus.SUCCESS) { if (unsealPhysicalAddress != Constants.kInvalidAddress) @@ -641,7 +641,7 @@ private LatchDestination AcquireLatchUpsert(FasterExecut private OperationStatus CreateNewRecordUpsert(ref Key key, ref Input input, ref Value value, ref Output output, ref PendingContext pendingContext, FasterSession fasterSession, FasterExecutionContext sessionCtx, HashBucket* bucket, int slot, ushort tag, HashBucketEntry entry, - long latestLogicalAddress, long prevFirstReadCacheLogicalAddress, long lastReadCachePhysicalAddress, long unsealPhysicalAddress) + long latestLogicalAddress, long prevHighestReadCacheLogicalAddress, long lowestReadCachePhysicalAddress, long unsealPhysicalAddress) where FasterSession : IFasterSession { var (actualSize, allocateSize) = hlog.GetRecordSize(ref key, ref value); @@ -660,8 +660,12 @@ private OperationStatus CreateNewRecordUpsert( FindOrCreateTag(hash, tag, ref bucket, ref slot, ref entry, hlog.BeginAddress); var logicalAddress = entry.Address; - long lastReadCachePhysicalAddress = Constants.kInvalidAddress; - long prevFirstReadCacheLogicalAddress = Constants.kInvalidAddress; + long lowestReadCachePhysicalAddress = Constants.kInvalidAddress; + long prevHighestReadCacheLogicalAddress = Constants.kInvalidAddress; if (UseReadCache) { - prevFirstReadCacheLogicalAddress = logicalAddress; - SkipReadCache(ref logicalAddress, out lastReadCachePhysicalAddress); - if (prevFirstReadCacheLogicalAddress == logicalAddress) // if there were no readcache records - prevFirstReadCacheLogicalAddress = Constants.kInvalidAddress; + prevHighestReadCacheLogicalAddress = logicalAddress; + SkipReadCache(ref logicalAddress, out lowestReadCachePhysicalAddress); + if (prevHighestReadCacheLogicalAddress == logicalAddress) // if there were no readcache records + prevHighestReadCacheLogicalAddress = Constants.kInvalidAddress; } var latestLogicalAddress = logicalAddress; @@ -864,8 +871,10 @@ internal OperationStatus InternalRMW( { if (fasterSession.InPlaceUpdater(ref key, ref input, ref recordValue, ref output, ref recordInfo, logicalAddress, out bool lockFailed)) { - if (sessionCtx.phase == Phase.REST) hlog.MarkPage(logicalAddress, sessionCtx.version); - else hlog.MarkPageAtomic(logicalAddress, sessionCtx.version); + if (sessionCtx.phase == Phase.REST) + hlog.MarkPage(logicalAddress, sessionCtx.version); + else + hlog.MarkPageAtomic(logicalAddress, sessionCtx.version); pendingContext.recordInfo = recordInfo; pendingContext.logicalAddress = logicalAddress; status = OperationStatus.SUCCESS; @@ -939,15 +948,15 @@ internal OperationStatus InternalRMW( // Invalidate the entry in the read cache, as we did not do IPU. if (UseReadCache) { - prevFirstReadCacheLogicalAddress = logicalAddress; - if (!SkipAndInvalidateReadCache(ref logicalAddress, ref key, out lastReadCachePhysicalAddress, out OperationStatus internalStatus)) + var la = prevHighestReadCacheLogicalAddress; + if (!SkipAndInvalidateReadCache(ref la, ref key, out lowestReadCachePhysicalAddress, out OperationStatus internalStatus)) return internalStatus; } if (latchDestination != LatchDestination.CreatePendingContext) { status = CreateNewRecordRMW(ref key, ref input, ref output, ref pendingContext, fasterSession, sessionCtx, bucket, slot, logicalAddress, physicalAddress, tag, entry, - latestLogicalAddress, prevFirstReadCacheLogicalAddress, lastReadCachePhysicalAddress, unsealPhysicalAddress); + latestLogicalAddress, prevHighestReadCacheLogicalAddress, lowestReadCachePhysicalAddress, unsealPhysicalAddress); if (status != OperationStatus.SUCCESS) { if (unsealPhysicalAddress != Constants.kInvalidAddress) @@ -1069,7 +1078,7 @@ private LatchDestination AcquireLatchRMW(PendingContext< private OperationStatus CreateNewRecordRMW(ref Key key, ref Input input, ref Output output, ref PendingContext pendingContext, FasterSession fasterSession, FasterExecutionContext sessionCtx, HashBucket* bucket, int slot, long logicalAddress, long physicalAddress, ushort tag, HashBucketEntry entry, long latestLogicalAddress, - long prevFirstReadCacheLogicalAddress, long lastReadCachePhysicalAddress, long unsealPhysicalAddress) + long prevHighestReadCacheLogicalAddress, long lowestReadCachePhysicalAddress, long unsealPhysicalAddress) where FasterSession : IFasterSession { // Determine if we should allocate a new record @@ -1131,8 +1140,12 @@ ref hlog.GetValue(newPhysicalAddress, newPhysicalAddress + actualSize), return OperationStatus.RETRY_NOW; } + bool lockTableEntryExists = false; + if (unsealPhysicalAddress == Constants.kInvalidAddress && LockTable.IsActive && !LockTable.TrySeal(ref key, out lockTableEntryExists) && lockTableEntryExists) + return OperationStatus.RETRY_NOW; + bool success = true; - if (lastReadCachePhysicalAddress == Constants.kInvalidAddress) + if (lowestReadCachePhysicalAddress == Constants.kInvalidAddress) { // Insert as the first record in the hash chain. var updatedEntry = default(HashBucketEntry); @@ -1148,7 +1161,7 @@ ref hlog.GetValue(newPhysicalAddress, newPhysicalAddress + actualSize), else { // Splice into the gap of the last readcache/first main log entries. - ref RecordInfo rcri = ref readcache.GetInfo(lastReadCachePhysicalAddress); + ref RecordInfo rcri = ref readcache.GetInfo(lowestReadCachePhysicalAddress); if (rcri.PreviousAddress != latestLogicalAddress) return OperationStatus.RETRY_NOW; @@ -1156,15 +1169,18 @@ ref hlog.GetValue(newPhysicalAddress, newPhysicalAddress + actualSize), success = rcri.TryUpdateAddress(newLogicalAddress); // Now see if we have added a readcache entry from a pending read while we were inserting; if so it is obsolete and must be Invalidated. - InvalidateUpdatedRecordInReadCache(entry.word, ref key, prevFirstReadCacheLogicalAddress); + InvalidateUpdatedRecordInReadCache(entry.word, ref key, prevHighestReadCacheLogicalAddress); } if (success) { if (unsealPhysicalAddress != Constants.kInvalidAddress) recordInfo.CopyLocksFrom(hlog.GetInfo(unsealPhysicalAddress)); - else if (LockTable.IsActive && !LockTable.ApplyToLogRecord(ref key, ref recordInfo)) + else if (lockTableEntryExists && !LockTable.ApplyToLogRecord(ref key, ref recordInfo)) + { + LockTable.Unseal(ref key); return OperationStatus.RETRY_NOW; + } recordInfo.Tentative = false; // If IU, status will be NOTFOUND; return that. @@ -1265,14 +1281,14 @@ internal OperationStatus InternalDelete( logicalAddress = entry.Address; - long lastReadCachePhysicalAddress = Constants.kInvalidAddress; - long prevFirstReadCacheLogicalAddress = Constants.kInvalidAddress; + long lowestReadCachePhysicalAddress = Constants.kInvalidAddress; + long prevHighestReadCacheLogicalAddress = Constants.kInvalidAddress; if (UseReadCache) { - prevFirstReadCacheLogicalAddress = logicalAddress; - SkipReadCache(ref logicalAddress, out lastReadCachePhysicalAddress); - if (prevFirstReadCacheLogicalAddress == logicalAddress) // if there were no readcache records - prevFirstReadCacheLogicalAddress = Constants.kInvalidAddress; + prevHighestReadCacheLogicalAddress = logicalAddress; + SkipReadCache(ref logicalAddress, out lowestReadCachePhysicalAddress); + if (prevHighestReadCacheLogicalAddress == logicalAddress) // if there were no readcache records + prevHighestReadCacheLogicalAddress = Constants.kInvalidAddress; } var latestLogicalAddress = logicalAddress; @@ -1419,6 +1435,14 @@ internal OperationStatus InternalDelete( #region Create new record in the mutable region CreateNewRecord: { + // Invalidate the entry in the read cache, as we did not do IPU. + if (UseReadCache) + { + var la = prevHighestReadCacheLogicalAddress; + if (!SkipAndInvalidateReadCache(ref la, ref key, out lowestReadCachePhysicalAddress, out OperationStatus internalStatus)) + return internalStatus; + } + var value = default(Value); // Immutable region or new record // Allocate default record size for tombstone @@ -1443,8 +1467,12 @@ internal OperationStatus InternalDelete( latestLogicalAddress); hlog.Serialize(ref key, newPhysicalAddress); + bool lockTableEntryExists = false; + if (unsealPhysicalAddress == Constants.kInvalidAddress && LockTable.IsActive && !LockTable.TrySeal(ref key, out lockTableEntryExists) && lockTableEntryExists) + return OperationStatus.RETRY_NOW; + bool success = true; - if (lastReadCachePhysicalAddress == Constants.kInvalidAddress) + if (lowestReadCachePhysicalAddress == Constants.kInvalidAddress) { // Insert as the first record in the hash chain. var updatedEntry = default(HashBucketEntry); @@ -1460,7 +1488,7 @@ internal OperationStatus InternalDelete( else { // Splice into the gap of the last readcache/first main log entries. - ref RecordInfo rcri = ref readcache.GetInfo(lastReadCachePhysicalAddress); + ref RecordInfo rcri = ref readcache.GetInfo(lowestReadCachePhysicalAddress); if (rcri.PreviousAddress != latestLogicalAddress) return OperationStatus.RETRY_NOW; @@ -1468,15 +1496,18 @@ internal OperationStatus InternalDelete( success = rcri.TryUpdateAddress(newLogicalAddress); // Now see if we have added a readcache entry from a pending read while we were inserting; if so it is obsolete and must be Invalidated. - InvalidateUpdatedRecordInReadCache(entry.word, ref key, prevFirstReadCacheLogicalAddress); + InvalidateUpdatedRecordInReadCache(entry.word, ref key, prevHighestReadCacheLogicalAddress); } if (success) { if (unsealPhysicalAddress != Constants.kInvalidAddress) recordInfo.CopyLocksFrom(hlog.GetInfo(unsealPhysicalAddress)); - else if (LockTable.IsActive && !LockTable.ApplyToLogRecord(ref key, ref recordInfo)) + else if (lockTableEntryExists && !LockTable.ApplyToLogRecord(ref key, ref recordInfo)) + { + LockTable.Unseal(ref key); return OperationStatus.RETRY_NOW; + } recordInfo.Tentative = false; // Note that this is the new logicalAddress; we have not retrieved the old one if it was below HeadAddress, and thus @@ -1567,7 +1598,7 @@ internal OperationStatus InternalLock( OperationStatus status; if (UseReadCache) { - if (LockReadCacheRecord(logicalAddress, ref key, lockOp, out lockInfo, out status)) + if (DoReadCacheRecordLockOperation(logicalAddress, ref key, lockOp, out lockInfo, out status)) return status; } @@ -1798,7 +1829,7 @@ internal void InternalContinuePendingReadCopyToTail @@ -1847,8 +1878,8 @@ internal OperationStatus InternalContinuePendingRMW> Constants.kHashTagShift); - long lastReadCachePhysicalAddress = Constants.kInvalidAddress; - long prevFirstReadCacheLogicalAddress = Constants.kInvalidAddress; + long lowestReadCachePhysicalAddress = Constants.kInvalidAddress; + long prevHighestReadCacheLogicalAddress = Constants.kInvalidAddress; #region Trace Back for Record on In-Memory HybridLog while (true) @@ -1860,11 +1891,11 @@ internal OperationStatus InternalContinuePendingRMW Constants.kInvalidAddress; /* in loop */) + { + HashBucketEntry entry = new() { word = prevAddress }; + keyLog = entry.ReadCache ? readcache : hlog; + keyPhysicalAddress = keyLog.GetPhysicalAddress(entry.Address & ~Constants.kReadCacheBitMask); + ri = keyLog.GetInfo(keyPhysicalAddress); + + // Stop at the first valid or MainLog record. + if (!ri.Invalid || !entry.ReadCache) + break; + prevAddress = ri.PreviousAddress; + } + + // Found no valid record so can't look up key + if (ri.Invalid) + keyLog = null; + } + + if (keyLog is not null) + { + ref Key key = ref keyLog.GetKey(keyPhysicalAddress); // If there is a readcache entry for this hash, the chain will always be of the form: - // hashtable -> zero or more readcache entries in latest-to-earliest order -> main FKV entry. + // hashtable -> zero or more readcache entries in latest-to-earliest order -> main FKV records. // If this to-be-evicted readcache record's prevAddress points to a record in the main FKV, evict all Invalid // readcache records in this key's readcache chain in the FKV, as well as any entries in the readcache range. // The ordering of readcache records ensures we won't miss any readcache records that are eligible for eviction, - // while only executing the body of the loop once for each hash chain. + // while only executing the body of the loop once for each hash chain. Note: This means we may leave some Invalid + // entries in hash chains where the RC->MainLog boundary is not contained in the range to be evicted. HashBucketEntry entry = default; entry.word = info.PreviousAddress; if (!entry.ReadCache) { - // Find the hash index entry for the key in the main FKV. - var hash = comparer.GetHashCode64(ref key); - var tag = (ushort)((ulong)hash >> Constants.kHashTagShift); + for (var restartChain = true; restartChain; /* in loop */) + { + restartChain = false; - entry = default; - var tagExists = FindTag(hash, tag, ref bucket, ref slot, ref entry); - if (!tagExists) - continue; + // Find the hash index entry for the key in the main FKV. + var hash = comparer.GetHashCode64(ref key); + var tag = (ushort)((ulong)hash >> Constants.kHashTagShift); - // Traverse the chain of readcache entries for this key. - while (entry.ReadCache) - { - var la = entry.Address & ~Constants.kReadCacheBitMask; - var pa = readcache.GetPhysicalAddress(la); - ref RecordInfo ri = ref readcache.GetInfo(pa); + entry = default; + var tagExists = FindTag(hash, tag, ref bucket, ref slot, ref entry); + if (!tagExists) + continue; - // If the record is Invalid or its address is in the from/to HeadAddress range, unlink it from the chain. - if (ri.Invalid || (la >= fromHeadAddress && la < toHeadAddress)) + // Traverse the chain of readcache entries for this key. + long prevPhysicalAddress = Constants.kInvalidAddress; + while (entry.ReadCache && !restartChain) { - if (ri.IsLocked) - { - // If it is not Invalid, we must Seal it so there is no possibility it will be missed while we're in the process - // of transferring it to the Lock Table. Use manualLocking as we want to transfer the locks, not drain them. - if (!ri.Invalid) - ri.Seal(manualLocking: true); - - // Now get it into the lock table, so it is ready as soon as the CAS removes this record from the RC chain. - this.LockTable.TransferFrom(ref key, ri); - } + var la = entry.Address & ~Constants.kReadCacheBitMask; + var pa = readcache.GetPhysicalAddress(la); + ref RecordInfo ri = ref readcache.GetInfo(pa); - // Swap in the next entry in the chain, unless somee other thread has done it for us. - // Note that this removes the entire leading readcache-entry set of records from the hash table pointer. - while (tagExists && entry.ReadCache) + // If the record is Invalid or its address is in the from/to HeadAddress range, unlink it from the chain. + if (ri.Invalid || (la >= fromHeadAddress && la < toHeadAddress)) { - var updatedEntry = default(HashBucketEntry); - updatedEntry.Tag = tag; - updatedEntry.Address = ri.PreviousAddress; - updatedEntry.Pending = entry.Pending; - updatedEntry.Tentative = false; + if (ri.IsLocked) + { + // If it is not Invalid, we must Seal it so there is no possibility it will be missed while we're in the process + // of transferring it to the Lock Table. Use manualLocking as we want to transfer the locks, not drain them. + if (!ri.Invalid) + { + // If we fail to seal, it means there is another thread ahead of us, so break out of this key chain. + if (!ri.Seal(manualLocking: true)) + break; + } + + // Now get it into the lock table, so it is ready as soon as the CAS removes this record from the RC chain. + this.LockTable.TransferFrom(ref readcache.GetKey(pa), ri); + } - if (entry.word == Interlocked.CompareExchange(ref bucket->bucket_entries[slot], updatedEntry.word, entry.word)) + // Swap in the next entry in the chain. Because we may encounter a race where another thread swaps in a readcache + // record into the hash table entry (and if so that address would be greater than what we have now), we must restart + // the chain processing on thread conflicts (CAS failure). Similarly, another thread may have changed the previous + // readcache record's PreviousAddress. + if (prevPhysicalAddress == Constants.kInvalidAddress) { + var updatedEntry = default(HashBucketEntry); + updatedEntry.Tag = tag; + updatedEntry.Address = ri.PreviousAddress; + updatedEntry.Pending = entry.Pending; + updatedEntry.Tentative = false; + if (entry.word != Interlocked.CompareExchange(ref bucket->bucket_entries[slot], updatedEntry.word, entry.word)) + restartChain = true; entry.word = updatedEntry.word; - break; } - - tagExists = FindTag(hash, tag, ref bucket, ref slot, ref entry); + else + { + ref RecordInfo prevri = ref readcache.GetInfo(prevPhysicalAddress); + if (!prevri.TryUpdateAddress(ri.PreviousAddress)) + restartChain = true; + entry.word = ri.PreviousAddress; + } + } + else + { + prevPhysicalAddress = pa; + entry.word = ri.PreviousAddress; } } - else - entry.word = ri.PreviousAddress; } } } diff --git a/cs/src/core/Utilities/LockTable.cs b/cs/src/core/Utilities/LockTable.cs index 16a46e5d6..ebd05826c 100644 --- a/cs/src/core/Utilities/LockTable.cs +++ b/cs/src/core/Utilities/LockTable.cs @@ -42,7 +42,7 @@ class KeyComparer : IEqualityComparer> public int GetHashCode(IHeapContainer k) => (int)comparer.GetHashCode64(ref k.Get()); } - readonly SafeConcurrentDictionary, LockTableEntry> dict; + readonly internal SafeConcurrentDictionary, LockTableEntry> dict; readonly IVariableLengthStruct keyLen; readonly KeyComparer keyComparer; readonly SectorAlignedBufferPool bufferPool; @@ -81,7 +81,7 @@ private bool Update(ref TKey key, Func, LockTableEntry { lte.lockRecordInfo.Unlock(lockType); return lte; })) + if (Update(ref key, lte => { lte.logRecordInfo.Unlock(lockType); return lte; })) TryRemoveIfNoLocks(ref key); else Debug.Fail("Trying to unlock a nonexistent key"); @@ -97,7 +97,9 @@ internal void TransferFrom(ref TKey key, RecordInfo logRecordInfo) { keyContainer.Dispose(); Debug.Fail("Trying to Transfer to an existing key"); + return; } + Interlocked.Increment(ref this.approxNumItems); } // Lock the LockTable record for the key if it exists, else add a Tentative record for it. @@ -158,7 +160,7 @@ internal void TryRemoveIfNoLocks(ref TKey key) // False is legit, as the record may have been removed between the time it was known to be here and the time Seal was called, // or this may be called by SealOrTentative. [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool TrySeal(ref TKey key, out bool exists) + internal bool TrySeal(ref TKey key, out bool exists) { using var lookupKey = GetKeyContainer(ref key); if (!dict.ContainsKey(lookupKey)) @@ -166,7 +168,7 @@ private bool TrySeal(ref TKey key, out bool exists) exists = false; return true; } - exists = false; + exists = true; return Update(ref key, lte => { lte.lockRecordInfo.Seal(); return lte; }); } @@ -174,27 +176,7 @@ private bool TrySeal(ref TKey key, out bool exists) internal void Unseal(ref TKey key) { if (!Update(ref key, lte => { lte.lockRecordInfo.Unseal(); return lte; })) - Debug.Fail("Trying to remove Unseal nonexistent key"); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal bool TrySealOrTentative(ref TKey key, out bool tentative) - { - tentative = false; - if (this.TrySeal(ref key, out bool exists)) - return true; - if (exists) - return false; - - var keyContainer = GetKeyContainer(ref key); - RecordInfo lockRecordInfo = default; - lockRecordInfo.Tentative = tentative = true; - if (dict.TryAdd(keyContainer, new(keyContainer, default, lockRecordInfo))) - return true; - - // Someone else already inserted a tentative record - keyContainer.Dispose(); - return false; + Debug.Fail("Trying to Unseal nonexistent key"); } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -210,6 +192,9 @@ internal bool Get(ref TKey key, out RecordInfo recordInfo) return false; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal bool Get(TKey key, out RecordInfo recordInfo) => Get(ref key, out recordInfo); + [MethodImpl(MethodImplOptions.AggressiveInlining)] internal bool ContainsKey(ref TKey key) { @@ -220,22 +205,19 @@ internal bool ContainsKey(ref TKey key) [MethodImpl(MethodImplOptions.AggressiveInlining)] internal bool ApplyToLogRecord(ref TKey key, ref RecordInfo logRecord) { + // This is called after the record has been CAS'd into the log or readcache, so this should not be allowed to fail. using var lookupKey = GetKeyContainer(ref key); if (dict.TryGetValue(lookupKey, out var lte)) { + Debug.Assert(lte.lockRecordInfo.Sealed, "lockRecordInfo should have been Sealed already"); + // If it's a Tentative record, ignore it--it will be removed by Lock() and retried against the inserted log record. if (lte.lockRecordInfo.Tentative) return true; - // If Sealing fails, we have to retry; it could mean that a pending read (readcache or copytotail) grabbed the locks - // before the Upsert/etc. got to them. In that case, the upsert must retry so those locks will be drained from the - // read entry. Note that Seal() momentarily xlocks the record being sealed, which in this case is the LockTable record; - // this does not affect the lock count of the contained record. - if (!lte.lockRecordInfo.Seal()) - return false; - logRecord.CopyLocksFrom(lte.logRecordInfo); lte.lockRecordInfo.SetInvalid(); + lte.lockRecordInfo.Unseal(); if (dict.TryRemove(lookupKey, out _)) { Interlocked.Decrement(ref this.approxNumItems); diff --git a/cs/test/LockableUnsafeContextTests.cs b/cs/test/LockableUnsafeContextTests.cs index 2d7543580..b4728f1b2 100644 --- a/cs/test/LockableUnsafeContextTests.cs +++ b/cs/test/LockableUnsafeContextTests.cs @@ -8,8 +8,9 @@ using System.Threading; using FASTER.core; using NUnit.Framework; +using FASTER.test.ReadCacheTests; -namespace FASTER.test +namespace FASTER.test.LockableUnsafeContext { // Functions for the "Simple lock transaction" case, e.g.: // - Lock key1, key2, key3, keyResult @@ -44,9 +45,12 @@ public enum UpdateOp { Upsert, RMW } class LockableUnsafeContextTests { const int numRecords = 1000; + const int transferToNewKey = 1010; + const int transferToExistingKey = 200; + const int valueMult = 1_000_000; - private FasterKV fkv; + private FasterKV fht; private ClientSession session; private IDevice log; @@ -68,9 +72,9 @@ public void Setup() } } - fkv = new FasterKV(1L << 20, new LogSettings { LogDevice = log, ObjectLogDevice = null, PageSizeBits = 12, MemorySizeBits = 22, ReadCacheSettings = readCacheSettings }, + fht = new FasterKV(1L << 20, new LogSettings { LogDevice = log, ObjectLogDevice = null, PageSizeBits = 12, MemorySizeBits = 22, ReadCacheSettings = readCacheSettings }, supportsLocking: true ); - session = fkv.For(new LockableUnsafeFunctions()).NewSession(); + session = fht.For(new LockableUnsafeFunctions()).NewSession(); } [TearDown] @@ -78,8 +82,8 @@ public void TearDown() { session?.Dispose(); session = null; - fkv?.Dispose(); - fkv = null; + fht?.Dispose(); + fht = null; log?.Dispose(); log = null; @@ -108,9 +112,9 @@ static void AssertIsLocked(LockableUnsafeContext luContext) @@ -122,7 +126,7 @@ static void ClearCountsOnError(LockableUnsafeContext locks = new(); Random rng = new(tid + 101); - using var localSession = fkv.For(new LockableUnsafeFunctions()).NewSession(); + using var localSession = fht.For(new LockableUnsafeFunctions()).NewSession(); using var luContext = localSession.GetLockableUnsafeContext(); luContext.ResumeThread(); @@ -451,7 +455,7 @@ void runOpThread(int tid) { Random rng = new(tid + 101); - using var localSession = fkv.For(new LockableUnsafeFunctions()).NewSession(); + using var localSession = fht.For(new LockableUnsafeFunctions()).NewSession(); for (var iteration = 0; iteration < numIterations; ++iteration) { @@ -483,5 +487,190 @@ void runOpThread(int tid) EnsureNoLocks(); } + + void AddLockTableEntry(LockableUnsafeContext> luContext, int key, bool immutable) + { + luContext.Lock(key, LockType.Exclusive); + var found = fht.LockTable.Get(key, out RecordInfo recordInfo); + + // Immutable locks in the ReadOnly region; it does NOT create a LockTable entry + if (immutable) + { + Assert.IsFalse(found); + return; + } + Assert.IsTrue(found); + Assert.IsTrue(recordInfo.IsLockedExclusive); + } + + void VerifySplicedInKey(LockableUnsafeContext> luContext, int expectedKey) + { + // Scan to the end of the readcache chain and verify we inserted the value. + var (_, pa) = ChainTests.SkipReadCacheChain(fht, expectedKey); + var storedKey = fht.hlog.GetKey(pa); + Assert.AreEqual(expectedKey, storedKey); + + // This is called after we've transferred from LockTable to log. + Assert.False(fht.LockTable.Get(expectedKey, out _)); + + // Verify we've transferred the expected locks. + ref RecordInfo recordInfo = ref fht.hlog.GetInfo(pa); + Assert.IsTrue(recordInfo.IsLockedExclusive); + Assert.IsFalse(recordInfo.IsLockedShared); + + // Now unlock it; we're done. + luContext.Unlock(expectedKey, LockType.Exclusive); + } + + [Test] + [Category(TestUtils.LockableUnsafeContextTestCategory)] + [Category(TestUtils.SmokeTestCategory)] + public void TransferFromLockTableToCTTTest() + { + Populate(); + fht.Log.FlushAndEvict(wait: true); + + using var session = fht.NewSession(new SimpleFunctions()); + using var luContext = session.GetLockableUnsafeContext(); + int input = 0, output = 0, key = transferToExistingKey; + RecordMetadata recordMetadata = default; + AddLockTableEntry(luContext, key, immutable:false); + + var status = session.Read(ref key, ref input, ref output, ref recordMetadata, ReadFlags.CopyToTail); + Assert.AreEqual(Status.PENDING, status); + session.CompletePending(wait: true); + + VerifySplicedInKey(luContext, key); + } + + void PopulateAndEvict(bool immutable = false) + { + Populate(); + + if (immutable) + fht.Log.ShiftReadOnlyAddress(fht.Log.TailAddress, wait: true); + else + fht.Log.FlushAndEvict(true); + } + + [Test] + [Category(TestUtils.LockableUnsafeContextTestCategory)] + [Category(TestUtils.SmokeTestCategory)] + public void TransferFromLockTableToUpsertTest([Values] ChainTests.RecordRegion recordRegion) + { + PopulateAndEvict(recordRegion == ChainTests.RecordRegion.Immutable); + + using var session = fht.NewSession(new SimpleFunctions()); + using var luContext = session.GetLockableUnsafeContext(); + luContext.ResumeThread(); + + int key = -1; + try + { + if (recordRegion == ChainTests.RecordRegion.Immutable || recordRegion == ChainTests.RecordRegion.OnDisk) + { + key = transferToExistingKey; + AddLockTableEntry(luContext, key, recordRegion == ChainTests.RecordRegion.Immutable); + var status = luContext.Upsert(key, key * valueMult); + Assert.AreEqual(Status.OK, status); + } + else + { + key = transferToNewKey; + AddLockTableEntry(luContext, key, immutable: false); + var status = luContext.Upsert(key, key * valueMult); + Assert.AreEqual(Status.OK, status); + } + } + finally + { + luContext.SuspendThread(); + } + VerifySplicedInKey(luContext, key); + } + + [Test] + [Category(TestUtils.LockableUnsafeContextTestCategory)] + [Category(TestUtils.SmokeTestCategory)] + public void TransferFromLockTableToRMWTest([Values] ChainTests.RecordRegion recordRegion) + { + PopulateAndEvict(recordRegion == ChainTests.RecordRegion.Immutable); + + using var session = fht.NewSession(new SimpleFunctions()); + using var luContext = session.GetLockableUnsafeContext(); + luContext.ResumeThread(); + + int key = -1; + try + { + if (recordRegion == ChainTests.RecordRegion.Immutable || recordRegion == ChainTests.RecordRegion.OnDisk) + { + key = transferToExistingKey; + AddLockTableEntry(luContext, key, recordRegion == ChainTests.RecordRegion.Immutable); + var status = luContext.RMW(key, key * valueMult); + Assert.AreEqual(recordRegion == ChainTests.RecordRegion.OnDisk ? Status.PENDING : Status.OK, status); + luContext.CompletePending(wait: true); + } + else + { + key = transferToNewKey; + AddLockTableEntry(luContext, key, immutable: false); + var status = luContext.RMW(key, key * valueMult); + Assert.AreEqual(Status.NOTFOUND, status); + } + } + finally + { + luContext.SuspendThread(); + } + + VerifySplicedInKey(luContext, key); + } + + [Test] + [Category(TestUtils.LockableUnsafeContextTestCategory)] + [Category(TestUtils.SmokeTestCategory)] + public void TransferFromLockTableToDeleteTest([Values] ChainTests.RecordRegion recordRegion) + { + PopulateAndEvict(recordRegion == ChainTests.RecordRegion.Immutable); + + using var session = fht.NewSession(new SimpleFunctions()); + using var luContext = session.GetLockableUnsafeContext(); + luContext.ResumeThread(); + + int key = -1; + try + { + if (recordRegion == ChainTests.RecordRegion.Immutable || recordRegion == ChainTests.RecordRegion.OnDisk) + { + key = transferToExistingKey; + AddLockTableEntry(luContext, key, recordRegion == ChainTests.RecordRegion.Immutable); + var status = luContext.Delete(key); + Assert.AreEqual(Status.OK, status); + } + else + { + key = transferToNewKey; + AddLockTableEntry(luContext, key, immutable: false); + var status = luContext.Delete(key); + Assert.AreEqual(Status.OK, status); + } + } + finally + { + luContext.SuspendThread(); + } + + VerifySplicedInKey(luContext, key); + } + + [Test] + [Category(TestUtils.LockableUnsafeContextTestCategory)] + [Category(TestUtils.SmokeTestCategory)] + public void LockAndUnlockInLockTableOnlyTest() + { + // TODO this + // TODO: MemoryPageLockEvictionScan tests + } } } diff --git a/cs/test/NativeReadCacheTests.cs b/cs/test/NativeReadCacheTests.cs index 58473aa3a..5d590ff62 100644 --- a/cs/test/NativeReadCacheTests.cs +++ b/cs/test/NativeReadCacheTests.cs @@ -4,7 +4,7 @@ using FASTER.core; using NUnit.Framework; -namespace FASTER.test +namespace FASTER.test.ReadCacheTests { [TestFixture] internal class NativeReadCacheTests diff --git a/cs/test/ObjectReadCacheTests.cs b/cs/test/ObjectReadCacheTests.cs index 1a978ef92..3935b3274 100644 --- a/cs/test/ObjectReadCacheTests.cs +++ b/cs/test/ObjectReadCacheTests.cs @@ -4,7 +4,7 @@ using FASTER.core; using NUnit.Framework; -namespace FASTER.test +namespace FASTER.test.ReadCacheTests { [TestFixture] internal class ObjectReadCacheTests diff --git a/cs/test/ReadCacheChainTests.cs b/cs/test/ReadCacheChainTests.cs new file mode 100644 index 000000000..28b1f49cd --- /dev/null +++ b/cs/test/ReadCacheChainTests.cs @@ -0,0 +1,557 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using FASTER.core; +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.Linq; + +namespace FASTER.test.ReadCacheTests +{ + class ChainTests + { + private FasterKV fht; + private IDevice log; + const int lowChainKey = 40; + const int midChainKey = lowChainKey + chainLen * (mod / 2); + const int highChainKey = lowChainKey + chainLen * (mod - 1); + const int mod = 10; + const int chainLen = 10; + const int valueAdd = 1_000_000; + + // -1 so highChainKey is first in the chain. + const int numKeys = highChainKey + mod - 1; + + // Insert into chain. + const int spliceInNewKey = highChainKey + mod * 2; + const int spliceInExistingKey = highChainKey - mod; + const int immutableSplitKey = numKeys / 2; + + // This is the record after the first readcache record we insert; it lets us limit the range to ReadCacheEvict + // so we get outsplicing rather than successively overwriting the hash table entry on ReadCacheEvict. + long readCacheHighEvictionAddress; + + class ChainComparer : IFasterEqualityComparer + { + public bool Equals(ref int k1, ref int k2) => k1 == k2; + + public long GetHashCode64(ref int k) => k % mod; + } + + [SetUp] + public void Setup() + { + TestUtils.DeleteDirectory(TestUtils.MethodTestDir, wait: true); + var readCacheSettings = new ReadCacheSettings { MemorySizeBits = 15, PageSizeBits = 9 }; + log = Devices.CreateLogDevice(TestUtils.MethodTestDir + "/NativeReadCacheTests.log", deleteOnClose: true); + fht = new FasterKV + (1L << 20, new LogSettings { LogDevice = log, MemorySizeBits = 15, PageSizeBits = 10, ReadCacheSettings = readCacheSettings }, + comparer: new ChainComparer()); + } + + [TearDown] + public void TearDown() + { + fht?.Dispose(); + fht = null; + log?.Dispose(); + log = null; + TestUtils.DeleteDirectory(TestUtils.MethodTestDir); + } + + void PopulateAndEvict(bool immutable = false) + { + using var session = fht.NewSession(new SimpleFunctions()); + + if (!immutable) + { + for (int key = 0; key < numKeys; key++) + session.Upsert(key, key + valueAdd); + session.CompletePending(true); + fht.Log.FlushAndEvict(true); + return; + } + + // Two parts, so we can have some evicted (and bring them into the readcache), and some in immutable (readonly). + for (int key = 0; key < immutableSplitKey; key++) + session.Upsert(key, key + valueAdd); + session.CompletePending(true); + fht.Log.FlushAndEvict(true); + + for (int key = immutableSplitKey; key < numKeys; key++) + session.Upsert(key, key + valueAdd); + session.CompletePending(true); + fht.Log.ShiftReadOnlyAddress(fht.Log.TailAddress, wait: true); + } + + void CreateChain(bool immutable = false) + { + using var session = fht.NewSession(new SimpleFunctions()); + + // Pass1: PENDING reads and populate the cache + for (var ii = 0; ii < chainLen; ++ii) + { + var key = lowChainKey + ii * mod; + var status = session.Read(key, out _); + Assert.AreEqual((immutable && key >= immutableSplitKey) ? Status.OK : Status.PENDING, status); + session.CompletePending(wait: true); + if (ii == 0) + readCacheHighEvictionAddress = fht.readcache.GetTailAddress(); + } + + // Pass2: non-PENDING reads from the cache + for (var ii = 0; ii < chainLen; ++ii) + { + var status = session.Read(lowChainKey + ii * mod, out _); + Assert.AreNotEqual(Status.PENDING, status); + } + + // Pass 3: Put in bunch of extra keys into the cache so when we FlushAndEvict we get all the ones of interest. + for (var key = 0; key < numKeys; ++key) + { + if ((key % mod) != 0) + { + var status = session.Read(key, out _); + Assert.AreEqual((immutable && key >= immutableSplitKey) ? Status.OK : Status.PENDING, status); + session.CompletePending(wait: true); + } + } + } + + unsafe (long logicalAddress, long physicalAddress) GetHashChain(int key, out int recordKey, out bool invalid, out bool isReadCache) + => GetHashChain(fht, key, out recordKey, out invalid, out isReadCache); + + internal static unsafe (long logicalAddress, long physicalAddress) GetHashChain(FasterKV fht, int key, out int recordKey, out bool invalid, out bool isReadCache) + { + var bucket = default(HashBucket*); + var slot = default(int); + + var hash = fht.comparer.GetHashCode64(ref key); + var tag = (ushort)((ulong)hash >> Constants.kHashTagShift); + + var entry = default(HashBucketEntry); + var tagExists = fht.FindTag(hash, tag, ref bucket, ref slot, ref entry); + Assert.IsTrue(tagExists); + + isReadCache = entry.ReadCache; + var log = isReadCache ? fht.readcache : fht.hlog; + var pa = log.GetPhysicalAddress(entry.Address); + recordKey = log.GetKey(pa); + invalid = log.GetInfo(pa).Invalid; + + return (entry.Address, pa); + } + + (long logicalAddress, long physicalAddress) NextInChain(long physicalAddress, out int recordKey, out bool invalid, ref bool isReadCache) + => NextInChain(fht, physicalAddress, out recordKey, out invalid, ref isReadCache); + + internal static (long logicalAddress, long physicalAddress) NextInChain(FasterKV fht, long physicalAddress, out int recordKey, out bool invalid, ref bool isReadCache) + { + var log = isReadCache ? fht.readcache : fht.hlog; + var info = log.GetInfo(physicalAddress); + var la = info.PreviousAddress; + isReadCache = new HashBucketEntry { word = la }.ReadCache; + log = isReadCache ? fht.readcache : fht.hlog; + var pa = log.GetPhysicalAddress(la); + recordKey = log.GetKey(pa); + invalid = log.GetInfo(pa).Invalid; + return (la, pa); + } + + (long logicalAddress, long physicalAddress) ScanReadCacheChain(int[] omitted = null, bool evicted = false) + { + omitted ??= Array.Empty(); + + var (la, pa) = GetHashChain(fht, lowChainKey, out int actualKey, out bool invalid, out bool isReadCache); + for (var expectedKey = highChainKey; expectedKey >= lowChainKey; expectedKey -= mod) + { + if (omitted.Contains(expectedKey)) + { + // Either we have not yet evicted Invalid readcache records, in which case we'll see an Invalid record, + // or we have, in which case we don't see that record. + if (evicted) + { + expectedKey -= mod; + if (expectedKey < lowChainKey) + { + Assert.IsFalse(isReadCache); + break; + } + } + else + Assert.IsTrue(invalid); + } + Assert.AreEqual(expectedKey, actualKey); + Assert.IsTrue(isReadCache); + (la, pa) = NextInChain(pa, out actualKey, out invalid, ref isReadCache); + } + Assert.IsFalse(isReadCache); + return (la, pa); + } + + (long logicalAddress, long physicalAddress) SkipReadCacheChain(int key) + => SkipReadCacheChain(fht, key); + + internal static (long logicalAddress, long physicalAddress) SkipReadCacheChain(FasterKV fht, int key) + { + var (la, pa) = ChainTests.GetHashChain(fht, key, out _, out _, out bool isReadCache); + while (isReadCache) + (la, pa) = ChainTests.NextInChain(fht, pa, out _, out _, ref isReadCache); + return (la, pa); + } + + void VerifySplicedInKey(int expectedKey) + { + // Scan to the end of the readcache chain and verify we inserted the value. + var (_, pa) = SkipReadCacheChain(expectedKey); + var storedKey = fht.hlog.GetKey(pa); + Assert.AreEqual(expectedKey, storedKey); + } + + [Test] + [Category(TestUtils.FasterKVTestCategory)] + [Category(TestUtils.ReadCacheTestCategory)] + [Category(TestUtils.SmokeTestCategory)] + public void ChainVerificationTest() + { + PopulateAndEvict(); + CreateChain(); + + ScanReadCacheChain(); + } + + [Test] + [Category(TestUtils.FasterKVTestCategory)] + [Category(TestUtils.ReadCacheTestCategory)] + [Category(TestUtils.SmokeTestCategory)] + public void DeleteTest() + { + PopulateAndEvict(); + CreateChain(); + using var session = fht.NewSession(new SimpleFunctions()); + + void doTest(int key) + { + var status = session.Delete(key); + Assert.AreEqual(Status.OK, status); + + status = session.Read(key, out var value); + Assert.AreEqual(Status.NOTFOUND, status); + } + + doTest(lowChainKey); + doTest(highChainKey); + doTest(midChainKey); + ScanReadCacheChain(new[] { lowChainKey, midChainKey, highChainKey }, evicted: false); + + fht.ReadCacheEvict(fht.readcache.BeginAddress, readCacheHighEvictionAddress); + ScanReadCacheChain(new[] { lowChainKey, midChainKey, highChainKey }, evicted: true); + } + + [Test] + [Category(TestUtils.FasterKVTestCategory)] + [Category(TestUtils.ReadCacheTestCategory)] + [Category(TestUtils.SmokeTestCategory)] + public void DeleteAllTest() + { + PopulateAndEvict(); + CreateChain(); + using var session = fht.NewSession(new SimpleFunctions()); + + void doTest(int key) + { + var status = session.Delete(key); + Assert.AreEqual(Status.OK, status); + + status = session.Read(key, out var value); + Assert.AreEqual(Status.NOTFOUND, status); + } + + // Delete all keys in the readcache chain. + for (var ii = lowChainKey; ii <= highChainKey; ++ii) + doTest(ii); + + var _ = GetHashChain(lowChainKey, out int actualKey, out bool invalid, out bool isReadCache); + Assert.IsTrue(isReadCache); + Assert.IsTrue(invalid); + + fht.ReadCacheEvict(fht.readcache.BeginAddress, readCacheHighEvictionAddress); + _ = GetHashChain(lowChainKey, out actualKey, out invalid, out isReadCache); + Assert.IsFalse(isReadCache); + Assert.IsFalse(invalid); + } + + [Test] + [Category(TestUtils.FasterKVTestCategory)] + [Category(TestUtils.ReadCacheTestCategory)] + [Category(TestUtils.SmokeTestCategory)] + public void UpsertTest() + { + DoUpdateTest(useRMW: false); + } + + [Test] + [Category(TestUtils.FasterKVTestCategory)] + [Category(TestUtils.ReadCacheTestCategory)] + [Category(TestUtils.SmokeTestCategory)] + public void RMWTest() + { + DoUpdateTest(useRMW: true); + } + + void DoUpdateTest(bool useRMW) + { + PopulateAndEvict(); + CreateChain(); + using var session = fht.NewSession(new SimpleFunctions()); + + void doTest(int key) + { + var status = session.Read(key, out var value); + Assert.AreEqual(Status.OK, status); + + if (useRMW) + { + // RMW will get the old value from disk, unlike Upsert + status = session.RMW(key, value + valueAdd); + Assert.AreEqual(Status.PENDING, status); + session.CompletePending(wait: true); + } + else + { + status = session.Upsert(key, value + valueAdd); + Assert.AreEqual(Status.OK, status); + } + + status = session.Read(key, out value); + Assert.AreEqual(Status.OK, status); + Assert.AreEqual(key + valueAdd * 2, value); + } + + doTest(lowChainKey); + doTest(highChainKey); + doTest(midChainKey); + ScanReadCacheChain(new[] { lowChainKey, midChainKey, highChainKey }, evicted: false); + + fht.ReadCacheEvict(fht.readcache.BeginAddress, readCacheHighEvictionAddress); + ScanReadCacheChain(new[] { lowChainKey, midChainKey, highChainKey }, evicted: true); + } + + [Test] + [Category(TestUtils.FasterKVTestCategory)] + [Category(TestUtils.ReadCacheTestCategory)] + [Category(TestUtils.SmokeTestCategory)] + public void SpliceInFromCTTTest() + { + PopulateAndEvict(); + CreateChain(); + + using var session = fht.NewSession(new SimpleFunctions()); + int input = 0, output = 0, key = lowChainKey - mod; // key must be in evicted region for this test + RecordMetadata recordMetadata = default; + + var status = session.Read(ref key, ref input, ref output, ref recordMetadata, ReadFlags.CopyToTail); + Assert.AreEqual(Status.PENDING, status); + session.CompletePending(wait: true); + + VerifySplicedInKey(key); + } + + public enum RecordRegion { Immutable, OnDisk, NotFound }; + + [Test] + [Category(TestUtils.FasterKVTestCategory)] + [Category(TestUtils.ReadCacheTestCategory)] + [Category(TestUtils.SmokeTestCategory)] + public void SpliceInFromUpsertTest([Values] RecordRegion recordRegion) + { + PopulateAndEvict(recordRegion == RecordRegion.Immutable); + CreateChain(recordRegion == RecordRegion.Immutable); + + using var session = fht.NewSession(new SimpleFunctions()); + int key = -1; + + if (recordRegion == RecordRegion.Immutable || recordRegion == RecordRegion.OnDisk) + { + key = spliceInExistingKey; + var status = session.Upsert(key, key + valueAdd); + Assert.AreEqual(Status.OK, status); + } + else + { + key = spliceInNewKey; + var status = session.Upsert(key, key + valueAdd); + Assert.AreEqual(Status.OK, status); + } + + VerifySplicedInKey(key); + } + + [Test] + [Category(TestUtils.FasterKVTestCategory)] + [Category(TestUtils.ReadCacheTestCategory)] + [Category(TestUtils.SmokeTestCategory)] + public void SpliceInFromRMWTest([Values] RecordRegion recordRegion) + { + PopulateAndEvict(recordRegion == RecordRegion.Immutable); + CreateChain(recordRegion == RecordRegion.Immutable); + + using var session = fht.NewSession(new SimpleFunctions()); + int key = -1; + + if (recordRegion == RecordRegion.Immutable || recordRegion == RecordRegion.OnDisk) + { + key = spliceInExistingKey; + var status = session.RMW(key, key + valueAdd); + Assert.AreEqual(recordRegion == RecordRegion.OnDisk ? Status.PENDING : Status.OK, status); + session.CompletePending(wait: true); + } + else + { + key = spliceInNewKey; + var status = session.RMW(key, key + valueAdd); + // This NOTFOUND key will return PENDING because we have to trace back through the collisions. + Assert.AreEqual(Status.PENDING, status); + session.CompletePending(wait: true); + } + + VerifySplicedInKey(key); + } + + [Test] + [Category(TestUtils.FasterKVTestCategory)] + [Category(TestUtils.ReadCacheTestCategory)] + [Category(TestUtils.SmokeTestCategory)] + public void SpliceInFromDeleteTest([Values] RecordRegion recordRegion) + { + PopulateAndEvict(recordRegion == RecordRegion.Immutable); + CreateChain(recordRegion == RecordRegion.Immutable); + + using var session = fht.NewSession(new SimpleFunctions()); + int key = -1; + + if (recordRegion == RecordRegion.Immutable || recordRegion == RecordRegion.OnDisk) + { + key = spliceInExistingKey; + var status = session.Delete(key); + Assert.AreEqual(Status.OK, status); + } + else + { + key = spliceInNewKey; + var status = session.Delete(key); + Assert.AreEqual(Status.OK, status); + } + + VerifySplicedInKey(key); + } + + [Test] + [Category(TestUtils.FasterKVTestCategory)] + [Category(TestUtils.ReadCacheTestCategory)] + [Category(TestUtils.SmokeTestCategory)] + public void EvictFromReadCacheToLockTableTest() + { + PopulateAndEvict(); + CreateChain(); + + using var session = fht.NewSession(new SimpleFunctions()); + using var luContext = session.GetLockableUnsafeContext(); + + Dictionary locks = new() + { + { lowChainKey, LockType.Exclusive }, + { midChainKey, LockType.Shared}, + { highChainKey, LockType.Exclusive } + }; + + // For this single-threaded test, the locking does not really have to be in order, but for consistency do it. + foreach (var key in locks.Keys.OrderBy(k => k)) + luContext.Lock(key, locks[key]); + + fht.ReadCache.FlushAndEvict(wait: true); + + Assert.IsTrue(fht.LockTable.IsActive); + Assert.AreEqual(locks.Count, fht.LockTable.dict.Count); + + foreach (var key in locks.Keys) + { + var found = fht.LockTable.Get(key, out RecordInfo recordInfo); + Assert.IsTrue(found); + var lockType = locks[key]; + Assert.AreEqual(lockType == LockType.Exclusive, recordInfo.IsLockedExclusive); + Assert.AreEqual(lockType != LockType.Exclusive, recordInfo.IsLockedShared); + + luContext.Unlock(key, lockType); + Assert.IsFalse(fht.LockTable.Get(key, out recordInfo)); + } + + Assert.IsFalse(fht.LockTable.IsActive); + Assert.AreEqual(0, fht.LockTable.dict.Count); + } + + [Test] + [Category(TestUtils.FasterKVTestCategory)] + [Category(TestUtils.ReadCacheTestCategory)] + [Category(TestUtils.SmokeTestCategory)] + public void TransferFromLockTableToReadCacheTest() + { + PopulateAndEvict(); + + // DO NOT create the chain here; do that below. Here, we create records in the lock table and THEN we create + // the chain, resulting in transfer of the locked records. + //CreateChain(); + + using var session = fht.NewSession(new SimpleFunctions()); + using var luContext = session.GetLockableUnsafeContext(); + + Dictionary locks = new() + { + { lowChainKey, LockType.Exclusive }, + { midChainKey, LockType.Shared }, + { highChainKey, LockType.Exclusive } + }; + + // For this single-threaded test, the locking does not really have to be in order, but for consistency do it. + foreach (var key in locks.Keys.OrderBy(k => k)) + luContext.Lock(key, locks[key]); + + fht.ReadCache.FlushAndEvict(wait: true); + + // Verify the locks have been evicted to the lockTable + Assert.IsTrue(fht.LockTable.IsActive); + Assert.AreEqual(locks.Count, fht.LockTable.dict.Count); + + foreach (var key in locks.Keys) + { + var found = fht.LockTable.Get(key, out RecordInfo recordInfo); + Assert.IsTrue(found); + var lockType = locks[key]; + Assert.AreEqual(lockType == LockType.Exclusive, recordInfo.IsLockedExclusive); + Assert.AreEqual(lockType != LockType.Exclusive, recordInfo.IsLockedShared); + } + + fht.Log.FlushAndEvict(wait: true); + + // Create the readcache entries, which will transfer the locks from the locktable to the readcache + foreach (var key in locks.Keys) + { + var status = session.Read(key, out _); + Assert.AreEqual(Status.PENDING, status); + session.CompletePending(wait: true); + + var lockType = locks[key]; + var (exclusive, shared) = luContext.IsLocked(key); + Assert.AreEqual(lockType == LockType.Exclusive, exclusive); + Assert.AreEqual(lockType != LockType.Exclusive, shared); + + luContext.Unlock(key, lockType); + Assert.IsFalse(fht.LockTable.Get(key, out _)); + } + + Assert.IsFalse(fht.LockTable.IsActive); + Assert.AreEqual(0, fht.LockTable.dict.Count); + } + } +} diff --git a/cs/test/ReproReadCacheTest.cs b/cs/test/ReproReadCacheTest.cs index 679bfc40b..4515e23ba 100644 --- a/cs/test/ReproReadCacheTest.cs +++ b/cs/test/ReproReadCacheTest.cs @@ -6,7 +6,7 @@ using FASTER.core; using NUnit.Framework; -namespace FASTER.test +namespace FASTER.test.ReadCacheTests { [TestFixture] internal class RandomReadCacheTest @@ -32,7 +32,7 @@ public override bool SingleReader(ref SpanByte key, ref long input, ref long val public override void ReadCompletionCallback(ref SpanByte key, ref long input, ref long output, Context context, Status status, RecordMetadata recordMetadata) { - Assert.AreEqual(status, Status.OK); + Assert.AreEqual(Status.OK, status); Assert.AreEqual(input, output); context.Status = status; } diff --git a/cs/test/TestUtils.cs b/cs/test/TestUtils.cs index c75ab4f2b..e052c2bfc 100644 --- a/cs/test/TestUtils.cs +++ b/cs/test/TestUtils.cs @@ -3,7 +3,6 @@ using NUnit.Framework; using System; -using System.Diagnostics; using System.IO; using FASTER.core; using FASTER.devices; @@ -17,7 +16,8 @@ internal static class TestUtils // Various categories used to group tests internal const string SmokeTestCategory = "Smoke"; internal const string FasterKVTestCategory = "FasterKV"; - internal const string LockableUnsafeContextCategory = "LockableUnsafeContext"; + internal const string LockableUnsafeContextTestCategory = "LockableUnsafeContext"; + internal const string ReadCacheTestCategory = "ReadCache"; /// /// Delete a directory recursively From 34a886631af9e0ff029efe1d0978283a6e7f4ad3 Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Tue, 28 Dec 2021 10:43:56 -0800 Subject: [PATCH 13/25] Test MemoryPageLockEvictionScan --- cs/src/core/Allocator/AllocatorBase.cs | 13 +++- cs/test/LockableUnsafeContextTests.cs | 92 +++++++++++++++++++++++++- 2 files changed, 101 insertions(+), 4 deletions(-) diff --git a/cs/src/core/Allocator/AllocatorBase.cs b/cs/src/core/Allocator/AllocatorBase.cs index af8fcaf92..8eb8ac6d2 100644 --- a/cs/src/core/Allocator/AllocatorBase.cs +++ b/cs/src/core/Allocator/AllocatorBase.cs @@ -1273,8 +1273,17 @@ private void OnPagesMarkedReadOnly(long newSafeReadOnlyAddress) /// private void OnPagesReadyToClose(long oldHeadAddress, long newHeadAddress) { - if (ReadCache && (newHeadAddress > HeadAddress)) - EvictCallback(HeadAddress, newHeadAddress); + if (newHeadAddress > HeadAddress) + { + if (ReadCache) + EvictCallback(HeadAddress, newHeadAddress); + for (long closePageAddress = HeadAddress & ~PageSizeMask; closePageAddress < newHeadAddress; closePageAddress += PageSize) + { + long start = HeadAddress > closePageAddress ? HeadAddress : closePageAddress; + long end = newHeadAddress < closePageAddress + PageSize ? newHeadAddress : closePageAddress + PageSize; + MemoryPageLockEvictionScan(start, end); + } + } if (Utility.MonotonicUpdate(ref HeadAddress, newHeadAddress, out oldHeadAddress)) { diff --git a/cs/test/LockableUnsafeContextTests.cs b/cs/test/LockableUnsafeContextTests.cs index b4728f1b2..c5c39f3fb 100644 --- a/cs/test/LockableUnsafeContextTests.cs +++ b/cs/test/LockableUnsafeContextTests.cs @@ -669,8 +669,96 @@ public void TransferFromLockTableToDeleteTest([Values] ChainTests.RecordRegion r [Category(TestUtils.SmokeTestCategory)] public void LockAndUnlockInLockTableOnlyTest() { - // TODO this - // TODO: MemoryPageLockEvictionScan tests + // For this, just don't load anything, and it will happen in lock table. + using var session = fht.NewSession(new SimpleFunctions()); + using var luContext = session.GetLockableUnsafeContext(); + + Dictionary locks = new(); + var rng = new Random(101); + foreach (var key in Enumerable.Range( 0, numRecords).Select(ii => rng.Next(numRecords))) + locks[key] = (key & 1) == 0 ? LockType.Exclusive : LockType.Shared; + + // For this single-threaded test, the locking does not really have to be in order, but for consistency do it. + foreach (var key in locks.Keys.OrderBy(k => k)) + luContext.Lock(key, locks[key]); + + Assert.IsTrue(fht.LockTable.IsActive); + Assert.AreEqual(locks.Count, fht.LockTable.dict.Count); + + foreach (var key in locks.Keys) + { + var found = fht.LockTable.Get(key, out RecordInfo recordInfo); + Assert.IsTrue(found); + var lockType = locks[key]; + Assert.AreEqual(lockType == LockType.Exclusive, recordInfo.IsLockedExclusive); + Assert.AreEqual(lockType != LockType.Exclusive, recordInfo.IsLockedShared); + + luContext.Unlock(key, lockType); + Assert.IsFalse(fht.LockTable.Get(key, out _)); + } + + Assert.IsFalse(fht.LockTable.IsActive); + Assert.AreEqual(0, fht.LockTable.dict.Count); + } + + [Test] + [Category(TestUtils.LockableUnsafeContextTestCategory)] + [Category(TestUtils.SmokeTestCategory)] + public void EvictFromMainLogToLockTableTest() + { + Populate(); + + using var session = fht.NewSession(new SimpleFunctions()); + using var luContext = session.GetLockableUnsafeContext(); + + Dictionary locks = new(); + var rng = new Random(101); + foreach (var key in Enumerable.Range(0, numRecords / 5).Select(ii => rng.Next(numRecords))) + locks[key] = (key & 1) == 0 ? LockType.Exclusive : LockType.Shared; + + // For this single-threaded test, the locking does not really have to be in order, but for consistency do it. + foreach (var key in locks.Keys.OrderBy(k => k)) + luContext.Lock(key, locks[key]); + + // All locking should have been done in main log. + Assert.IsFalse(fht.LockTable.IsActive); + Assert.AreEqual(0, fht.LockTable.dict.Count); + + // Now evict main log which should transfer records to the LockTable. + fht.Log.FlushAndEvict(wait: true); + + Assert.IsTrue(fht.LockTable.IsActive); + Assert.AreEqual(locks.Count, fht.LockTable.dict.Count); + + // Verify LockTable + foreach (var key in locks.Keys) + { + var found = fht.LockTable.Get(key, out RecordInfo recordInfo); + Assert.IsTrue(found); + var lockType = locks[key]; + Assert.AreEqual(lockType == LockType.Exclusive, recordInfo.IsLockedExclusive); + Assert.AreEqual(lockType != LockType.Exclusive, recordInfo.IsLockedShared); + + // Just a little more testing of Read/CTT transferring from LockTable + int input = 0, output = 0, localKey = key; + RecordMetadata recordMetadata = default; + var status = session.Read(ref localKey, ref input, ref output, ref recordMetadata, ReadFlags.CopyToTail); + Assert.AreEqual(Status.PENDING, status); + session.CompletePending(wait: true); + + Assert.IsFalse(fht.LockTable.Get(key, out _)); + var (isLockedExclusive, isLockedShared) = luContext.IsLocked(localKey); + Assert.AreEqual(lockType == LockType.Exclusive, isLockedExclusive); + Assert.AreEqual(lockType != LockType.Exclusive, isLockedShared); + + luContext.Unlock(key, lockType); + (isLockedExclusive, isLockedShared) = luContext.IsLocked(localKey); + Assert.IsFalse(isLockedExclusive); + Assert.IsFalse(isLockedShared); + } + + Assert.IsFalse(fht.LockTable.IsActive); + Assert.AreEqual(0, fht.LockTable.dict.Count); } } } From b5283fa32a3f309092f3c1e3dcc80d0772eb5a97 Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Mon, 3 Jan 2022 10:39:01 -0800 Subject: [PATCH 14/25] Add testing for two-phase upsert/copytotail --- .../ClientSession/LockableUnsafeContext.cs | 4 +- cs/src/core/Index/Common/Contexts.cs | 11 +- cs/src/core/Index/Common/RecordInfo.cs | 2 +- cs/src/core/Index/FASTER/FASTER.cs | 7 + cs/src/core/Index/FASTER/FASTERImpl.cs | 124 +++++++------ cs/src/core/Index/Recovery/Checkpoint.cs | 1 + cs/src/core/Utilities/LockTable.cs | 3 +- cs/test/AdvancedLockTests.cs | 168 ++++++++++++++++++ cs/test/{LockTests.cs => BasicLockTests.cs} | 4 +- cs/test/ReadCacheChainTests.cs | 20 ++- cs/test/TestUtils.cs | 32 ++++ docs/_docs/30-fasterkv-manual-locking.md | 16 +- 12 files changed, 314 insertions(+), 78 deletions(-) create mode 100644 cs/test/AdvancedLockTests.cs rename cs/test/{LockTests.cs => BasicLockTests.cs} (99%) diff --git a/cs/src/core/ClientSession/LockableUnsafeContext.cs b/cs/src/core/ClientSession/LockableUnsafeContext.cs index eb12a2525..df322f6b2 100644 --- a/cs/src/core/ClientSession/LockableUnsafeContext.cs +++ b/cs/src/core/ClientSession/LockableUnsafeContext.cs @@ -96,7 +96,7 @@ public bool CompletePendingWithOutputs(out CompletedOutputIterator 0) throw new FasterException($"Disposing LockableUnsafeContext with locks held: {sharedLockCount} shared locks, {exclusiveLockCount} exclusive locks"); this.isAcquired = false; - Interlocked.Decrement(ref this.clientSession.fht.NumActiveLockingSessions); + this.clientSession.fht.DecrementNumLockingSessions(); } #endregion Acquire and Dispose diff --git a/cs/src/core/Index/Common/Contexts.cs b/cs/src/core/Index/Common/Contexts.cs index 6657c74c8..9f1cd995f 100644 --- a/cs/src/core/Index/Common/Contexts.cs +++ b/cs/src/core/Index/Common/Contexts.cs @@ -98,7 +98,7 @@ internal struct PendingContext internal const ushort kNoKey = 0x0100; internal const ushort kIsAsync = 0x0200; - internal const ushort kHasPrevTailAddress = 0x0400; + internal const ushort kHasPrevHighestKeyHashAddress = 0x0400; [MethodImpl(MethodImplOptions.AggressiveInlining)] internal IHeapContainer DetachKey() @@ -177,10 +177,10 @@ internal bool IsAsync set => operationFlags = value ? (ushort)(operationFlags | kIsAsync) : (ushort)(operationFlags & ~kIsAsync); } - internal bool HasPrevTailAddress + internal bool HasPrevHighestKeyHashAddress { - get => (operationFlags & kHasPrevTailAddress) != 0; - set => operationFlags = value ? (ushort)(operationFlags | kHasPrevTailAddress) : (ushort)(operationFlags & ~kHasPrevTailAddress); + get => (operationFlags & kHasPrevHighestKeyHashAddress) != 0; + set => operationFlags = value ? (ushort)(operationFlags | kHasPrevHighestKeyHashAddress) : (ushort)(operationFlags & ~kHasPrevHighestKeyHashAddress); } public void Dispose() @@ -307,11 +307,12 @@ public struct HybridLogRecoveryInfo /// Begin address /// public long beginAddress; + /// /// If true, there was at least one IFasterContext implementation active that did manual locking at some point during the checkpoint; /// these pages must be scanned for lock cleanup. /// - bool manualLockingActive; + public bool manualLockingActive; /// /// Commit tokens per session restored during Continue diff --git a/cs/src/core/Index/Common/RecordInfo.cs b/cs/src/core/Index/Common/RecordInfo.cs index 4c44c20a1..4cb4b4fc4 100644 --- a/cs/src/core/Index/Common/RecordInfo.cs +++ b/cs/src/core/Index/Common/RecordInfo.cs @@ -338,7 +338,7 @@ public bool InNewVersion public void SetDirty() => word |= kDirtyBitMask; public void SetTombstone() => word |= kTombstoneBitMask; public void SetValid() => word |= kValidBitMask; - public void SetInvalid() => word &= ~kValidBitMask; + public void SetInvalid() => word &= ~(kValidBitMask | kTentativeBitMask); public bool Invalid => (word & kValidBitMask) == 0; diff --git a/cs/src/core/Index/FASTER/FASTER.cs b/cs/src/core/Index/FASTER/FASTER.cs index 78587ca48..ad80ffca0 100644 --- a/cs/src/core/Index/FASTER/FASTER.cs +++ b/cs/src/core/Index/FASTER/FASTER.cs @@ -115,6 +115,13 @@ public partial class FasterKV : FasterBase, internal LockTable LockTable; internal long NumActiveLockingSessions = 0; + internal void IncrementNumLockingSessions() + { + _hybridLogCheckpoint.info.manualLockingActive = true; + Interlocked.Increment(ref this.NumActiveLockingSessions); + } + internal void DecrementNumLockingSessions() => --this.NumActiveLockingSessions; + /// /// Create FasterKV instance /// diff --git a/cs/src/core/Index/FASTER/FASTERImpl.cs b/cs/src/core/Index/FASTER/FASTERImpl.cs index edc639b67..55c94a34d 100644 --- a/cs/src/core/Index/FASTER/FASTERImpl.cs +++ b/cs/src/core/Index/FASTER/FASTERImpl.cs @@ -120,9 +120,13 @@ internal OperationStatus InternalRead( #region Trace back for record in in-memory HybridLog HashBucketEntry entry = default; + // This tracks the highest address that a new record could be added after we call FindTag. This is the value after skipping readcache + // and before TraceBackForKeyMatch. It is an in-memory address (mutable or readonly), or the first on-disk address, or 0 (in which case + // we return NOTFOUND and this value is not used). InternalTryCopyToTail can stop its scan immediately above this address. + long prevHighestKeyHashAddress = Constants.kInvalidAddress; + OperationStatus status; long logicalAddress; - long prevTailAddress = hlog.GetTailAddress(); var useStartAddress = startAddress != Constants.kInvalidAddress && !pendingContext.HasMinAddress; bool tagExists; if (!useStartAddress) @@ -162,6 +166,8 @@ internal OperationStatus InternalRead( else if (status != OperationStatus.SUCCESS) return status; } + if (prevHighestKeyHashAddress < logicalAddress) + prevHighestKeyHashAddress = logicalAddress; if (logicalAddress >= hlog.HeadAddress) { @@ -304,8 +310,8 @@ internal OperationStatus InternalRead( pendingContext.serialNum = lsn; pendingContext.heldLatch = heldOperation; - pendingContext.HasPrevTailAddress = true; - pendingContext.recordInfo.PreviousAddress = prevTailAddress; + pendingContext.HasPrevHighestKeyHashAddress = true; + pendingContext.recordInfo.PreviousAddress = prevHighestKeyHashAddress; } #endregion @@ -389,8 +395,6 @@ internal OperationStatus InternalUpsert( { prevHighestReadCacheLogicalAddress = logicalAddress; SkipReadCache(ref logicalAddress, out lowestReadCachePhysicalAddress); - if (prevHighestReadCacheLogicalAddress == logicalAddress) // if there were no readcache records - prevHighestReadCacheLogicalAddress = Constants.kInvalidAddress; } var latestLogicalAddress = logicalAddress; @@ -687,9 +691,12 @@ private OperationStatus CreateNewRecordUpsertbucket_entries[slot]; + InvalidateUpdatedRecordInReadCache(entry.Address, ref key, prevHighestReadCacheLogicalAddress); + } } if (success) @@ -800,8 +807,6 @@ internal OperationStatus InternalRMW( { prevHighestReadCacheLogicalAddress = logicalAddress; SkipReadCache(ref logicalAddress, out lowestReadCachePhysicalAddress); - if (prevHighestReadCacheLogicalAddress == logicalAddress) // if there were no readcache records - prevHighestReadCacheLogicalAddress = Constants.kInvalidAddress; } var latestLogicalAddress = logicalAddress; @@ -1167,9 +1172,12 @@ ref hlog.GetValue(newPhysicalAddress, newPhysicalAddress + actualSize), // Splice a non-tentative record into the readcache/mainlog gap. success = rcri.TryUpdateAddress(newLogicalAddress); - - // Now see if we have added a readcache entry from a pending read while we were inserting; if so it is obsolete and must be Invalidated. - InvalidateUpdatedRecordInReadCache(entry.word, ref key, prevHighestReadCacheLogicalAddress); + if (success) + { + // Now see if we have added a readcache entry from a pending read while we were inserting; if so it is obsolete and must be Invalidated. + entry.word = bucket->bucket_entries[slot]; + InvalidateUpdatedRecordInReadCache(entry.Address, ref key, prevHighestReadCacheLogicalAddress); + } } if (success) @@ -1287,8 +1295,6 @@ internal OperationStatus InternalDelete( { prevHighestReadCacheLogicalAddress = logicalAddress; SkipReadCache(ref logicalAddress, out lowestReadCachePhysicalAddress); - if (prevHighestReadCacheLogicalAddress == logicalAddress) // if there were no readcache records - prevHighestReadCacheLogicalAddress = Constants.kInvalidAddress; } var latestLogicalAddress = logicalAddress; @@ -1494,9 +1500,12 @@ internal OperationStatus InternalDelete( // Splice a non-tentative record into the readcache/mainlog gap. success = rcri.TryUpdateAddress(newLogicalAddress); - - // Now see if we have added a readcache entry from a pending read while we were inserting; if so it is obsolete and must be Invalidated. - InvalidateUpdatedRecordInReadCache(entry.word, ref key, prevHighestReadCacheLogicalAddress); + if (success) + { + // Now see if we have added a readcache entry from a pending read while we were inserting; if so it is obsolete and must be Invalidated. + entry.word = bucket->bucket_entries[slot]; + InvalidateUpdatedRecordInReadCache(entry.Address, ref key, prevHighestReadCacheLogicalAddress); + } } if (success) @@ -2001,9 +2010,12 @@ ref hlog.GetValue(newPhysicalAddress, newPhysicalAddress + actualSize), // Splice a non-tentative record into the readcache/mainlog gap. success = rcri.TryUpdateAddress(newLogicalAddress); - - // Now see if we have added a readcache entry from a pending read while we were inserting; if so it is obsolete and must be Invalidated. - InvalidateUpdatedRecordInReadCache(entry.word, ref key, prevHighestReadCacheLogicalAddress); + if (success) + { + // Now see if we have added a readcache entry from a pending read while we were inserting; if so it is obsolete and must be Invalidated. + entry.word = bucket->bucket_entries[slot]; + InvalidateUpdatedRecordInReadCache(entry.Address, ref key, prevHighestReadCacheLogicalAddress); + } } if (success) @@ -2409,7 +2421,7 @@ internal OperationStatus InternalTryCopyToTail 0); } var latestLogicalAddress = logicalAddress; @@ -2456,9 +2468,6 @@ internal OperationStatus InternalTryCopyToTailbucket_entries[slot], updatedEntry.word, entry.word); success = foundEntry.word == entry.word; - if (success && UseReadCache && pendingContext.HasPrevTailAddress) + if (success && copyToReadCache && pendingContext.HasPrevHighestKeyHashAddress) { - // See if we have added a main-log entry from an update while we were inserting; if so, the new readcache - // record is obsolete and must be Invalidated. - ref RecordInfo rcri = ref readcache.GetInfo(newPhysicalAddress); - var la = entry.Address; - SkipReadCache(ref la, out _); - for ( ; la >= prevTailAddress; /* incremented in loop */) + // See if we have added a main-log entry for this key from an update while we were inserting the new readcache record; + // if so, the new readcache record is obsolete and must be Invalidated. + + // Use the last readcache record in the chain to get the first non-readcache record in the chain. Note that this may be + // different from latestLogicalAddress if a new record was inserted since then. + var la = latestLogicalAddress; + if (lowestReadCachePhysicalAddress != Constants.kInvalidAddress) + { + ref RecordInfo last_rcri = ref readcache.GetInfo(lowestReadCachePhysicalAddress); + la = last_rcri.PreviousAddress; + } + ref RecordInfo new_rcri = ref readcache.GetInfo(newPhysicalAddress); + + // prevHighestKeyKashAddress may be either the first in-memory address or the first on-disk address at the time of Read(). + // We compare to > prevHighestKeyKashAddress because any new record would be added above that. + while (la > prevHighestKeyKashAddress && la >= hlog.HeadAddress) { var pa = hlog.GetPhysicalAddress(la); if (comparer.Equals(ref key, ref hlog.GetKey(pa))) { - rcri.SetInvalid(); + new_rcri.SetInvalid(); break; } la = hlog.GetInfo(pa).PreviousAddress; } - if (!rcri.Invalid) + + if (!new_rcri.Invalid) { - // prevTailAddress may have escaped to disk, so we must retry. - if (prevTailAddress < hlog.HeadAddress) + // An inserted record may have escaped to disk during the time of this Read/PENDING operation, in which case we must retry. + if (la > prevHighestKeyKashAddress && la < hlog.HeadAddress) { - rcri.SetInvalid(); - return OperationStatus.RETRY_NOW; + new_rcri.SetInvalid(); + return OperationStatus.RECORD_ON_DISK; } - rcri.Tentative = false; + new_rcri.Tentative = false; } } } @@ -2547,19 +2567,22 @@ ref hlog.GetValue(newPhysicalAddress, newPhysicalAddress + actualSize), ref outp // Splice a non-tentative record into the readcache/mainlog gap. success = rcri.TryUpdateAddress(newLogicalAddress); - - // Now see if we have added a readcache entry from a pending read while we were inserting; if so it is obsolete and must be Invalidated. - InvalidateUpdatedRecordInReadCache(entry.word, ref key, prevHighestReadCacheLogicalAddress); + if (success) + { + // Now see if we have added a readcache entry from a pending read while we were inserting; if so it is obsolete and must be Invalidated. + entry.word = bucket->bucket_entries[slot]; + InvalidateUpdatedRecordInReadCache(entry.Address, ref key, prevHighestReadCacheLogicalAddress); + } } + var log = copyToReadCache ? readcache : hlog; if (!success) { - if (!copyToReadCache) hlog.GetInfo(newPhysicalAddress).SetInvalid(); + log.GetInfo(newPhysicalAddress).SetInvalid(); return OperationStatus.RETRY_NOW; } else { - var log = copyToReadCache ? readcache : hlog; ref RecordInfo recordInfo = ref log.GetInfo(newPhysicalAddress); if (lockTableEntryExists && !LockTable.ApplyToLogRecord(ref key, ref recordInfo)) @@ -2956,10 +2979,10 @@ private void InvalidateUpdatedRecordInReadCache(long logicalAddress, ref Key key if (!entry.ReadCache) return; - var physicalAddress = readcache.GetPhysicalAddress(logicalAddress & ~Constants.kReadCacheBitMask); - while (logicalAddress != untilAddress) { + var physicalAddress = readcache.GetPhysicalAddress(logicalAddress & ~Constants.kReadCacheBitMask); + // Invalidate read cache entry if key found. This is called when an updated value has been inserted to the main log tail, // so instead of waiting just invalidate and return. ref RecordInfo recordInfo = ref readcache.GetInfo(physicalAddress); @@ -2970,7 +2993,6 @@ private void InvalidateUpdatedRecordInReadCache(long logicalAddress, ref Key key entry.word = logicalAddress; if (!entry.ReadCache) return; - physicalAddress = readcache.GetPhysicalAddress(logicalAddress & ~Constants.kReadCacheBitMask); } } diff --git a/cs/src/core/Index/Recovery/Checkpoint.cs b/cs/src/core/Index/Recovery/Checkpoint.cs index bf348bea0..717af58ee 100644 --- a/cs/src/core/Index/Recovery/Checkpoint.cs +++ b/cs/src/core/Index/Recovery/Checkpoint.cs @@ -97,6 +97,7 @@ internal void InitializeIndexCheckpoint(Guid indexToken) internal void InitializeHybridLogCheckpoint(Guid hybridLogToken, long version) { _hybridLogCheckpoint.Initialize(hybridLogToken, version, checkpointManager); + _hybridLogCheckpoint.info.manualLockingActive = this.NumActiveLockingSessions > 0; } // #endregion diff --git a/cs/src/core/Utilities/LockTable.cs b/cs/src/core/Utilities/LockTable.cs index ebd05826c..f45aae219 100644 --- a/cs/src/core/Utilities/LockTable.cs +++ b/cs/src/core/Utilities/LockTable.cs @@ -157,8 +157,7 @@ internal void TryRemoveIfNoLocks(ref TKey key) // If we make it here, the key was already removed. } - // False is legit, as the record may have been removed between the time it was known to be here and the time Seal was called, - // or this may be called by SealOrTentative. + // False is legit, as the record may have been removed between the time it was known to be here and the time Seal was called. [MethodImpl(MethodImplOptions.AggressiveInlining)] internal bool TrySeal(ref TKey key, out bool exists) { diff --git a/cs/test/AdvancedLockTests.cs b/cs/test/AdvancedLockTests.cs new file mode 100644 index 000000000..417236b9b --- /dev/null +++ b/cs/test/AdvancedLockTests.cs @@ -0,0 +1,168 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using FASTER.core; +using NUnit.Framework; +using System; +using System.Threading; +using FASTER.test.ReadCacheTests; + +namespace FASTER.test.LockTests +{ + [TestFixture] + internal class AdvancedLockTests + { + const int numKeys = 1000; + const int valueAdd = 1000000; + const int mod = 100; + + public struct Input + { + internal LockFunctionFlags flags; + internal int sleepRangeMs; + public override string ToString() => $"{flags}, {sleepRangeMs}"; + } + + [Flags]internal enum LockFunctionFlags + { + None = 0, + SetEvent = 1, + WaitForEvent = 2, + SleepAfterEventOperation = 4, + } + + internal class Functions : FunctionsBase + { + internal readonly ManualResetEventSlim mres = new(); + readonly Random rng = new(101); + + public Functions() : base(true) + { + } + + public override void SingleWriter(ref int key, ref Input input, ref int src, ref int dst, ref int output, ref RecordInfo recordInfo, long address) + { + // In the wait case we are waiting for a signal that something else has completed, e.g. a pending Read, by the thread with SetEvent. + if ((input.flags & LockFunctionFlags.WaitForEvent) != 0) + { + mres.Wait(); + if ((input.flags & LockFunctionFlags.SleepAfterEventOperation) != 0) + Thread.Sleep(rng.Next(input.sleepRangeMs)); + } + else if ((input.flags & LockFunctionFlags.SetEvent) != 0) + { + mres.Set(); + if ((input.flags & LockFunctionFlags.SleepAfterEventOperation) != 0) + Thread.Sleep(rng.Next(input.sleepRangeMs)); + } + dst = src; + return; + } + + public override bool SingleReader(ref int key, ref Input input, ref int value, ref int dst, ref RecordInfo recordInfo, long address) + { + // We should only be here if we are doing the initial read, before Upsert has taken place. + Assert.AreEqual(key + valueAdd, value, $"Key = {key}"); + dst = value; + return true; + } + + public override bool ConcurrentReader(ref int key, ref Input input, ref int value, ref int dst, ref RecordInfo recordInfo, long address) + { + // We should only be here if the Upsert completed before the Read started; in this case we Read() the Upserted value. + Assert.AreEqual(key + valueAdd * 2, value, $"Key = {key}"); + dst = value; + return true; + } + } + + private FasterKV fkv; + private ClientSession session; + private IDevice log; + + [SetUp] + public void Setup() + { + TestUtils.DeleteDirectory(TestUtils.MethodTestDir, wait: true); + log = Devices.CreateLogDevice(TestUtils.MethodTestDir + "/GenericStringTests.log", deleteOnClose: true); + var readCacheSettings = new ReadCacheSettings { MemorySizeBits = 15, PageSizeBits = 9 }; + fkv = new FasterKV(1L << 20, new LogSettings { LogDevice = log, ObjectLogDevice = null, ReadCacheSettings = readCacheSettings}, + comparer: new ChainTests.ChainComparer(mod), supportsLocking: true); + session = fkv.For(new Functions()).NewSession(); + } + + [TearDown] + public void TearDown() + { + session?.Dispose(); + session = null; + fkv?.Dispose(); + fkv = null; + log?.Dispose(); + log = null; + + TestUtils.DeleteDirectory(TestUtils.MethodTestDir); + } + + void Populate(bool evict = false) + { + using var session = fkv.NewSession(new Functions()); + + for (int key = 0; key < numKeys; key++) + session.Upsert(key, key + valueAdd); + session.CompletePending(true); + if (evict) + fkv.Log.FlushAndEvict(wait: true); + } + + [Test] + [Category(TestUtils.FasterKVTestCategory)] + [Category(TestUtils.LockTestCategory)] + public void SameKeyInsertAndCTTTest() + { + Populate(evict: true); + Functions functions = new(); + using var session = fkv.NewSession(functions); + var iter = 0; + + TestUtils.DoTwoThreadTest(numKeys, + key => + { + int output = 0; + var sleepFlag = (iter % 5 == 0) ? LockFunctionFlags.None : LockFunctionFlags.SleepAfterEventOperation; + Input input = new() { flags = LockFunctionFlags.WaitForEvent | sleepFlag, sleepRangeMs = 10 }; + var status = session.Upsert(key, input, key + valueAdd * 2, ref output); + Assert.AreEqual(Status.OK, status, $"Key = {key}"); + }, + key => + { + var sleepFlag = (iter % 5 == 0) ? LockFunctionFlags.None : LockFunctionFlags.SleepAfterEventOperation; + Input input = new() { flags = LockFunctionFlags.SetEvent | sleepFlag, sleepRangeMs = 10 }; + int output = 0; + RecordMetadata recordMetadata = default; + + // This will copy to ReadCache, and the test is trying to cause a race with the above Upsert. + var status = session.Read(ref key, ref input, ref output, ref recordMetadata); + + // If the Upsert completed before the Read started, we may Read() the Upserted value. + if (status == Status.OK) + Assert.AreEqual(key + valueAdd * 2, output, $"Key = {key}"); + else + { + Assert.AreEqual(Status.PENDING, status, $"Key = {key}"); + session.CompletePending(wait: true); + } + }, + key => + { + int output = default; + var status = session.Read(ref key, ref output); + Assert.AreEqual(Status.OK, status, $"Key = {key}"); + Assert.AreEqual(key + valueAdd * 2, output, $"Key = {key}"); + functions.mres.Reset(); + ++iter; + } + ); + } + } +} \ No newline at end of file diff --git a/cs/test/LockTests.cs b/cs/test/BasicLockTests.cs similarity index 99% rename from cs/test/LockTests.cs rename to cs/test/BasicLockTests.cs index 4db30a88f..67dfcf1dc 100644 --- a/cs/test/LockTests.cs +++ b/cs/test/BasicLockTests.cs @@ -8,10 +8,10 @@ using System.Threading; using System.Threading.Tasks; -namespace FASTER.test +namespace FASTER.test.LockTests { [TestFixture] - internal class LockTests + internal class BasicLockTests { internal class Functions : SimpleFunctions { diff --git a/cs/test/ReadCacheChainTests.cs b/cs/test/ReadCacheChainTests.cs index 28b1f49cd..06e6c1f7d 100644 --- a/cs/test/ReadCacheChainTests.cs +++ b/cs/test/ReadCacheChainTests.cs @@ -6,6 +6,7 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Threading; namespace FASTER.test.ReadCacheTests { @@ -26,14 +27,17 @@ class ChainTests // Insert into chain. const int spliceInNewKey = highChainKey + mod * 2; const int spliceInExistingKey = highChainKey - mod; - const int immutableSplitKey = numKeys / 2; + const int immutableSplitKey = numKeys / 2; // This is the record after the first readcache record we insert; it lets us limit the range to ReadCacheEvict // so we get outsplicing rather than successively overwriting the hash table entry on ReadCacheEvict. long readCacheHighEvictionAddress; - class ChainComparer : IFasterEqualityComparer + internal class ChainComparer : IFasterEqualityComparer { + int mod; + internal ChainComparer(int mod) => this.mod = mod; + public bool Equals(ref int k1, ref int k2) => k1 == k2; public long GetHashCode64(ref int k) => k % mod; @@ -47,7 +51,7 @@ public void Setup() log = Devices.CreateLogDevice(TestUtils.MethodTestDir + "/NativeReadCacheTests.log", deleteOnClose: true); fht = new FasterKV (1L << 20, new LogSettings { LogDevice = log, MemorySizeBits = 15, PageSizeBits = 10, ReadCacheSettings = readCacheSettings }, - comparer: new ChainComparer()); + comparer: new ChainComparer(mod)); } [TearDown] @@ -225,7 +229,7 @@ public void ChainVerificationTest() [Category(TestUtils.FasterKVTestCategory)] [Category(TestUtils.ReadCacheTestCategory)] [Category(TestUtils.SmokeTestCategory)] - public void DeleteTest() + public void DeleteCacheRecordTest() { PopulateAndEvict(); CreateChain(); @@ -253,7 +257,7 @@ void doTest(int key) [Category(TestUtils.FasterKVTestCategory)] [Category(TestUtils.ReadCacheTestCategory)] [Category(TestUtils.SmokeTestCategory)] - public void DeleteAllTest() + public void DeleteAllCacheRecordsTest() { PopulateAndEvict(); CreateChain(); @@ -286,7 +290,7 @@ void doTest(int key) [Category(TestUtils.FasterKVTestCategory)] [Category(TestUtils.ReadCacheTestCategory)] [Category(TestUtils.SmokeTestCategory)] - public void UpsertTest() + public void UpsertCacheRecordTest() { DoUpdateTest(useRMW: false); } @@ -295,7 +299,7 @@ public void UpsertTest() [Category(TestUtils.FasterKVTestCategory)] [Category(TestUtils.ReadCacheTestCategory)] [Category(TestUtils.SmokeTestCategory)] - public void RMWTest() + public void RMWCacheRecordTest() { DoUpdateTest(useRMW: true); } @@ -462,7 +466,7 @@ public void EvictFromReadCacheToLockTableTest() Dictionary locks = new() { { lowChainKey, LockType.Exclusive }, - { midChainKey, LockType.Shared}, + { midChainKey, LockType.Shared }, { highChainKey, LockType.Exclusive } }; diff --git a/cs/test/TestUtils.cs b/cs/test/TestUtils.cs index e052c2bfc..e03ee5754 100644 --- a/cs/test/TestUtils.cs +++ b/cs/test/TestUtils.cs @@ -8,6 +8,7 @@ using FASTER.devices; using System.Threading; using System.Runtime.InteropServices; +using System.Linq; namespace FASTER.test { @@ -18,6 +19,7 @@ internal static class TestUtils internal const string FasterKVTestCategory = "FasterKV"; internal const string LockableUnsafeContextTestCategory = "LockableUnsafeContext"; internal const string ReadCacheTestCategory = "ReadCache"; + internal const string LockTestCategory = "Locking"; /// /// Delete a directory recursively @@ -183,5 +185,35 @@ internal static (Status status, TOutput output) GetSinglePendingResult first, Action second, Action verification, int randSleepRangeMs = -1) + { + Thread[] threads = new Thread[2]; + + var rng = new Random(101); + for (var iter = 0; iter < count; ++iter) + { + var arg = rng.Next(count); + threads[0] = new Thread(() => first(arg)); + threads[1] = new Thread(() => second(arg)); + + var doSleep = randSleepRangeMs >= 0; + for (int t = 0; t < threads.Length; t++) + { + if (doSleep) + { + if (randSleepRangeMs > 0) + Thread.Sleep(rng.Next(10)); + else + Thread.Yield(); + } + threads[t].Start(); + } + for (int t = 0; t < threads.Length; t++) + threads[t].Join(); + + verification(arg); + } + } } } diff --git a/docs/_docs/30-fasterkv-manual-locking.md b/docs/_docs/30-fasterkv-manual-locking.md index 046da82e7..6a9af6d74 100644 --- a/docs/_docs/30-fasterkv-manual-locking.md +++ b/docs/_docs/30-fasterkv-manual-locking.md @@ -73,12 +73,12 @@ TODO: Add sample with `luContext.LocalCurrentEpoch`. This section covers the internal design and implementation of manual locking. Although Sealing a record is not strictly a lock, it is still part of this document because it is closely intertwined with [Record Transfers](#record-transfers). Manual locking and checking is integrated into `FASTERImpl.cs` methods: -- The locking and unlocking are implemented in `InternalLock` -- Other record operations that must consider locks are `InternalUpsert`, `InternalRead` and `InternalCompletePendingRead`, `InternalRMW` and `InternalCompletePendingRMW`, and `InternalDelete`. These modifications are exposed via the `Lock()` and `Unlock()`. +- The locking and unlocking are implemented in `InternalLock`, which is called by the `Lock()` and `Unlock()` methods of `LockableUnsafeContext`. +- Other record operations that must consider locks are `InternalUpsert`, `InternalRead` and `InternalCompletePendingRead`, `InternalRMW` and `InternalCompletePendingRMW`, and `InternalDelete`. Because epoch protection is done by user calls, LockableUnsafeContext methods call the internal ContextRead etc. methods, which are called by the API methods that do Resume and Suspend of epoch protection. -At a high level, `Lock()` and `Unlock()` call `InternalLock()`. Locking does not issue PENDING operations to retrieve on-disk data, and locking/unlocking is designed to avoid pending I/O operations by use of a [`LockTable`](#locktable-overview) consisting of {`TKey`, `RecordInfo`} pairs, where `TKey` is the FasterKV Key type and `RecordInfo` is used to perform the locking/unlocking. +At a high level, `Lock()` and `Unlock()` call `InternalLock()`. Locking does not issue PENDING operations to retrieve on-disk data, and locking/unlocking is designed to avoid pending I/O operations by use of a [`LockTable`](#locktable-overview) consisting of {`TKey`, `RecordInfo`} pairs, where `TKey` is the FasterKV Key type and `RecordInfo` is used to perform the locking/unlocking. If a record to be locked is not found in memory (above HeadAddress), then a record is created in the `LockTable`; if this record is subsequently read from the disk, the locks from the `LockTable` are applied (and the `LockTable` entry is removed). Locking and unlocking use bits in the `RecordInfo` header to obtain one exclusive lock or up to 64 shared locks. Because locking does not affect data, even records in the ReadOnly region may be locked and unlocked directly. @@ -91,11 +91,13 @@ The following sections refer to the following two in the `RecordInfo`: - Sealing is done via `RecordInfo.Seal`. This is used in locking scenarios rather than a sequence of "CAS to set Sealed; test Sealed bit because the after-Seal locking is fuzzy; we don't know whether the record was CTT'd before or after a post-Seal lock, and thus we don't know if the transferred record "owns" our lock. `RecordInfo.Seal` does a CAS with both the XLock and Seal bits, then Unlocks the XLock bit; this ensures it works whether SupportsLocking is true or false. It returns true if successsful or false if another thread Sealed the record. However, `LockableUnsafeContext` must not try to lock as it owns the lock already. - **Invalid**: This is a well-known bit from v1 included here for clarity: its behavior is that the record is to be skipped, using its `.PreviousAddress` to move along the chain. This has relevance to some areas of [Record Transfers](#record-transfers), particularly with respect to the `ReadCache`. -Additionally, the `SupportsLocking` flag has been moved from IFunctions to a `FasterKV` constructor argument. This value must be uniform across all asessions. It is only to control the locking done by FasterKV; this replaces the concept of user-controlled locking that was provided with the `IFunctions` methods for concurrent record access. +Additionally, `IFunctions` has been modified: +- The `SupportsLocking` flag has been moved from `IFunctions` to a `FasterKV` constructor argument. This value must be uniform across all asessions. It is only to control the locking done by FasterKV; this replaces the concept of user-controlled locking that was provided with the `IFunctions` methods for concurrent record access. +- All locking methods on `IFunctions` have been removed; locking is now done internally only, using the `RecordInfo` bits, and controlled by `SupportsLocking`. ### LockTable Overview -For records not found in memory, the `LockTable` is used. The semantics of `LockTable` entries are as follow. This is a conceptual view; implementation details are described in subsequent sections: +For records not found in memory, the `LockTable` is used. The semantics of `LockTable` entries are as follow. This is a conceptual view; implementation details are described in subsequent sections. - On a `Lock` call, if the key is not found in memory, the `LockTable` is searched for the Key. - if the RecordInfo is in the `LockTable` it is locked as specified - else a new Tentative record is added and subsequently finalized as in [Insertion to LockTable due to Lock](#insertion-to-locktable-due-to-lock) @@ -113,7 +115,7 @@ For records not found in memory, the `LockTable` is used. The semantics of `Lock - it removes the Seal from the entry in the `LockTable`, or deletes the entry if it was Tentative - Because `LockTable` use does not verify that the key actually exists (as it does not issue a pending operation to ensure the requested key, and not a collision, is found in the on-disk portion), it is possible that keys will exist in the `LockTable` that do not in fact exist in the log. This is fine; if we do more than `Lock` them, then they will be added to the log at that time, and the locks applied to them. -We implement the `LockTable` with a `ConcurrentDictionary` because the use is expected to be very low--the vast majority of locks should not last long enough to be evicted from either the `ReadCache` or main memory. Thus, most operations on the `LockTable` will simply compare to `Count > 0`. +We implement the `LockTable` with a `ConcurrentDictionary` because the use is expected to be very low--the vast majority of locks should not last long enough to be evicted from either the `ReadCache` or main memory. Thus, most operations on the `LockTable` will simply compare to an internal `approximateCount > 0`; the stock `Count` property locks all sub-tables within the `ConcurrentDictionary`. #### Insertion to LockTable due to Lock @@ -251,7 +253,7 @@ We must clear in-memory records' lock bits during FoldOver recovery. ### FASTER Operations -Following are the 4 FASTER operations and their flow for the various lock states. +Following are the 4 FASTER data operations and Lock/Unlock, and their flow for the various lock states. Abbreviations: - LockOp: The `LockOperations` instance passed to one of the InternalXxx methods. From bf64ffb6d0bb6a1ac0f20f6b11607eec62855136 Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Fri, 7 Jan 2022 01:37:44 -0800 Subject: [PATCH 15/25] Remove manual locking bits in Recovery; minor refactorings in Recovery.cs to reduce duplication --- cs/samples/SecondaryReaderStore/Program.cs | 6 +- cs/src/core/Index/Common/Contexts.cs | 5 + cs/src/core/Index/Common/RecordInfo.cs | 2 + cs/src/core/Index/Recovery/Recovery.cs | 259 +++++++++++---------- cs/test/LockableUnsafeContextTests.cs | 220 +++++++++++++++-- cs/test/TestUtils.cs | 3 + docs/_docs/30-fasterkv-manual-locking.md | 6 +- 7 files changed, 362 insertions(+), 139 deletions(-) diff --git a/cs/samples/SecondaryReaderStore/Program.cs b/cs/samples/SecondaryReaderStore/Program.cs index 08cc41a5e..59e3f939f 100644 --- a/cs/samples/SecondaryReaderStore/Program.cs +++ b/cs/samples/SecondaryReaderStore/Program.cs @@ -3,7 +3,6 @@ using FASTER.core; using System; -using System.Diagnostics; using System.IO; using System.Threading; @@ -57,7 +56,7 @@ static void PrimaryWriter() if (key > 0 && key % checkpointFreq == 0) { Console.WriteLine($"Checkpointing primary until key {key - 1}"); - primaryStore.TakeHybridLogCheckpointAsync(CheckpointType.Snapshot).GetAwaiter().GetResult(); + primaryStore.TakeHybridLogCheckpointAsync(CheckpointType.Snapshot).AsTask().GetAwaiter().GetResult(); Console.WriteLine($"Upserting keys at primary starting from {key}"); } @@ -66,7 +65,7 @@ static void PrimaryWriter() } Console.WriteLine($"Checkpointing primary until key {numOps - 1}"); - primaryStore.TakeHybridLogCheckpointAsync(CheckpointType.Snapshot).GetAwaiter().GetResult(); + primaryStore.TakeHybridLogCheckpointAsync(CheckpointType.Snapshot).AsTask().GetAwaiter().GetResult(); Console.WriteLine("Shutting down primary"); } @@ -111,6 +110,5 @@ static void SecondaryReader() } } - } } diff --git a/cs/src/core/Index/Common/Contexts.cs b/cs/src/core/Index/Common/Contexts.cs index 9f1cd995f..b341ead08 100644 --- a/cs/src/core/Index/Common/Contexts.cs +++ b/cs/src/core/Index/Common/Contexts.cs @@ -404,6 +404,9 @@ public void Initialize(StreamReader reader) value = reader.ReadLine(); deltaTailAddress = long.Parse(value); + value = reader.ReadLine(); + manualLockingActive = bool.Parse(value); + value = reader.ReadLine(); var numSessions = int.Parse(value); @@ -512,6 +515,7 @@ public byte[] ToByteArray() writer.WriteLine(headAddress); writer.WriteLine(beginAddress); writer.WriteLine(deltaTailAddress); + writer.WriteLine(manualLockingActive); writer.WriteLine(checkpointTokens.Count); foreach (var kvp in checkpointTokens) @@ -562,6 +566,7 @@ public readonly void DebugPrint() Debug.WriteLine("Head Address: {0}", headAddress); Debug.WriteLine("Begin Address: {0}", beginAddress); Debug.WriteLine("Delta Tail Address: {0}", deltaTailAddress); + Debug.WriteLine("Manual Locking Active: {0}", manualLockingActive); Debug.WriteLine("Num sessions recovered: {0}", continueTokens.Count); Debug.WriteLine("Recovered sessions: "); foreach (var sessionInfo in continueTokens.Take(10)) diff --git a/cs/src/core/Index/Common/RecordInfo.cs b/cs/src/core/Index/Common/RecordInfo.cs index 4cb4b4fc4..d5b34997c 100644 --- a/cs/src/core/Index/Common/RecordInfo.cs +++ b/cs/src/core/Index/Common/RecordInfo.cs @@ -78,6 +78,8 @@ public static void WriteInfo(ref RecordInfo info, bool inNewVersion, bool tombst public bool IsLockedShared => (word & kSharedLockMaskInWord) != 0; + public void ClearLocks() => word &= ~(kExclusiveLockBitMask | kSharedLockMaskInWord); + public bool IsIntermediate => (word & (kTentativeBitMask | kSealedBitMask)) != 0; /// diff --git a/cs/src/core/Index/Recovery/Recovery.cs b/cs/src/core/Index/Recovery/Recovery.cs index e79df473f..c72f0bea6 100644 --- a/cs/src/core/Index/Recovery/Recovery.cs +++ b/cs/src/core/Index/Recovery/Recovery.cs @@ -98,6 +98,22 @@ internal void Dispose() } } + internal struct RecoveryOptions + { + internal long headAddress; + internal long tailAddress; + internal bool undoNextVersion; + + internal bool clearLocks => this.headAddress != Constants.kInvalidAddress; + + internal RecoveryOptions(bool clearLocks, long headAddress, long tailAddress, bool undoNextVer) + { + this.headAddress = clearLocks ? headAddress : Constants.kInvalidAddress; + this.tailAddress = clearLocks ? tailAddress : Constants.kInvalidAddress; + this.undoNextVersion = undoNextVer; + } + } + public partial class FasterKV : FasterBase, IFasterKV { private void FindRecoveryInfo(long requestedVersion, out HybridLogCheckpointInfo recoveredHlcInfo, @@ -270,12 +286,13 @@ private void InternalRecover(IndexCheckpointInfo recoveredICInfo, HybridLogCheck if (!SetRecoveryPageRanges(recoveredHLCInfo, numPagesToPreload, recoverFromAddress, out long tailAddress, out long headAddress, out long scanFromAddress)) return; + RecoveryOptions options = new(recoveredHLCInfo.info.manualLockingActive, headAddress, tailAddress, undoNextVersion); long readOnlyAddress; // Make index consistent for version v if (recoveredHLCInfo.info.useSnapshotFile == 0) { - RecoverHybridLog(scanFromAddress, recoverFromAddress, recoveredHLCInfo.info.finalLogicalAddress, recoveredHLCInfo.info.nextVersion, CheckpointType.FoldOver, undoNextVersion); + RecoverHybridLog(scanFromAddress, recoverFromAddress, recoveredHLCInfo.info.finalLogicalAddress, recoveredHLCInfo.info.nextVersion, CheckpointType.FoldOver, options); readOnlyAddress = tailAddress; } else @@ -284,26 +301,15 @@ private void InternalRecover(IndexCheckpointInfo recoveredICInfo, HybridLogCheck headAddress = recoveredHLCInfo.info.flushedLogicalAddress; // First recover from index starting point (fromAddress) to snapshot starting point (flushedLogicalAddress) - RecoverHybridLog(scanFromAddress, recoverFromAddress, recoveredHLCInfo.info.flushedLogicalAddress, recoveredHLCInfo.info.nextVersion, CheckpointType.Snapshot, undoNextVersion); + RecoverHybridLog(scanFromAddress, recoverFromAddress, recoveredHLCInfo.info.flushedLogicalAddress, recoveredHLCInfo.info.nextVersion, CheckpointType.Snapshot, options); // Then recover snapshot into mutable region - RecoverHybridLogFromSnapshotFile(recoveredHLCInfo.info.flushedLogicalAddress, recoverFromAddress, recoveredHLCInfo.info.finalLogicalAddress, recoveredHLCInfo.info.startLogicalAddress, recoveredHLCInfo.info.snapshotFinalLogicalAddress, recoveredHLCInfo.info.nextVersion, recoveredHLCInfo.info.guid, undoNextVersion, recoveredHLCInfo.deltaLog, recoverTo); + RecoverHybridLogFromSnapshotFile(recoveredHLCInfo.info.flushedLogicalAddress, recoverFromAddress, recoveredHLCInfo.info.finalLogicalAddress, recoveredHLCInfo.info.startLogicalAddress, + recoveredHLCInfo.info.snapshotFinalLogicalAddress, recoveredHLCInfo.info.nextVersion, recoveredHLCInfo.info.guid, options, recoveredHLCInfo.deltaLog, recoverTo); readOnlyAddress = recoveredHLCInfo.info.flushedLogicalAddress; } - // Adjust head and read-only address post-recovery - var _head = (1 + (tailAddress >> hlog.LogPageSizeBits) - hlog.GetCapacityNumPages()) << hlog.LogPageSizeBits; - if (_head > headAddress) - headAddress = _head; - if (readOnlyAddress < headAddress) - readOnlyAddress = headAddress; - - // Recover session information - hlog.RecoveryReset(tailAddress, headAddress, recoveredHLCInfo.info.beginAddress, readOnlyAddress); - _recoveredSessions = recoveredHLCInfo.info.continueTokens; - - checkpointManager.OnRecovery(recoveredICInfo.info.token, recoveredHLCInfo.info.guid); - recoveredHLCInfo.Dispose(); + DoPostRecovery(recoveredICInfo, recoveredHLCInfo, tailAddress, ref headAddress, ref readOnlyAddress); } private async ValueTask InternalRecoverAsync(IndexCheckpointInfo recoveredICInfo, HybridLogCheckpointInfo recoveredHLCInfo, int numPagesToPreload, bool undoNextVersion, long recoverTo, CancellationToken cancellationToken) @@ -313,12 +319,14 @@ private async ValueTask InternalRecoverAsync(IndexCheckpointInfo recoveredICInfo if (!SetRecoveryPageRanges(recoveredHLCInfo, numPagesToPreload, recoverFromAddress, out long tailAddress, out long headAddress, out long scanFromAddress)) return; + RecoveryOptions options = new(recoveredHLCInfo.info.manualLockingActive, headAddress, tailAddress, undoNextVersion); long readOnlyAddress; // Make index consistent for version v if (recoveredHLCInfo.info.useSnapshotFile == 0) { - await RecoverHybridLogAsync(scanFromAddress, recoverFromAddress, recoveredHLCInfo.info.finalLogicalAddress, recoveredHLCInfo.info.nextVersion, CheckpointType.FoldOver, undoNextVersion, cancellationToken).ConfigureAwait(false); + await RecoverHybridLogAsync(scanFromAddress, recoverFromAddress, recoveredHLCInfo.info.finalLogicalAddress, recoveredHLCInfo.info.nextVersion, CheckpointType.FoldOver, + options, cancellationToken).ConfigureAwait(false); readOnlyAddress = tailAddress; } else @@ -327,14 +335,20 @@ private async ValueTask InternalRecoverAsync(IndexCheckpointInfo recoveredICInfo headAddress = recoveredHLCInfo.info.flushedLogicalAddress; // First recover from index starting point (fromAddress) to snapshot starting point (flushedLogicalAddress) - await RecoverHybridLogAsync (scanFromAddress, recoverFromAddress, recoveredHLCInfo.info.flushedLogicalAddress, recoveredHLCInfo.info.nextVersion, CheckpointType.Snapshot, undoNextVersion, cancellationToken).ConfigureAwait(false); + await RecoverHybridLogAsync(scanFromAddress, recoverFromAddress, recoveredHLCInfo.info.flushedLogicalAddress, recoveredHLCInfo.info.nextVersion, CheckpointType.Snapshot, + new RecoveryOptions(recoveredHLCInfo.info.manualLockingActive, headAddress, tailAddress, undoNextVersion), cancellationToken).ConfigureAwait(false); // Then recover snapshot into mutable region await RecoverHybridLogFromSnapshotFileAsync(recoveredHLCInfo.info.flushedLogicalAddress, recoverFromAddress, recoveredHLCInfo.info.finalLogicalAddress, recoveredHLCInfo.info.startLogicalAddress, - recoveredHLCInfo.info.snapshotFinalLogicalAddress, recoveredHLCInfo.info.nextVersion, recoveredHLCInfo.info.guid, undoNextVersion, recoveredHLCInfo.deltaLog, recoverTo, cancellationToken).ConfigureAwait(false); + recoveredHLCInfo.info.snapshotFinalLogicalAddress, recoveredHLCInfo.info.nextVersion, recoveredHLCInfo.info.guid, options, recoveredHLCInfo.deltaLog, recoverTo, cancellationToken).ConfigureAwait(false); readOnlyAddress = recoveredHLCInfo.info.flushedLogicalAddress; } + DoPostRecovery(recoveredICInfo, recoveredHLCInfo, tailAddress, ref headAddress, ref readOnlyAddress); + } + + private void DoPostRecovery(IndexCheckpointInfo recoveredICInfo, HybridLogCheckpointInfo recoveredHLCInfo, long tailAddress, ref long headAddress, ref long readOnlyAddress) + { // Adjust head and read-only address post-recovery var _head = (1 + (tailAddress >> hlog.LogPageSizeBits) - hlog.GetCapacityNumPages()) << hlog.LogPageSizeBits; if (_head > headAddress) @@ -441,7 +455,7 @@ private bool SetRecoveryPageRanges(HybridLogCheckpointInfo recoveredHLCInfo, int return true; } - private void RecoverHybridLog(long scanFromAddress, long recoverFromAddress, long untilAddress, long nextVersion, CheckpointType checkpointType, bool undoNextVersion) + private void RecoverHybridLog(long scanFromAddress, long recoverFromAddress, long untilAddress, long nextVersion, CheckpointType checkpointType, RecoveryOptions options) { if (untilAddress <= scanFromAddress) return; @@ -456,28 +470,13 @@ private void RecoverHybridLog(long scanFromAddress, long recoverFromAddress, lon int pageIndex = hlog.GetPageIndexForPage(page); recoveryStatus.WaitRead(pageIndex); - if (ProcessReadPage(recoverFromAddress, untilAddress, nextVersion, undoNextVersion, recoveryStatus, page, pageIndex)) - { - // Page was modified due to undoFutureVersion. Flush it to disk; the callback issues the after-capacity read request if necessary. - hlog.AsyncFlushPages(page, 1, AsyncFlushPageCallbackForRecovery, recoveryStatus); - continue; - } - - // We do not need to flush - recoveryStatus.flushStatus[pageIndex] = FlushStatus.Done; - - // Issue next read if there are more pages past 'capacity' from this one. - if (page + capacity < endPage) - { - recoveryStatus.readStatus[pageIndex] = ReadStatus.Pending; - hlog.AsyncReadPagesFromDevice(page + capacity, 1, untilAddress, hlog.AsyncReadPagesCallbackForRecovery, recoveryStatus); - } + ProcessReadPage(recoverFromAddress, untilAddress, nextVersion, options, recoveryStatus, endPage, capacity, page, pageIndex); } WaitUntilAllPagesHaveBeenFlushed(startPage, endPage, recoveryStatus); } - private async ValueTask RecoverHybridLogAsync(long scanFromAddress, long recoverFromAddress, long untilAddress, long nextVersion, CheckpointType checkpointType, bool undoNextVersion, CancellationToken cancellationToken) + private async ValueTask RecoverHybridLogAsync(long scanFromAddress, long recoverFromAddress, long untilAddress, long nextVersion, CheckpointType checkpointType, RecoveryOptions options, CancellationToken cancellationToken) { if (untilAddress <= scanFromAddress) return; @@ -492,22 +491,7 @@ private async ValueTask RecoverHybridLogAsync(long scanFromAddress, long recover int pageIndex = hlog.GetPageIndexForPage(page); await recoveryStatus.WaitReadAsync(pageIndex, cancellationToken).ConfigureAwait(false); - if (ProcessReadPage(recoverFromAddress, untilAddress, nextVersion, undoNextVersion, recoveryStatus, page, pageIndex)) - { - // Page was modified due to undoFutureVersion. Flush it to disk; the callback issues the after-capacity read request if necessary. - hlog.AsyncFlushPages(page, 1, AsyncFlushPageCallbackForRecovery, recoveryStatus); - continue; - } - - // We do not need to flush - recoveryStatus.flushStatus[pageIndex] = FlushStatus.Done; - - // Issue next read if there are more pages past 'capacity' from this one. - if (page + capacity < endPage) - { - recoveryStatus.readStatus[pageIndex] = ReadStatus.Pending; - hlog.AsyncReadPagesFromDevice(page + capacity, 1, untilAddress, hlog.AsyncReadPagesCallbackForRecovery, recoveryStatus); - } + ProcessReadPage(recoverFromAddress, untilAddress, nextVersion, options, recoveryStatus, endPage, capacity, page, pageIndex); } await WaitUntilAllPagesHaveBeenFlushedAsync(startPage, endPage, recoveryStatus, cancellationToken).ConfigureAwait(false); @@ -528,29 +512,65 @@ private RecoveryStatus GetPageRangesToRead(long scanFromAddress, long untilAddre return new RecoveryStatus(capacity, endPage, untilAddress, checkpointType); } - private bool ProcessReadPage(long recoverFromAddress, long untilAddress, long nextVersion, bool undoNextVersion, RecoveryStatus recoveryStatus, long page, int pageIndex) + private void ProcessReadPage(long recoverFromAddress, long untilAddress, long nextVersion, RecoveryOptions options, RecoveryStatus recoveryStatus, long endPage, int capacity, long page, int pageIndex) + { + if (ProcessReadPage(recoverFromAddress, untilAddress, nextVersion, options, recoveryStatus, page, pageIndex)) + { + // Page was modified due to undoFutureVersion. Flush it to disk; the callback issues the after-capacity read request if necessary. + hlog.AsyncFlushPages(page, 1, AsyncFlushPageCallbackForRecovery, recoveryStatus); + return; + } + + // We do not need to flush + recoveryStatus.flushStatus[pageIndex] = FlushStatus.Done; + + // Issue next read if there are more pages past 'capacity' from this one. + if (page + capacity < endPage) + { + recoveryStatus.readStatus[pageIndex] = ReadStatus.Pending; + hlog.AsyncReadPagesFromDevice(page + capacity, 1, untilAddress, hlog.AsyncReadPagesCallbackForRecovery, recoveryStatus); + } + } + + private bool ProcessReadPage(long recoverFromAddress, long untilAddress, long nextVersion, RecoveryOptions options, RecoveryStatus recoveryStatus, long page, int pageIndex) { var startLogicalAddress = hlog.GetStartLogicalAddress(page); var endLogicalAddress = hlog.GetStartLogicalAddress(page + 1); - if (recoverFromAddress < endLogicalAddress) + if (options.clearLocks) + { + if (options.headAddress >= endLogicalAddress) + return false; + } + else if (recoverFromAddress >= endLogicalAddress) + return false; + + var pageFromAddress = 0L; + var pageUntilAddress = hlog.GetPageSize(); + if (options.clearLocks) + { + if (options.headAddress > startLogicalAddress) + pageFromAddress = hlog.GetOffsetInPage(options.headAddress); + + if (options.tailAddress < endLogicalAddress) + pageUntilAddress = hlog.GetOffsetInPage(options.tailAddress); + } + else { - var pageFromAddress = 0L; if (recoverFromAddress > startLogicalAddress) pageFromAddress = hlog.GetOffsetInPage(recoverFromAddress); - var pageUntilAddress = hlog.GetPageSize(); if (untilAddress < endLogicalAddress) pageUntilAddress = hlog.GetOffsetInPage(untilAddress); + } - var physicalAddress = hlog.GetPhysicalAddress(startLogicalAddress); - if (RecoverFromPage(recoverFromAddress, pageFromAddress, pageUntilAddress, startLogicalAddress, physicalAddress, nextVersion, undoNextVersion)) - { - // The current page was modified due to undoFutureVersion; caller will flush it to storage and issue a read request if necessary. - recoveryStatus.readStatus[pageIndex] = ReadStatus.Pending; - recoveryStatus.flushStatus[pageIndex] = FlushStatus.Pending; - return true; - } + var physicalAddress = hlog.GetPhysicalAddress(startLogicalAddress); + if (RecoverFromPage(recoverFromAddress, pageFromAddress, pageUntilAddress, startLogicalAddress, physicalAddress, nextVersion, options.undoNextVersion, options.clearLocks)) + { + // The current page was modified due to undoFutureVersion; caller will flush it to storage and issue a read request if necessary. + recoveryStatus.readStatus[pageIndex] = ReadStatus.Pending; + recoveryStatus.flushStatus[pageIndex] = FlushStatus.Pending; + return true; } return false; @@ -568,7 +588,7 @@ private async ValueTask WaitUntilAllPagesHaveBeenFlushedAsync(long startPage, lo await recoveryStatus.WaitFlushAsync(hlog.GetPageIndexForPage(page), cancellationToken).ConfigureAwait(false); } - private void RecoverHybridLogFromSnapshotFile(long scanFromAddress, long recoverFromAddress, long untilAddress, long snapshotStartAddress, long snapshotEndAddress, long nextVersion, Guid guid, bool undoNextVersion, DeltaLog deltaLog, long recoverTo) + private void RecoverHybridLogFromSnapshotFile(long scanFromAddress, long recoverFromAddress, long untilAddress, long snapshotStartAddress, long snapshotEndAddress, long nextVersion, Guid guid, RecoveryOptions options, DeltaLog deltaLog, long recoverTo) { GetSnapshotPageRangesToRead(scanFromAddress, untilAddress, snapshotStartAddress, snapshotEndAddress, guid, out long startPage, out long endPage, out long snapshotEndPage, out int capacity, out var recoveryStatus, out int numPagesToReadFirst); @@ -598,34 +618,14 @@ private void RecoverHybridLogFromSnapshotFile(long scanFromAddress, long recover } } - // Apply delta - hlog.ApplyDelta(deltaLog, page, end, recoverTo); - - for (long p = page; p < end; p++) - { - int pageIndex = hlog.GetPageIndexForPage(p); - - if (recoverFromAddress < hlog.GetStartLogicalAddress(p + 1) && recoverFromAddress < untilAddress) - ProcessReadSnapshotPage(scanFromAddress, untilAddress, nextVersion, undoNextVersion, recoveryStatus, p, pageIndex); - - // Issue next read - if (p + capacity < endPage) - { - // Flush snapshot page to main log - // Flush callback will issue further reads or page clears - recoveryStatus.flushStatus[pageIndex] = FlushStatus.Pending; - if (p + capacity < snapshotEndPage) - recoveryStatus.readStatus[pageIndex] = ReadStatus.Pending; - hlog.AsyncFlushPages(p, 1, AsyncFlushPageCallbackForRecovery, recoveryStatus); - } - } + ApplyDelta(scanFromAddress, recoverFromAddress, untilAddress, nextVersion, options, deltaLog, recoverTo, endPage, snapshotEndPage, capacity, recoveryStatus, page, end); } WaitUntilAllPagesHaveBeenFlushed(startPage, endPage, recoveryStatus); recoveryStatus.Dispose(); } - private async ValueTask RecoverHybridLogFromSnapshotFileAsync(long scanFromAddress, long recoverFromAddress, long untilAddress, long snapshotStartAddress, long snapshotEndAddress, long nextVersion, Guid guid, bool undoNextVersion, DeltaLog deltaLog, long recoverTo, CancellationToken cancellationToken) + private async ValueTask RecoverHybridLogFromSnapshotFileAsync(long scanFromAddress, long recoverFromAddress, long untilAddress, long snapshotStartAddress, long snapshotEndAddress, long nextVersion, Guid guid, RecoveryOptions options, DeltaLog deltaLog, long recoverTo, CancellationToken cancellationToken) { GetSnapshotPageRangesToRead(scanFromAddress, untilAddress, snapshotStartAddress, snapshotEndAddress, guid, out long startPage, out long endPage, out long snapshotEndPage, out int capacity, out var recoveryStatus, out int numPagesToReadFirst); @@ -655,31 +655,37 @@ private async ValueTask RecoverHybridLogFromSnapshotFileAsync(long scanFromAddre } } - // Apply delta - hlog.ApplyDelta(deltaLog, page, end, recoverTo); + ApplyDelta(scanFromAddress, recoverFromAddress, untilAddress, nextVersion, options, deltaLog, recoverTo, endPage, snapshotEndPage, capacity, recoveryStatus, page, end); + } - for (long p = page; p < end; p++) - { - int pageIndex = hlog.GetPageIndexForPage(p); + await WaitUntilAllPagesHaveBeenFlushedAsync(startPage, endPage, recoveryStatus, cancellationToken).ConfigureAwait(false); + recoveryStatus.Dispose(); + } - if (recoverFromAddress < hlog.GetStartLogicalAddress(p + 1) && recoverFromAddress < untilAddress) - ProcessReadSnapshotPage(scanFromAddress, untilAddress, nextVersion, undoNextVersion, recoveryStatus, p, pageIndex); + private void ApplyDelta(long scanFromAddress, long recoverFromAddress, long untilAddress, long nextVersion, RecoveryOptions options, DeltaLog deltaLog, long recoverTo, long endPage, long snapshotEndPage, int capacity, RecoveryStatus recoveryStatus, long page, long end) + { + hlog.ApplyDelta(deltaLog, page, end, recoverTo); - // Issue next read - if (p + capacity < endPage) - { - // Flush snapshot page to main log - // Flush callback will issue further reads or page clears - recoveryStatus.flushStatus[pageIndex] = FlushStatus.Pending; - if (p + capacity < snapshotEndPage) - recoveryStatus.readStatus[pageIndex] = ReadStatus.Pending; - hlog.AsyncFlushPages(p, 1, AsyncFlushPageCallbackForRecovery, recoveryStatus); - } + for (long p = page; p < end; p++) + { + int pageIndex = hlog.GetPageIndexForPage(p); + + var endLogicalAddress = hlog.GetStartLogicalAddress(p + 1); + if ((recoverFromAddress < endLogicalAddress && recoverFromAddress < untilAddress) + || (options.clearLocks && options.headAddress < endLogicalAddress)) + ProcessReadSnapshotPage(scanFromAddress, untilAddress, nextVersion, options, recoveryStatus, p, pageIndex); + + // Issue next read + if (p + capacity < endPage) + { + // Flush snapshot page to main log + // Flush callback will issue further reads or page clears + recoveryStatus.flushStatus[pageIndex] = FlushStatus.Pending; + if (p + capacity < snapshotEndPage) + recoveryStatus.readStatus[pageIndex] = ReadStatus.Pending; + hlog.AsyncFlushPages(p, 1, AsyncFlushPageCallbackForRecovery, recoveryStatus); } } - - await WaitUntilAllPagesHaveBeenFlushedAsync(startPage, endPage, recoveryStatus, cancellationToken).ConfigureAwait(false); - recoveryStatus.Dispose(); } private void GetSnapshotPageRangesToRead(long fromAddress, long untilAddress, long snapshotStartAddress, long snapshotEndAddress, Guid guid, out long startPage, out long endPage, out long snapshotEndPage, out int capacity, @@ -715,14 +721,14 @@ private void GetSnapshotPageRangesToRead(long fromAddress, long untilAddress, lo numPagesToReadFirst = Math.Min(capacity, totalPagesToRead); } - private void ProcessReadSnapshotPage(long fromAddress, long untilAddress, long nextVersion, bool undoNextVersion, RecoveryStatus recoveryStatus, long page, int pageIndex) + private void ProcessReadSnapshotPage(long fromAddress, long untilAddress, long nextVersion, RecoveryOptions options, RecoveryStatus recoveryStatus, long page, int pageIndex) { // Page at hand var startLogicalAddress = hlog.GetStartLogicalAddress(page); var endLogicalAddress = hlog.GetStartLogicalAddress(page + 1); - // Perform recovery if page in fuzzy portion of the log - if ((fromAddress < endLogicalAddress) && (fromAddress < untilAddress)) + // Perform recovery if page in fuzzy portion of the log or clearing locks + if ((fromAddress < endLogicalAddress && fromAddress < untilAddress) || (options.clearLocks && options.headAddress < endLogicalAddress)) { /* * Handling corner-cases: @@ -730,18 +736,30 @@ private void ProcessReadSnapshotPage(long fromAddress, long untilAddress, long n * When fromAddress is in the middle of the page, then start recovery only from corresponding offset * in page. Similarly, if untilAddress falls in the middle of the page, perform recovery only until that * offset. Otherwise, scan the entire page [0, PageSize) + * + * If options.clearLocks, the read of this page overwrote the prior lock clearing, so we must redo it here. */ - var pageFromAddress = 0L; - if (fromAddress > startLogicalAddress && fromAddress < endLogicalAddress) - pageFromAddress = hlog.GetOffsetInPage(fromAddress); + var pageFromAddress = 0L; var pageUntilAddress = hlog.GetPageSize(); - if (endLogicalAddress > untilAddress) - pageUntilAddress = hlog.GetOffsetInPage(untilAddress); + if (options.clearLocks) + { + if (options.headAddress > startLogicalAddress && options.headAddress < endLogicalAddress) + pageFromAddress = hlog.GetOffsetInPage(options.headAddress); + if (endLogicalAddress > options.tailAddress) + pageUntilAddress = hlog.GetOffsetInPage(options.tailAddress); + } + else + { + if (fromAddress > startLogicalAddress && fromAddress < endLogicalAddress) + pageFromAddress = hlog.GetOffsetInPage(fromAddress); + if (endLogicalAddress > untilAddress) + pageUntilAddress = hlog.GetOffsetInPage(untilAddress); + } var physicalAddress = hlog.GetPhysicalAddress(startLogicalAddress); RecoverFromPage(fromAddress, pageFromAddress, pageUntilAddress, - startLogicalAddress, physicalAddress, nextVersion, undoNextVersion); + startLogicalAddress, physicalAddress, nextVersion, options.undoNextVersion, options.clearLocks); } recoveryStatus.flushStatus[pageIndex] = FlushStatus.Done; @@ -752,7 +770,7 @@ private unsafe bool RecoverFromPage(long startRecoveryAddress, long untilLogicalAddressInPage, long pageLogicalAddress, long pagePhysicalAddress, - long nextVersion, bool undoNextVersion) + long nextVersion, bool undoNextVersion, bool clearLocks) { bool touched = false; @@ -791,6 +809,11 @@ private unsafe bool RecoverFromPage(long startRecoveryAddress, entry.Pending = false; entry.Tentative = false; bucket->bucket_entries[slot] = entry.word; + if (clearLocks && info.IsLocked) + { + // We do not set 'touched' here as there is no need to write these pages back + info.ClearLocks(); + } } else { diff --git a/cs/test/LockableUnsafeContextTests.cs b/cs/test/LockableUnsafeContextTests.cs index c5c39f3fb..fb5e74861 100644 --- a/cs/test/LockableUnsafeContextTests.cs +++ b/cs/test/LockableUnsafeContextTests.cs @@ -9,6 +9,8 @@ using FASTER.core; using NUnit.Framework; using FASTER.test.ReadCacheTests; +using System.Threading.Tasks; +using System.Runtime.ExceptionServices; namespace FASTER.test.LockableUnsafeContext { @@ -55,13 +57,18 @@ class LockableUnsafeContextTests private IDevice log; [SetUp] - public void Setup() - { - TestUtils.DeleteDirectory(TestUtils.MethodTestDir, wait: true); + public void Setup() => Setup(forRecovery: false); - log = Devices.CreateLogDevice(Path.Combine(TestUtils.MethodTestDir, "test.log"), deleteOnClose: true); + public void Setup(bool forRecovery) + { + if (!forRecovery) + { + TestUtils.DeleteDirectory(TestUtils.MethodTestDir, wait: true); + } + log = Devices.CreateLogDevice(Path.Combine(TestUtils.MethodTestDir, "test.log"), deleteOnClose: false, recoverDevice: forRecovery); ReadCacheSettings readCacheSettings = default; + CheckpointSettings checkpointSettings = default; foreach (var arg in TestContext.CurrentContext.Test.Arguments) { if (arg is ReadCopyDestination dest) @@ -70,15 +77,23 @@ public void Setup() readCacheSettings = new() { PageSizeBits = 12, MemorySizeBits = 22 }; break; } + if (arg is CheckpointType chktType) + { + checkpointSettings = new CheckpointSettings { CheckpointDir = TestUtils.MethodTestDir }; + break; + } } fht = new FasterKV(1L << 20, new LogSettings { LogDevice = log, ObjectLogDevice = null, PageSizeBits = 12, MemorySizeBits = 22, ReadCacheSettings = readCacheSettings }, - supportsLocking: true ); + checkpointSettings: checkpointSettings, + supportsLocking: true); session = fht.For(new LockableUnsafeFunctions()).NewSession(); } [TearDown] - public void TearDown() + public void TearDown() => TearDown(forRecovery: false); + + public void TearDown(bool forRecovery) { session?.Dispose(); session = null; @@ -87,8 +102,10 @@ public void TearDown() log?.Dispose(); log = null; - // Clean up log - TestUtils.DeleteDirectory(TestUtils.MethodTestDir); + if (!forRecovery) + { + TestUtils.DeleteDirectory(TestUtils.MethodTestDir); + } } void Populate() @@ -99,7 +116,7 @@ void Populate() } } - static void AssertIsLocked(LockableUnsafeContext luContext, int key, LockType lockType) + static void AssertIsLocked(LockableUnsafeContext luContext, int key, LockType lockType) => AssertIsLocked(luContext, key, lockType == LockType.Exclusive, lockType == LockType.Shared); static void AssertIsLocked(LockableUnsafeContext luContext, int key, bool xlock, bool slock) @@ -142,7 +159,7 @@ void EnsureNoLocks() [Category(TestUtils.LockableUnsafeContextTestCategory)] [Category(TestUtils.SmokeTestCategory)] public void InMemorySimpleLockTxnTest([Values] ResultLockTarget resultLockTarget, [Values] ReadCopyDestination readCopyDestination, - [Values]FlushMode flushMode, [Values(Phase.REST, Phase.INTERMEDIATE)] Phase phase, [Values] UpdateOp updateOp) + [Values] FlushMode flushMode, [Values(Phase.REST, Phase.INTERMEDIATE)] Phase phase, [Values] UpdateOp updateOp) { Populate(); PrepareRecordLocation(flushMode); @@ -534,7 +551,7 @@ public void TransferFromLockTableToCTTTest() using var luContext = session.GetLockableUnsafeContext(); int input = 0, output = 0, key = transferToExistingKey; RecordMetadata recordMetadata = default; - AddLockTableEntry(luContext, key, immutable:false); + AddLockTableEntry(luContext, key, immutable: false); var status = session.Read(ref key, ref input, ref output, ref recordMetadata, ReadFlags.CopyToTail); Assert.AreEqual(Status.PENDING, status); @@ -675,7 +692,7 @@ public void LockAndUnlockInLockTableOnlyTest() Dictionary locks = new(); var rng = new Random(101); - foreach (var key in Enumerable.Range( 0, numRecords).Select(ii => rng.Next(numRecords))) + foreach (var key in Enumerable.Range(0, numRecords).Select(ii => rng.Next(numRecords))) locks[key] = (key & 1) == 0 ? LockType.Exclusive : LockType.Shared; // For this single-threaded test, the locking does not really have to be in order, but for consistency do it. @@ -685,7 +702,7 @@ public void LockAndUnlockInLockTableOnlyTest() Assert.IsTrue(fht.LockTable.IsActive); Assert.AreEqual(locks.Count, fht.LockTable.dict.Count); - foreach (var key in locks.Keys) + foreach (var key in locks.Keys.OrderBy(k => -k)) { var found = fht.LockTable.Get(key, out RecordInfo recordInfo); Assert.IsTrue(found); @@ -731,7 +748,7 @@ public void EvictFromMainLogToLockTableTest() Assert.AreEqual(locks.Count, fht.LockTable.dict.Count); // Verify LockTable - foreach (var key in locks.Keys) + foreach (var key in locks.Keys.OrderBy(k => -k)) { var found = fht.LockTable.Get(key, out RecordInfo recordInfo); Assert.IsTrue(found); @@ -760,5 +777,180 @@ public void EvictFromMainLogToLockTableTest() Assert.IsFalse(fht.LockTable.IsActive); Assert.AreEqual(0, fht.LockTable.dict.Count); } + + [Test] + [Category(TestUtils.LockableUnsafeContextTestCategory)] + [Category(TestUtils.CheckpointRestoreCategory)] + public async ValueTask CheckpointRecoverTest([Values] CheckpointType checkpointType, [Values] TestUtils.SyncMode syncMode) + { + Populate(); + + Dictionary locks = new(); + var rng = new Random(101); + foreach (var key in Enumerable.Range(0, numRecords / 5).Select(ii => rng.Next(numRecords))) + locks[key] = (key & 1) == 0 ? LockType.Exclusive : LockType.Shared; + + Guid fullCheckpointToken; + bool success = true; + { + using var session = fht.NewSession(new SimpleFunctions()); + using var luContext = session.GetLockableUnsafeContext(); + + // For this single-threaded test, the locking does not really have to be in order, but for consistency do it. + foreach (var key in locks.Keys.OrderBy(k => k)) + luContext.Lock(key, locks[key]); + + this.fht.Log.ShiftReadOnlyAddress(this.fht.Log.TailAddress, wait: true); + + if (syncMode == TestUtils.SyncMode.Sync) + { + this.fht.TakeFullCheckpoint(out fullCheckpointToken, checkpointType); + await this.fht.CompleteCheckpointAsync(); + } + else + (success, fullCheckpointToken) = await fht.TakeFullCheckpointAsync(checkpointType); + Assert.IsTrue(success); + + foreach (var key in locks.Keys.OrderBy(k => -k)) + luContext.Unlock(key, locks[key]); + } + + TearDown(forRecovery: true); + Setup(forRecovery: true); + + if (syncMode == TestUtils.SyncMode.Sync) + this.fht.Recover(fullCheckpointToken); + else + await this.fht.RecoverAsync(fullCheckpointToken); + + { + using var luContext = this.session.GetLockableUnsafeContext(); + + foreach (var key in locks.Keys.OrderBy(k => k)) + { + var (exclusive, shared) = luContext.IsLocked(key); + Assert.IsFalse(exclusive, $"key: {key}"); + Assert.IsFalse(shared, $"key: {key}"); + } + } + } + + const int numSecondaryReaderKeys = 1500; + const int checkpointFreq = 250; + + [Test] + [Category(TestUtils.LockableUnsafeContextTestCategory)] + [Category(TestUtils.CheckpointRestoreCategory)] + async public Task SecondaryReaderTest([Values] TestUtils.SyncMode syncMode) + { + // This test is taken from the SecondaryReaderStore sample + + var path = TestUtils.MethodTestDir; + TestUtils.DeleteDirectory(path, wait: true); + + var log = Devices.CreateLogDevice(path + "hlog.log", deleteOnClose: true); + + var primaryStore = new FasterKV + (1L << 10, + logSettings: new LogSettings { LogDevice = log, MutableFraction = 1, PageSizeBits = 10, MemorySizeBits = 20 }, + checkpointSettings: new CheckpointSettings { CheckpointDir = path } + ); + + var secondaryStore = new FasterKV + (1L << 10, + logSettings: new LogSettings { LogDevice = log, MutableFraction = 1, PageSizeBits = 10, MemorySizeBits = 20 }, + checkpointSettings: new CheckpointSettings { CheckpointDir = path } + ); + + // Use Task instead of Thread because this propagate exceptions back to this thread. + await Task.WhenAll(Task.Run(() => PrimaryWriter(primaryStore, syncMode)), + Task.Run(() => SecondaryReader(secondaryStore, syncMode))); + + log.Dispose(); + TestUtils.DeleteDirectory(path, wait: true); + } + + async static Task PrimaryWriter(FasterKV primaryStore, TestUtils.SyncMode syncMode) + { + using var s1 = primaryStore.NewSession(new SimpleFunctions()); + using var luc1 = s1.GetLockableUnsafeContext(); + + // Upserting keys at primary starting from key 0 + for (long key = 0; key < numSecondaryReaderKeys; key++) + { + if (key > 0 && key % checkpointFreq == 0) + { + // Checkpointing primary until key {key - 1} + if (syncMode == TestUtils.SyncMode.Sync) + { + primaryStore.TakeHybridLogCheckpoint(out _, CheckpointType.Snapshot); + await primaryStore.CompleteCheckpointAsync().ConfigureAwait(false); + } + else + { + var (success, _) = await primaryStore.TakeHybridLogCheckpointAsync(CheckpointType.Snapshot).ConfigureAwait(false); + Assert.IsTrue(success); + } + Thread.Sleep(10); + } + + var status = s1.Upsert(ref key, ref key); + Assert.AreEqual(Status.OK, status); + luc1.Lock(key, LockType.Shared); + } + + // Checkpointing primary until key {numSecondaryReaderOps - 1} + await primaryStore.TakeHybridLogCheckpointAsync(CheckpointType.Snapshot).ConfigureAwait(false); + + // Unlock everything before we Dispose() luc1 + for (long key = 0; key < numSecondaryReaderKeys; key++) + { + luc1.Unlock(key, LockType.Shared); + } + } + + async static Task SecondaryReader(FasterKV secondaryStore, TestUtils.SyncMode syncMode) + { + using var s1 = secondaryStore.NewSession(new SimpleFunctions()); + using var luc1 = s1.GetLockableUnsafeContext(); + + long key = 0, output = 0; + while (true) + { + try + { + // read-only recovery, no writing back undos + if (syncMode == TestUtils.SyncMode.Sync) + secondaryStore.Recover(undoNextVersion: false); + else + await secondaryStore.RecoverAsync(undoNextVersion: false).ConfigureAwait(false); + } + catch (FasterException) + { + // Nothing to recover to at secondary, retrying + Thread.Sleep(500); + continue; + } + + while (true) + { + var status = s1.Read(ref key, ref output); + if (status == Status.NOTFOUND) + { + // Key {key} not found at secondary; performing recovery to catch up + Thread.Sleep(500); + break; + } + Assert.AreEqual(key, output); + var (xlock, slock) = luc1.IsLocked(key); + Assert.IsFalse(xlock); + Assert.IsFalse(slock); + + key++; + if (key == numSecondaryReaderKeys) + return; + } + } + } } } diff --git a/cs/test/TestUtils.cs b/cs/test/TestUtils.cs index e03ee5754..eee901d7f 100644 --- a/cs/test/TestUtils.cs +++ b/cs/test/TestUtils.cs @@ -20,6 +20,7 @@ internal static class TestUtils internal const string LockableUnsafeContextTestCategory = "LockableUnsafeContext"; internal const string ReadCacheTestCategory = "ReadCache"; internal const string LockTestCategory = "Locking"; + internal const string CheckpointRestoreCategory = "CheckpointRestore"; /// /// Delete a directory recursively @@ -173,6 +174,8 @@ internal enum AllocatorType Generic } + internal enum SyncMode { Sync, Async }; + internal static (Status status, TOutput output) GetSinglePendingResult(CompletedOutputIterator completedOutputs) => GetSinglePendingResult(completedOutputs, out _); diff --git a/docs/_docs/30-fasterkv-manual-locking.md b/docs/_docs/30-fasterkv-manual-locking.md index 6a9af6d74..2b33a4233 100644 --- a/docs/_docs/30-fasterkv-manual-locking.md +++ b/docs/_docs/30-fasterkv-manual-locking.md @@ -119,7 +119,7 @@ We implement the `LockTable` with a `ConcurrentDictionary` because the use is ex #### Insertion to LockTable due to Lock -This is the complementary side of [Insertion to LockTable due to Upsert](#insertion-to-locktable-due-to-upsert): +This is the complementary side of [Insertion to LockTable due to Update](#insertion-to-locktable-due-to-update): When a thread doing `Lock()` looks for a key in the LockTable and cannot find it, it must do a Tentative insertion into the locktable, because it is possible that another thread CAS'd that key to the Tail of the log after the current thread had passed the hash table lookup: - If Lock() finds the key in memory: @@ -142,9 +142,9 @@ When a thread doing `Lock()` looks for a key in the LockTable and cannot find it - if no, we can set locktable entry as final by removing the Tentative bit - Any waiting thread proceeds normally -#### Insertion to LockTable due to Upsert +#### Insertion to LockTable due to Update -This is the complementary side of [Insertion to LockTable due to Lock](#insertion-to-locktable-due-to-lock) and applies RMW and Delete as well, when any of these append a record to the tail of the log (for brevity, Upsert is used). It is necessary so that threads that try to Lock() the Upsert()ed record as soon as it is CAS'd into the Log will not "split" locks between the log record and a `LockTable` entry. There is a bit of Catch-22 here; we cannot CAS in the non-Tentative log record before we have transferred the locks from a LockTable entry; but we must have a record on the log so that Lock() will not try to add a new entry, or lock an existing entry, while Upsert is in the process of creating the record and possibly transferring the locks from the `LockTable`. +This is the complementary side of [Insertion to LockTable due to Lock](#insertion-to-locktable-due-to-lock) and applies to Upsert, RMW, and Delete, when any of these append a record to the tail of the log (for brevity, Update is used). It is necessary so that threads that try to Lock() the Upsert()ed record as soon as it is CAS'd into the Log will not "split" locks between the log record and a `LockTable` entry. There is a bit of Catch-22 here; we cannot CAS in the non-Tentative log record before we have transferred the locks from a LockTable entry; but we must have a record on the log so that Lock() will not try to add a new entry, or lock an existing entry, while Upsert is in the process of creating the record and possibly transferring the locks from the `LockTable`. For performance reasons, Upsert cannot do an operation on the `LockTable` for each added record; therefore, we defer the cost until the last possible point, where we know we have to do something with the `LockTable` (which is very rare). From 50b17ef180c9a1fa38ed441aba21fe7c9c0e507e Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Fri, 7 Jan 2022 17:01:43 -0800 Subject: [PATCH 16/25] Add manual locking to FASTER.benchmark --- cs/benchmark/FasterYcsbBenchmark.cs | 24 ++++++++++++++++++++++++ cs/benchmark/Options.cs | 3 ++- cs/benchmark/YcsbConstants.cs | 5 +++-- 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/cs/benchmark/FasterYcsbBenchmark.cs b/cs/benchmark/FasterYcsbBenchmark.cs index cfc762a70..6a71ce6be 100644 --- a/cs/benchmark/FasterYcsbBenchmark.cs +++ b/cs/benchmark/FasterYcsbBenchmark.cs @@ -209,6 +209,21 @@ internal unsafe (double, double) Run(TestLoader testLoader) dash.Start(); #endif + ClientSession session = store.For(functions).NewSession(null); + LockableUnsafeContext luContext = session.GetLockableUnsafeContext(); + + (Key key, LockType kind) xlock = (new Key { value = long.MaxValue }, LockType.Exclusive); + (Key key, LockType kind) slock = (new Key { value = long.MaxValue - 1 }, LockType.Shared); + if (testLoader.Options.LockImpl == (int)LockImpl.Manual) + { + session = store.For(functions).NewSession(null); + luContext = session.GetLockableUnsafeContext(); + + Console.WriteLine("Taking 2 manual locks"); + luContext.Lock(xlock.key, xlock.kind); + luContext.Lock(slock.key, slock.kind); + } + Thread[] workers = new Thread[testLoader.Options.ThreadCount]; Console.WriteLine("Executing setup."); @@ -308,6 +323,15 @@ internal unsafe (double, double) Run(TestLoader testLoader) { worker.Join(); } + + if (testLoader.Options.LockImpl == (int)LockImpl.Manual) + { + luContext.Unlock(xlock.key, xlock.kind); + luContext.Unlock(slock.key, slock.kind); + luContext.Dispose(); + session.Dispose(); + } + waiter.Reset(); #if DASHBOARD diff --git a/cs/benchmark/Options.cs b/cs/benchmark/Options.cs index b97731938..77a397e7e 100644 --- a/cs/benchmark/Options.cs +++ b/cs/benchmark/Options.cs @@ -35,7 +35,8 @@ class Options [Option('z', "locking", Required = false, Default = 0, HelpText = "Locking Implementation:" + "\n 0 = None (default)" + - "\n 1 = RecordInfo.SpinLock()")] + "\n 1 = Ephemeral locking using RecordInfo.SpinLock()" + + "\n 2 = Manual locking using LockableUnsafeContext")] public int LockImpl { get; set; } [Option('i', "iterations", Required = false, Default = 1, diff --git a/cs/benchmark/YcsbConstants.cs b/cs/benchmark/YcsbConstants.cs index 03cda44e0..697d8ae23 100644 --- a/cs/benchmark/YcsbConstants.cs +++ b/cs/benchmark/YcsbConstants.cs @@ -14,8 +14,9 @@ enum BenchmarkType : byte enum LockImpl : byte { - None, - RecordInfo + None = 0, + Ephemeral = 1, + Manual = 2 }; enum AddressLineNum : int From 72a87f76748fd937a1b1101af5c41a8caffb40cd Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Fri, 14 Jan 2022 14:11:47 -0800 Subject: [PATCH 17/25] More LockTable testing and fixes, mostly around ocking nonexistent key while Upsert/RMW/Deleting that key --- cs/src/core/Allocator/LockEvictionObserver.cs | 2 +- .../ClientSession/LockableUnsafeContext.cs | 9 +- cs/src/core/Index/Common/RecordInfo.cs | 56 +++- cs/src/core/Index/FASTER/FASTERImpl.cs | 181 +++++------ cs/src/core/Utilities/LockTable.cs | 217 ++++++++----- cs/src/core/Utilities/LockUtility.cs | 1 + cs/test/LockableUnsafeContextTests.cs | 287 ++++++++++++++---- docs/_docs/30-fasterkv-manual-locking.md | 10 +- 8 files changed, 522 insertions(+), 241 deletions(-) diff --git a/cs/src/core/Allocator/LockEvictionObserver.cs b/cs/src/core/Allocator/LockEvictionObserver.cs index 2d7a727d1..05cdce36e 100644 --- a/cs/src/core/Allocator/LockEvictionObserver.cs +++ b/cs/src/core/Allocator/LockEvictionObserver.cs @@ -41,7 +41,7 @@ public void OnNext(IFasterScanIterator iter) info.Seal(manualLocking: true); // Now get it into the lock table, so it is ready as soon as the record is removed. - this.store.LockTable.TransferFrom(ref key, info); + this.store.LockTable.TransferFromLogRecord(ref key, info); } } diff --git a/cs/src/core/ClientSession/LockableUnsafeContext.cs b/cs/src/core/ClientSession/LockableUnsafeContext.cs index df322f6b2..723b76f7a 100644 --- a/cs/src/core/ClientSession/LockableUnsafeContext.cs +++ b/cs/src/core/ClientSession/LockableUnsafeContext.cs @@ -130,8 +130,9 @@ public unsafe void Lock(ref Key key, LockType lockType) LockOperation lockOp = new(LockOperationType.Lock, lockType); OperationStatus status; + bool oneMiss = false; do - status = clientSession.fht.InternalLock(ref key, lockOp, out _, FasterSession, clientSession.ctx); + status = clientSession.fht.InternalLock(ref key, lockOp, ref oneMiss, out _); while (status == OperationStatus.RETRY_NOW); Debug.Assert(status == OperationStatus.SUCCESS); @@ -160,8 +161,9 @@ public void Unlock(ref Key key, LockType lockType) LockOperation lockOp = new(LockOperationType.Unlock, lockType); OperationStatus status; + bool oneMiss = false; do - status = clientSession.fht.InternalLock(ref key, lockOp, out _, FasterSession, clientSession.ctx); + status = clientSession.fht.InternalLock(ref key, lockOp, ref oneMiss, out _); while (status == OperationStatus.RETRY_NOW); Debug.Assert(status == OperationStatus.SUCCESS); @@ -190,8 +192,9 @@ public void Unlock(ref Key key, LockType lockType) OperationStatus status; RecordInfo lockInfo; + bool oneMiss = false; do - status = clientSession.fht.InternalLock(ref key, lockOp, out lockInfo, FasterSession, clientSession.ctx); + status = clientSession.fht.InternalLock(ref key, lockOp, ref oneMiss, out lockInfo); while (status == OperationStatus.RETRY_NOW); Debug.Assert(status == OperationStatus.SUCCESS); return (lockInfo.IsLockedExclusive, lockInfo.IsLockedShared); diff --git a/cs/src/core/Index/Common/RecordInfo.cs b/cs/src/core/Index/Common/RecordInfo.cs index d5b34997c..2fd85b9e4 100644 --- a/cs/src/core/Index/Common/RecordInfo.cs +++ b/cs/src/core/Index/Common/RecordInfo.cs @@ -129,6 +129,25 @@ public bool TryLockExclusive(int spinCount = 1) return true; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void LockExclusiveRaw() + { + // Acquire exclusive lock, without spin limit or considering Intermediate state + while (true) + { + long expected_word = word; + if ((expected_word & kExclusiveLockBitMask) == 0) + { + if (expected_word == Interlocked.CompareExchange(ref word, expected_word | kExclusiveLockBitMask, expected_word)) + break; + } + Thread.Yield(); + } + + // Wait for readers to drain + while ((word & kSharedLockMaskInWord) != 0) Thread.Yield(); + } + /// /// Take shared (read) lock on RecordInfo /// @@ -171,6 +190,24 @@ public bool TryLockShared(int spinCount = 1) return true; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal bool LockSharedRaw(int spinCount = 1) + { + // Acquire shared lock, without spin limit or considering Intermediate state + while (true) + { + long expected_word = word; + if (((expected_word & kExclusiveLockBitMask) == 0) // not exclusively locked + && (expected_word & kSharedLockMaskInWord) != kSharedLockMaskInWord) // shared lock is not full + { + if (expected_word == Interlocked.CompareExchange(ref word, expected_word + kSharedLockIncrement, expected_word)) + break; + } + Thread.Yield(); + } + return true; + } + /// /// Take shared (read) lock on RecordInfo /// @@ -257,9 +294,26 @@ public bool Tentative } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void SetTentativeAtomic(bool value) + { + // Call this when locking may be done simultaneously + while (this.Tentative != value) + { + long expected_word = word; + long new_word = value ? (word | kTentativeBitMask) : (word & ~kTentativeBitMask); + long current_word = Interlocked.CompareExchange(ref word, new_word, expected_word); + if (expected_word == current_word) + return; + + // Tentative records should not be operated on by other threads. + Debug.Assert((word & kSealedBitMask) == 0 && !this.Invalid); + Thread.Yield(); + } + } + public bool Sealed => (word & kSealedBitMask) > 0; - // Ensure we have exclusive access before sealing. [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool Seal(bool manualLocking = false) { diff --git a/cs/src/core/Index/FASTER/FASTERImpl.cs b/cs/src/core/Index/FASTER/FASTERImpl.cs index 55c94a34d..8bd9ec8d0 100644 --- a/cs/src/core/Index/FASTER/FASTERImpl.cs +++ b/cs/src/core/Index/FASTER/FASTERImpl.cs @@ -310,7 +310,7 @@ internal OperationStatus InternalRead( pendingContext.serialNum = lsn; pendingContext.heldLatch = heldOperation; - pendingContext.HasPrevHighestKeyHashAddress = true; + pendingContext.HasPrevHighestKeyHashAddress = prevHighestKeyHashAddress >= hlog.BeginAddress; pendingContext.recordInfo.PreviousAddress = prevHighestKeyHashAddress; } #endregion @@ -664,10 +664,6 @@ private OperationStatus CreateNewRecordUpsert( } // Fuzzy Region: Must go pending due to lost-update anomaly - else if (logicalAddress >= hlog.SafeReadOnlyAddress && !hlog.GetInfo(physicalAddress).Tombstone) // TODO replace with Sealed + else if (logicalAddress >= hlog.SafeReadOnlyAddress && !hlog.GetInfo(physicalAddress).Tombstone) // TODO potentially replace with Sealed { status = OperationStatus.RETRY_LATER; // Do not retain latch for pendings ops in relaxed CPR @@ -1107,11 +1099,11 @@ private OperationStatus CreateNewRecordRMW( } var newPhysicalAddress = hlog.GetPhysicalAddress(newLogicalAddress); ref RecordInfo recordInfo = ref hlog.GetInfo(newPhysicalAddress); - recordInfo.Tentative = true; RecordInfo.WriteInfo(ref recordInfo, inNewVersion: sessionCtx.InNewVersion, tombstone: true, dirty: true, latestLogicalAddress); + recordInfo.Tentative = true; hlog.Serialize(ref key, newPhysicalAddress); - bool lockTableEntryExists = false; - if (unsealPhysicalAddress == Constants.kInvalidAddress && LockTable.IsActive && !LockTable.TrySeal(ref key, out lockTableEntryExists) && lockTableEntryExists) - return OperationStatus.RETRY_NOW; - bool success = true; if (lowestReadCachePhysicalAddress == Constants.kInvalidAddress) { @@ -1512,12 +1493,9 @@ internal OperationStatus InternalDelete( { if (unsealPhysicalAddress != Constants.kInvalidAddress) recordInfo.CopyLocksFrom(hlog.GetInfo(unsealPhysicalAddress)); - else if (lockTableEntryExists && !LockTable.ApplyToLogRecord(ref key, ref recordInfo)) - { - LockTable.Unseal(ref key); - return OperationStatus.RETRY_NOW; - } - recordInfo.Tentative = false; + else if (LockTable.IsActive) + LockTable.TransferToLogRecord(ref key, ref recordInfo); + recordInfo.SetTentativeAtomic(false); // Note that this is the new logicalAddress; we have not retrieved the old one if it was below HeadAddress, and thus // we do not know whether 'logicalAddress' belongs to 'key' or is a collision. @@ -1582,14 +1560,11 @@ internal OperationStatus InternalDelete( /// /// key of the record. /// Lock operation being done. + /// Indicates whether we had a missing record once before. This handles the race where we try to unlock as lock records are + /// transferred out of the lock table, so we retry once if the record does not exist /// Receives the recordInfo of the record being locked - /// Callback functions. - /// Session context [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal OperationStatus InternalLock( - ref Key key, LockOperation lockOp, out RecordInfo lockInfo, FasterSession fasterSession, - FasterExecutionContext sessionCtx) - where FasterSession : IFasterSession + internal OperationStatus InternalLock(ref Key key, LockOperation lockOp, ref bool oneMiss, out RecordInfo lockInfo) { var bucket = default(HashBucket*); var slot = default(int); @@ -1597,12 +1572,12 @@ internal OperationStatus InternalLock( var hash = comparer.GetHashCode64(ref key); var tag = (ushort)((ulong)hash >> Constants.kHashTagShift); - var prevTailAddress = hlog.GetTailAddress(); - #region Trace back for record in in-memory HybridLog var entry = default(HashBucketEntry); - FindOrCreateTag(hash, tag, ref bucket, ref slot, ref entry, hlog.BeginAddress); + FindTag(hash, tag, ref bucket, ref slot, ref entry); + var logicalAddress = entry.Address; + long prevHighestKeyHashAddress = logicalAddress; OperationStatus status; if (UseReadCache) @@ -1613,14 +1588,14 @@ internal OperationStatus InternalLock( var physicalAddress = hlog.GetPhysicalAddress(logicalAddress); - if (logicalAddress >= hlog.ReadOnlyAddress) + if (logicalAddress >= hlog.HeadAddress) { if (!comparer.Equals(ref key, ref hlog.GetKey(physicalAddress))) { logicalAddress = hlog.GetInfo(physicalAddress).PreviousAddress; TraceBackForKeyMatch(ref key, logicalAddress, - hlog.ReadOnlyAddress, + hlog.HeadAddress, out logicalAddress, out physicalAddress); } @@ -1645,32 +1620,61 @@ internal OperationStatus InternalLock( // Not in memory. Do LockTable operations if (lockOp.LockOperationType == LockOperationType.IsLocked) - { - this.LockTable.Get(ref key, out lockInfo); - return OperationStatus.SUCCESS; - } + return this.LockTable.Get(ref key, out lockInfo) ? OperationStatus.SUCCESS : OperationStatus.RETRY_NOW; if (lockOp.LockOperationType == LockOperationType.Unlock) { - this.LockTable.Unlock(ref key, lockOp.LockType); - return OperationStatus.SUCCESS; + if (this.LockTable.Unlock(ref key, lockOp.LockType, out bool lockTableEntryExists)) + return OperationStatus.SUCCESS; + if (!lockTableEntryExists) + { + if (oneMiss) + { + Debug.Fail("Trying to unlock a nonexistent key"); + return OperationStatus.SUCCESS; // SUCCEED so we don't continue the loop + } + oneMiss = true; + } + return OperationStatus.RETRY_NOW; } // Try to lock - bool tentativeLock; - while (!this.LockTable.LockOrTentative(ref key, lockOp.LockType, out tentativeLock)) - { - // Sealed by someone else, so retry + if (!this.LockTable.LockOrTentative(ref key, lockOp.LockType, out bool tentativeLock)) return OperationStatus.RETRY_NOW; - } - // We got the lock. If we had another record with this key inserted, RETRY. - if (FindTag(hash, tag, ref bucket, ref slot, ref entry) && entry.Address >= prevTailAddress) - return OperationStatus.RETRY_NOW; + // We got the lock. If we had a new record with this key inserted, RETRY. + if (FindTag(hash, tag, ref bucket, ref slot, ref entry) && entry.Address > hlog.BeginAddress) + { + var ok = prevHighestKeyHashAddress >= hlog.BeginAddress; + if (ok) + { + var la = entry.Address; + while (la > prevHighestKeyHashAddress && la >= hlog.HeadAddress) + { + var pa = hlog.GetPhysicalAddress(la); + if (comparer.Equals(ref key, ref hlog.GetKey(pa))) + { + ok = false; + break; + } + la = hlog.GetInfo(pa).PreviousAddress; + } + + // An inserted record may have escaped to disk during the time of this Read/PENDING operation, in which case we must retry. + if (la > prevHighestKeyHashAddress && la < hlog.HeadAddress) + return OperationStatus.RETRY_NOW; + } + + if (!ok) + { + LockTable.UnlockOrRemoveTentative(ref key, lockOp.LockType, tentativeLock); + return OperationStatus.RETRY_NOW; + } + } // Success if (tentativeLock) - this.LockTable.ClearTentative(ref key); + return this.LockTable.ClearTentative(ref key) ? OperationStatus.SUCCESS : OperationStatus.RETRY_NOW; return OperationStatus.SUCCESS; } @@ -1832,10 +1836,7 @@ internal void InternalContinuePendingReadCopyToTail= hlog.BeginAddress) && !hlog.GetInfoFromBytePointer(request.record.GetValidPointer()).Tombstone) + RecordInfo oldRecordInfo = hlog.GetInfoFromBytePointer(request.record.GetValidPointer()); + if ((request.logicalAddress >= hlog.BeginAddress) && !oldRecordInfo.Tombstone) { if (!fasterSession.NeedCopyUpdate(ref key, ref pendingContext.input.Get(), ref hlog.GetContextRecordValue(ref request), ref pendingContext.output)) return OperationStatus.SUCCESS; @@ -1945,7 +1947,7 @@ internal OperationStatus InternalContinuePendingRMW prevHighestKeyKashAddress because any new record would be added above that. - while (la > prevHighestKeyKashAddress && la >= hlog.HeadAddress) + while (la > prevHighestKeyHashAddress && la >= hlog.HeadAddress) { var pa = hlog.GetPhysicalAddress(la); if (comparer.Equals(ref key, ref hlog.GetKey(pa))) @@ -2549,12 +2540,12 @@ ref hlog.GetValue(newPhysicalAddress, newPhysicalAddress + actualSize), ref outp if (!new_rcri.Invalid) { // An inserted record may have escaped to disk during the time of this Read/PENDING operation, in which case we must retry. - if (la > prevHighestKeyKashAddress && la < hlog.HeadAddress) + if (la > prevHighestKeyHashAddress && la < hlog.HeadAddress) { new_rcri.SetInvalid(); return OperationStatus.RECORD_ON_DISK; } - new_rcri.Tentative = false; + new_rcri.SetTentativeAtomic(false); } } } @@ -2584,15 +2575,9 @@ ref hlog.GetValue(newPhysicalAddress, newPhysicalAddress + actualSize), ref outp else { ref RecordInfo recordInfo = ref log.GetInfo(newPhysicalAddress); - - if (lockTableEntryExists && !LockTable.ApplyToLogRecord(ref key, ref recordInfo)) - { - LockTable.Unseal(ref key); - recordInfo.SetInvalid(); - recordInfo.Tentative = false; - return OperationStatus.RETRY_NOW; - } - recordInfo.Tentative = false; + if (LockTable.IsActive) + LockTable.TransferToLogRecord(ref key, ref recordInfo); + recordInfo.SetTentativeAtomic(false); pendingContext.recordInfo = recordInfo; pendingContext.logicalAddress = copyToReadCache ? Constants.kInvalidAddress /* We do not expose readcache addresses */ : newLogicalAddress; @@ -3124,7 +3109,7 @@ internal void ReadCacheEvict(long fromHeadAddress, long toHeadAddress) } // Now get it into the lock table, so it is ready as soon as the CAS removes this record from the RC chain. - this.LockTable.TransferFrom(ref readcache.GetKey(pa), ri); + this.LockTable.TransferFromLogRecord(ref readcache.GetKey(pa), ri); } // Swap in the next entry in the chain. Because we may encounter a race where another thread swaps in a readcache diff --git a/cs/src/core/Utilities/LockTable.cs b/cs/src/core/Utilities/LockTable.cs index f45aae219..85ccbc55e 100644 --- a/cs/src/core/Utilities/LockTable.cs +++ b/cs/src/core/Utilities/LockTable.cs @@ -1,7 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. -using System; using System.Collections.Generic; using System.Diagnostics; using System.Runtime.CompilerServices; @@ -15,7 +14,7 @@ internal class LockTableEntry : IEqualityComparer> { internal IHeapContainer key; internal RecordInfo logRecordInfo; // in main log - internal RecordInfo lockRecordInfo; // in lock table; we have to Lock/Seal/Tentative the LockTable entry separately from logRecordInfo + internal RecordInfo lockRecordInfo; // in lock table; we have to Lock/Tentative the LockTable entry separately from logRecordInfo internal LockTableEntry(IHeapContainer key, RecordInfo logRecordInfo, RecordInfo lockRecordInfo) { @@ -24,9 +23,24 @@ internal LockTableEntry(IHeapContainer key, RecordInfo logRecordInfo, Reco this.lockRecordInfo = lockRecordInfo; } - public bool Equals(LockTableEntry k1, LockTableEntry k2) => k1.logRecordInfo.Equals(k2.logRecordInfo); + public bool Equals(LockTableEntry k1, LockTableEntry k2) + => k1.logRecordInfo.Equals(k2.logRecordInfo) && k1.lockRecordInfo.Tentative == k2.lockRecordInfo.Tentative; public int GetHashCode(LockTableEntry k) => (int)k.logRecordInfo.GetHashCode64(); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void XLock() => this.lockRecordInfo.LockExclusiveRaw(); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void XUnlock() { this.lockRecordInfo.UnlockExclusive();} + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void SLock() => this.lockRecordInfo.LockSharedRaw(); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void SUnlock() { this.lockRecordInfo.UnlockShared(); } + + public override string ToString() => $"{key}"; } internal class LockTable @@ -65,64 +79,83 @@ internal LockTable(IVariableLengthStruct keyLen, IFasterEqualityComparer GetKeyContainer(ref TKey key) => bufferPool is null ? new StandardHeapContainer(ref key) : new VarLenHeapContainer(ref key, keyLen, bufferPool); - // Provide our own implementation of "Update by lambda" [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool Update(ref TKey key, Func, LockTableEntry> updateFactory) + internal bool Unlock(ref TKey key, LockType lockType, out bool exists) { - using var keyContainer = GetKeyContainer(ref key); - while (dict.TryGetValue(keyContainer, out var lte)) - { - if (dict.TryUpdate(keyContainer, updateFactory(lte), lte)) - return true; - } + var lookupKey = GetKeyContainer(ref key); + exists = dict.TryGetValue(lookupKey, out var lte); + if (exists) + return Unlock(lookupKey, lte, lockType); return false; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void Unlock(ref TKey key, LockType lockType) + private bool Unlock(IHeapContainer lookupKey, LockTableEntry lte, LockType lockType) { - if (Update(ref key, lte => { lte.logRecordInfo.Unlock(lockType); return lte; })) - TryRemoveIfNoLocks(ref key); - else - Debug.Fail("Trying to unlock a nonexistent key"); + bool result = false; + lte.SLock(); + if (!lte.lockRecordInfo.Invalid) + { + lte.logRecordInfo.Unlock(lockType); + result = true; + } + lte.SUnlock(); + + if (!lte.logRecordInfo.IsLocked) + { + lte.XLock(); + if (!lte.logRecordInfo.IsLocked && !lte.lockRecordInfo.Invalid) + TryRemoveEntry(lookupKey); + lte.XUnlock(); + } + + return result; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void TransferFrom(ref TKey key, RecordInfo logRecordInfo) + internal void TransferFromLogRecord(ref TKey key, RecordInfo logRecordInfo) { - var keyContainer = GetKeyContainer(ref key); + var lookupKey = GetKeyContainer(ref key); RecordInfo newRec = default; + newRec.SetValid(); newRec.CopyLocksFrom(logRecordInfo); - if (!dict.TryAdd(keyContainer, new(keyContainer, newRec, default))) + RecordInfo lockRec = default; + lockRec.SetValid(); + Interlocked.Increment(ref this.approxNumItems); + if (!dict.TryAdd(lookupKey, new(lookupKey, newRec, lockRec))) { - keyContainer.Dispose(); + Interlocked.Decrement(ref this.approxNumItems); + lookupKey.Dispose(); Debug.Fail("Trying to Transfer to an existing key"); return; } - Interlocked.Increment(ref this.approxNumItems); } // Lock the LockTable record for the key if it exists, else add a Tentative record for it. - // Returns true if the record was locked or tentative; else false (a Sealed or already-Tentative record was encountered) + // Returns true if the record was locked or tentative; else false (an already-Tentative record was encountered) [MethodImpl(MethodImplOptions.AggressiveInlining)] internal bool LockOrTentative(ref TKey key, LockType lockType, out bool tentative) { var keyContainer = GetKeyContainer(ref key); bool existingConflict = false; var lte = dict.AddOrUpdate(keyContainer, - key => { + key => { // New Value RecordInfo lockRecordInfo = default; lockRecordInfo.Tentative = true; + lockRecordInfo.SetValid(); RecordInfo logRecordInfo = default; + logRecordInfo.SetValid(); existingConflict = !logRecordInfo.Lock(lockType); Interlocked.Increment(ref this.approxNumItems); return new(key, logRecordInfo, lockRecordInfo); - }, (key, lte) => { - existingConflict = !lte.logRecordInfo.Lock(lockType); - if (lte.lockRecordInfo.Sealed) - { + }, (key, lte) => { // Update Value + if (lte.lockRecordInfo.Tentative) existingConflict = true; - lte.logRecordInfo.Unlock(lockType); + else + { + lte.XLock(); + existingConflict = lte.lockRecordInfo.Invalid || !lte.logRecordInfo.Lock(lockType); + lte.XUnlock(); } return lte; }); @@ -131,102 +164,124 @@ internal bool LockOrTentative(ref TKey key, LockType lockType, out bool tentativ } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void ClearTentative(ref TKey key) + internal bool Get(TKey key, out RecordInfo recordInfo) => Get(ref key, out recordInfo); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal bool ContainsKey(ref TKey key) { - if (!Update(ref key, lte => { lte.lockRecordInfo.Tentative = false; return lte; })) - Debug.Fail("Trying to remove Tentative bit from nonexistent locktable entry"); + using var lookupKey = GetKeyContainer(ref key); + return dict.ContainsKey(lookupKey); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void TryRemoveIfNoLocks(ref TKey key) + internal bool Get(ref TKey key, out RecordInfo recordInfo) { using var lookupKey = GetKeyContainer(ref key); - - // From https://devblogs.microsoft.com/pfxteam/little-known-gems-atomic-conditional-removals-from-concurrentdictionary/ - while (dict.TryGetValue(lookupKey, out var lte)) + if (dict.TryGetValue(lookupKey, out var lte)) { - if (lte.lockRecordInfo.IsLocked || lte.lockRecordInfo.Sealed || lte.logRecordInfo.IsLocked) - return; - if (dict.TryRemoveConditional(lookupKey, lte)) - { - Interlocked.Decrement(ref this.approxNumItems); - lte.key.Dispose(); - return; - } + recordInfo = lte.logRecordInfo; + return !lte.lockRecordInfo.Invalid; } - // If we make it here, the key was already removed. + recordInfo = default; + return false; } - // False is legit, as the record may have been removed between the time it was known to be here and the time Seal was called. [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal bool TrySeal(ref TKey key, out bool exists) + internal bool ClearTentative(ref TKey key) { using var lookupKey = GetKeyContainer(ref key); - if (!dict.ContainsKey(lookupKey)) + + // False is legit, as other operations may have removed it. + if (!dict.TryGetValue(lookupKey, out var lte)) + return false; + bool cleared = false; + lte.XLock(); + if (lte.lockRecordInfo.Tentative && !lte.lockRecordInfo.Invalid) { - exists = false; - return true; + lte.lockRecordInfo.SetTentativeAtomic(false); + cleared = true; } - exists = true; - return Update(ref key, lte => { lte.lockRecordInfo.Seal(); return lte; }); + lte.XUnlock(); + return cleared; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void Unseal(ref TKey key) + internal void UnlockOrRemoveTentative(ref TKey key, LockType lockType, bool wasTentative) { - if (!Update(ref key, lte => { lte.lockRecordInfo.Unseal(); return lte; })) - Debug.Fail("Trying to Unseal nonexistent key"); + using var lookupKey = GetKeyContainer(ref key); + if (dict.TryGetValue(lookupKey, out var lte)) + { + Debug.Assert(wasTentative == lte.lockRecordInfo.Tentative, "lockRecordInfo.Tentative was not as expected"); + + // We assume that we own the lock or placed the Tentative record, and a Tentative record may have legitimately been removed. + if (lte.lockRecordInfo.Tentative) + RemoveIfTentative(lookupKey, lte); + else + Unlock(lookupKey, lte, lockType); + return; + } + + // A tentative record may have been removed by the other side of the 2-phase process. + if (!wasTentative) + Debug.Fail("Trying to UnlockOrRemoveTentative on nonexistent nonTentative key"); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal bool Get(ref TKey key, out RecordInfo recordInfo) + bool TryRemoveEntry(IHeapContainer lookupKey) { - using var lookupKey = GetKeyContainer(ref key); - if (dict.TryGetValue(lookupKey, out var lte)) + if (dict.TryRemove(lookupKey, out var lte)) { - recordInfo = lte.logRecordInfo; + Interlocked.Decrement(ref this.approxNumItems); + lte.lockRecordInfo.SetInvalid(); + lte.key.Dispose(); return true; } - recordInfo = default; return false; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal bool Get(TKey key, out RecordInfo recordInfo) => Get(ref key, out recordInfo); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal bool ContainsKey(ref TKey key) + private bool RemoveIfTentative(IHeapContainer lookupKey, LockTableEntry lte) { - using var lookupKey = GetKeyContainer(ref key); - return dict.ContainsKey(lookupKey); + if (lte.lockRecordInfo.Dirty) + if (lte.lockRecordInfo.Filler) return false; + if (lte.lockRecordInfo.IsLocked) + if (lte.lockRecordInfo.Filler) return false; + if (!lte.lockRecordInfo.Tentative || lte.lockRecordInfo.Invalid) + return false; + lte.XLock(); + if (lte.lockRecordInfo.Dirty) + if (lte.lockRecordInfo.Filler) return false; + + // If the record is Invalid, it was already removed. + var removed = lte.lockRecordInfo.Invalid || (lte.lockRecordInfo.Tentative && TryRemoveEntry(lookupKey)); + lte.XUnlock(); + return removed; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal bool ApplyToLogRecord(ref TKey key, ref RecordInfo logRecord) + internal void TransferToLogRecord(ref TKey key, ref RecordInfo logRecord) { // This is called after the record has been CAS'd into the log or readcache, so this should not be allowed to fail. using var lookupKey = GetKeyContainer(ref key); if (dict.TryGetValue(lookupKey, out var lte)) { - Debug.Assert(lte.lockRecordInfo.Sealed, "lockRecordInfo should have been Sealed already"); - - // If it's a Tentative record, ignore it--it will be removed by Lock() and retried against the inserted log record. - if (lte.lockRecordInfo.Tentative) - return true; + // If it's a Tentative record, wait for it to no longer be tentative. + while (lte.lockRecordInfo.Tentative) + Thread.Yield(); + + // If invalid, then the Lock thread called TryRemoveEntry and will retry, which will add the locks after the main-log record is no longer tentative. + if (lte.lockRecordInfo.Invalid) + return; - logRecord.CopyLocksFrom(lte.logRecordInfo); - lte.lockRecordInfo.SetInvalid(); - lte.lockRecordInfo.Unseal(); - if (dict.TryRemove(lookupKey, out _)) + lte.XLock(); + if (!lte.lockRecordInfo.Invalid) { - Interlocked.Decrement(ref this.approxNumItems); - lte.key.Dispose(); + logRecord.CopyLocksFrom(lte.logRecordInfo); + TryRemoveEntry(lookupKey); } - lte.lockRecordInfo.Tentative = false; + lte.XUnlock(); } - // No locks to apply, or we applied them all. - return true; + // If we're here, there were no locks to apply, or we applied them all. } public override string ToString() => this.dict.Count.ToString(); diff --git a/cs/src/core/Utilities/LockUtility.cs b/cs/src/core/Utilities/LockUtility.cs index d1ee424b0..21ac26199 100644 --- a/cs/src/core/Utilities/LockUtility.cs +++ b/cs/src/core/Utilities/LockUtility.cs @@ -26,6 +26,7 @@ internal static bool HandleIntermediate(this ref RecordInfo recordInfo, out Oper while (recordInfo.Tentative) Thread.Yield(); + // We don't want to jump out on Sealed and restart if we are traversing the "read by address" chain if (recordInfo.Sealed && !isReadingAtAddress) { Thread.Yield(); diff --git a/cs/test/LockableUnsafeContextTests.cs b/cs/test/LockableUnsafeContextTests.cs index fb5e74861..bbc4d18fe 100644 --- a/cs/test/LockableUnsafeContextTests.cs +++ b/cs/test/LockableUnsafeContextTests.cs @@ -10,14 +10,14 @@ using NUnit.Framework; using FASTER.test.ReadCacheTests; using System.Threading.Tasks; -using System.Runtime.ExceptionServices; +using static FASTER.test.TestUtils; namespace FASTER.test.LockableUnsafeContext { // Functions for the "Simple lock transaction" case, e.g.: // - Lock key1, key2, key3, keyResult // - Do some operation on value1, value2, value3 and write the result to valueResult - class LockableUnsafeFunctions : SimpleFunctions + internal class LockableUnsafeFunctions : SimpleFunctions { internal long deletedRecordAddress; @@ -35,13 +35,28 @@ public override bool ConcurrentDeleter(ref int key, ref int value, ref RecordInf } } + internal class LockableUnsafeComparer : IFasterEqualityComparer + { + internal int maxSleepMs; + readonly Random rng = new(101); + + public bool Equals(ref int k1, ref int k2) => k1 == k2; + + public long GetHashCode64(ref int k) + { + if (maxSleepMs > 0) + Thread.Sleep(rng.Next(maxSleepMs)); + return Utility.GetHashCode(k); + } + } + public enum ResultLockTarget { MutableLock, LockTable } public enum ReadCopyDestination { Tail, ReadCache } public enum FlushMode { NoFlush, ReadOnly, OnDisk } - public enum UpdateOp { Upsert, RMW } + public enum UpdateOp { Upsert, RMW, Delete } [TestFixture] class LockableUnsafeContextTests @@ -52,6 +67,9 @@ class LockableUnsafeContextTests const int valueMult = 1_000_000; + LockableUnsafeFunctions functions; + LockableUnsafeComparer comparer; + private FasterKV fht; private ClientSession session; private IDevice log; @@ -63,9 +81,9 @@ public void Setup(bool forRecovery) { if (!forRecovery) { - TestUtils.DeleteDirectory(TestUtils.MethodTestDir, wait: true); + DeleteDirectory(MethodTestDir, wait: true); } - log = Devices.CreateLogDevice(Path.Combine(TestUtils.MethodTestDir, "test.log"), deleteOnClose: false, recoverDevice: forRecovery); + log = Devices.CreateLogDevice(Path.Combine(MethodTestDir, "test.log"), deleteOnClose: false, recoverDevice: forRecovery); ReadCacheSettings readCacheSettings = default; CheckpointSettings checkpointSettings = default; @@ -79,15 +97,18 @@ public void Setup(bool forRecovery) } if (arg is CheckpointType chktType) { - checkpointSettings = new CheckpointSettings { CheckpointDir = TestUtils.MethodTestDir }; + checkpointSettings = new CheckpointSettings { CheckpointDir = MethodTestDir }; break; } } + comparer = new LockableUnsafeComparer(); + functions = new LockableUnsafeFunctions(); + fht = new FasterKV(1L << 20, new LogSettings { LogDevice = log, ObjectLogDevice = null, PageSizeBits = 12, MemorySizeBits = 22, ReadCacheSettings = readCacheSettings }, - checkpointSettings: checkpointSettings, + checkpointSettings: checkpointSettings, comparer: comparer, supportsLocking: true); - session = fht.For(new LockableUnsafeFunctions()).NewSession(); + session = fht.For(functions).NewSession(); } [TearDown] @@ -104,16 +125,14 @@ public void TearDown(bool forRecovery) if (!forRecovery) { - TestUtils.DeleteDirectory(TestUtils.MethodTestDir); + DeleteDirectory(MethodTestDir); } } void Populate() { for (int key = 0; key < numRecords; key++) - { Assert.AreNotEqual(Status.PENDING, session.Upsert(key, key * valueMult)); - } } static void AssertIsLocked(LockableUnsafeContext luContext, int key, LockType lockType) @@ -156,10 +175,11 @@ void EnsureNoLocks() } [Test] - [Category(TestUtils.LockableUnsafeContextTestCategory)] - [Category(TestUtils.SmokeTestCategory)] + [Category(LockableUnsafeContextTestCategory)] + [Category(SmokeTestCategory)] public void InMemorySimpleLockTxnTest([Values] ResultLockTarget resultLockTarget, [Values] ReadCopyDestination readCopyDestination, - [Values] FlushMode flushMode, [Values(Phase.REST, Phase.INTERMEDIATE)] Phase phase, [Values] UpdateOp updateOp) + [Values] FlushMode flushMode, [Values(Phase.REST, Phase.INTERMEDIATE)] Phase phase, + [Values(UpdateOp.Upsert, UpdateOp.RMW)] UpdateOp updateOp) { Populate(); PrepareRecordLocation(flushMode); @@ -287,9 +307,10 @@ public void InMemorySimpleLockTxnTest([Values] ResultLockTarget resultLockTarget } [Test] - [Category(TestUtils.LockableUnsafeContextTestCategory)] - [Category(TestUtils.SmokeTestCategory)] - public void InMemoryLongLockTest([Values] ResultLockTarget resultLockTarget, [Values] FlushMode flushMode, [Values(Phase.REST, Phase.INTERMEDIATE)] Phase phase, [Values] UpdateOp updateOp) + [Category(LockableUnsafeContextTestCategory)] + [Category(SmokeTestCategory)] + public void InMemoryLongLockTest([Values] ResultLockTarget resultLockTarget, [Values] FlushMode flushMode, [Values(Phase.REST, Phase.INTERMEDIATE)] Phase phase, + [Values(UpdateOp.Upsert, UpdateOp.RMW)] UpdateOp updateOp) { Populate(); PrepareRecordLocation(flushMode); @@ -313,7 +334,7 @@ public void InMemoryLongLockTest([Values] ResultLockTarget resultLockTarget, [Va { Assert.AreEqual(Status.PENDING, status); luContext.CompletePendingWithOutputs(out var completedOutputs, wait: true); - (status, value24) = TestUtils.GetSinglePendingResult(completedOutputs); + (status, value24) = GetSinglePendingResult(completedOutputs); Assert.AreEqual(Status.OK, status); Assert.AreEqual(24 * valueMult, value24); } @@ -372,8 +393,8 @@ public void InMemoryLongLockTest([Values] ResultLockTarget resultLockTarget, [Va } [Test] - [Category(TestUtils.LockableUnsafeContextTestCategory)] - [Category(TestUtils.SmokeTestCategory)] + [Category(LockableUnsafeContextTestCategory)] + [Category(SmokeTestCategory)] public void InMemoryDeleteTest([Values] ResultLockTarget resultLockTarget, [Values] ReadCopyDestination readCopyDestination, [Values(FlushMode.NoFlush, FlushMode.ReadOnly)] FlushMode flushMode, [Values(Phase.REST, Phase.INTERMEDIATE)] Phase phase) { @@ -430,8 +451,8 @@ public void InMemoryDeleteTest([Values] ResultLockTarget resultLockTarget, [Valu } [Test] - [Category(TestUtils.LockableUnsafeContextTestCategory)] - [Category(TestUtils.SmokeTestCategory)] + [Category(LockableUnsafeContextTestCategory)] + [Category(SmokeTestCategory)] public void StressLocks([Values(1, 8)] int numLockThreads, [Values(1, 8)] int numOpThreads) { Populate(); @@ -540,8 +561,8 @@ void VerifySplicedInKey(LockableUnsafeContext key + valueMult; + + luContext.ResumeThread(); + + try + { + var status = updateOp switch + { + UpdateOp.Upsert => luContext.Upsert(key, getValue(key)), + UpdateOp.RMW => luContext.RMW(key, getValue(key)), + UpdateOp.Delete => luContext.Delete(key), + _ => Status.ERROR + }; + Assert.AreNotEqual(Status.ERROR, status, $"Unexpected UpdateOp {updateOp}"); + Assert.AreEqual(Status.OK, status); + + var (xlock, slock) = luContext.IsLocked(key); + Assert.IsTrue(xlock); + Assert.IsFalse(slock); + } + finally + { + luContext.SuspendThread(); + } + + luContext.Unlock(key, LockType.Exclusive); + } + + [Test] + [Category(LockableUnsafeContextTestCategory)] + [Category(SmokeTestCategory)] + public void LockNewRecordCompeteWithUpdateTest([Values(LockOperationType.Lock, LockOperationType.Unlock)] LockOperationType lockOp, [Values] UpdateOp updateOp) + { + const int numNewRecords = 100; + + using var updateSession = fht.NewSession(new SimpleFunctions()); + using var lockSession = fht.NewSession(new SimpleFunctions()); + + using var updateLuContext = updateSession.GetLockableUnsafeContext(); + using var lockLuContext = lockSession.GetLockableUnsafeContext(); + + LockType getLockType(int key) => ((key & 1) == 0) ? LockType.Exclusive : LockType.Shared; + int getValue(int key) => key + valueMult; + + // If we are testing Delete, then we need to have the records ON-DISK first; Delete is a no-op for unfound records. + if (updateOp == UpdateOp.Delete) + { + for (var key = numRecords; key < numRecords + numNewRecords; ++key) + Assert.AreNotEqual(Status.PENDING, session.Upsert(key, key * valueMult)); + fht.Log.FlushAndEvict(wait: true); + } + + // Now populate the main area of the log. + Populate(); + + HashSet locks = new(); + void lockKey(int key) + { + lockLuContext.Lock(key, getLockType(key)); + locks.Add(key); + } + void unlockKey(int key) + { + lockLuContext.Unlock(key, getLockType(key)); + locks.Remove(key); + } + + // If we are testing unlocking, then we need to lock first. + if (lockOp == LockOperationType.Unlock) + { + for (var key = numRecords; key < numRecords + numNewRecords; ++key) + lockKey(key); + } + + // Sleep at varying durations for each call to comparer.GetHashCode, which is called at the start of Lock/Unlock and Upsert/RMW/Delete. + comparer.maxSleepMs = 20; + + for (var key = numRecords; key < numRecords + numNewRecords; ++key) + { + // Use Task instead of Thread because this propagates exceptions (such as Assert.* failures) back to this thread. + Task.WaitAll(Task.Run(() => locker(key)), Task.Run(() => updater(key))); + var (xlock, slock) = lockLuContext.IsLocked(key); + var expectedXlock = getLockType(key) == LockType.Exclusive && lockOp != LockOperationType.Unlock; + var expectedSlock = getLockType(key) == LockType.Shared && lockOp != LockOperationType.Unlock; + Assert.AreEqual(expectedXlock, xlock); + Assert.AreEqual(expectedSlock, slock); + + if (lockOp == LockOperationType.Lock) + { + // There should be no entries in the locktable now; they should all be on the RecordInfo. + Assert.IsFalse(fht.LockTable.IsActive, $"count = {fht.LockTable.dict.Count}"); + } + else + { + // We are unlocking so should remove one record for each iteration. + Assert.AreEqual(numNewRecords + numRecords - key - 1, fht.LockTable.dict.Count); + } + } + + // Unlock all the keys we are expecting to unlock, which ensures all the locks were applied to RecordInfos as expected. + foreach (var key in locks.ToArray()) + unlockKey(key); + + void locker(int key) + { + try + { + lockLuContext.ResumeThread(); + if (lockOp == LockOperationType.Lock) + lockKey(key); + else + unlockKey(key); + } + finally + { + lockLuContext.SuspendThread(); + } + } + + void updater(int key) + { + updateLuContext.ResumeThread(); + + try + { + // Use the LuContext here even though we're not doing locking, because we don't want the ephemeral locks to be tried for this test + // (the test will hang as we try to acquire the lock). + var status = updateOp switch + { + UpdateOp.Upsert => updateLuContext.Upsert(key, getValue(key)), + UpdateOp.RMW => updateLuContext.RMW(key, getValue(key)), + UpdateOp.Delete => updateLuContext.Delete(key), + _ => Status.ERROR + }; + Assert.AreNotEqual(Status.ERROR, status, $"Unexpected UpdateOp {updateOp}"); + if (updateOp == UpdateOp.RMW) + Assert.AreEqual(Status.NOTFOUND, status); + else + Assert.AreEqual(Status.OK, status); + } + finally + { + updateLuContext.SuspendThread(); + } + } + } + + [Test] + [Category(LockableUnsafeContextTestCategory)] + [Category(SmokeTestCategory)] public void EvictFromMainLogToLockTableTest() { Populate(); @@ -779,9 +962,9 @@ public void EvictFromMainLogToLockTableTest() } [Test] - [Category(TestUtils.LockableUnsafeContextTestCategory)] - [Category(TestUtils.CheckpointRestoreCategory)] - public async ValueTask CheckpointRecoverTest([Values] CheckpointType checkpointType, [Values] TestUtils.SyncMode syncMode) + [Category(LockableUnsafeContextTestCategory)] + [Category(CheckpointRestoreCategory)] + public async ValueTask CheckpointRecoverTest([Values] CheckpointType checkpointType, [Values] SyncMode syncMode) { Populate(); @@ -802,7 +985,7 @@ public async ValueTask CheckpointRecoverTest([Values] CheckpointType checkpointT this.fht.Log.ShiftReadOnlyAddress(this.fht.Log.TailAddress, wait: true); - if (syncMode == TestUtils.SyncMode.Sync) + if (syncMode == SyncMode.Sync) { this.fht.TakeFullCheckpoint(out fullCheckpointToken, checkpointType); await this.fht.CompleteCheckpointAsync(); @@ -818,7 +1001,7 @@ public async ValueTask CheckpointRecoverTest([Values] CheckpointType checkpointT TearDown(forRecovery: true); Setup(forRecovery: true); - if (syncMode == TestUtils.SyncMode.Sync) + if (syncMode == SyncMode.Sync) this.fht.Recover(fullCheckpointToken); else await this.fht.RecoverAsync(fullCheckpointToken); @@ -839,14 +1022,14 @@ public async ValueTask CheckpointRecoverTest([Values] CheckpointType checkpointT const int checkpointFreq = 250; [Test] - [Category(TestUtils.LockableUnsafeContextTestCategory)] - [Category(TestUtils.CheckpointRestoreCategory)] - async public Task SecondaryReaderTest([Values] TestUtils.SyncMode syncMode) + [Category(LockableUnsafeContextTestCategory)] + [Category(CheckpointRestoreCategory)] + async public Task SecondaryReaderTest([Values] SyncMode syncMode) { // This test is taken from the SecondaryReaderStore sample - var path = TestUtils.MethodTestDir; - TestUtils.DeleteDirectory(path, wait: true); + var path = MethodTestDir; + DeleteDirectory(path, wait: true); var log = Devices.CreateLogDevice(path + "hlog.log", deleteOnClose: true); @@ -862,15 +1045,15 @@ async public Task SecondaryReaderTest([Values] TestUtils.SyncMode syncMode) checkpointSettings: new CheckpointSettings { CheckpointDir = path } ); - // Use Task instead of Thread because this propagate exceptions back to this thread. + // Use Task instead of Thread because this propagates exceptions (such as Assert.* failures) back to this thread. await Task.WhenAll(Task.Run(() => PrimaryWriter(primaryStore, syncMode)), Task.Run(() => SecondaryReader(secondaryStore, syncMode))); log.Dispose(); - TestUtils.DeleteDirectory(path, wait: true); + DeleteDirectory(path, wait: true); } - async static Task PrimaryWriter(FasterKV primaryStore, TestUtils.SyncMode syncMode) + async static Task PrimaryWriter(FasterKV primaryStore, SyncMode syncMode) { using var s1 = primaryStore.NewSession(new SimpleFunctions()); using var luc1 = s1.GetLockableUnsafeContext(); @@ -881,7 +1064,7 @@ async static Task PrimaryWriter(FasterKV primaryStore, TestUtils.Syn if (key > 0 && key % checkpointFreq == 0) { // Checkpointing primary until key {key - 1} - if (syncMode == TestUtils.SyncMode.Sync) + if (syncMode == SyncMode.Sync) { primaryStore.TakeHybridLogCheckpoint(out _, CheckpointType.Snapshot); await primaryStore.CompleteCheckpointAsync().ConfigureAwait(false); @@ -909,7 +1092,7 @@ async static Task PrimaryWriter(FasterKV primaryStore, TestUtils.Syn } } - async static Task SecondaryReader(FasterKV secondaryStore, TestUtils.SyncMode syncMode) + async static Task SecondaryReader(FasterKV secondaryStore, SyncMode syncMode) { using var s1 = secondaryStore.NewSession(new SimpleFunctions()); using var luc1 = s1.GetLockableUnsafeContext(); @@ -920,7 +1103,7 @@ async static Task SecondaryReader(FasterKV secondaryStore, TestUtils try { // read-only recovery, no writing back undos - if (syncMode == TestUtils.SyncMode.Sync) + if (syncMode == SyncMode.Sync) secondaryStore.Recover(undoNextVersion: false); else await secondaryStore.RecoverAsync(undoNextVersion: false).ConfigureAwait(false); diff --git a/docs/_docs/30-fasterkv-manual-locking.md b/docs/_docs/30-fasterkv-manual-locking.md index 2b33a4233..a5d4e45e8 100644 --- a/docs/_docs/30-fasterkv-manual-locking.md +++ b/docs/_docs/30-fasterkv-manual-locking.md @@ -154,7 +154,7 @@ When Upsert must append a new record: - Any thread seeing a Tentative record will spinwait until it's no longer Tentative, so no thread will try to lock this newly-CAS'd record. - Upsert checks the `LockTable` to see if there is an entry in it for this key. - If an entry is in the `LockTable`, then Upsert checks to see if it is marked Tentative. - - If so, then it is ignored; per [Insertion to LockTable due to Lock](#insertion-to-locktable-due-to-lock), it will be removed by the Lock() thread. + - If so, then we spinwait until it is no longer tentative; per [Insertion to LockTable due to Lock](#insertion-to-locktable-due-to-lock), it will be removed by the Lock() thread, or made final, depending on whether the Lock() thread saw the newly-Upserted record. - Otherwise, Upsert: - Applies the locks to its newly-CAS'd record (which is still Tentative) - Sets the LockTable entry Invalid and removes it @@ -205,7 +205,7 @@ For record transfers involving the ReadCache, we have the following high-level c - Otherwise, we insplice between the final RC entry and the first main-log entry; we never splice into the middle of the RC prefix chain. - Even when there are RC entries in the hash chain, we must avoid latching because that would slow down all record-insertion operations (upsert, RMW of a new record, Delete of an on-disk record, etc.) as well as some Read situations. - "Insplicing" occurs when a new record is inserted into the main log after the end of the ReadCache prefix string. -- "Outsplicing" occurs when a record is spliced out of the RC portion of the hash chain (main log records are never spliced out) because the value for that key must be updated, or because we are evicting records from the ReadCache. Outsplicing introduces concurrency considerations but we must support it; we cannot simply mark ReadCache entries as Invalid and leave them there, or the chain will grow without bound. For concurrency reasons we defer outsplicing to readcache eviction time, when readcache records are destroyed, as described below. +- "Outsplicing" occurs when a record is spliced out of the RC portion of the hash chain (main log records are never spliced out) because the value for that key must be updated, or because we are evicting records from the ReadCache. We cannot simply mark ReadCache entries as Invalid and leave them there, or the chain will grow without bound. For concurrency reasons, outsplicing is "delayed"; we mark the readcache record as Invalid during normal operations, and defer actual record removal to readcache eviction time, as described below. - Insplicing: For splicing into the chain, we always CAS at the final RC entry rather than at the HashTable bucket slot (we never splice into the middle of the RC prefix chain). - Add the new record to the tail of main by pointing to the existing tail of in its `.PreviousAddress`. - CAS the existing final RC record to point to the new record (set its .PreviousAddress and CAS). @@ -244,9 +244,9 @@ Transfers to the `LockTable` due to main log evictions are handled in the follow ### Recovery Considerations -We must clear in-memory records' lock bits during FoldOver recovery. -- Add to checkpoint information an indication of whether any `LockableUnsafeContext` were active during the Checkpoint. -- If this MRO indicator is true: +We must clear in-memory records' lock bits during recovery. +- Add `RecoveryInfo.manualLockingActive`, an indication of whether any `LockableUnsafeContext` were active during the Checkpoint. +- If this indicator is true: - Scan pages, clearing the locks of any records - These pages do not need to be flushed to disk - Ensure random reads and scans will NOT be flummoxed by the weird lock bits From 763a0ace7a0dadd1cf73c1be0751912df8a96f6c Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Sat, 15 Jan 2022 00:06:30 -0800 Subject: [PATCH 18/25] Add UnsafeContext and make it the default for FASTER.Benchmark --- .../FasterClientSessionYcsbBenchmark.cs | 352 +++++++++ cs/benchmark/FasterYcsbBenchmark.cs | 151 ++-- cs/benchmark/Options.cs | 3 +- cs/benchmark/Program.cs | 31 +- cs/benchmark/TestLoader.cs | 4 + cs/benchmark/YcsbConstants.cs | 3 +- cs/src/core/ClientSession/ClientSession.cs | 20 +- .../ClientSession/LockableUnsafeContext.cs | 6 +- cs/src/core/ClientSession/UnsafeContext.cs | 742 ++++++++++++++++++ cs/test/BasicFASTERTests.cs | 114 +-- cs/test/UnsafeContextTests.cs | 670 ++++++++++++++++ 11 files changed, 1925 insertions(+), 171 deletions(-) create mode 100644 cs/benchmark/FasterClientSessionYcsbBenchmark.cs create mode 100644 cs/src/core/ClientSession/UnsafeContext.cs create mode 100644 cs/test/UnsafeContextTests.cs diff --git a/cs/benchmark/FasterClientSessionYcsbBenchmark.cs b/cs/benchmark/FasterClientSessionYcsbBenchmark.cs new file mode 100644 index 000000000..687f5158e --- /dev/null +++ b/cs/benchmark/FasterClientSessionYcsbBenchmark.cs @@ -0,0 +1,352 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using FASTER.core; +using System; +using System.Diagnostics; +using System.Threading; + +namespace FASTER.benchmark +{ + internal class FASTER_ClientSessionYcsbBenchmark + { + // Ensure sizes are aligned to chunk sizes + static long InitCount; + static long TxnCount; + + readonly TestLoader testLoader; + readonly ManualResetEventSlim waiter = new(); + readonly int numaStyle; + readonly int readPercent; + readonly Functions functions; + readonly Input[] input_; + + readonly Key[] init_keys_; + readonly Key[] txn_keys_; + + readonly IDevice device; + readonly FasterKV store; + + long idx_ = 0; + long total_ops_done = 0; + volatile bool done = false; + + internal FASTER_ClientSessionYcsbBenchmark(Key[] i_keys_, Key[] t_keys_, TestLoader testLoader) + { + // Affinize main thread to last core on first socket if not used by experiment + var (numGrps, numProcs) = Native32.GetNumGroupsProcsPerGroup(); + if ((testLoader.Options.NumaStyle == 0 && testLoader.Options.ThreadCount <= (numProcs - 1)) || + (testLoader.Options.NumaStyle == 1 && testLoader.Options.ThreadCount <= numGrps * (numProcs - 1))) + Native32.AffinitizeThreadRoundRobin(numProcs - 1); + + this.testLoader = testLoader; + init_keys_ = i_keys_; + txn_keys_ = t_keys_; + numaStyle = testLoader.Options.NumaStyle; + readPercent = testLoader.Options.ReadPercent; + var lockImpl = testLoader.LockImpl; + functions = new Functions(lockImpl != LockImpl.None, testLoader.Options.PostOps); + + input_ = new Input[8]; + for (int i = 0; i < 8; i++) + input_[i].value = i; + + device = Devices.CreateLogDevice(TestLoader.DevicePath, preallocateFile: true, deleteOnClose: !testLoader.RecoverMode, useIoCompletionPort: true); + + if (testLoader.Options.ThreadCount >= 16) + device.ThrottleLimit = testLoader.Options.ThreadCount * 12; + + if (testLoader.Options.UseSmallMemoryLog) + store = new FasterKV + (testLoader.MaxKey / 4, new LogSettings { LogDevice = device, PreallocateLog = true, PageSizeBits = 25, SegmentSizeBits = 30, MemorySizeBits = 28 }, + new CheckpointSettings { CheckpointDir = testLoader.BackupPath }); + else + store = new FasterKV + (testLoader.MaxKey / 2, new LogSettings { LogDevice = device, PreallocateLog = true }, + new CheckpointSettings { CheckpointDir = testLoader.BackupPath }); + } + + internal void Dispose() + { + store.Dispose(); + device.Dispose(); + } + + private void RunYcsb(int thread_idx) + { + RandomGenerator rng = new((uint)(1 + thread_idx)); + + if (numaStyle == 0) + Native32.AffinitizeThreadRoundRobin((uint)thread_idx); + else + Native32.AffinitizeThreadShardedNuma((uint)thread_idx, 2); // assuming two NUMA sockets + + waiter.Wait(); + + var sw = Stopwatch.StartNew(); + + Value value = default; + Input input = default; + Output output = default; + + long reads_done = 0; + long writes_done = 0; + + var session = store.For(functions).NewSession(null, !testLoader.Options.NoThreadAffinity); + + while (!done) + { + long chunk_idx = Interlocked.Add(ref idx_, YcsbConstants.kChunkSize) - YcsbConstants.kChunkSize; + while (chunk_idx >= TxnCount) + { + if (chunk_idx == TxnCount) + idx_ = 0; + chunk_idx = Interlocked.Add(ref idx_, YcsbConstants.kChunkSize) - YcsbConstants.kChunkSize; + } + + for (long idx = chunk_idx; idx < chunk_idx + YcsbConstants.kChunkSize && !done; ++idx) + { + Op op; + int r = (int)rng.Generate(100); + if (r < readPercent) + op = Op.Read; + else if (readPercent >= 0) + op = Op.Upsert; + else + op = Op.ReadModifyWrite; + + if (idx % 512 == 0) + { + if (!testLoader.Options.NoThreadAffinity) + session.Refresh(); + session.CompletePending(false); + } + + switch (op) + { + case Op.Upsert: + { + session.Upsert(ref txn_keys_[idx], ref value, Empty.Default, 1); + ++writes_done; + break; + } + case Op.Read: + { + session.Read(ref txn_keys_[idx], ref input, ref output, Empty.Default, 1); + ++reads_done; + break; + } + case Op.ReadModifyWrite: + { + session.RMW(ref txn_keys_[idx], ref input_[idx & 0x7], Empty.Default, 1); + ++writes_done; + break; + } + default: + throw new InvalidOperationException("Unexpected op: " + op); + } + } + } + + session.CompletePending(true); + session.Dispose(); + + sw.Stop(); + + Console.WriteLine("Thread " + thread_idx + " done; " + reads_done + " reads, " + + writes_done + " writes, in " + sw.ElapsedMilliseconds + " ms."); + Interlocked.Add(ref total_ops_done, reads_done + writes_done); + } + + internal unsafe (double, double) Run(TestLoader testLoader) + { + ClientSession session = default; + LockableUnsafeContext luContext = default; + + (Key key, LockType kind) xlock = (new Key { value = long.MaxValue }, LockType.Exclusive); + (Key key, LockType kind) slock = (new Key { value = long.MaxValue - 1 }, LockType.Shared); + if (testLoader.Options.LockImpl == (int)LockImpl.Manual) + { + session = store.For(functions).NewSession(null); + luContext = session.GetLockableUnsafeContext(); + + Console.WriteLine("Taking 2 manual locks"); + luContext.Lock(xlock.key, xlock.kind); + luContext.Lock(slock.key, slock.kind); + } + + Thread[] workers = new Thread[testLoader.Options.ThreadCount]; + + Console.WriteLine("Executing setup."); + + var storeWasRecovered = testLoader.MaybeRecoverStore(store); + long elapsedMs = 0; + if (!storeWasRecovered) + { + // Setup the store for the YCSB benchmark. + Console.WriteLine("Loading FasterKV from data"); + for (int idx = 0; idx < testLoader.Options.ThreadCount; ++idx) + { + int x = idx; + workers[idx] = new Thread(() => SetupYcsb(x)); + } + + foreach (Thread worker in workers) + { + worker.Start(); + } + + waiter.Set(); + var sw = Stopwatch.StartNew(); + foreach (Thread worker in workers) + { + worker.Join(); + } + sw.Stop(); + elapsedMs = sw.ElapsedMilliseconds; + waiter.Reset(); + } + double insertsPerSecond = elapsedMs == 0 ? 0 : ((double)InitCount / elapsedMs) * 1000; + Console.WriteLine(TestStats.GetLoadingTimeLine(insertsPerSecond, elapsedMs)); + Console.WriteLine(TestStats.GetAddressesLine(AddressLineNum.Before, store.Log.BeginAddress, store.Log.HeadAddress, store.Log.ReadOnlyAddress, store.Log.TailAddress)); + + if (!storeWasRecovered) + testLoader.MaybeCheckpointStore(store); + + // Uncomment below to dispose log from memory, use for 100% read workloads only + // store.Log.DisposeFromMemory(); + + idx_ = 0; + + if (testLoader.Options.DumpDistribution) + Console.WriteLine(store.DumpDistribution()); + + // Ensure first fold-over checkpoint is fast + if (testLoader.Options.PeriodicCheckpointMilliseconds > 0 && testLoader.Options.PeriodicCheckpointType == CheckpointType.FoldOver) + store.Log.ShiftReadOnlyAddress(store.Log.TailAddress, true); + + Console.WriteLine("Executing experiment."); + + // Run the experiment. + for (int idx = 0; idx < testLoader.Options.ThreadCount; ++idx) + { + int x = idx; + workers[idx] = new Thread(() => RunYcsb(x)); + } + // Start threads. + foreach (Thread worker in workers) + { + worker.Start(); + } + + waiter.Set(); + var swatch = Stopwatch.StartNew(); + + if (testLoader.Options.PeriodicCheckpointMilliseconds <= 0) + { + Thread.Sleep(TimeSpan.FromSeconds(testLoader.Options.RunSeconds)); + } + else + { + var checkpointTaken = 0; + while (swatch.ElapsedMilliseconds < 1000 * testLoader.Options.RunSeconds) + { + if (checkpointTaken < swatch.ElapsedMilliseconds / testLoader.Options.PeriodicCheckpointMilliseconds) + { + long start = swatch.ElapsedTicks; + if (store.TakeHybridLogCheckpoint(out _, testLoader.Options.PeriodicCheckpointType, testLoader.Options.PeriodicCheckpointTryIncremental)) + { + store.CompleteCheckpointAsync().AsTask().GetAwaiter().GetResult(); + var timeTaken = (swatch.ElapsedTicks - start) / TimeSpan.TicksPerMillisecond; + Console.WriteLine("Checkpoint time: {0}ms", timeTaken); + checkpointTaken++; + } + } + } + Console.WriteLine($"Checkpoint taken {checkpointTaken}"); + } + + swatch.Stop(); + + done = true; + + foreach (Thread worker in workers) + { + worker.Join(); + } + + if (testLoader.Options.LockImpl == (int)LockImpl.Manual) + { + luContext.Unlock(xlock.key, xlock.kind); + luContext.Unlock(slock.key, slock.kind); + luContext.Dispose(); + session.Dispose(); + } + + waiter.Reset(); + + double seconds = swatch.ElapsedMilliseconds / 1000.0; + Console.WriteLine(TestStats.GetAddressesLine(AddressLineNum.After, store.Log.BeginAddress, store.Log.HeadAddress, store.Log.ReadOnlyAddress, store.Log.TailAddress)); + + double opsPerSecond = total_ops_done / seconds; + Console.WriteLine(TestStats.GetTotalOpsString(total_ops_done, seconds)); + Console.WriteLine(TestStats.GetStatsLine(StatsLineNum.Iteration, YcsbConstants.OpsPerSec, opsPerSecond)); + return (insertsPerSecond, opsPerSecond); + } + + private void SetupYcsb(int thread_idx) + { + if (numaStyle == 0) + Native32.AffinitizeThreadRoundRobin((uint)thread_idx); + else + Native32.AffinitizeThreadShardedNuma((uint)thread_idx, 2); // assuming two NUMA sockets + + waiter.Wait(); + + var session = store.For(functions).NewSession(null, !testLoader.Options.NoThreadAffinity); + + Value value = default; + + for (long chunk_idx = Interlocked.Add(ref idx_, YcsbConstants.kChunkSize) - YcsbConstants.kChunkSize; + chunk_idx < InitCount; + chunk_idx = Interlocked.Add(ref idx_, YcsbConstants.kChunkSize) - YcsbConstants.kChunkSize) + { + for (long idx = chunk_idx; idx < chunk_idx + YcsbConstants.kChunkSize; ++idx) + { + if (idx % 256 == 0) + { + session.Refresh(); + + if (idx % 65536 == 0) + { + session.CompletePending(false); + } + } + + session.Upsert(ref init_keys_[idx], ref value, Empty.Default, 1); + } + } + + session.CompletePending(true); + session.Dispose(); + } + + #region Load Data + + internal static void CreateKeyVectors(TestLoader testLoader, out Key[] i_keys, out Key[] t_keys) + { + InitCount = YcsbConstants.kChunkSize * (testLoader.InitCount / YcsbConstants.kChunkSize); + TxnCount = YcsbConstants.kChunkSize * (testLoader.TxnCount / YcsbConstants.kChunkSize); + + i_keys = new Key[InitCount]; + t_keys = new Key[TxnCount]; + } + + internal class KeySetter : IKeySetter + { + public void Set(Key[] vector, long idx, long value) => vector[idx].value = value; + } + + #endregion + } +} diff --git a/cs/benchmark/FasterYcsbBenchmark.cs b/cs/benchmark/FasterYcsbBenchmark.cs index 6a71ce6be..fa715510b 100644 --- a/cs/benchmark/FasterYcsbBenchmark.cs +++ b/cs/benchmark/FasterYcsbBenchmark.cs @@ -116,60 +116,63 @@ private void RunYcsb(int thread_idx) int count = 0; #endif - var session = store.For(functions).NewSession(null, !testLoader.Options.NoThreadAffinity); + var session = store.For(functions).NewSession(null); + var uContext = session.GetUnsafeContext(); + uContext.ResumeThread(); - while (!done) + try { - long chunk_idx = Interlocked.Add(ref idx_, YcsbConstants.kChunkSize) - YcsbConstants.kChunkSize; - while (chunk_idx >= TxnCount) - { - if (chunk_idx == TxnCount) - idx_ = 0; - chunk_idx = Interlocked.Add(ref idx_, YcsbConstants.kChunkSize) - YcsbConstants.kChunkSize; - } - - for (long idx = chunk_idx; idx < chunk_idx + YcsbConstants.kChunkSize && !done; ++idx) + while (!done) { - Op op; - int r = (int)rng.Generate(100); - if (r < readPercent) - op = Op.Read; - else if (readPercent >= 0) - op = Op.Upsert; - else - op = Op.ReadModifyWrite; - - if (idx % 512 == 0) + long chunk_idx = Interlocked.Add(ref idx_, YcsbConstants.kChunkSize) - YcsbConstants.kChunkSize; + while (chunk_idx >= TxnCount) { - if (!testLoader.Options.NoThreadAffinity) - session.Refresh(); - session.CompletePending(false); + if (chunk_idx == TxnCount) + idx_ = 0; + chunk_idx = Interlocked.Add(ref idx_, YcsbConstants.kChunkSize) - YcsbConstants.kChunkSize; } - switch (op) + for (long idx = chunk_idx; idx < chunk_idx + YcsbConstants.kChunkSize && !done; ++idx) { - case Op.Upsert: - { - session.Upsert(ref txn_keys_[idx], ref value, Empty.Default, 1); - ++writes_done; - break; - } - case Op.Read: - { - session.Read(ref txn_keys_[idx], ref input, ref output, Empty.Default, 1); - ++reads_done; - break; - } - case Op.ReadModifyWrite: - { - session.RMW(ref txn_keys_[idx], ref input_[idx & 0x7], Empty.Default, 1); - ++writes_done; - break; - } - default: - throw new InvalidOperationException("Unexpected op: " + op); + Op op; + int r = (int)rng.Generate(100); + if (r < readPercent) + op = Op.Read; + else if (readPercent >= 0) + op = Op.Upsert; + else + op = Op.ReadModifyWrite; + + if (idx % 512 == 0) + { + uContext.Refresh(); + uContext.CompletePending(false); + } + + switch (op) + { + case Op.Upsert: + { + uContext.Upsert(ref txn_keys_[idx], ref value, Empty.Default, 1); + ++writes_done; + break; + } + case Op.Read: + { + uContext.Read(ref txn_keys_[idx], ref input, ref output, Empty.Default, 1); + ++reads_done; + break; + } + case Op.ReadModifyWrite: + { + uContext.RMW(ref txn_keys_[idx], ref input_[idx & 0x7], Empty.Default, 1); + ++writes_done; + break; + } + default: + throw new InvalidOperationException("Unexpected op: " + op); + } } - } #if DASHBOARD count += (int)kChunkSize; @@ -186,9 +189,16 @@ private void RunYcsb(int thread_idx) statsWritten[thread_idx].Set(); } #endif + } + + uContext.CompletePending(true); + } + finally + { + uContext.SuspendThread(); } - session.CompletePending(true); + uContext.Dispose(); session.Dispose(); sw.Stop(); @@ -209,8 +219,8 @@ internal unsafe (double, double) Run(TestLoader testLoader) dash.Start(); #endif - ClientSession session = store.For(functions).NewSession(null); - LockableUnsafeContext luContext = session.GetLockableUnsafeContext(); + ClientSession session = default; + LockableUnsafeContext luContext = default; (Key key, LockType kind) xlock = (new Key { value = long.MaxValue }, LockType.Exclusive); (Key key, LockType kind) slock = (new Key { value = long.MaxValue - 1 }, LockType.Shared); @@ -356,7 +366,9 @@ private void SetupYcsb(int thread_idx) waiter.Wait(); - var session = store.For(functions).NewSession(null, !testLoader.Options.NoThreadAffinity); + var session = store.For(functions).NewSession(null); + var uContext = session.GetUnsafeContext(); + uContext.ResumeThread(); #if DASHBOARD var tstart = Stopwatch.GetTimestamp(); @@ -365,26 +377,28 @@ private void SetupYcsb(int thread_idx) int count = 0; #endif - Value value = default; - - for (long chunk_idx = Interlocked.Add(ref idx_, YcsbConstants.kChunkSize) - YcsbConstants.kChunkSize; - chunk_idx < InitCount; - chunk_idx = Interlocked.Add(ref idx_, YcsbConstants.kChunkSize) - YcsbConstants.kChunkSize) + try { - for (long idx = chunk_idx; idx < chunk_idx + YcsbConstants.kChunkSize; ++idx) + Value value = default; + + for (long chunk_idx = Interlocked.Add(ref idx_, YcsbConstants.kChunkSize) - YcsbConstants.kChunkSize; + chunk_idx < InitCount; + chunk_idx = Interlocked.Add(ref idx_, YcsbConstants.kChunkSize) - YcsbConstants.kChunkSize) { - if (idx % 256 == 0) + for (long idx = chunk_idx; idx < chunk_idx + YcsbConstants.kChunkSize; ++idx) { - session.Refresh(); - - if (idx % 65536 == 0) + if (idx % 256 == 0) { - session.CompletePending(false); + uContext.Refresh(); + + if (idx % 65536 == 0) + { + uContext.CompletePending(false); + } } - } - session.Upsert(ref init_keys_[idx], ref value, Empty.Default, 1); - } + uContext.Upsert(ref init_keys_[idx], ref value, Empty.Default, 1); + } #if DASHBOARD count += (int)kChunkSize; @@ -399,9 +413,14 @@ private void SetupYcsb(int thread_idx) statsWritten[thread_idx].Set(); } #endif + } + uContext.CompletePending(true); } - - session.CompletePending(true); + finally + { + uContext.SuspendThread(); + } + uContext.Dispose(); session.Dispose(); } diff --git a/cs/benchmark/Options.cs b/cs/benchmark/Options.cs index 77a397e7e..a451616ac 100644 --- a/cs/benchmark/Options.cs +++ b/cs/benchmark/Options.cs @@ -12,7 +12,8 @@ class Options HelpText = "Benchmark to run:" + "\n 0 = YCSB" + "\n 1 = YCSB with SpanByte" + - "\n 2 = ConcurrentDictionary")] + "\n 2 = YCSB with ClientSession" + + "\n 3 = ConcurrentDictionary")] public int Benchmark { get; set; } [Option('t', "threads", Required = false, Default = 8, diff --git a/cs/benchmark/Program.cs b/cs/benchmark/Program.cs index f9273caba..1c876f687 100644 --- a/cs/benchmark/Program.cs +++ b/cs/benchmark/Program.cs @@ -29,19 +29,32 @@ public static void Main(string[] args) switch (testLoader.BenchmarkType) { case BenchmarkType.Ycsb: - var yTest = new FASTER_YcsbBenchmark(testLoader.init_keys, testLoader.txn_keys, testLoader); - testStats.AddResult(yTest.Run(testLoader)); - yTest.Dispose(); + { + var tester = new FASTER_YcsbBenchmark(testLoader.init_keys, testLoader.txn_keys, testLoader); + testStats.AddResult(tester.Run(testLoader)); + tester.Dispose(); + } break; case BenchmarkType.SpanByte: - var sTest = new FasterSpanByteYcsbBenchmark(testLoader.init_span_keys, testLoader.txn_span_keys, testLoader); - testStats.AddResult(sTest.Run(testLoader)); - sTest.Dispose(); + { + var tester = new FasterSpanByteYcsbBenchmark(testLoader.init_span_keys, testLoader.txn_span_keys, testLoader); + testStats.AddResult(tester.Run(testLoader)); + tester.Dispose(); + } + break; + case BenchmarkType.ClientSession: + { + var tester = new FASTER_ClientSessionYcsbBenchmark(testLoader.init_keys, testLoader.txn_keys, testLoader); + testStats.AddResult(tester.Run(testLoader)); + tester.Dispose(); + } break; case BenchmarkType.ConcurrentDictionaryYcsb: - var cTest = new ConcurrentDictionary_YcsbBenchmark(testLoader.init_keys, testLoader.txn_keys, testLoader); - testStats.AddResult(cTest.Run(testLoader)); - cTest.Dispose(); + { + var tester = new ConcurrentDictionary_YcsbBenchmark(testLoader.init_keys, testLoader.txn_keys, testLoader); + testStats.AddResult(tester.Run(testLoader)); + tester.Dispose(); + } break; default: throw new ApplicationException("Unknown benchmark type"); diff --git a/cs/benchmark/TestLoader.cs b/cs/benchmark/TestLoader.cs index bbee6c0a1..55d561c9c 100644 --- a/cs/benchmark/TestLoader.cs +++ b/cs/benchmark/TestLoader.cs @@ -105,6 +105,10 @@ private void LoadDataThreadProc() FasterSpanByteYcsbBenchmark.CreateKeyVectors(this, out this.init_span_keys, out this.txn_span_keys); LoadData(this, this.init_span_keys, this.txn_span_keys, new FasterSpanByteYcsbBenchmark.KeySetter()); break; + case BenchmarkType.ClientSession: + FASTER_ClientSessionYcsbBenchmark.CreateKeyVectors(this, out this.init_keys, out this.txn_keys); + LoadData(this, this.init_keys, this.txn_keys, new FASTER_YcsbBenchmark.KeySetter()); + break; case BenchmarkType.ConcurrentDictionaryYcsb: ConcurrentDictionary_YcsbBenchmark.CreateKeyVectors(this, out this.init_keys, out this.txn_keys); LoadData(this, this.init_keys, this.txn_keys, new ConcurrentDictionary_YcsbBenchmark.KeySetter()); diff --git a/cs/benchmark/YcsbConstants.cs b/cs/benchmark/YcsbConstants.cs index 697d8ae23..c9387dc6a 100644 --- a/cs/benchmark/YcsbConstants.cs +++ b/cs/benchmark/YcsbConstants.cs @@ -7,8 +7,9 @@ namespace FASTER.benchmark { enum BenchmarkType : byte { - Ycsb, + Ycsb = 0, SpanByte, + ClientSession, ConcurrentDictionaryYcsb }; diff --git a/cs/src/core/ClientSession/ClientSession.cs b/cs/src/core/ClientSession/ClientSession.cs index dbb6451ff..4dcc47d5f 100644 --- a/cs/src/core/ClientSession/ClientSession.cs +++ b/cs/src/core/ClientSession/ClientSession.cs @@ -36,7 +36,8 @@ public sealed class ClientSession internal readonly InternalFasterSession FasterSession; - LockableUnsafeContext manualOperations; + UnsafeContext uContext; + LockableUnsafeContext luContext; internal const string NotAsyncSessionErr = "Session does not support async operations"; @@ -158,14 +159,24 @@ public void Dispose() UnsafeSuspendThread(); } + /// + /// Return a new interface to Faster operations that supports manual epoch control. + /// + public UnsafeContext GetUnsafeContext() + { + this.uContext ??= new (this); + this.uContext.Acquire(); + return this.uContext; + } + /// /// Return a new interface to Faster operations that supports manual locking and epoch control. /// public LockableUnsafeContext GetLockableUnsafeContext() { - this.manualOperations ??= new LockableUnsafeContext(this); - this.manualOperations.Acquire(); - return this.manualOperations; + this.luContext ??= new(this); + this.luContext.Acquire(); + return this.luContext; } #region IFasterOperations @@ -1160,7 +1171,6 @@ public bool CompletePendingWithOutputs(out CompletedOutputIterator _clientSession.CompletePendingWithOutputs(out completedOutputs, wait, spinWaitForCommit); #endregion Internal utilities } - #endregion IFasterSession } } diff --git a/cs/src/core/ClientSession/LockableUnsafeContext.cs b/cs/src/core/ClientSession/LockableUnsafeContext.cs index 723b76f7a..900498b94 100644 --- a/cs/src/core/ClientSession/LockableUnsafeContext.cs +++ b/cs/src/core/ClientSession/LockableUnsafeContext.cs @@ -10,7 +10,7 @@ namespace FASTER.core { /// - /// Faster Operations implementation that allows manual control of record locking and epoch management. For advanced use only. + /// Faster Context implementation that allows manual control of record locking and epoch management. For advanced use only. /// public sealed class LockableUnsafeContext : IFasterContext, IDisposable where Functions : IFunctions @@ -208,7 +208,7 @@ public void Unlock(ref Key key, LockType lockType) #endregion Key Locking - #region IFasterOperations + #region IFasterContext /// [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -468,7 +468,7 @@ public void Refresh() clientSession.fht.InternalRefresh(clientSession.ctx, FasterSession); } - #endregion IFasterOperations + #endregion IFasterContext #region IFasterSession diff --git a/cs/src/core/ClientSession/UnsafeContext.cs b/cs/src/core/ClientSession/UnsafeContext.cs new file mode 100644 index 000000000..24b370754 --- /dev/null +++ b/cs/src/core/ClientSession/UnsafeContext.cs @@ -0,0 +1,742 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Threading; +using System.Threading.Tasks; + +namespace FASTER.core +{ + /// + /// Faster Operations implementation that allows manual control of record epoch management. For advanced use only. + /// + public sealed class UnsafeContext : IFasterContext, IDisposable + where Functions : IFunctions + { + readonly ClientSession clientSession; + + internal readonly InternalFasterSession FasterSession; + bool isAcquired; + + void CheckAcquired() + { + if (!isAcquired) + throw new FasterException("Method call on not-acquired UnsafeContext"); + } + + internal UnsafeContext(ClientSession clientSession) + { + this.clientSession = clientSession; + FasterSession = new InternalFasterSession(clientSession); + } + + /// + /// Resume session on current thread. IMPORTANT: Call SuspendThread before any async op. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void ResumeThread() + { + CheckAcquired(); + clientSession.UnsafeResumeThread(); + } + + /// + /// Resume session on current thread. IMPORTANT: Call SuspendThread before any async op. + /// + /// Epoch that the session resumed on; can be saved to see if epoch has changed + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void ResumeThread(out int resumeEpoch) + { + CheckAcquired(); + clientSession.UnsafeResumeThread(out resumeEpoch); + } + + /// + /// Suspend session on current thread + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void SuspendThread() + { + Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); + clientSession.UnsafeSuspendThread(); + } + + /// + /// Current epoch of the session + /// + public int LocalCurrentEpoch => clientSession.fht.epoch.LocalCurrentEpoch; + + /// + /// Synchronously complete outstanding pending synchronous operations. + /// Async operations must be completed individually. + /// + /// Wait for all pending operations on session to complete + /// Spin-wait until ongoing commit/checkpoint, if any, completes + /// True if all pending operations have completed, false otherwise + public bool CompletePending(bool wait = false, bool spinWaitForCommit = false) + => this.clientSession.UnsafeCompletePending(this.FasterSession, false, wait, spinWaitForCommit); + + /// + /// Synchronously complete outstanding pending synchronous operations, returning outputs for the completed operations. + /// Assumes epoch protection is managed by user. Async operations must be completed individually. + /// + /// Outputs completed by this operation + /// Wait for all pending operations on session to complete + /// Spin-wait until ongoing commit/checkpoint, if any, completes + /// True if all pending operations have completed, false otherwise + public bool CompletePendingWithOutputs(out CompletedOutputIterator completedOutputs, bool wait = false, bool spinWaitForCommit = false) + => this.clientSession.UnsafeCompletePendingWithOutputs(this.FasterSession, out completedOutputs, wait, spinWaitForCommit); + + #region Acquire and Dispose + internal void Acquire() + { + if (this.isAcquired) + throw new FasterException("Trying to acquire an already-acquired UnsafeContext"); + this.isAcquired = true; + } + + /// + /// Does not actually dispose of anything; asserts the epoch has been suspended + /// + public void Dispose() + { + if (LightEpoch.AnyInstanceProtected()) + throw new FasterException("Disposing UnsafeContext with a protected epoch; must call UnsafeSuspendThread"); + this.isAcquired = false; + } + #endregion Acquire and Dispose + + #region IFasterContext + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Read(ref Key key, ref Input input, ref Output output, Context userContext = default, long serialNo = 0) + { + Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); + return clientSession.fht.ContextRead(ref key, ref input, ref output, userContext, FasterSession, serialNo, clientSession.ctx); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Read(Key key, Input input, out Output output, Context userContext = default, long serialNo = 0) + { + output = default; + return Read(ref key, ref input, ref output, userContext, serialNo); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Read(ref Key key, ref Output output, Context userContext = default, long serialNo = 0) + { + Input input = default; + return Read(ref key, ref input, ref output, userContext, serialNo); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Read(Key key, out Output output, Context userContext = default, long serialNo = 0) + { + Input input = default; + output = default; + return Read(ref key, ref input, ref output, userContext, serialNo); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public (Status status, Output output) Read(Key key, Context userContext = default, long serialNo = 0) + { + Input input = default; + Output output = default; + return (Read(ref key, ref input, ref output, userContext, serialNo), output); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Read(ref Key key, ref Input input, ref Output output, ref RecordMetadata recordMetadata, ReadFlags readFlags = ReadFlags.None, Context userContext = default, long serialNo = 0) + { + Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); + return clientSession.fht.ContextRead(ref key, ref input, ref output, ref recordMetadata, readFlags, userContext, FasterSession, serialNo, clientSession.ctx); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status ReadAtAddress(long address, ref Input input, ref Output output, ReadFlags readFlags = ReadFlags.None, Context userContext = default, long serialNo = 0) + { + Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); + return clientSession.fht.ContextReadAtAddress(address, ref input, ref output, readFlags, userContext, FasterSession, serialNo, clientSession.ctx); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.ReadAsyncResult> ReadAsync(ref Key key, ref Input input, Context userContext = default, long serialNo = 0, CancellationToken cancellationToken = default) + { + Debug.Assert(!LightEpoch.AnyInstanceProtected()); + return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, Constants.kInvalidAddress, userContext, serialNo, cancellationToken); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.ReadAsyncResult> ReadAsync(Key key, Input input, Context context = default, long serialNo = 0, CancellationToken token = default) + { + Debug.Assert(!LightEpoch.AnyInstanceProtected()); + return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, Constants.kInvalidAddress, context, serialNo, token); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.ReadAsyncResult> ReadAsync(ref Key key, Context userContext = default, long serialNo = 0, CancellationToken token = default) + { + Debug.Assert(!LightEpoch.AnyInstanceProtected()); + Input input = default; + return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, Constants.kInvalidAddress, userContext, serialNo, token); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.ReadAsyncResult> ReadAsync(Key key, Context context = default, long serialNo = 0, CancellationToken token = default) + { + Debug.Assert(!LightEpoch.AnyInstanceProtected()); + Input input = default; + return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, Constants.kInvalidAddress, context, serialNo, token); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.ReadAsyncResult> ReadAsync(ref Key key, ref Input input, long startAddress, ReadFlags readFlags = ReadFlags.None, + Context userContext = default, long serialNo = 0, CancellationToken cancellationToken = default) + { + Debug.Assert(!LightEpoch.AnyInstanceProtected()); + var operationFlags = FasterKV.PendingContext.GetOperationFlags(readFlags); + return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, startAddress, userContext, serialNo, cancellationToken, operationFlags); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.ReadAsyncResult> ReadAtAddressAsync(long address, ref Input input, ReadFlags readFlags = ReadFlags.None, + Context userContext = default, long serialNo = 0, CancellationToken cancellationToken = default) + { + Debug.Assert(!LightEpoch.AnyInstanceProtected()); + Key key = default; + var operationFlags = FasterKV.PendingContext.GetOperationFlags(readFlags, noKey: true); + return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, address, userContext, serialNo, cancellationToken, operationFlags); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Upsert(ref Key key, ref Value desiredValue, Context userContext = default, long serialNo = 0) + { + Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); + Input input = default; + Output output = default; + return Upsert(ref key, ref input, ref desiredValue, ref output, out _, userContext, serialNo); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Upsert(ref Key key, ref Input input, ref Value desiredValue, ref Output output, Context userContext = default, long serialNo = 0) + { + Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); + return clientSession.fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, userContext, FasterSession, serialNo, clientSession.ctx); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Upsert(ref Key key, ref Input input, ref Value desiredValue, ref Output output, out RecordMetadata recordMetadata, Context userContext = default, long serialNo = 0) + { + Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); + return clientSession.fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, out recordMetadata, userContext, FasterSession, serialNo, clientSession.ctx); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Upsert(Key key, Value desiredValue, Context userContext = default, long serialNo = 0) + => Upsert(ref key, ref desiredValue, userContext, serialNo); + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Upsert(Key key, Input input, Value desiredValue, ref Output output, Context userContext = default, long serialNo = 0) + => Upsert(ref key, ref input, ref desiredValue, ref output, userContext, serialNo); + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.UpsertAsyncResult> UpsertAsync(ref Key key, ref Value desiredValue, Context userContext = default, long serialNo = 0, CancellationToken token = default) + { + Input input = default; + return UpsertAsync(ref key, ref input, ref desiredValue, userContext, serialNo, token); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.UpsertAsyncResult> UpsertAsync(ref Key key, ref Input input, ref Value desiredValue, Context userContext = default, long serialNo = 0, CancellationToken token = default) + { + Debug.Assert(!LightEpoch.AnyInstanceProtected()); + return clientSession.fht.UpsertAsync(FasterSession, clientSession.ctx, ref key, ref input, ref desiredValue, userContext, serialNo, token); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.UpsertAsyncResult> UpsertAsync(Key key, Value desiredValue, Context userContext = default, long serialNo = 0, CancellationToken token = default) + => UpsertAsync(ref key, ref desiredValue, userContext, serialNo, token); + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.UpsertAsyncResult> UpsertAsync(Key key, Input input, Value desiredValue, Context userContext = default, long serialNo = 0, CancellationToken token = default) + => UpsertAsync(ref key, ref input, ref desiredValue, userContext, serialNo, token); + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status RMW(ref Key key, ref Input input, ref Output output, Context userContext = default, long serialNo = 0) + => RMW(ref key, ref input, ref output, out _, userContext, serialNo); + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status RMW(ref Key key, ref Input input, ref Output output, out RecordMetadata recordMetadata, Context userContext = default, long serialNo = 0) + { + Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); + return clientSession.fht.ContextRMW(ref key, ref input, ref output, out recordMetadata, userContext, FasterSession, serialNo, clientSession.ctx); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status RMW(Key key, Input input, out Output output, Context userContext = default, long serialNo = 0) + { + output = default; + return RMW(ref key, ref input, ref output, userContext, serialNo); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status RMW(ref Key key, ref Input input, Context userContext = default, long serialNo = 0) + { + Output output = default; + return RMW(ref key, ref input, ref output, userContext, serialNo); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status RMW(Key key, Input input, Context userContext = default, long serialNo = 0) + { + Output output = default; + return RMW(ref key, ref input, ref output, userContext, serialNo); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.RmwAsyncResult> RMWAsync(ref Key key, ref Input input, Context context = default, long serialNo = 0, CancellationToken token = default) + { + Debug.Assert(!LightEpoch.AnyInstanceProtected()); + return clientSession.fht.RmwAsync(FasterSession, clientSession.ctx, ref key, ref input, context, serialNo, token); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.RmwAsyncResult> RMWAsync(Key key, Input input, Context context = default, long serialNo = 0, CancellationToken token = default) + => RMWAsync(ref key, ref input, context, serialNo, token); + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Delete(ref Key key, Context userContext = default, long serialNo = 0) + { + Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); + return clientSession.fht.ContextDelete(ref key, userContext, FasterSession, serialNo, clientSession.ctx); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Status Delete(Key key, Context userContext = default, long serialNo = 0) + => Delete(ref key, userContext, serialNo); + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.DeleteAsyncResult> DeleteAsync(ref Key key, Context userContext = default, long serialNo = 0, CancellationToken token = default) + { + Debug.Assert(!LightEpoch.AnyInstanceProtected()); + return clientSession.fht.DeleteAsync(FasterSession, clientSession.ctx, ref key, userContext, serialNo, token); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ValueTask.DeleteAsyncResult> DeleteAsync(Key key, Context userContext = default, long serialNo = 0, CancellationToken token = default) + => DeleteAsync(ref key, userContext, serialNo, token); + + /// + public void Refresh() + { + Debug.Assert(!LightEpoch.AnyInstanceProtected()); + clientSession.fht.InternalRefresh(clientSession.ctx, FasterSession); + } + + #endregion IFasterContext + + #region IFasterSession + + // This is a struct to allow JIT to inline calls (and bypass default interface call mechanism) + internal readonly struct InternalFasterSession : IFasterSession + { + private readonly ClientSession _clientSession; + + public InternalFasterSession(ClientSession clientSession) + { + _clientSession = clientSession; + } + + #region IFunctions - Optional features supported + public bool SupportsLocking => _clientSession.fht.SupportsLocking; + + public bool SupportsPostOperations => _clientSession.functions.SupportsPostOperations; + + public bool IsManualLocking => false; + #endregion IFunctions - Optional features supported + + #region IFunctions - Reads + public bool SingleReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, long address) + => _clientSession.functions.SingleReader(ref key, ref input, ref value, ref dst, ref recordInfo, address); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, long address, out bool lockFailed) + { + lockFailed = false; + return !this.SupportsLocking + ? _clientSession.functions.ConcurrentReader(ref key, ref input, ref value, ref dst, ref recordInfo, address) + : ConcurrentReaderLock(ref key, ref input, ref value, ref dst, ref recordInfo, address, out lockFailed); + } + + public bool ConcurrentReaderLock(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, long address, out bool lockFailed) + { + if (!recordInfo.LockShared()) + { + lockFailed = true; + return false; + } + try + { + lockFailed = false; + return _clientSession.functions.ConcurrentReader(ref key, ref input, ref value, ref dst, ref recordInfo, address); + } + finally + { + recordInfo.UnlockShared(); + } + } + + public void ReadCompletionCallback(ref Key key, ref Input input, ref Output output, Context ctx, Status status, RecordMetadata recordMetadata) + => _clientSession.functions.ReadCompletionCallback(ref key, ref input, ref output, ctx, status, recordMetadata); + + #endregion IFunctions - Reads + + // Except for readcache/copy-to-tail usage of SingleWriter, all operations that append a record must lock in the () call and unlock + // in the Post call; otherwise another session can try to access the record as soon as it's CAS'd and before Post is called. + + #region IFunctions - Upserts + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) + { + _clientSession.functions.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); + + if (this.SupportsPostOperations && this.SupportsLocking) + { + // Lock ephemerally before we CAS into the log; Unlocked in PostSingleWriterLock. + recordInfo.SetLockExclusiveBit(); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) + { + if (!this.SupportsPostOperations) + return; + if (!this.SupportsLocking) + PostSingleWriterNoLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); + else + PostSingleWriterLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void PostSingleWriterNoLock(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) + { + _clientSession.functions.PostSingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void PostSingleWriterLock(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) + { + // Lock was taken in SingleWriterLock + try + { + PostSingleWriterNoLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); + } + finally + { + recordInfo.UnlockExclusive(); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address, out bool lockFailed) + { + lockFailed = false; + return !this.SupportsLocking + ? ConcurrentWriterNoLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address) + : ConcurrentWriterLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address, out lockFailed); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool ConcurrentWriterNoLock(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) + { + recordInfo.SetDirty(); + // Note: KeyIndexes do not need notification of in-place updates because the key does not change. + return _clientSession.functions.ConcurrentWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool ConcurrentWriterLock(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address, out bool lockFailed) + { + if (!recordInfo.LockExclusive()) + { + lockFailed = true; + return false; + } + try + { + lockFailed = false; + return !recordInfo.Tombstone && ConcurrentWriterNoLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); + } + finally + { + recordInfo.UnlockExclusive(); + } + } + + public void UpsertCompletionCallback(ref Key key, ref Input input, ref Value value, Context ctx) + => _clientSession.functions.UpsertCompletionCallback(ref key, ref input, ref value, ctx); + #endregion IFunctions - Upserts + + #region IFunctions - RMWs + #region InitialUpdater + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool NeedInitialUpdate(ref Key key, ref Input input, ref Output output) + => _clientSession.functions.NeedInitialUpdate(ref key, ref input, ref output); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) + { + _clientSession.functions.InitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); + + if (this.SupportsPostOperations && this.SupportsLocking) + { + // Lock ephemerally before we CAS into the log; Unlocked in PostInitialUpdaterLock. + recordInfo.SetLockExclusiveBit(); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) + { + if (!this.SupportsPostOperations) + return; + if (!this.SupportsLocking) + PostInitialUpdaterNoLock(ref key, ref input, ref value, ref output, ref recordInfo, address); + else + PostInitialUpdaterLock(ref key, ref input, ref value, ref output, ref recordInfo, address); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void PostInitialUpdaterNoLock(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) + { + _clientSession.functions.PostInitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void PostInitialUpdaterLock(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) + { + // Lock was taken in InitialUpdaterLock + try + { + PostInitialUpdaterNoLock(ref key, ref input, ref value, ref output, ref recordInfo, address); + } + finally + { + recordInfo.UnlockExclusive(); + } + } + #endregion InitialUpdater + + #region CopyUpdater + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool NeedCopyUpdate(ref Key key, ref Input input, ref Value oldValue, ref Output output) + => _clientSession.functions.NeedCopyUpdate(ref key, ref input, ref oldValue, ref output); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address) + { + _clientSession.functions.CopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); + + if (this.SupportsPostOperations && this.SupportsLocking) + { + // Lock ephemerally before we CAS into the log. Unlocked in PostInitialUpdaterLock. + recordInfo.SetLockExclusiveBit(); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address) + { + if (!this.SupportsPostOperations) + return true; + return !this.SupportsLocking + ? PostCopyUpdaterNoLock(ref key, ref input, ref output, ref oldValue, ref newValue, ref recordInfo, address) + : PostCopyUpdaterLock(ref key, ref input, ref output, ref oldValue, ref newValue, ref recordInfo, address); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool PostCopyUpdaterNoLock(ref Key key, ref Input input, ref Output output, ref Value oldValue, ref Value newValue, ref RecordInfo recordInfo, long address) + { + return _clientSession.functions.PostCopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool PostCopyUpdaterLock(ref Key key, ref Input input, ref Output output, ref Value oldValue, ref Value newValue, ref RecordInfo recordInfo, long address) + { + // Lock was taken in CopyUpdaterLock + try + { + // KeyIndexes do not need notification of in-place updates because the key does not change. + return !recordInfo.Tombstone && PostCopyUpdaterNoLock(ref key, ref input, ref output, ref oldValue, ref newValue, ref recordInfo, address); + } + finally + { + recordInfo.UnlockExclusive(); + } + } + #endregion CopyUpdater + + #region InPlaceUpdater + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool InPlaceUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address, out bool lockFailed) + { + lockFailed = false; + return !this.SupportsLocking + ? InPlaceUpdaterNoLock(ref key, ref input, ref output, ref value, ref recordInfo, address) + : InPlaceUpdaterLock(ref key, ref input, ref output, ref value, ref recordInfo, address, out lockFailed); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool InPlaceUpdaterNoLock(ref Key key, ref Input input, ref Output output, ref Value value, ref RecordInfo recordInfo, long address) + { + recordInfo.SetDirty(); + // Note: KeyIndexes do not need notification of in-place updates because the key does not change. + return _clientSession.functions.InPlaceUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); + } + + private bool InPlaceUpdaterLock(ref Key key, ref Input input, ref Output output, ref Value value, ref RecordInfo recordInfo, long address, out bool lockFailed) + { + if (!recordInfo.LockExclusive()) + { + lockFailed = true; + return false; + } + try + { + lockFailed = false; + return !recordInfo.Tombstone && InPlaceUpdaterNoLock(ref key, ref input, ref output, ref value, ref recordInfo, address); + } + finally + { + recordInfo.UnlockExclusive(); + } + } + + public void RMWCompletionCallback(ref Key key, ref Input input, ref Output output, Context ctx, Status status, RecordMetadata recordMetadata) + => _clientSession.functions.RMWCompletionCallback(ref key, ref input, ref output, ctx, status, recordMetadata); + + #endregion InPlaceUpdater + #endregion IFunctions - RMWs + + #region IFunctions - Deletes + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long address) + { + // There is no value to lock here, so we take a RecordInfo lock in InternalDelete and release it here. + recordInfo.SetDirty(); + + if (this.SupportsPostOperations) + _clientSession.functions.PostSingleDeleter(ref key, ref recordInfo, address); + if (this.SupportsLocking) + recordInfo.UnlockExclusive(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address, out bool lockFailed) + { + lockFailed = false; + return (!this.SupportsLocking) + ? ConcurrentDeleterNoLock(ref key, ref value, ref recordInfo, address) + : ConcurrentDeleterLock(ref key, ref value, ref recordInfo, address, out lockFailed); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool ConcurrentDeleterNoLock(ref Key key, ref Value value, ref RecordInfo recordInfo, long address) + { + recordInfo.SetDirty(); + recordInfo.SetTombstone(); + return _clientSession.functions.ConcurrentDeleter(ref key, ref value, ref recordInfo, address); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool ConcurrentDeleterLock(ref Key key, ref Value value, ref RecordInfo recordInfo, long address, out bool lockFailed) + { + if (!recordInfo.LockExclusive()) + { + lockFailed = true; + return false; + } + try + { + lockFailed = false; + return ConcurrentDeleterNoLock(ref key, ref value, ref recordInfo, address); + } + finally + { + recordInfo.UnlockExclusive(); + } + } + + public void DeleteCompletionCallback(ref Key key, Context ctx) + => _clientSession.functions.DeleteCompletionCallback(ref key, ctx); + #endregion IFunctions - Deletes + + #region IFunctions - Checkpointing + public void CheckpointCompletionCallback(string guid, CommitPoint commitPoint) + { + _clientSession.functions.CheckpointCompletionCallback(guid, commitPoint); + _clientSession.LatestCommitPoint = commitPoint; + } + #endregion IFunctions - Checkpointing + + #region Internal utilities + public int GetInitialLength(ref Input input) + => _clientSession.variableLengthStruct.GetInitialLength(ref input); + + public int GetLength(ref Value t, ref Input input) + => _clientSession.variableLengthStruct.GetLength(ref t, ref input); + + public IHeapContainer GetHeapContainer(ref Input input) + { + if (_clientSession.inputVariableLengthStruct == default) + return new StandardHeapContainer(ref input); + return new VarLenHeapContainer(ref input, _clientSession.inputVariableLengthStruct, _clientSession.fht.hlog.bufferPool); + } + + public void UnsafeResumeThread() => _clientSession.UnsafeResumeThread(); + + public void UnsafeSuspendThread() => _clientSession.UnsafeSuspendThread(); + + public bool CompletePendingWithOutputs(out CompletedOutputIterator completedOutputs, bool wait = false, bool spinWaitForCommit = false) + => _clientSession.CompletePendingWithOutputs(out completedOutputs, wait, spinWaitForCommit); + #endregion Internal utilities + } + #endregion IFasterSession + } +} diff --git a/cs/test/BasicFASTERTests.cs b/cs/test/BasicFASTERTests.cs index b06e2c567..73be28c98 100644 --- a/cs/test/BasicFASTERTests.cs +++ b/cs/test/BasicFASTERTests.cs @@ -30,6 +30,15 @@ public void Setup() TestUtils.DeleteDirectory(path, wait: true); } + private void Setup(long size, LogSettings logSettings, TestUtils.DeviceType deviceType) + { + string filename = path + TestContext.CurrentContext.Test.Name + deviceType.ToString() + ".log"; + log = TestUtils.CreateTestDevice(deviceType, filename); + logSettings.LogDevice = log; + fht = new FasterKV(size, logSettings); + session = fht.For(new Functions()).NewSession(); + } + [TearDown] public void TearDown() { @@ -69,11 +78,7 @@ private static (Status status, OutputStruct output) CompletePendingResult(Comple [Category("Smoke")] public void NativeInMemWriteRead([Values] TestUtils.DeviceType deviceType) { - string filename = path + "NativeInMemWriteRead" + deviceType.ToString() + ".log"; - log = TestUtils.CreateTestDevice(deviceType, filename); - fht = new FasterKV - (128, new LogSettings { LogDevice = log, PageSizeBits = 10, MemorySizeBits = 12, SegmentSizeBits = 22 }); - session = fht.For(new Functions()).NewSession(); + Setup(128, new LogSettings { PageSizeBits = 10, MemorySizeBits = 12, SegmentSizeBits = 22 }, deviceType); InputStruct input = default; OutputStruct output = default; @@ -94,11 +99,7 @@ public void NativeInMemWriteRead([Values] TestUtils.DeviceType deviceType) [Category("Smoke")] public void NativeInMemWriteReadDelete([Values] TestUtils.DeviceType deviceType) { - string filename = path + "NativeInMemWriteReadDelete" + deviceType.ToString() + ".log"; - log = TestUtils.CreateTestDevice(deviceType, filename); - fht = new FasterKV - (128, new LogSettings { LogDevice = log, PageSizeBits = 10, MemorySizeBits = 12, SegmentSizeBits = 22 }); - session = fht.For(new Functions()).NewSession(); + Setup(128, new LogSettings { PageSizeBits = 10, MemorySizeBits = 12, SegmentSizeBits = 22 }, deviceType); InputStruct input = default; OutputStruct output = default; @@ -137,12 +138,8 @@ public void NativeInMemWriteReadDelete2() const int count = 10; - string filename = path + "NativeInMemWriteReadDelete2" + deviceType.ToString() + ".log"; - log = TestUtils.CreateTestDevice(deviceType, filename); - fht = new FasterKV - // (128, new LogSettings { LogDevice = log, MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }); - (128, new LogSettings { LogDevice = log, MemorySizeBits = 29 }); - session = fht.For(new Functions()).NewSession(); + // Setup(128, new LogSettings { MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }, deviceType); + Setup(128, new LogSettings { MemorySizeBits = 29 }, deviceType); InputStruct input = default; OutputStruct output = default; @@ -190,11 +187,8 @@ public unsafe void NativeInMemWriteRead2() int count = 200; - string filename = path + "NativeInMemWriteRead2" + deviceType.ToString() + ".log"; - log = TestUtils.CreateTestDevice(deviceType, filename); - fht = new FasterKV - // (128, new LogSettings { LogDevice = log, MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }); - (128, new LogSettings { LogDevice = log, MemorySizeBits = 29 }); + // Setup(128, new LogSettings { MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }, deviceType); + Setup(128, new LogSettings { MemorySizeBits = 29 }, deviceType); session = fht.For(new Functions()).NewSession(); InputStruct input = default; @@ -252,12 +246,7 @@ public unsafe void TestShiftHeadAddress([Values] TestUtils.DeviceType deviceType Random r = new Random(RandSeed); var sw = Stopwatch.StartNew(); - string filename = path + "TestShiftHeadAddress" + deviceType.ToString() + ".log"; - log = TestUtils.CreateTestDevice(deviceType, filename); - fht = new FasterKV - (128, new LogSettings { LogDevice = log, MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }); - session = fht.For(new Functions()).NewSession(); - + Setup(128, new LogSettings { MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }, deviceType); for (int c = 0; c < NumRecs; c++) { @@ -320,11 +309,7 @@ public unsafe void NativeInMemRMWRefKeys([Values] TestUtils.DeviceType deviceTyp InputStruct input = default; OutputStruct output = default; - string filename = path + "NativeInMemRMWRefKeys" + deviceType.ToString() + ".log"; - log = TestUtils.CreateTestDevice(deviceType, filename); - fht = new FasterKV - (128, new LogSettings { LogDevice = log, MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }); - session = fht.For(new Functions()).NewSession(); + Setup(128, new LogSettings { MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }, deviceType); var nums = Enumerable.Range(0, 1000).ToArray(); var rnd = new Random(11); @@ -388,11 +373,7 @@ public unsafe void NativeInMemRMWNoRefKeys([Values] TestUtils.DeviceType deviceT { InputStruct input = default; - string filename = path + "NativeInMemRMWNoRefKeys" + deviceType.ToString() + ".log"; - log = TestUtils.CreateTestDevice(deviceType, filename); - fht = new FasterKV - (128, new LogSettings { LogDevice = log, MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }); - session = fht.For(new Functions()).NewSession(); + Setup(128, new LogSettings { MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }, deviceType); var nums = Enumerable.Range(0, 1000).ToArray(); var rnd = new Random(11); @@ -450,11 +431,7 @@ public void ReadNoRefKeyInputOutput([Values] TestUtils.DeviceType deviceType) { InputStruct input = default; - string filename = path + "ReadNoRefKeyInputOutput" + deviceType.ToString() + ".log"; - log = TestUtils.CreateTestDevice(deviceType, filename); - fht = new FasterKV - (128, new LogSettings { LogDevice = log, MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }); - session = fht.For(new Functions()).NewSession(); + Setup(128, new LogSettings { MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }, deviceType); var key1 = new KeyStruct { kfield1 = 13, kfield2 = 14 }; var value = new ValueStruct { vfield1 = 23, vfield2 = 24 }; @@ -475,11 +452,7 @@ public void ReadNoRefKeyInputOutput([Values] TestUtils.DeviceType deviceType) [Category("FasterKV")] public void ReadNoRefKey([Values] TestUtils.DeviceType deviceType) { - string filename = path + "ReadNoRefKey" + deviceType.ToString() + ".log"; - log = TestUtils.CreateTestDevice(deviceType, filename); - fht = new FasterKV - (128, new LogSettings { LogDevice = log, MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }); - session = fht.For(new Functions()).NewSession(); + Setup(128, new LogSettings { MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }, deviceType); var key1 = new KeyStruct { kfield1 = 13, kfield2 = 14 }; var value = new ValueStruct { vfield1 = 23, vfield2 = 24 }; @@ -502,11 +475,7 @@ public void ReadNoRefKey([Values] TestUtils.DeviceType deviceType) [Category("Smoke")] public void ReadWithoutInput([Values] TestUtils.DeviceType deviceType) { - string filename = path + "ReadWithoutInput" + deviceType.ToString() + ".log"; - log = TestUtils.CreateTestDevice(deviceType, filename); - fht = new FasterKV - (128, new LogSettings { LogDevice = log, MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }); - session = fht.For(new Functions()).NewSession(); + Setup(128, new LogSettings { MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }, deviceType); OutputStruct output = default; @@ -534,11 +503,7 @@ public void ReadWithoutSerialID() // Just checking without Serial ID so one device type is enough deviceType = TestUtils.DeviceType.MLSD; - string filename = path + "ReadWithoutSerialID" + deviceType.ToString() + ".log"; - log = TestUtils.CreateTestDevice(deviceType, filename); - fht = new FasterKV - (128, new LogSettings { LogDevice = log, MemorySizeBits = 29 }); - session = fht.For(new Functions()).NewSession(); + Setup(128, new LogSettings { MemorySizeBits = 29 }, deviceType); InputStruct input = default; OutputStruct output = default; @@ -562,11 +527,7 @@ public void ReadWithoutSerialID() [Category("Smoke")] public void ReadBareMinParams([Values] TestUtils.DeviceType deviceType) { - string filename = path + "ReadBareMinParams" + deviceType.ToString() + ".log"; - log = TestUtils.CreateTestDevice(deviceType, filename); - fht = new FasterKV - (128, new LogSettings { LogDevice = log, MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }); - session = fht.For(new Functions()).NewSession(); + Setup(128, new LogSettings { MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }, deviceType); var key1 = new KeyStruct { kfield1 = 13, kfield2 = 14 }; var value = new ValueStruct { vfield1 = 23, vfield2 = 24 }; @@ -591,11 +552,7 @@ public void ReadAtAddressReadFlagsNone() // Just functional test of ReadFlag so one device is enough deviceType = TestUtils.DeviceType.MLSD; - string filename = path + "ReadAtAddressReadFlagsNone" + deviceType.ToString() + ".log"; - log = TestUtils.CreateTestDevice(deviceType, filename); - fht = new FasterKV - (128, new LogSettings { LogDevice = log, MemorySizeBits = 29 }); - session = fht.For(new Functions()).NewSession(); + Setup(128, new LogSettings { MemorySizeBits = 29 }, deviceType); InputStruct input = default; OutputStruct output = default; @@ -652,10 +609,7 @@ public void ReadAtAddressReadFlagsSkipReadCache() // Another ReadFlag functional test so one device is enough deviceType = TestUtils.DeviceType.MLSD; - string filename = path + "ReadAtAddressReadFlagsSkipReadCache" + deviceType.ToString() + ".log"; - log = TestUtils.CreateTestDevice(deviceType, filename); - fht = new FasterKV - (128, new LogSettings { LogDevice = log, MemorySizeBits = 29, ReadCacheSettings = new ReadCacheSettings() }); + Setup(128, new LogSettings { MemorySizeBits = 29, ReadCacheSettings = new ReadCacheSettings() }, deviceType); SkipReadCacheFunctions functions = new(); using var skipReadCacheSession = fht.For(functions).NewSession(); @@ -732,11 +686,7 @@ void VerifyResult() [Category("Smoke")] public void UpsertDefaultsTest([Values] TestUtils.DeviceType deviceType) { - string filename = path + "UpsertDefaultsTest" + deviceType.ToString() + ".log"; - log = TestUtils.CreateTestDevice(deviceType, filename); - fht = new FasterKV - (128, new LogSettings { LogDevice = log, MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }); - session = fht.For(new Functions()).NewSession(); + Setup(128, new LogSettings { MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }, deviceType); InputStruct input = default; OutputStruct output = default; @@ -764,11 +714,7 @@ public void UpsertNoRefNoDefaultsTest() // Just checking more parameter values so one device is enough deviceType = TestUtils.DeviceType.MLSD; - string filename = path + "UpsertNoRefNoDefaultsTest" + deviceType.ToString() + ".log"; - log = TestUtils.CreateTestDevice(deviceType, filename); - fht = new FasterKV - (128, new LogSettings { LogDevice = log, MemorySizeBits = 29 }); - session = fht.For(new Functions()).NewSession(); + Setup(128, new LogSettings { MemorySizeBits = 29 }, deviceType); InputStruct input = default; OutputStruct output = default; @@ -794,11 +740,7 @@ public void UpsertSerialNumberTest() // Simple Upsert of Serial Number test so one device is enough deviceType = TestUtils.DeviceType.MLSD; - string filename = path + "UpsertSerialNumberTest" + deviceType.ToString() + ".log"; - log = TestUtils.CreateTestDevice(deviceType, filename); - fht = new FasterKV - (128, new LogSettings { LogDevice = log, MemorySizeBits = 29 }); - session = fht.For(new Functions()).NewSession(); + Setup(128, new LogSettings { MemorySizeBits = 29 }, deviceType); int numKeys = 100; int keyMod = 10; diff --git a/cs/test/UnsafeContextTests.cs b/cs/test/UnsafeContextTests.cs new file mode 100644 index 000000000..96ddaef4a --- /dev/null +++ b/cs/test/UnsafeContextTests.cs @@ -0,0 +1,670 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using System; +using System.Diagnostics; +using System.Linq; +using FASTER.core; +using NUnit.Framework; + +namespace FASTER.test.UnsafeContext +{ + //** These tests ensure the basics are fully covered - taken from BasicFASTERTests + + [TestFixture] + internal class BasicUnsafeContextTests + { + private FasterKV fht; + private ClientSession fullSession; + private UnsafeContext uContext; + private IDevice log; + private string path; + TestUtils.DeviceType deviceType; + + [SetUp] + public void Setup() + { + path = TestUtils.MethodTestDir + "/"; + + // Clean up log files from previous test runs in case they weren't cleaned up + TestUtils.DeleteDirectory(path, wait: true); + } + + private void Setup(long size, LogSettings logSettings, TestUtils.DeviceType deviceType) + { + string filename = path + TestContext.CurrentContext.Test.Name + deviceType.ToString() + ".log"; + log = TestUtils.CreateTestDevice(deviceType, filename); + logSettings.LogDevice = log; + fht = new FasterKV(size, logSettings); + fullSession = fht.For(new Functions()).NewSession(); + uContext = fullSession.GetUnsafeContext(); + } + + [TearDown] + public void TearDown() + { + uContext?.Dispose(); + uContext = null; + fullSession?.Dispose(); + fullSession = null; + fht?.Dispose(); + fht = null; + log?.Dispose(); + log = null; + TestUtils.DeleteDirectory(path); + } + + private void AssertCompleted(Status expected, Status actual) + { + if (actual == Status.PENDING) + (actual, _) = CompletePendingResult(); + Assert.AreEqual(expected, actual); + } + + private (Status status, OutputStruct output) CompletePendingResult() + { + uContext.CompletePendingWithOutputs(out var completedOutputs); + return TestUtils.GetSinglePendingResult(completedOutputs); + } + + [Test] + [Category("FasterKV")] + [Category("Smoke")] + public void NativeInMemWriteRead([Values] TestUtils.DeviceType deviceType) + { + Setup(128, new LogSettings { PageSizeBits = 10, MemorySizeBits = 12, SegmentSizeBits = 22 }, deviceType); + uContext.ResumeThread(); + + try + { + InputStruct input = default; + OutputStruct output = default; + + var key1 = new KeyStruct { kfield1 = 13, kfield2 = 14 }; + var value = new ValueStruct { vfield1 = 23, vfield2 = 24 }; + + uContext.Upsert(ref key1, ref value, Empty.Default, 0); + var status = uContext.Read(ref key1, ref input, ref output, Empty.Default, 0); + + AssertCompleted(Status.OK, status); + Assert.AreEqual(value.vfield1, output.value.vfield1); + Assert.AreEqual(value.vfield2, output.value.vfield2); + } + finally + { + uContext.SuspendThread(); + } + } + + [Test] + [Category("FasterKV")] + [Category("Smoke")] + public void NativeInMemWriteReadDelete([Values] TestUtils.DeviceType deviceType) + { + Setup(128, new LogSettings { PageSizeBits = 10, MemorySizeBits = 12, SegmentSizeBits = 22 }, deviceType); + uContext.ResumeThread(); + + try + { + InputStruct input = default; + OutputStruct output = default; + + var key1 = new KeyStruct { kfield1 = 13, kfield2 = 14 }; + var value = new ValueStruct { vfield1 = 23, vfield2 = 24 }; + + uContext.Upsert(ref key1, ref value, Empty.Default, 0); + var status = uContext.Read(ref key1, ref input, ref output, Empty.Default, 0); + AssertCompleted(Status.OK, status); + + uContext.Delete(ref key1, Empty.Default, 0); + + status = uContext.Read(ref key1, ref input, ref output, Empty.Default, 0); + AssertCompleted(Status.NOTFOUND, status); + + var key2 = new KeyStruct { kfield1 = 14, kfield2 = 15 }; + var value2 = new ValueStruct { vfield1 = 24, vfield2 = 25 }; + + uContext.Upsert(ref key2, ref value2, Empty.Default, 0); + status = uContext.Read(ref key2, ref input, ref output, Empty.Default, 0); + + AssertCompleted(Status.OK, status); + Assert.AreEqual(value2.vfield1, output.value.vfield1); + Assert.AreEqual(value2.vfield2, output.value.vfield2); + } + finally + { + uContext.SuspendThread(); + } + } + + + [Test] + [Category("FasterKV")] + [Category("Smoke")] + public void NativeInMemWriteReadDelete2() + { + // Just set this one since Write Read Delete already does all four devices + deviceType = TestUtils.DeviceType.MLSD; + + const int count = 10; + + // Setup(128, new LogSettings { MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }, deviceType); + Setup(128, new LogSettings { MemorySizeBits = 29 }, deviceType); + uContext.ResumeThread(); + + try + { + InputStruct input = default; + OutputStruct output = default; + + for (int i = 0; i < 10 * count; i++) + { + var key1 = new KeyStruct { kfield1 = i, kfield2 = 14 }; + var value = new ValueStruct { vfield1 = i, vfield2 = 24 }; + + uContext.Upsert(ref key1, ref value, Empty.Default, 0); + } + + for (int i = 0; i < 10 * count; i++) + { + var key1 = new KeyStruct { kfield1 = i, kfield2 = 14 }; + uContext.Delete(ref key1, Empty.Default, 0); + } + + for (int i = 0; i < 10 * count; i++) + { + var key1 = new KeyStruct { kfield1 = i, kfield2 = 14 }; + var value = new ValueStruct { vfield1 = i, vfield2 = 24 }; + + var status = uContext.Read(ref key1, ref input, ref output, Empty.Default, 0); + AssertCompleted(Status.NOTFOUND, status); + + uContext.Upsert(ref key1, ref value, Empty.Default, 0); + } + + for (int i = 0; i < 10 * count; i++) + { + var key1 = new KeyStruct { kfield1 = i, kfield2 = 14 }; + var status = uContext.Read(ref key1, ref input, ref output, Empty.Default, 0); + AssertCompleted(Status.OK, status); + } + } + finally + { + uContext.SuspendThread(); + } + } + + [Test] + [Category("FasterKV")] + [Category("Smoke")] + public unsafe void NativeInMemWriteRead2() + { + // Just use this one instead of all four devices since InMemWriteRead covers all four devices + deviceType = TestUtils.DeviceType.MLSD; + + int count = 200; + + // Setup(128, new LogSettings { MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }, deviceType); + Setup(128, new LogSettings { MemorySizeBits = 29 }, deviceType); + uContext.ResumeThread(); + + try + { + InputStruct input = default; + + Random r = new Random(10); + for (int c = 0; c < count; c++) + { + var i = r.Next(10000); + var key1 = new KeyStruct { kfield1 = i, kfield2 = i + 1 }; + var value = new ValueStruct { vfield1 = i, vfield2 = i + 1 }; + uContext.Upsert(ref key1, ref value, Empty.Default, 0); + } + + r = new Random(10); + + for (int c = 0; c < count; c++) + { + var i = r.Next(10000); + OutputStruct output = default; + var key1 = new KeyStruct { kfield1 = i, kfield2 = i + 1 }; + var value = new ValueStruct { vfield1 = i, vfield2 = i + 1 }; + + if (uContext.Read(ref key1, ref input, ref output, Empty.Default, 0) == Status.PENDING) + { + uContext.CompletePending(true); + } + + Assert.AreEqual(value.vfield1, output.value.vfield1); + Assert.AreEqual(value.vfield2, output.value.vfield2); + } + + // Clean up and retry - should not find now + fht.Log.ShiftBeginAddress(fht.Log.TailAddress, truncateLog: true); + + r = new Random(10); + for (int c = 0; c < count; c++) + { + var i = r.Next(10000); + OutputStruct output = default; + var key1 = new KeyStruct { kfield1 = i, kfield2 = i + 1 }; + Assert.AreEqual(Status.NOTFOUND, uContext.Read(ref key1, ref input, ref output, Empty.Default, 0)); + } + } + finally + { + uContext.SuspendThread(); + } + } + + [Test] + [Category("FasterKV")] + [Category("Smoke")] + public unsafe void TestShiftHeadAddress([Values] TestUtils.DeviceType deviceType) + { + InputStruct input = default; + const int RandSeed = 10; + const int RandRange = 10000; + const int NumRecs = 200; + + Random r = new Random(RandSeed); + var sw = Stopwatch.StartNew(); + + Setup(128, new LogSettings { MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }, deviceType); + uContext.ResumeThread(); + + try + { + for (int c = 0; c < NumRecs; c++) + { + var i = r.Next(RandRange); + var key1 = new KeyStruct { kfield1 = i, kfield2 = i + 1 }; + var value = new ValueStruct { vfield1 = i, vfield2 = i + 1 }; + uContext.Upsert(ref key1, ref value, Empty.Default, 0); + } + + r = new Random(RandSeed); + sw.Restart(); + + for (int c = 0; c < NumRecs; c++) + { + var i = r.Next(RandRange); + OutputStruct output = default; + var key1 = new KeyStruct { kfield1 = i, kfield2 = i + 1 }; + var value = new ValueStruct { vfield1 = i, vfield2 = i + 1 }; + + if (uContext.Read(ref key1, ref input, ref output, Empty.Default, 0) != Status.PENDING) + { + Assert.AreEqual(value.vfield1, output.value.vfield1); + Assert.AreEqual(value.vfield2, output.value.vfield2); + } + } + uContext.CompletePending(true); + + // Shift head and retry - should not find in main memory now + fht.Log.FlushAndEvict(true); + + r = new Random(RandSeed); + sw.Restart(); + + for (int c = 0; c < NumRecs; c++) + { + var i = r.Next(RandRange); + OutputStruct output = default; + var key1 = new KeyStruct { kfield1 = i, kfield2 = i + 1 }; + Status foundStatus = uContext.Read(ref key1, ref input, ref output, Empty.Default, 0); + Assert.AreEqual(Status.PENDING, foundStatus); + } + + uContext.CompletePendingWithOutputs(out var outputs, wait: true); + int count = 0; + while (outputs.Next()) + { + count++; + Assert.AreEqual(outputs.Current.Key.kfield1, outputs.Current.Output.value.vfield1); + Assert.AreEqual(outputs.Current.Key.kfield2, outputs.Current.Output.value.vfield2); + } + outputs.Dispose(); + Assert.AreEqual(NumRecs, count); + } + finally + { + uContext.SuspendThread(); + } + } + + [Test] + [Category("FasterKV")] + [Category("Smoke")] + public unsafe void NativeInMemRMWRefKeys([Values] TestUtils.DeviceType deviceType) + { + InputStruct input = default; + OutputStruct output = default; + + Setup(128, new LogSettings { MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }, deviceType); + uContext.ResumeThread(); + + try + { + var nums = Enumerable.Range(0, 1000).ToArray(); + var rnd = new Random(11); + for (int i = 0; i < nums.Length; ++i) + { + int randomIndex = rnd.Next(nums.Length); + int temp = nums[randomIndex]; + nums[randomIndex] = nums[i]; + nums[i] = temp; + } + + for (int j = 0; j < nums.Length; ++j) + { + var i = nums[j]; + var key1 = new KeyStruct { kfield1 = i, kfield2 = i + 1 }; + input = new InputStruct { ifield1 = i, ifield2 = i + 1 }; + uContext.RMW(ref key1, ref input, Empty.Default, 0); + } + for (int j = 0; j < nums.Length; ++j) + { + var i = nums[j]; + var key1 = new KeyStruct { kfield1 = i, kfield2 = i + 1 }; + input = new InputStruct { ifield1 = i, ifield2 = i + 1 }; + if (uContext.RMW(ref key1, ref input, ref output, Empty.Default, 0) == Status.PENDING) + { + uContext.CompletePending(true); + } + else + { + Assert.AreEqual(2 * i, output.value.vfield1); + Assert.AreEqual(2 * (i + 1), output.value.vfield2); + } + } + + Status status; + KeyStruct key; + + for (int j = 0; j < nums.Length; ++j) + { + var i = nums[j]; + + key = new KeyStruct { kfield1 = i, kfield2 = i + 1 }; + ValueStruct value = new ValueStruct { vfield1 = i, vfield2 = i + 1 }; + + status = uContext.Read(ref key, ref input, ref output, Empty.Default, 0); + + AssertCompleted(Status.OK, status); + Assert.AreEqual(2 * value.vfield1, output.value.vfield1); + Assert.AreEqual(2 * value.vfield2, output.value.vfield2); + } + + key = new KeyStruct { kfield1 = nums.Length, kfield2 = nums.Length + 1 }; + status = uContext.Read(ref key, ref input, ref output, Empty.Default, 0); + AssertCompleted(Status.NOTFOUND, status); + } + finally + { + uContext.SuspendThread(); + } + } + + // Tests the overload where no reference params used: key,input,userContext,serialNo + [Test] + [Category("FasterKV")] + public unsafe void NativeInMemRMWNoRefKeys([Values] TestUtils.DeviceType deviceType) + { + InputStruct input = default; + + Setup(128, new LogSettings { MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }, deviceType); + uContext.ResumeThread(); + + try + { + var nums = Enumerable.Range(0, 1000).ToArray(); + var rnd = new Random(11); + for (int i = 0; i < nums.Length; ++i) + { + int randomIndex = rnd.Next(nums.Length); + int temp = nums[randomIndex]; + nums[randomIndex] = nums[i]; + nums[i] = temp; + } + + for (int j = 0; j < nums.Length; ++j) + { + var i = nums[j]; + var key1 = new KeyStruct { kfield1 = i, kfield2 = i + 1 }; + input = new InputStruct { ifield1 = i, ifield2 = i + 1 }; + uContext.RMW(ref key1, ref input, Empty.Default, 0); + } + for (int j = 0; j < nums.Length; ++j) + { + var i = nums[j]; + var key1 = new KeyStruct { kfield1 = i, kfield2 = i + 1 }; + input = new InputStruct { ifield1 = i, ifield2 = i + 1 }; + uContext.RMW(key1, input); // no ref and do not set any other params + } + + OutputStruct output = default; + Status status; + KeyStruct key; + + for (int j = 0; j < nums.Length; ++j) + { + var i = nums[j]; + + key = new KeyStruct { kfield1 = i, kfield2 = i + 1 }; + ValueStruct value = new ValueStruct { vfield1 = i, vfield2 = i + 1 }; + + status = uContext.Read(ref key, ref input, ref output, Empty.Default, 0); + + AssertCompleted(Status.OK, status); + Assert.AreEqual(2 * value.vfield1, output.value.vfield1); + Assert.AreEqual(2 * value.vfield2, output.value.vfield2); + } + + key = new KeyStruct { kfield1 = nums.Length, kfield2 = nums.Length + 1 }; + status = uContext.Read(ref key, ref input, ref output, Empty.Default, 0); + AssertCompleted(Status.NOTFOUND, status); + } + finally + { + uContext.SuspendThread(); + } + } + + // Tests the overload of .Read(key, input, out output, context, serialNo) + [Test] + [Category("FasterKV")] + [Category("Smoke")] + public void ReadNoRefKeyInputOutput([Values] TestUtils.DeviceType deviceType) + { + InputStruct input = default; + + Setup(128, new LogSettings { MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }, deviceType); + uContext.ResumeThread(); + + try + { + var key1 = new KeyStruct { kfield1 = 13, kfield2 = 14 }; + var value = new ValueStruct { vfield1 = 23, vfield2 = 24 }; + + uContext.Upsert(ref key1, ref value, Empty.Default, 0); + var status = uContext.Read(key1, input, out OutputStruct output, Empty.Default, 111); + AssertCompleted(Status.OK, status); + + // Verify the read data + Assert.AreEqual(value.vfield1, output.value.vfield1); + Assert.AreEqual(value.vfield2, output.value.vfield2); + Assert.AreEqual(key1.kfield1, 13); + Assert.AreEqual(key1.kfield2, 14); + } + finally + { + uContext.SuspendThread(); + } + } + + // Test the overload call of .Read (key, out output, userContext, serialNo) + [Test] + [Category("FasterKV")] + public void ReadNoRefKey([Values] TestUtils.DeviceType deviceType) + { + Setup(128, new LogSettings { MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }, deviceType); + uContext.ResumeThread(); + + try + { + var key1 = new KeyStruct { kfield1 = 13, kfield2 = 14 }; + var value = new ValueStruct { vfield1 = 23, vfield2 = 24 }; + + uContext.Upsert(ref key1, ref value, Empty.Default, 0); + var status = uContext.Read(key1, out OutputStruct output, Empty.Default, 1); + AssertCompleted(Status.OK, status); + + // Verify the read data + Assert.AreEqual(value.vfield1, output.value.vfield1); + Assert.AreEqual(value.vfield2, output.value.vfield2); + Assert.AreEqual(key1.kfield1, 13); + Assert.AreEqual(key1.kfield2, 14); + } + finally + { + uContext.SuspendThread(); + } + } + + + // Test the overload call of .Read (ref key, ref output, userContext, serialNo) + [Test] + [Category("FasterKV")] + [Category("Smoke")] + public void ReadWithoutInput([Values] TestUtils.DeviceType deviceType) + { + Setup(128, new LogSettings { MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }, deviceType); + uContext.ResumeThread(); + + try + { + OutputStruct output = default; + + var key1 = new KeyStruct { kfield1 = 13, kfield2 = 14 }; + var value = new ValueStruct { vfield1 = 23, vfield2 = 24 }; + + uContext.Upsert(ref key1, ref value, Empty.Default, 0); + var status = uContext.Read(ref key1, ref output, Empty.Default, 99); + AssertCompleted(Status.OK, status); + + // Verify the read data + Assert.AreEqual(value.vfield1, output.value.vfield1); + Assert.AreEqual(value.vfield2, output.value.vfield2); + Assert.AreEqual(key1.kfield1, 13); + Assert.AreEqual(key1.kfield2, 14); + } + finally + { + uContext.SuspendThread(); + } + } + + // Test the overload call of .Read (ref key, ref input, ref output, ref recordInfo, userContext: context) + [Test] + [Category("FasterKV")] + [Category("Smoke")] + public void ReadWithoutSerialID() + { + // Just checking without Serial ID so one device type is enough + deviceType = TestUtils.DeviceType.MLSD; + + Setup(128, new LogSettings { MemorySizeBits = 29 }, deviceType); + uContext.ResumeThread(); + + try + { + InputStruct input = default; + OutputStruct output = default; + + var key1 = new KeyStruct { kfield1 = 13, kfield2 = 14 }; + var value = new ValueStruct { vfield1 = 23, vfield2 = 24 }; + + uContext.Upsert(ref key1, ref value, Empty.Default, 0); + var status = uContext.Read(ref key1, ref input, ref output, Empty.Default); + AssertCompleted(Status.OK, status); + + Assert.AreEqual(value.vfield1, output.value.vfield1); + Assert.AreEqual(value.vfield2, output.value.vfield2); + Assert.AreEqual(key1.kfield1, 13); + Assert.AreEqual(key1.kfield2, 14); + } + finally + { + uContext.SuspendThread(); + } + } + + // Test the overload call of .Read (key) + [Test] + [Category("FasterKV")] + [Category("Smoke")] + public void ReadBareMinParams([Values] TestUtils.DeviceType deviceType) + { + Setup(128, new LogSettings { MemorySizeBits = 22, SegmentSizeBits = 22, PageSizeBits = 10 }, deviceType); + uContext.ResumeThread(); + + try + { + var key1 = new KeyStruct { kfield1 = 13, kfield2 = 14 }; + var value = new ValueStruct { vfield1 = 23, vfield2 = 24 }; + + uContext.Upsert(ref key1, ref value, Empty.Default, 0); + + var (status, output) = uContext.Read(key1); + AssertCompleted(Status.OK, status); + + Assert.AreEqual(value.vfield1, output.value.vfield1); + Assert.AreEqual(value.vfield2, output.value.vfield2); + Assert.AreEqual(key1.kfield1, 13); + Assert.AreEqual(key1.kfield2, 14); + } + finally + { + uContext.SuspendThread(); + } + } + + // Test the ReadAtAddress where ReadFlags = ReadFlags.none + [Test] + [Category("FasterKV")] + [Category("Smoke")] + public void ReadAtAddressReadFlagsNone() + { + // Just functional test of ReadFlag so one device is enough + deviceType = TestUtils.DeviceType.MLSD; + + Setup(128, new LogSettings { MemorySizeBits = 29 }, deviceType); + uContext.ResumeThread(); + + try + { + InputStruct input = default; + OutputStruct output = default; + + var key1 = new KeyStruct { kfield1 = 13, kfield2 = 14 }; + var value = new ValueStruct { vfield1 = 23, vfield2 = 24 }; + var readAtAddress = fht.Log.BeginAddress; + + uContext.Upsert(ref key1, ref value, Empty.Default, 0); + var status = uContext.ReadAtAddress(readAtAddress, ref input, ref output, ReadFlags.None, Empty.Default, 0); + AssertCompleted(Status.OK, status); + + Assert.AreEqual(value.vfield1, output.value.vfield1); + Assert.AreEqual(value.vfield2, output.value.vfield2); + Assert.AreEqual(key1.kfield1, 13); + Assert.AreEqual(key1.kfield2, 14); + } + finally + { + uContext.SuspendThread(); + } + } + } +} \ No newline at end of file From 204f258c2758216f1b60b4a0a5c26a561e89e4cc Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Mon, 17 Jan 2022 09:21:24 -0800 Subject: [PATCH 19/25] Updates from code review: - IsLocked returns shared lock count (added test) - Fix location of IsIntermediate checks in RecordInfo.Lock/Seal - Remove unused IEqualityComparer in LockTableEntry --- .../ClientSession/LockableUnsafeContext.cs | 6 +- cs/src/core/Index/Common/RecordInfo.cs | 29 ++++---- cs/src/core/Utilities/LockTable.cs | 12 ++-- cs/test/LockableUnsafeContextTests.cs | 71 +++++++++++++------ cs/test/ReadCacheChainTests.cs | 8 +-- 5 files changed, 78 insertions(+), 48 deletions(-) diff --git a/cs/src/core/ClientSession/LockableUnsafeContext.cs b/cs/src/core/ClientSession/LockableUnsafeContext.cs index 900498b94..64454c58c 100644 --- a/cs/src/core/ClientSession/LockableUnsafeContext.cs +++ b/cs/src/core/ClientSession/LockableUnsafeContext.cs @@ -184,7 +184,7 @@ public void Unlock(ref Key key, LockType lockType) /// Determines if the key is locked. Note this value may be obsolete as soon as it returns. /// /// The key to lock - public (bool exclusive, bool shared) IsLocked(ref Key key) + public (bool exclusive, byte shared) IsLocked(ref Key key) { CheckAcquired(); @@ -197,14 +197,14 @@ public void Unlock(ref Key key, LockType lockType) status = clientSession.fht.InternalLock(ref key, lockOp, ref oneMiss, out lockInfo); while (status == OperationStatus.RETRY_NOW); Debug.Assert(status == OperationStatus.SUCCESS); - return (lockInfo.IsLockedExclusive, lockInfo.IsLockedShared); + return (lockInfo.IsLockedExclusive, lockInfo.NumLockedShared); } /// /// Determines if the key is locked. Note this value may be obsolete as soon as it returns. /// /// The key to lock - public (bool exclusive, bool shared) IsLocked(Key key) => IsLocked(ref key); + public (bool exclusive, byte shared) IsLocked(Key key) => IsLocked(ref key); #endregion Key Locking diff --git a/cs/src/core/Index/Common/RecordInfo.cs b/cs/src/core/Index/Common/RecordInfo.cs index 2fd85b9e4..5368ae088 100644 --- a/cs/src/core/Index/Common/RecordInfo.cs +++ b/cs/src/core/Index/Common/RecordInfo.cs @@ -76,11 +76,13 @@ public static void WriteInfo(ref RecordInfo info, bool inNewVersion, bool tombst public bool IsLockedExclusive => (word & kExclusiveLockBitMask) != 0; - public bool IsLockedShared => (word & kSharedLockMaskInWord) != 0; + public byte NumLockedShared => (byte)((word & kSharedLockMaskInWord) >> kLockShiftInWord); public void ClearLocks() => word &= ~(kExclusiveLockBitMask | kSharedLockMaskInWord); - public bool IsIntermediate => (word & (kTentativeBitMask | kSealedBitMask)) != 0; + public bool IsIntermediate => IsIntermediateWord(word); + + private static bool IsIntermediateWord(long word) => (word & (kTentativeBitMask | kSealedBitMask)) != 0; /// /// Take exclusive (write) lock on RecordInfo @@ -112,9 +114,9 @@ public bool TryLockExclusive(int spinCount = 1) // Acquire exclusive lock (readers may still be present; we'll drain them later) while (true) { - if (IsIntermediate) - return false; long expected_word = word; + if (IsIntermediateWord(expected_word)) + return false; if ((expected_word & kExclusiveLockBitMask) == 0) { if (expected_word == Interlocked.CompareExchange(ref word, expected_word | kExclusiveLockBitMask, expected_word)) @@ -175,9 +177,9 @@ public bool TryLockShared(int spinCount = 1) // Acquire shared lock while (true) { - if (IsIntermediate) - return false; long expected_word = word; + if (IsIntermediateWord(expected_word)) + return false; if (((expected_word & kExclusiveLockBitMask) == 0) // not exclusively locked && (expected_word & kSharedLockMaskInWord) != kSharedLockMaskInWord) // shared lock is not full { @@ -226,9 +228,9 @@ public bool TryLockExclusiveFromShared(int spinCount = 1) while (true) { // Even though we own the lock here, it might be in the process of eviction, which seals it - if (IsIntermediate) - return false; long expected_word = word; + if (IsIntermediateWord(expected_word)) + return false; if ((expected_word & kExclusiveLockBitMask) == 0) // not exclusively locked { var new_word = expected_word | kExclusiveLockBitMask; @@ -249,7 +251,7 @@ public bool TryLockExclusiveFromShared(int spinCount = 1) public void CopyLocksFrom(RecordInfo other) { word &= ~(kExclusiveLockBitMask | kSharedLockMaskInWord); - word |= (other.word & (kExclusiveLockBitMask | kSharedLockMaskInWord)); + word |= other.word & (kExclusiveLockBitMask | kSharedLockMaskInWord); } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -321,11 +323,13 @@ public bool Seal(bool manualLocking = false) long sealBits = manualLocking ? kSealedBitMask : kExclusiveLockBitMask | kSealedBitMask; while (true) { - if (IsIntermediate) + long expected_word = word; + + // If someone else sealed this, we fail this attempt. + if (IsIntermediateWord(expected_word) || ((expected_word & kValidBitMask) == 0)) return false; - if ((word & sealBits) == 0) + if ((expected_word & sealBits) == 0) { - long expected_word = word; long new_word = word | sealBits; long current_word = Interlocked.CompareExchange(ref word, new_word, expected_word); if (expected_word == current_word) @@ -336,7 +340,6 @@ public bool Seal(bool manualLocking = false) return true; } - // If someone else sealed this, we fail this attempt. if ((word & kSealedBitMask) > 0 || this.Invalid) return false; } diff --git a/cs/src/core/Utilities/LockTable.cs b/cs/src/core/Utilities/LockTable.cs index 85ccbc55e..2e0647b01 100644 --- a/cs/src/core/Utilities/LockTable.cs +++ b/cs/src/core/Utilities/LockTable.cs @@ -10,7 +10,7 @@ namespace FASTER.core { // We need to duplicate the Key because we can't get to the key object of the dictionary to Return() it. // This is a class rather than a struct because a struct would update a copy. - internal class LockTableEntry : IEqualityComparer> + internal class LockTableEntry { internal IHeapContainer key; internal RecordInfo logRecordInfo; // in main log @@ -23,11 +23,6 @@ internal LockTableEntry(IHeapContainer key, RecordInfo logRecordInfo, Reco this.lockRecordInfo = lockRecordInfo; } - public bool Equals(LockTableEntry k1, LockTableEntry k2) - => k1.logRecordInfo.Equals(k2.logRecordInfo) && k1.lockRecordInfo.Tentative == k2.lockRecordInfo.Tentative; - - public int GetHashCode(LockTableEntry k) => (int)k.logRecordInfo.GetHashCode64(); - [MethodImpl(MethodImplOptions.AggressiveInlining)] internal void XLock() => this.lockRecordInfo.LockExclusiveRaw(); @@ -43,6 +38,11 @@ public bool Equals(LockTableEntry k1, LockTableEntry k2) public override string ToString() => $"{key}"; } + // TODO: abstract base or interface for additional implementations (or "DI" a test wrapper) + // TODO: internal IHeapContainer key; causes an object allocation each time. we need: + // Non-Varlen: a specialized LockTableEntry that just uses Key directly + // Varlen: a shared heap container abstraction that shares a single buffer pool allocator and allocates, frees into it, returning a struct wrapper. + internal class LockTable { class KeyComparer : IEqualityComparer> diff --git a/cs/test/LockableUnsafeContextTests.cs b/cs/test/LockableUnsafeContextTests.cs index bbc4d18fe..f12fc0f06 100644 --- a/cs/test/LockableUnsafeContextTests.cs +++ b/cs/test/LockableUnsafeContextTests.cs @@ -135,14 +135,11 @@ void Populate() Assert.AreNotEqual(Status.PENDING, session.Upsert(key, key * valueMult)); } - static void AssertIsLocked(LockableUnsafeContext luContext, int key, LockType lockType) - => AssertIsLocked(luContext, key, lockType == LockType.Exclusive, lockType == LockType.Shared); - static void AssertIsLocked(LockableUnsafeContext luContext, int key, bool xlock, bool slock) { var (isX, isS) = luContext.IsLocked(key); Assert.AreEqual(xlock, isX, "xlock mismatch"); - Assert.AreEqual(slock, isS, "slock mismatch"); + Assert.AreEqual(slock, isS > 0, "slock mismatch"); } void PrepareRecordLocation(FlushMode recordLocation) @@ -167,7 +164,7 @@ void EnsureNoLocks() while (iter.GetNext(out var recordInfo, out var key, out var value)) { ++count; - Assert.False(recordInfo.IsLocked, $"Unexpected Locked record: {(recordInfo.IsLockedShared ? "S" : "")} {(recordInfo.IsLockedExclusive ? "X" : "")}"); + Assert.False(recordInfo.IsLocked, $"Unexpected Locked record: {(recordInfo.NumLockedShared > 0 ? "S" : "")} {(recordInfo.IsLockedExclusive ? "X" : "")}"); } // We delete some records so just make sure the test worked. @@ -227,7 +224,7 @@ public void InMemorySimpleLockTxnTest([Values] ResultLockTarget resultLockTarget Assert.True(completedOutputs.Next()); value24 = completedOutputs.Current.Output; Assert.False(completedOutputs.Current.RecordMetadata.RecordInfo.IsLockedExclusive); - Assert.True(completedOutputs.Current.RecordMetadata.RecordInfo.IsLockedShared); + Assert.Less(0, completedOutputs.Current.RecordMetadata.RecordInfo.NumLockedShared); Assert.False(completedOutputs.Next()); completedOutputs.Dispose(); } @@ -246,7 +243,7 @@ public void InMemorySimpleLockTxnTest([Values] ResultLockTarget resultLockTarget Assert.True(completedOutputs.Next()); value51 = completedOutputs.Current.Output; Assert.False(completedOutputs.Current.RecordMetadata.RecordInfo.IsLockedExclusive); - Assert.True(completedOutputs.Current.RecordMetadata.RecordInfo.IsLockedShared); + Assert.Less(0, completedOutputs.Current.RecordMetadata.RecordInfo.NumLockedShared); Assert.False(completedOutputs.Next()); completedOutputs.Dispose(); } @@ -270,7 +267,7 @@ public void InMemorySimpleLockTxnTest([Values] ResultLockTarget resultLockTarget Assert.True(completedOutputs.Next()); resultValue = completedOutputs.Current.Output; Assert.True(completedOutputs.Current.RecordMetadata.RecordInfo.IsLockedExclusive); - Assert.False(completedOutputs.Current.RecordMetadata.RecordInfo.IsLockedShared); + Assert.AreEqual(0, completedOutputs.Current.RecordMetadata.RecordInfo.NumLockedShared); Assert.False(completedOutputs.Next()); completedOutputs.Dispose(); } @@ -351,7 +348,7 @@ public void InMemoryLongLockTest([Values] ResultLockTarget resultLockTarget, [Va Assert.True(completedOutputs.Next()); value51 = completedOutputs.Current.Output; Assert.True(completedOutputs.Current.RecordMetadata.RecordInfo.IsLockedExclusive); - Assert.False(completedOutputs.Current.RecordMetadata.RecordInfo.IsLockedShared); + Assert.AreEqual(0, completedOutputs.Current.RecordMetadata.RecordInfo.NumLockedShared); Assert.False(completedOutputs.Next()); completedOutputs.Dispose(); } @@ -554,7 +551,7 @@ void VerifySplicedInKey(LockableUnsafeContext 0); luContext.Unlock(key, lockType); Assert.IsFalse(fht.LockTable.Get(key, out _)); @@ -772,7 +769,7 @@ public void TransferFromReadOnlyToUpdateRecordTest([Values] UpdateOp updateOp) var (xlock, slock) = luContext.IsLocked(key); Assert.IsTrue(xlock); - Assert.IsFalse(slock); + Assert.AreEqual(0, slock); } finally { @@ -835,11 +832,11 @@ void unlockKey(int key) { // Use Task instead of Thread because this propagates exceptions (such as Assert.* failures) back to this thread. Task.WaitAll(Task.Run(() => locker(key)), Task.Run(() => updater(key))); - var (xlock, slock) = lockLuContext.IsLocked(key); + var (xlock, slockCount) = lockLuContext.IsLocked(key); var expectedXlock = getLockType(key) == LockType.Exclusive && lockOp != LockOperationType.Unlock; var expectedSlock = getLockType(key) == LockType.Shared && lockOp != LockOperationType.Unlock; Assert.AreEqual(expectedXlock, xlock); - Assert.AreEqual(expectedSlock, slock); + Assert.AreEqual(expectedSlock, slockCount > 0); if (lockOp == LockOperationType.Lock) { @@ -901,6 +898,36 @@ void updater(int key) } } + [Test] + [Category(LockableUnsafeContextTestCategory)] + [Category(SmokeTestCategory)] + public void MultiSharedLockTest() + { + Populate(); + + using var session = fht.NewSession(new SimpleFunctions()); + using var luContext = session.GetLockableUnsafeContext(); + + const int key = 42; + var maxLocks = 63; + + for (var ii = 0; ii < maxLocks; ++ii) + { + luContext.Lock(key, LockType.Shared); + var (xlock, slockCount) = luContext.IsLocked(key); + Assert.IsFalse(xlock); + Assert.AreEqual(ii + 1, slockCount); + } + + for (var ii = 0; ii < maxLocks; ++ii) + { + luContext.Unlock(key, LockType.Shared); + var (xlock, slockCount) = luContext.IsLocked(key); + Assert.IsFalse(xlock); + Assert.AreEqual(maxLocks - ii - 1, slockCount); + } + } + [Test] [Category(LockableUnsafeContextTestCategory)] [Category(SmokeTestCategory)] @@ -937,7 +964,7 @@ public void EvictFromMainLogToLockTableTest() Assert.IsTrue(found); var lockType = locks[key]; Assert.AreEqual(lockType == LockType.Exclusive, recordInfo.IsLockedExclusive); - Assert.AreEqual(lockType != LockType.Exclusive, recordInfo.IsLockedShared); + Assert.AreEqual(lockType != LockType.Exclusive, recordInfo.NumLockedShared > 0); // Just a little more testing of Read/CTT transferring from LockTable int input = 0, output = 0, localKey = key; @@ -947,14 +974,14 @@ public void EvictFromMainLogToLockTableTest() session.CompletePending(wait: true); Assert.IsFalse(fht.LockTable.Get(key, out _)); - var (isLockedExclusive, isLockedShared) = luContext.IsLocked(localKey); + var (isLockedExclusive, numLockedShared) = luContext.IsLocked(localKey); Assert.AreEqual(lockType == LockType.Exclusive, isLockedExclusive); - Assert.AreEqual(lockType != LockType.Exclusive, isLockedShared); + Assert.AreEqual(lockType != LockType.Exclusive, numLockedShared > 0); luContext.Unlock(key, lockType); - (isLockedExclusive, isLockedShared) = luContext.IsLocked(localKey); + (isLockedExclusive, numLockedShared) = luContext.IsLocked(localKey); Assert.IsFalse(isLockedExclusive); - Assert.IsFalse(isLockedShared); + Assert.AreEqual(0, numLockedShared); } Assert.IsFalse(fht.LockTable.IsActive); @@ -1011,9 +1038,9 @@ public async ValueTask CheckpointRecoverTest([Values] CheckpointType checkpointT foreach (var key in locks.Keys.OrderBy(k => k)) { - var (exclusive, shared) = luContext.IsLocked(key); + var (exclusive, numShared) = luContext.IsLocked(key); Assert.IsFalse(exclusive, $"key: {key}"); - Assert.IsFalse(shared, $"key: {key}"); + Assert.AreEqual(0, numShared, $"key: {key}"); } } } @@ -1127,7 +1154,7 @@ async static Task SecondaryReader(FasterKV secondaryStore, SyncMode Assert.AreEqual(key, output); var (xlock, slock) = luc1.IsLocked(key); Assert.IsFalse(xlock); - Assert.IsFalse(slock); + Assert.AreEqual(0, slock); key++; if (key == numSecondaryReaderKeys) diff --git a/cs/test/ReadCacheChainTests.cs b/cs/test/ReadCacheChainTests.cs index 06e6c1f7d..2d1e081fa 100644 --- a/cs/test/ReadCacheChainTests.cs +++ b/cs/test/ReadCacheChainTests.cs @@ -485,7 +485,7 @@ public void EvictFromReadCacheToLockTableTest() Assert.IsTrue(found); var lockType = locks[key]; Assert.AreEqual(lockType == LockType.Exclusive, recordInfo.IsLockedExclusive); - Assert.AreEqual(lockType != LockType.Exclusive, recordInfo.IsLockedShared); + Assert.AreEqual(lockType != LockType.Exclusive, recordInfo.NumLockedShared > 0); luContext.Unlock(key, lockType); Assert.IsFalse(fht.LockTable.Get(key, out recordInfo)); @@ -533,7 +533,7 @@ public void TransferFromLockTableToReadCacheTest() Assert.IsTrue(found); var lockType = locks[key]; Assert.AreEqual(lockType == LockType.Exclusive, recordInfo.IsLockedExclusive); - Assert.AreEqual(lockType != LockType.Exclusive, recordInfo.IsLockedShared); + Assert.AreEqual(lockType != LockType.Exclusive, recordInfo.NumLockedShared > 0); } fht.Log.FlushAndEvict(wait: true); @@ -546,9 +546,9 @@ public void TransferFromLockTableToReadCacheTest() session.CompletePending(wait: true); var lockType = locks[key]; - var (exclusive, shared) = luContext.IsLocked(key); + var (exclusive, sharedCount) = luContext.IsLocked(key); Assert.AreEqual(lockType == LockType.Exclusive, exclusive); - Assert.AreEqual(lockType != LockType.Exclusive, shared); + Assert.AreEqual(lockType != LockType.Exclusive, sharedCount > 0); luContext.Unlock(key, lockType); Assert.IsFalse(fht.LockTable.Get(key, out _)); From 5cba23b06e9327c544104acfcb18cf5693e2c033 Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Mon, 17 Jan 2022 22:58:29 -0800 Subject: [PATCH 20/25] Rename Take(Full|Index|HybridLog)Checkpoint to TryInitiate(Full|Index|HybridLog)Checkpoint, so it is obvious they need to complete it --- cs/benchmark/FasterClientSessionYcsbBenchmark.cs | 2 +- cs/benchmark/FasterSpanByteYcsbBenchmark.cs | 2 +- cs/benchmark/FasterYcsbBenchmark.cs | 2 +- cs/benchmark/TestLoader.cs | 2 +- cs/playground/ClassRecoveryDurability/Storedb.cs | 2 +- cs/playground/SumStore/RecoveryTest.cs | 2 +- cs/samples/HelloWorld/Program.cs | 2 +- cs/src/core/Index/FASTER/FASTER.cs | 12 ++++++------ cs/src/core/Index/Interfaces/IFasterKV.cs | 6 +++--- cs/test/AsyncLargeObjectTests.cs | 2 +- cs/test/AsyncTests.cs | 4 ++-- cs/test/CheckpointManagerTests.cs | 10 +++++----- cs/test/LargeObjectTests.cs | 2 +- cs/test/LockableUnsafeContextTests.cs | 4 ++-- cs/test/ObjectRecoveryTest.cs | 4 ++-- cs/test/ObjectRecoveryTest2.cs | 4 ++-- cs/test/ObjectRecoveryTest3.cs | 4 ++-- cs/test/RecoverContinueTests.cs | 4 ++-- cs/test/RecoveryChecks.cs | 4 ++-- cs/test/RecoveryTests.cs | 8 ++++---- cs/test/SharedDirectoryTests.cs | 2 +- cs/test/SimpleRecoveryTest.cs | 6 +++--- cs/test/StateMachineTests.cs | 6 +++--- 23 files changed, 48 insertions(+), 48 deletions(-) diff --git a/cs/benchmark/FasterClientSessionYcsbBenchmark.cs b/cs/benchmark/FasterClientSessionYcsbBenchmark.cs index 687f5158e..2c68520d5 100644 --- a/cs/benchmark/FasterClientSessionYcsbBenchmark.cs +++ b/cs/benchmark/FasterClientSessionYcsbBenchmark.cs @@ -254,7 +254,7 @@ internal unsafe (double, double) Run(TestLoader testLoader) if (checkpointTaken < swatch.ElapsedMilliseconds / testLoader.Options.PeriodicCheckpointMilliseconds) { long start = swatch.ElapsedTicks; - if (store.TakeHybridLogCheckpoint(out _, testLoader.Options.PeriodicCheckpointType, testLoader.Options.PeriodicCheckpointTryIncremental)) + if (store.TryInitiateHybridLogCheckpoint(out _, testLoader.Options.PeriodicCheckpointType, testLoader.Options.PeriodicCheckpointTryIncremental)) { store.CompleteCheckpointAsync().AsTask().GetAwaiter().GetResult(); var timeTaken = (swatch.ElapsedTicks - start) / TimeSpan.TicksPerMillisecond; diff --git a/cs/benchmark/FasterSpanByteYcsbBenchmark.cs b/cs/benchmark/FasterSpanByteYcsbBenchmark.cs index cc0642a27..9613dd994 100644 --- a/cs/benchmark/FasterSpanByteYcsbBenchmark.cs +++ b/cs/benchmark/FasterSpanByteYcsbBenchmark.cs @@ -293,7 +293,7 @@ internal unsafe (double, double) Run(TestLoader testLoader) if (checkpointTaken < swatch.ElapsedMilliseconds / testLoader.Options.PeriodicCheckpointMilliseconds) { long start = swatch.ElapsedTicks; - if (store.TakeHybridLogCheckpoint(out _, testLoader.Options.PeriodicCheckpointType, testLoader.Options.PeriodicCheckpointTryIncremental)) + if (store.TryInitiateHybridLogCheckpoint(out _, testLoader.Options.PeriodicCheckpointType, testLoader.Options.PeriodicCheckpointTryIncremental)) { store.CompleteCheckpointAsync().AsTask().GetAwaiter().GetResult(); var timeTaken = (swatch.ElapsedTicks - start) / TimeSpan.TicksPerMillisecond; diff --git a/cs/benchmark/FasterYcsbBenchmark.cs b/cs/benchmark/FasterYcsbBenchmark.cs index fa715510b..85de7adbe 100644 --- a/cs/benchmark/FasterYcsbBenchmark.cs +++ b/cs/benchmark/FasterYcsbBenchmark.cs @@ -313,7 +313,7 @@ internal unsafe (double, double) Run(TestLoader testLoader) if (checkpointTaken < swatch.ElapsedMilliseconds / testLoader.Options.PeriodicCheckpointMilliseconds) { long start = swatch.ElapsedTicks; - if (store.TakeHybridLogCheckpoint(out _, testLoader.Options.PeriodicCheckpointType, testLoader.Options.PeriodicCheckpointTryIncremental)) + if (store.TryInitiateHybridLogCheckpoint(out _, testLoader.Options.PeriodicCheckpointType, testLoader.Options.PeriodicCheckpointTryIncremental)) { store.CompleteCheckpointAsync().AsTask().GetAwaiter().GetResult(); var timeTaken = (swatch.ElapsedTicks - start) / TimeSpan.TicksPerMillisecond; diff --git a/cs/benchmark/TestLoader.cs b/cs/benchmark/TestLoader.cs index 55d561c9c..d2c9756f3 100644 --- a/cs/benchmark/TestLoader.cs +++ b/cs/benchmark/TestLoader.cs @@ -373,7 +373,7 @@ internal void MaybeCheckpointStore(FasterKV store) { Console.WriteLine($"Checkpointing FasterKV to {this.BackupPath} for fast restart"); var sw = Stopwatch.StartNew(); - store.TakeFullCheckpoint(out _, CheckpointType.Snapshot); + store.TryInitiateFullCheckpoint(out _, CheckpointType.Snapshot); store.CompleteCheckpointAsync().AsTask().GetAwaiter().GetResult(); sw.Stop(); Console.WriteLine($" Completed checkpoint in {(double)sw.ElapsedMilliseconds / 1000:N3} seconds"); diff --git a/cs/playground/ClassRecoveryDurability/Storedb.cs b/cs/playground/ClassRecoveryDurability/Storedb.cs index c4369fff7..a089d6f64 100644 --- a/cs/playground/ClassRecoveryDurability/Storedb.cs +++ b/cs/playground/ClassRecoveryDurability/Storedb.cs @@ -60,7 +60,7 @@ public bool InitAndRecover() public Guid Checkpoint() { - db.TakeFullCheckpoint(out Guid token, CheckpointType.Snapshot); + db.TryInitiateFullCheckpoint(out Guid token, CheckpointType.Snapshot); db.CompleteCheckpointAsync().GetAwaiter().GetResult(); return token; } diff --git a/cs/playground/SumStore/RecoveryTest.cs b/cs/playground/SumStore/RecoveryTest.cs index 772541660..32198aff0 100644 --- a/cs/playground/SumStore/RecoveryTest.cs +++ b/cs/playground/SumStore/RecoveryTest.cs @@ -166,7 +166,7 @@ private void PeriodicCheckpoints() { Thread.Sleep(checkpointInterval); - fht.TakeFullCheckpoint(out Guid token, CheckpointType.Snapshot); + fht.TryInitiateFullCheckpoint(out Guid token, CheckpointType.Snapshot); fht.CompleteCheckpointAsync().GetAwaiter().GetResult(); diff --git a/cs/samples/HelloWorld/Program.cs b/cs/samples/HelloWorld/Program.cs index 9a191d2ea..5c630aae7 100644 --- a/cs/samples/HelloWorld/Program.cs +++ b/cs/samples/HelloWorld/Program.cs @@ -126,7 +126,7 @@ static void DiskSample() // Take checkpoint so data is persisted for recovery Console.WriteLine("Taking full checkpoint"); - store.TakeFullCheckpoint(out _, CheckpointType.Snapshot); + store.TryInitiateFullCheckpoint(out _, CheckpointType.Snapshot); store.CompleteCheckpointAsync().GetAwaiter().GetResult(); } else diff --git a/cs/src/core/Index/FASTER/FASTER.cs b/cs/src/core/Index/FASTER/FASTER.cs index ad80ffca0..2fa6b5e00 100644 --- a/cs/src/core/Index/FASTER/FASTER.cs +++ b/cs/src/core/Index/FASTER/FASTER.cs @@ -299,7 +299,7 @@ public FasterKV(long size, LogSettings logSettings, /// fail if we are already taking a checkpoint or performing some other /// operation such as growing the index). Use CompleteCheckpointAsync to wait completion. /// - public bool TakeFullCheckpoint(out Guid token, CheckpointType checkpointType, long targetVersion = -1) + public bool TryInitiateFullCheckpoint(out Guid token, CheckpointType checkpointType, long targetVersion = -1) { ISynchronizationTask backend; if (checkpointType == CheckpointType.FoldOver) @@ -338,7 +338,7 @@ public bool TakeFullCheckpoint(out Guid token, CheckpointType checkpointType, lo public async ValueTask<(bool success, Guid token)> TakeFullCheckpointAsync(CheckpointType checkpointType, CancellationToken cancellationToken = default, long targetVersion = -1) { - var success = TakeFullCheckpoint(out Guid token, checkpointType, targetVersion); + var success = TryInitiateFullCheckpoint(out Guid token, checkpointType, targetVersion); if (success) await CompleteCheckpointAsync(cancellationToken).ConfigureAwait(false); @@ -351,7 +351,7 @@ public bool TakeFullCheckpoint(out Guid token, CheckpointType checkpointType, lo /// /// Checkpoint token /// Whether we could initiate the checkpoint. Use CompleteCheckpointAsync to wait completion. - public bool TakeIndexCheckpoint(out Guid token) + public bool TryInitiateIndexCheckpoint(out Guid token) { var result = StartStateMachine(new IndexSnapshotStateMachine()); token = _indexCheckpointToken; @@ -372,7 +372,7 @@ public bool TakeIndexCheckpoint(out Guid token) /// public async ValueTask<(bool success, Guid token)> TakeIndexCheckpointAsync(CancellationToken cancellationToken = default) { - var success = TakeIndexCheckpoint(out Guid token); + var success = TryInitiateIndexCheckpoint(out Guid token); if (success) await CompleteCheckpointAsync(cancellationToken).ConfigureAwait(false); @@ -392,7 +392,7 @@ public bool TakeIndexCheckpoint(out Guid token) /// number is -1, checkpoint will unconditionally create a new version. /// /// Whether we could initiate the checkpoint. Use CompleteCheckpointAsync to wait completion. - public bool TakeHybridLogCheckpoint(out Guid token, CheckpointType checkpointType, bool tryIncremental = false, + public bool TryInitiateHybridLogCheckpoint(out Guid token, CheckpointType checkpointType, bool tryIncremental = false, long targetVersion = -1) { ISynchronizationTask backend; @@ -435,7 +435,7 @@ public bool TakeHybridLogCheckpoint(out Guid token, CheckpointType checkpointTyp public async ValueTask<(bool success, Guid token)> TakeHybridLogCheckpointAsync(CheckpointType checkpointType, bool tryIncremental = false, CancellationToken cancellationToken = default, long targetVersion = -1) { - var success = TakeHybridLogCheckpoint(out Guid token, checkpointType, tryIncremental, targetVersion); + var success = TryInitiateHybridLogCheckpoint(out Guid token, checkpointType, tryIncremental, targetVersion); if (success) await CompleteCheckpointAsync(cancellationToken).ConfigureAwait(false); diff --git a/cs/src/core/Index/Interfaces/IFasterKV.cs b/cs/src/core/Index/Interfaces/IFasterKV.cs index a6534d0b7..8025657e9 100644 --- a/cs/src/core/Index/Interfaces/IFasterKV.cs +++ b/cs/src/core/Index/Interfaces/IFasterKV.cs @@ -61,7 +61,7 @@ ClientSession /// Whether we successfully initiated the checkpoint (initiation mayfail if we are already taking a checkpoint or performing some other /// operation such as growing the index). Use CompleteCheckpointAsync to await completion. - public bool TakeFullCheckpoint(out Guid token, CheckpointType checkpointType, long targetVersion = -1); + public bool TryInitiateFullCheckpoint(out Guid token, CheckpointType checkpointType, long targetVersion = -1); /// /// Take full (index + log) checkpoint of FASTER asynchronously @@ -85,7 +85,7 @@ ClientSession /// Token describing checkpoint /// Whether we could initiate the checkpoint. Use CompleteCheckpointAsync to await completion. - bool TakeIndexCheckpoint(out Guid token); + bool TryInitiateIndexCheckpoint(out Guid token); /// /// Take asynchronous checkpoint of FASTER index only (not log) @@ -111,7 +111,7 @@ ClientSession /// Whether we successfully initiated the checkpoint (initiation mayfail if we are already taking a checkpoint or performing some other /// operation such as growing the index). Use CompleteCheckpointAsync to await completion. - public bool TakeHybridLogCheckpoint(out Guid token, CheckpointType checkpointType, bool tryIncremental = false, long targetVersion = -1); + public bool TryInitiateHybridLogCheckpoint(out Guid token, CheckpointType checkpointType, bool tryIncremental = false, long targetVersion = -1); /// /// Initiate checkpoint of FASTER log only (not index) diff --git a/cs/test/AsyncLargeObjectTests.cs b/cs/test/AsyncLargeObjectTests.cs index d463ade00..0c0604bc9 100644 --- a/cs/test/AsyncLargeObjectTests.cs +++ b/cs/test/AsyncLargeObjectTests.cs @@ -60,7 +60,7 @@ public async Task LargeObjectTest([Values]CheckpointType checkpointType) } } - fht1.TakeFullCheckpoint(out Guid token, checkpointType); + fht1.TryInitiateFullCheckpoint(out Guid token, checkpointType); await fht1.CompleteCheckpointAsync(); fht1.Dispose(); diff --git a/cs/test/AsyncTests.cs b/cs/test/AsyncTests.cs index a54e43c08..e14084af9 100644 --- a/cs/test/AsyncTests.cs +++ b/cs/test/AsyncTests.cs @@ -73,12 +73,12 @@ public async Task AsyncRecoveryTest1(CheckpointType checkpointType) } // does not require session - fht1.TakeFullCheckpoint(out _, checkpointType); + fht1.TryInitiateFullCheckpoint(out _, checkpointType); await fht1.CompleteCheckpointAsync(); s2.CompletePending(true,false); - fht1.TakeFullCheckpoint(out Guid token, checkpointType); + fht1.TryInitiateFullCheckpoint(out Guid token, checkpointType); await fht1.CompleteCheckpointAsync(); s2.Dispose(); diff --git a/cs/test/CheckpointManagerTests.cs b/cs/test/CheckpointManagerTests.cs index f8ac91092..f0c9001e2 100644 --- a/cs/test/CheckpointManagerTests.cs +++ b/cs/test/CheckpointManagerTests.cs @@ -69,23 +69,23 @@ public async Task CheckpointManagerPurgeCheck([Values] DeviceMode deviceMode) switch (checkpointType) { case 0: - fht.TakeHybridLogCheckpoint(out result, CheckpointType.FoldOver); + fht.TryInitiateHybridLogCheckpoint(out result, CheckpointType.FoldOver); logCheckpoints.Add(result, 0); break; case 1: - fht.TakeHybridLogCheckpoint(out result, CheckpointType.Snapshot); + fht.TryInitiateHybridLogCheckpoint(out result, CheckpointType.Snapshot); logCheckpoints.Add(result, 0); break; case 2: - fht.TakeIndexCheckpoint(out result); + fht.TryInitiateIndexCheckpoint(out result); indexCheckpoints.Add(result, 0); break; case 3: - fht.TakeFullCheckpoint(out result, CheckpointType.FoldOver); + fht.TryInitiateFullCheckpoint(out result, CheckpointType.FoldOver); fullCheckpoints.Add(result, 0); break; case 4: - fht.TakeFullCheckpoint(out result, CheckpointType.Snapshot); + fht.TryInitiateFullCheckpoint(out result, CheckpointType.Snapshot); fullCheckpoints.Add(result, 0); break; default: diff --git a/cs/test/LargeObjectTests.cs b/cs/test/LargeObjectTests.cs index fbdd0c9f9..a38ca2532 100644 --- a/cs/test/LargeObjectTests.cs +++ b/cs/test/LargeObjectTests.cs @@ -60,7 +60,7 @@ public void LargeObjectTest(CheckpointType checkpointType) } session1.Dispose(); - fht1.TakeFullCheckpoint(out Guid token, checkpointType); + fht1.TryInitiateFullCheckpoint(out Guid token, checkpointType); fht1.CompleteCheckpointAsync().GetAwaiter().GetResult(); fht1.Dispose(); log.Dispose(); diff --git a/cs/test/LockableUnsafeContextTests.cs b/cs/test/LockableUnsafeContextTests.cs index f12fc0f06..180edd357 100644 --- a/cs/test/LockableUnsafeContextTests.cs +++ b/cs/test/LockableUnsafeContextTests.cs @@ -1014,7 +1014,7 @@ public async ValueTask CheckpointRecoverTest([Values] CheckpointType checkpointT if (syncMode == SyncMode.Sync) { - this.fht.TakeFullCheckpoint(out fullCheckpointToken, checkpointType); + this.fht.TryInitiateFullCheckpoint(out fullCheckpointToken, checkpointType); await this.fht.CompleteCheckpointAsync(); } else @@ -1093,7 +1093,7 @@ async static Task PrimaryWriter(FasterKV primaryStore, SyncMode sync // Checkpointing primary until key {key - 1} if (syncMode == SyncMode.Sync) { - primaryStore.TakeHybridLogCheckpoint(out _, CheckpointType.Snapshot); + primaryStore.TryInitiateHybridLogCheckpoint(out _, CheckpointType.Snapshot); await primaryStore.CompleteCheckpointAsync().ConfigureAwait(false); } else diff --git a/cs/test/ObjectRecoveryTest.cs b/cs/test/ObjectRecoveryTest.cs index e62b03f3c..7777d2766 100644 --- a/cs/test/ObjectRecoveryTest.cs +++ b/cs/test/ObjectRecoveryTest.cs @@ -111,9 +111,9 @@ public unsafe void Populate() if ((i + 1) % checkpointInterval == 0) { if (first) - while (!fht.TakeFullCheckpoint(out token, CheckpointType.Snapshot)) ; + while (!fht.TryInitiateFullCheckpoint(out token, CheckpointType.Snapshot)) ; else - while (!fht.TakeFullCheckpoint(out _, CheckpointType.Snapshot)) ; + while (!fht.TryInitiateFullCheckpoint(out _, CheckpointType.Snapshot)) ; fht.CompleteCheckpointAsync().GetAwaiter().GetResult(); diff --git a/cs/test/ObjectRecoveryTest2.cs b/cs/test/ObjectRecoveryTest2.cs index 59314ec2f..494f94c36 100644 --- a/cs/test/ObjectRecoveryTest2.cs +++ b/cs/test/ObjectRecoveryTest2.cs @@ -46,7 +46,7 @@ public async ValueTask ObjectRecoveryTest2( Read(session1, context, false); session1.Dispose(); - h.TakeFullCheckpoint(out _, checkpointType); + h.TryInitiateFullCheckpoint(out _, checkpointType); h.CompleteCheckpointAsync().AsTask().GetAwaiter().GetResult(); Destroy(log, objlog, h); @@ -109,7 +109,7 @@ private void Write(ClientSession 0) { - fht.TakeHybridLogCheckpoint(out Guid token, checkpointType); + fht.TryInitiateHybridLogCheckpoint(out Guid token, checkpointType); fht.CompleteCheckpointAsync().GetAwaiter().GetResult(); tokens.Add((i, token)); } diff --git a/cs/test/RecoverContinueTests.cs b/cs/test/RecoverContinueTests.cs index 984ad1f73..5d3431b78 100644 --- a/cs/test/RecoverContinueTests.cs +++ b/cs/test/RecoverContinueTests.cs @@ -77,7 +77,7 @@ public async ValueTask RecoverContinueTest([Values]bool isAsync) var firstsession = fht1.For(new AdSimpleFunctions()).NewSession("first"); IncrementAllValues(ref firstsession, ref sno); - fht1.TakeFullCheckpoint(out _, CheckpointType.Snapshot); + fht1.TryInitiateFullCheckpoint(out _, CheckpointType.Snapshot); fht1.CompleteCheckpointAsync().AsTask().GetAwaiter().GetResult(); firstsession.Dispose(); @@ -100,7 +100,7 @@ public async ValueTask RecoverContinueTest([Values]bool isAsync) long newSno = cp.UntilSerialNo; Assert.AreEqual(sno - 1, newSno); IncrementAllValues(ref continuesession, ref sno); - fht2.TakeFullCheckpoint(out _, CheckpointType.Snapshot); + fht2.TryInitiateFullCheckpoint(out _, CheckpointType.Snapshot); fht2.CompleteCheckpointAsync().AsTask().GetAwaiter().GetResult(); continuesession.Dispose(); diff --git a/cs/test/RecoveryChecks.cs b/cs/test/RecoveryChecks.cs index e53e8eab8..25461c596 100644 --- a/cs/test/RecoveryChecks.cs +++ b/cs/test/RecoveryChecks.cs @@ -559,7 +559,7 @@ private async ValueTask IncrSnapshotRecoveryCheck(ICheckpointManager checkpointM } var version1 = fht1.CurrentVersion; - var _result1 = fht1.TakeHybridLogCheckpoint(out var _token1, CheckpointType.Snapshot, true); + var _result1 = fht1.TryInitiateHybridLogCheckpoint(out var _token1, CheckpointType.Snapshot, true); await fht1.CompleteCheckpointAsync(); Assert.IsTrue(_result1); @@ -571,7 +571,7 @@ private async ValueTask IncrSnapshotRecoveryCheck(ICheckpointManager checkpointM } var version2 = fht1.CurrentVersion; - var _result2 = fht1.TakeHybridLogCheckpoint(out var _token2, CheckpointType.Snapshot, true); + var _result2 = fht1.TryInitiateHybridLogCheckpoint(out var _token2, CheckpointType.Snapshot, true); await fht1.CompleteCheckpointAsync(); Assert.IsTrue(_result2); diff --git a/cs/test/RecoveryTests.cs b/cs/test/RecoveryTests.cs index ea95b5ee8..ab0b6d159 100644 --- a/cs/test/RecoveryTests.cs +++ b/cs/test/RecoveryTests.cs @@ -104,7 +104,7 @@ private void FullCheckpointAction(int opNum) if ((opNum + 1) % checkpointInterval == 0) { Guid token; - while (!fht.TakeFullCheckpoint(out token, CheckpointType.Snapshot)) { } + while (!fht.TryInitiateFullCheckpoint(out token, CheckpointType.Snapshot)) { } logTokens.Add(token); indexTokens.Add(token); fht.CompleteCheckpointAsync().AsTask().GetAwaiter().GetResult(); @@ -120,12 +120,12 @@ private void SeparateCheckpointAction(int opNum) Guid token; if (checkpointNum % 2 == 1) { - while (!fht.TakeHybridLogCheckpoint(out token, CheckpointType.Snapshot)) { } + while (!fht.TryInitiateHybridLogCheckpoint(out token, CheckpointType.Snapshot)) { } logTokens.Add(token); } else { - while (!fht.TakeIndexCheckpoint(out token)) { } + while (!fht.TryInitiateIndexCheckpoint(out token)) { } indexTokens.Add(token); } fht.CompleteCheckpointAsync().AsTask().GetAwaiter().GetResult(); @@ -448,7 +448,7 @@ private async ValueTask Checkpoint(FasterKV fht, bool isAsy } else { - while (!fht.TakeFullCheckpoint(out this.logToken, CheckpointType.Snapshot)) { } + while (!fht.TryInitiateFullCheckpoint(out this.logToken, CheckpointType.Snapshot)) { } fht.CompleteCheckpointAsync().AsTask().GetAwaiter().GetResult(); } this.indexToken = this.logToken; diff --git a/cs/test/SharedDirectoryTests.cs b/cs/test/SharedDirectoryTests.cs index 051f7b261..3b09ebb4a 100644 --- a/cs/test/SharedDirectoryTests.cs +++ b/cs/test/SharedDirectoryTests.cs @@ -57,7 +57,7 @@ public async ValueTask SharedLogDirectory([Values]bool isAsync) Populate(this.original.Faster); // Take checkpoint from original to start the clone from - Assert.IsTrue(this.original.Faster.TakeFullCheckpoint(out var checkpointGuid, CheckpointType.FoldOver)); + Assert.IsTrue(this.original.Faster.TryInitiateFullCheckpoint(out var checkpointGuid, CheckpointType.FoldOver)); this.original.Faster.CompleteCheckpointAsync().GetAwaiter().GetResult(); // Sanity check against original diff --git a/cs/test/SimpleRecoveryTest.cs b/cs/test/SimpleRecoveryTest.cs index 89b65b4f7..96998081b 100644 --- a/cs/test/SimpleRecoveryTest.cs +++ b/cs/test/SimpleRecoveryTest.cs @@ -120,7 +120,7 @@ private async ValueTask SimpleRecoveryTest1_Worker(CheckpointType checkpointType if (testCommitCookie) fht1.CommitCookie = commitCookie; - fht1.TakeFullCheckpoint(out Guid token, checkpointType); + fht1.TryInitiateFullCheckpoint(out Guid token, checkpointType); fht1.CompleteCheckpointAsync().AsTask().GetAwaiter().GetResult(); session1.Dispose(); @@ -182,7 +182,7 @@ public async ValueTask SimpleRecoveryTest2([Values]CheckpointType checkpointType value.numClicks = key; session1.Upsert(ref inputArray[key], ref value, Empty.Default, 0); } - fht1.TakeFullCheckpoint(out Guid token, checkpointType); + fht1.TryInitiateFullCheckpoint(out Guid token, checkpointType); fht1.CompleteCheckpointAsync().AsTask().GetAwaiter().GetResult(); session1.Dispose(); @@ -238,7 +238,7 @@ public async ValueTask ShouldRecoverBeginAddress([Values]bool isAsync) fht1.Log.ShiftBeginAddress(address); - fht1.TakeFullCheckpoint(out Guid token, CheckpointType.FoldOver); + fht1.TryInitiateFullCheckpoint(out Guid token, CheckpointType.FoldOver); fht1.CompleteCheckpointAsync().AsTask().GetAwaiter().GetResult(); session1.Dispose(); diff --git a/cs/test/StateMachineTests.cs b/cs/test/StateMachineTests.cs index c49dff201..9ae658cd5 100644 --- a/cs/test/StateMachineTests.cs +++ b/cs/test/StateMachineTests.cs @@ -323,7 +323,7 @@ public void StateMachineTest6() s2.Dispose(); - fht1.TakeHybridLogCheckpoint(out _, CheckpointType.FoldOver); + fht1.TryInitiateHybridLogCheckpoint(out _, CheckpointType.FoldOver); fht1.CompleteCheckpointAsync().AsTask().GetAwaiter().GetResult(); // We should be in REST, 3 @@ -454,7 +454,7 @@ void Prepare(out SimpleFunctions f, Assert.IsTrue(SystemState.Equal(SystemState.Make(Phase.REST, 1), fht1.SystemState)); // Take index checkpoint for recovery purposes - fht1.TakeIndexCheckpoint(out _); + fht1.TryInitiateIndexCheckpoint(out _); fht1.CompleteCheckpointAsync().AsTask().GetAwaiter().GetResult(); // Index checkpoint does not update version, so @@ -480,7 +480,7 @@ void Prepare(out SimpleFunctions f, // We should be in REST, 1 Assert.IsTrue(SystemState.Equal(SystemState.Make(Phase.REST, 1), fht1.SystemState)); - fht1.TakeHybridLogCheckpoint(out _, CheckpointType.FoldOver, targetVersion: toVersion); + fht1.TryInitiateHybridLogCheckpoint(out _, CheckpointType.FoldOver, targetVersion: toVersion); // We should be in PREPARE, 1 Assert.IsTrue(SystemState.Equal(SystemState.Make(Phase.PREPARE, 1), fht1.SystemState)); From 963bc573b63f2dba3310c7141eff3058a5db7370 Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Tue, 18 Jan 2022 00:49:02 -0800 Subject: [PATCH 21/25] - Remove SupportsPostOperations; this is now always done - Add SingleDeleter for symmetry with Upsert - Add DisposeKey and DisposeValue --- cs/benchmark/Functions.cs | 12 +- cs/remote/samples/FixedLenServer/Types.cs | 8 +- .../src/FASTER.server/ServerKVFunctions.cs | 9 +- cs/src/core/ClientSession/ClientSession.cs | 148 +++--------------- .../ClientSession/LockableUnsafeContext.cs | 51 +++--- cs/src/core/ClientSession/UnsafeContext.cs | 32 ++-- .../core/Compaction/LogCompactionFunctions.cs | 8 +- cs/src/core/Index/FASTER/FASTERImpl.cs | 6 +- cs/src/core/Index/Interfaces/FunctionsBase.cs | 7 +- .../core/Index/Interfaces/IFasterSession.cs | 8 +- cs/src/core/Index/Interfaces/IFunctions.cs | 35 +++-- cs/test/LockableUnsafeContextTests.cs | 2 - 12 files changed, 127 insertions(+), 199 deletions(-) diff --git a/cs/benchmark/Functions.cs b/cs/benchmark/Functions.cs index 52cfd27b9..734a0e32f 100644 --- a/cs/benchmark/Functions.cs +++ b/cs/benchmark/Functions.cs @@ -18,8 +18,6 @@ public Functions(bool locking, bool postOps) this.postOps = postOps; } - public bool SupportsPostOperations => this.postOps; - public void RMWCompletionCallback(ref Key key, ref Input input, ref Output output, Empty ctx, Status status, RecordMetadata recordMetadata) { } @@ -56,6 +54,8 @@ public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref return true; } + public void SingleDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address) { value = default; } + public bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address) => true; // Upsert functions @@ -103,5 +103,13 @@ public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, re public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long address) { } public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) { } + + public void DisposeKey(ref Key key) { } + + /// + /// Dispose the value; for example, in evicted log records. FASTER assumes deep-copy semantics such as cloning or refcounting. + /// + /// + public void DisposeValue(ref Value value) { } } } diff --git a/cs/remote/samples/FixedLenServer/Types.cs b/cs/remote/samples/FixedLenServer/Types.cs index 815b81857..54ee525f3 100644 --- a/cs/remote/samples/FixedLenServer/Types.cs +++ b/cs/remote/samples/FixedLenServer/Types.cs @@ -116,8 +116,6 @@ public void CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Va output.value = newValue; } - public bool SupportsPostOperations => false; - [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address) => true; @@ -127,10 +125,16 @@ public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, re public bool NeedCopyUpdate(ref Key key, ref Input input, ref Value oldValue, ref Output output) => true; + public void SingleDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address) { } + public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long address) { } public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) { } public bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address) => true; + + public void DisposeKey(ref Key key) { } + + public void DisposeValue(ref Value value) { } } } diff --git a/cs/remote/src/FASTER.server/ServerKVFunctions.cs b/cs/remote/src/FASTER.server/ServerKVFunctions.cs index 6f07f0d1b..c4f507b07 100644 --- a/cs/remote/src/FASTER.server/ServerKVFunctions.cs +++ b/cs/remote/src/FASTER.server/ServerKVFunctions.cs @@ -11,8 +11,6 @@ internal struct ServerKVFunctions : IFunct private readonly Functions functions; private readonly FasterKVServerSessionBase serverNetworkSession; - public bool SupportsPostOperations => true; - public ServerKVFunctions(Functions functions, FasterKVServerSessionBase serverNetworkSession) { this.functions = functions; @@ -22,6 +20,9 @@ public ServerKVFunctions(Functions functions, FasterKVServerSessionBase public void CheckpointCompletionCallback(string sessionId, CommitPoint commitPoint) => functions.CheckpointCompletionCallback(sessionId, commitPoint); + public void SingleDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address) + => functions.SingleDeleter(ref key, ref value, ref recordInfo, address); + public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long address) { } public bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address) @@ -78,5 +79,9 @@ public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Va public void UpsertCompletionCallback(ref Key key, ref Input input, ref Value value, long ctx) => functions.UpsertCompletionCallback(ref key, ref input, ref value, ctx); + + public void DisposeKey(ref Key key) { functions.DisposeKey(ref key); } + + public void DisposeValue(ref Value value) { functions.DisposeValue(ref value); } } } diff --git a/cs/src/core/ClientSession/ClientSession.cs b/cs/src/core/ClientSession/ClientSession.cs index 4dcc47d5f..3845b77fc 100644 --- a/cs/src/core/ClientSession/ClientSession.cs +++ b/cs/src/core/ClientSession/ClientSession.cs @@ -819,8 +819,6 @@ public InternalFasterSession(ClientSession _clientSession.fht.SupportsLocking; - public bool SupportsPostOperations => _clientSession.functions.SupportsPostOperations; - public bool IsManualLocking => false; #endregion IFunctions - Optional features supported @@ -865,47 +863,12 @@ public void ReadCompletionCallback(ref Key key, ref Input input, ref Output outp #region IFunctions - Upserts [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) - { - _clientSession.functions.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); - - if (this.SupportsPostOperations && this.SupportsLocking) - { - // Lock ephemerally before we CAS into the log; Unlocked in PostSingleWriterLock. - recordInfo.SetLockExclusiveBit(); - } - } + public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) + => _clientSession.functions.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) - { - if (!this.SupportsPostOperations) - return; - if (!this.SupportsLocking) - PostSingleWriterNoLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); - else - PostSingleWriterLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void PostSingleWriterNoLock(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) - { - _clientSession.functions.PostSingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void PostSingleWriterLock(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) - { - // Lock was taken in SingleWriterLock - try - { - PostSingleWriterNoLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); - } - finally - { - recordInfo.UnlockExclusive(); - } - } + public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) + => _clientSession.functions.PostSingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address, out bool lockFailed) @@ -954,47 +917,12 @@ public bool NeedInitialUpdate(ref Key key, ref Input input, ref Output output) => _clientSession.functions.NeedInitialUpdate(ref key, ref input, ref output); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) - { - _clientSession.functions.InitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); - - if (this.SupportsPostOperations && this.SupportsLocking) - { - // Lock ephemerally before we CAS into the log; Unlocked in PostInitialUpdaterLock. - recordInfo.SetLockExclusiveBit(); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) - { - if (!this.SupportsPostOperations) - return; - if (!this.SupportsLocking) - PostInitialUpdaterNoLock(ref key, ref input, ref value, ref output, ref recordInfo, address); - else - PostInitialUpdaterLock(ref key, ref input, ref value, ref output, ref recordInfo, address); - } + public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) + => _clientSession.functions.InitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void PostInitialUpdaterNoLock(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) - { - _clientSession.functions.PostInitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void PostInitialUpdaterLock(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) - { - // Lock was taken in InitialUpdaterLock - try - { - PostInitialUpdaterNoLock(ref key, ref input, ref value, ref output, ref recordInfo, address); - } - finally - { - recordInfo.UnlockExclusive(); - } - } + public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) + => _clientSession.functions.PostInitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); #endregion InitialUpdater #region CopyUpdater @@ -1003,47 +931,12 @@ public bool NeedCopyUpdate(ref Key key, ref Input input, ref Value oldValue, ref => _clientSession.functions.NeedCopyUpdate(ref key, ref input, ref oldValue, ref output); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address) - { - _clientSession.functions.CopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); - - if (this.SupportsPostOperations && this.SupportsLocking) - { - // Lock ephemerally before we CAS into the log. Unlocked in PostInitialUpdaterLock. - recordInfo.SetLockExclusiveBit(); - } - } + public void CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address) + => _clientSession.functions.CopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address) - { - if (!this.SupportsPostOperations) - return true; - return !this.SupportsLocking - ? PostCopyUpdaterNoLock(ref key, ref input, ref output, ref oldValue, ref newValue, ref recordInfo, address) - : PostCopyUpdaterLock(ref key, ref input, ref output, ref oldValue, ref newValue, ref recordInfo, address); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool PostCopyUpdaterNoLock(ref Key key, ref Input input, ref Output output, ref Value oldValue, ref Value newValue, ref RecordInfo recordInfo, long address) - { - return _clientSession.functions.PostCopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool PostCopyUpdaterLock(ref Key key, ref Input input, ref Output output, ref Value oldValue, ref Value newValue, ref RecordInfo recordInfo, long address) - { - // Lock was taken in CopyUpdaterLock - try - { - // KeyIndexes do not need notification of in-place updates because the key does not change. - return !recordInfo.Tombstone && PostCopyUpdaterNoLock(ref key, ref input, ref output, ref oldValue, ref newValue, ref recordInfo, address); - } - finally - { - recordInfo.UnlockExclusive(); - } - } + public bool PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address) + => _clientSession.functions.PostCopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); #endregion CopyUpdater #region InPlaceUpdater @@ -1089,16 +982,15 @@ public void RMWCompletionCallback(ref Key key, ref Input input, ref Output outpu #endregion IFunctions - RMWs #region IFunctions - Deletes + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void SingleDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address) + => _clientSession.functions.SingleDeleter(ref key, ref value, ref recordInfo, address); + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long address) { - // There is no value to lock here, so we take a RecordInfo lock in InternalDelete and release it here. recordInfo.SetDirty(); - - if (this.SupportsPostOperations) - _clientSession.functions.PostSingleDeleter(ref key, ref recordInfo, address); - if (this.SupportsLocking) - recordInfo.UnlockExclusive(); + _clientSession.functions.PostSingleDeleter(ref key, ref recordInfo, address); } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -1141,6 +1033,12 @@ public void DeleteCompletionCallback(ref Key key, Context ctx) => _clientSession.functions.DeleteCompletionCallback(ref key, ctx); #endregion IFunctions - Deletes + #region Key and Value management + public void DisposeKey(ref Key key) { _clientSession.functions.DisposeKey(ref key); } + + public void DisposeValue(ref Value value) { _clientSession.functions.DisposeValue(ref value); } + #endregion Key and Value management + #region IFunctions - Checkpointing public void CheckpointCompletionCallback(string guid, CommitPoint commitPoint) { diff --git a/cs/src/core/ClientSession/LockableUnsafeContext.cs b/cs/src/core/ClientSession/LockableUnsafeContext.cs index 64454c58c..357643f3c 100644 --- a/cs/src/core/ClientSession/LockableUnsafeContext.cs +++ b/cs/src/core/ClientSession/LockableUnsafeContext.cs @@ -485,8 +485,6 @@ public InternalFasterSession(ClientSession false; // We only lock explicitly in Lock/Unlock, which are longer-duration locks. - public bool SupportsPostOperations => true; // We need this for user record locking, but check for user's setting before calling user code - public bool IsManualLocking => true; #endregion IFunctions - Optional features supported @@ -513,17 +511,12 @@ public void ReadCompletionCallback(ref Key key, ref Input input, ref Output outp #region IFunctions - Upserts [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) - { - _clientSession.functions.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); - } + public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) + => _clientSession.functions.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) - { - if (_clientSession.functions.SupportsPostOperations) - _clientSession.functions.PostSingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); - } + public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) + => _clientSession.functions.PostSingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address, out bool lockFailed) @@ -544,17 +537,12 @@ public bool NeedInitialUpdate(ref Key key, ref Input input, ref Output output) => _clientSession.functions.NeedInitialUpdate(ref key, ref input, ref output); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) - { - _clientSession.functions.InitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); - } + public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) + => _clientSession.functions.InitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) - { - if (_clientSession.functions.SupportsPostOperations) - _clientSession.functions.PostInitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); - } + public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) + => _clientSession.functions.PostInitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); #endregion InitialUpdater #region CopyUpdater @@ -567,11 +555,8 @@ public void CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Va => _clientSession.functions.CopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address) - { - return !_clientSession.functions.SupportsPostOperations - || _clientSession.functions.PostCopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); - } + public bool PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address) + => _clientSession.functions.PostCopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); #endregion CopyUpdater #region InPlaceUpdater @@ -591,11 +576,11 @@ public void RMWCompletionCallback(ref Key key, ref Input input, ref Output outpu #region IFunctions - Deletes [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long address) - { - if (_clientSession.functions.SupportsPostOperations) - _clientSession.functions.PostSingleDeleter(ref key, ref recordInfo, address); - } + public void SingleDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address) { value = default; } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long address) + => _clientSession.functions.PostSingleDeleter(ref key, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address, out bool lockFailed) @@ -609,6 +594,12 @@ public void DeleteCompletionCallback(ref Key key, Context ctx) => _clientSession.functions.DeleteCompletionCallback(ref key, ctx); #endregion IFunctions - Deletes + #region Key and Value management + public void DisposeKey(ref Key key) { _clientSession.functions.DisposeKey(ref key); } + + public void DisposeValue(ref Value value) { _clientSession.functions.DisposeValue(ref value); } + #endregion Key and Value management + #region IFunctions - Checkpointing public void CheckpointCompletionCallback(string guid, CommitPoint commitPoint) { diff --git a/cs/src/core/ClientSession/UnsafeContext.cs b/cs/src/core/ClientSession/UnsafeContext.cs index 24b370754..9535a21e8 100644 --- a/cs/src/core/ClientSession/UnsafeContext.cs +++ b/cs/src/core/ClientSession/UnsafeContext.cs @@ -385,8 +385,6 @@ public InternalFasterSession(ClientSession _clientSession.fht.SupportsLocking; - public bool SupportsPostOperations => _clientSession.functions.SupportsPostOperations; - public bool IsManualLocking => false; #endregion IFunctions - Optional features supported @@ -435,7 +433,7 @@ public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value { _clientSession.functions.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); - if (this.SupportsPostOperations && this.SupportsLocking) + if (this.SupportsLocking) { // Lock ephemerally before we CAS into the log; Unlocked in PostSingleWriterLock. recordInfo.SetLockExclusiveBit(); @@ -445,8 +443,6 @@ public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value [MethodImpl(MethodImplOptions.AggressiveInlining)] public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) { - if (!this.SupportsPostOperations) - return; if (!this.SupportsLocking) PostSingleWriterNoLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); else @@ -454,10 +450,8 @@ public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Va } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void PostSingleWriterNoLock(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) - { - _clientSession.functions.PostSingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); - } + private void PostSingleWriterNoLock(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) + => _clientSession.functions.PostSingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] private void PostSingleWriterLock(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) @@ -524,7 +518,7 @@ public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Ou { _clientSession.functions.InitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); - if (this.SupportsPostOperations && this.SupportsLocking) + if (this.SupportsLocking) { // Lock ephemerally before we CAS into the log; Unlocked in PostInitialUpdaterLock. recordInfo.SetLockExclusiveBit(); @@ -534,8 +528,6 @@ public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Ou [MethodImpl(MethodImplOptions.AggressiveInlining)] public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) { - if (!this.SupportsPostOperations) - return; if (!this.SupportsLocking) PostInitialUpdaterNoLock(ref key, ref input, ref value, ref output, ref recordInfo, address); else @@ -573,7 +565,7 @@ public void CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Va { _clientSession.functions.CopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); - if (this.SupportsPostOperations && this.SupportsLocking) + if (this.SupportsLocking) { // Lock ephemerally before we CAS into the log. Unlocked in PostInitialUpdaterLock. recordInfo.SetLockExclusiveBit(); @@ -583,8 +575,6 @@ public void CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Va [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address) { - if (!this.SupportsPostOperations) - return true; return !this.SupportsLocking ? PostCopyUpdaterNoLock(ref key, ref input, ref output, ref oldValue, ref newValue, ref recordInfo, address) : PostCopyUpdaterLock(ref key, ref input, ref output, ref oldValue, ref newValue, ref recordInfo, address); @@ -661,12 +651,14 @@ public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long addre // There is no value to lock here, so we take a RecordInfo lock in InternalDelete and release it here. recordInfo.SetDirty(); - if (this.SupportsPostOperations) - _clientSession.functions.PostSingleDeleter(ref key, ref recordInfo, address); + _clientSession.functions.PostSingleDeleter(ref key, ref recordInfo, address); if (this.SupportsLocking) recordInfo.UnlockExclusive(); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void SingleDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address) { value = default; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address, out bool lockFailed) { @@ -707,6 +699,12 @@ public void DeleteCompletionCallback(ref Key key, Context ctx) => _clientSession.functions.DeleteCompletionCallback(ref key, ctx); #endregion IFunctions - Deletes + #region Key and Value management + public void DisposeKey(ref Key key) { _clientSession.functions.DisposeKey(ref key); } + + public void DisposeValue(ref Value value) { _clientSession.functions.DisposeValue(ref value); } + #endregion Key and Value management + #region IFunctions - Checkpointing public void CheckpointCompletionCallback(string guid, CommitPoint commitPoint) { diff --git a/cs/src/core/Compaction/LogCompactionFunctions.cs b/cs/src/core/Compaction/LogCompactionFunctions.cs index 21e1c910e..039e33661 100644 --- a/cs/src/core/Compaction/LogCompactionFunctions.cs +++ b/cs/src/core/Compaction/LogCompactionFunctions.cs @@ -14,8 +14,6 @@ public LogCompactionFunctions(Functions functions) _functions = functions; } - public bool SupportsPostOperations => false; - public void CheckpointCompletionCallback(string sessionId, CommitPoint commitPoint) { } /// @@ -23,6 +21,8 @@ public void CheckpointCompletionCallback(string sessionId, CommitPoint commitPoi /// public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, long address) => true; + public void SingleDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address) { value = default; } + public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long address) { } /// @@ -68,5 +68,9 @@ public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) { } public void UpsertCompletionCallback(ref Key key, ref Input input, ref Value value, Context ctx) { } + + public void DisposeKey(ref Key key) { } + + public void DisposeValue(ref Value value) { } } } \ No newline at end of file diff --git a/cs/src/core/Index/FASTER/FASTERImpl.cs b/cs/src/core/Index/FASTER/FASTERImpl.cs index 8bd9ec8d0..9c07fa7d9 100644 --- a/cs/src/core/Index/FASTER/FASTERImpl.cs +++ b/cs/src/core/Index/FASTER/FASTERImpl.cs @@ -712,9 +712,10 @@ private OperationStatus CreateNewRecordUpsert( latestLogicalAddress); recordInfo.Tentative = true; hlog.Serialize(ref key, newPhysicalAddress); + fasterSession.SingleDeleter(ref key, ref hlog.GetValue(newPhysicalAddress), ref recordInfo, newLogicalAddress); bool success = true; if (lowestReadCachePhysicalAddress == Constants.kInvalidAddress) diff --git a/cs/src/core/Index/Interfaces/FunctionsBase.cs b/cs/src/core/Index/Interfaces/FunctionsBase.cs index 01d3fc398..c1fdfa07f 100644 --- a/cs/src/core/Index/Interfaces/FunctionsBase.cs +++ b/cs/src/core/Index/Interfaces/FunctionsBase.cs @@ -26,9 +26,6 @@ protected FunctionsBase(bool locking = false, bool postOps = false) this.postOps = postOps; } - /// - public virtual bool SupportsPostOperations => this.postOps; - /// public virtual bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, long address) => true; /// @@ -57,9 +54,13 @@ public virtual void CopyUpdater(ref Key key, ref Input input, ref Value oldValue public virtual bool InPlaceUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) => true; /// + public virtual void SingleDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address) { value = default; } public virtual void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long address) { } public virtual bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address) => true; + public virtual void DisposeKey(ref Key key) { } + public virtual void DisposeValue(ref Value value) { } + /// public virtual void ReadCompletionCallback(ref Key key, ref Input input, ref Output output, Context ctx, Status status, RecordMetadata recordMetadata) { } /// diff --git a/cs/src/core/Index/Interfaces/IFasterSession.cs b/cs/src/core/Index/Interfaces/IFasterSession.cs index 132cd4da1..1fdea2939 100644 --- a/cs/src/core/Index/Interfaces/IFasterSession.cs +++ b/cs/src/core/Index/Interfaces/IFasterSession.cs @@ -28,8 +28,6 @@ internal interface IFasterSession : IFasterS #region Optional features supported by this implementation bool SupportsLocking { get; } - bool SupportsPostOperations { get; } - bool IsManualLocking { get; } #endregion Optional features supported by this implementation @@ -67,11 +65,17 @@ internal interface IFasterSession : IFasterS #endregion RMWs #region Deletes + void SingleDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address); void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long address); bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address, out bool lockFailed); void DeleteCompletionCallback(ref Key key, Context ctx); #endregion Deletes + #region Key and Value management + void DisposeKey(ref Key key); + void DisposeValue(ref Value value); + #endregion Key and Value management + bool CompletePendingWithOutputs(out CompletedOutputIterator completedOutputs, bool wait = false, bool spinWaitForCommit = false); IHeapContainer GetHeapContainer(ref Input input); diff --git a/cs/src/core/Index/Interfaces/IFunctions.cs b/cs/src/core/Index/Interfaces/IFunctions.cs index 52162a29e..86632a3cb 100644 --- a/cs/src/core/Index/Interfaces/IFunctions.cs +++ b/cs/src/core/Index/Interfaces/IFunctions.cs @@ -13,16 +13,6 @@ namespace FASTER.core /// public interface IFunctions { - #region Optional features supported by this implementation - /// - /// Whether this Functions instance supports operations on records after they have been successfully appended to the log. For example, - /// after copies a list, - /// can add items to it. - /// - /// Once the record has been appended it is visible to other sessions, so locking will be done per - bool SupportsPostOperations { get; } - #endregion Optional features supported by this implementation - #region Reads /// /// Non-concurrent reader. @@ -205,6 +195,17 @@ public interface IFunctions #endregion RMWs #region Deletes + /// + /// Single deleter; called on an Delete that does not find the record in the mutable range and so inserts a new record. + /// + /// The key for the record to be deleted + /// The value for the record being deleted; because this method is called only for in-place updates, there is a previous value there. Usually this is ignored or assigned 'default'. + /// A reference to the header of the record + /// The logical address of the record being deleted; used as a RecordId by indexing + /// For Object Value types, Dispose() can be called here. If recordInfo.Invalid is true, this is called after the record was allocated and populated, but could not be appended at the end of the log. + /// True if the value was successfully deleted, else false (e.g. the record was sealed) + void SingleDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, long address); + /// /// Called after a record marking a Delete (with Tombstone set) has been successfully inserted at the tail of the log. /// @@ -234,6 +235,20 @@ public interface IFunctions void DeleteCompletionCallback(ref Key key, Context ctx); #endregion Deletes + #region Key and Value management + /// + /// Dispose the key; for example, in evicted log records. FASTER assumes deep-copy semantics such as cloning or refcounting. + /// + /// + void DisposeKey(ref Key key); + + /// + /// Dispose the value; for example, in evicted log records. FASTER assumes deep-copy semantics such as cloning or refcounting. + /// + /// + void DisposeValue(ref Value value); + #endregion Key and Value management + #region Checkpointing /// /// Checkpoint completion callback (called per client session) diff --git a/cs/test/LockableUnsafeContextTests.cs b/cs/test/LockableUnsafeContextTests.cs index 180edd357..8bde8ab70 100644 --- a/cs/test/LockableUnsafeContextTests.cs +++ b/cs/test/LockableUnsafeContextTests.cs @@ -21,8 +21,6 @@ internal class LockableUnsafeFunctions : SimpleFunctions { internal long deletedRecordAddress; - public override bool SupportsPostOperations => true; - public override void PostSingleDeleter(ref int key, ref RecordInfo recordInfo, long address) { deletedRecordAddress = address; From a62e727c49a675d2db8866ae4da73225bce86644 Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Tue, 18 Jan 2022 00:52:38 -0800 Subject: [PATCH 22/25] Add CopyWriter, to distinguish "maintentance" copying (CopyToTail or copy to ReadCache) from API-parameter copies --- cs/benchmark/Functions.cs | 6 ++++ cs/src/core/ClientSession/ClientSession.cs | 21 ++++++++++++++ .../ClientSession/LockableUnsafeContext.cs | 28 +++++++++++-------- cs/src/core/ClientSession/UnsafeContext.cs | 21 ++++++++++++++ .../core/Compaction/LogCompactionFunctions.cs | 6 ++++ cs/src/core/Index/FASTER/FASTERImpl.cs | 12 ++++---- cs/src/core/Index/Interfaces/FunctionsBase.cs | 1 + .../core/Index/Interfaces/IFasterSession.cs | 2 ++ cs/src/core/Index/Interfaces/IFunctions.cs | 15 ++++++++-- cs/src/core/VarLen/MemoryFunctions.cs | 6 ++++ cs/test/AdvancedLockTests.cs | 13 +++++++-- cs/test/FunctionPerSessionTests.cs | 2 +- 12 files changed, 109 insertions(+), 24 deletions(-) diff --git a/cs/benchmark/Functions.cs b/cs/benchmark/Functions.cs index 52cfd27b9..f5dd609d0 100644 --- a/cs/benchmark/Functions.cs +++ b/cs/benchmark/Functions.cs @@ -65,6 +65,12 @@ public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst = src; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void CopyWriter(ref Key key, ref Value src, ref Value dst, ref RecordInfo recordInfo, long address) + { + dst = src; + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) { diff --git a/cs/src/core/ClientSession/ClientSession.cs b/cs/src/core/ClientSession/ClientSession.cs index 4dcc47d5f..ad34f768f 100644 --- a/cs/src/core/ClientSession/ClientSession.cs +++ b/cs/src/core/ClientSession/ClientSession.cs @@ -907,6 +907,27 @@ private void PostSingleWriterLock(ref Key key, ref Input input, ref Value src, r } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void CopyWriter(ref Key key, ref Value src, ref Value dst, ref RecordInfo recordInfo, long address) + { + _clientSession.functions.CopyWriter(ref key, ref src, ref dst, ref recordInfo, address); + + if (this.SupportsPostOperations && this.SupportsLocking) + { + // Lock ephemerally before we CAS into the log; Unlocked in PostSingleWriterLock. + recordInfo.SetLockExclusiveBit(); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void PostCopyWriter(ref Key key, ref Value src, ref Value dst, ref RecordInfo recordInfo, long address) + { + // TODO: Placeholder for indexing + + if (this.SupportsPostOperations && this.SupportsLocking) + recordInfo.UnlockExclusive(); + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address, out bool lockFailed) { diff --git a/cs/src/core/ClientSession/LockableUnsafeContext.cs b/cs/src/core/ClientSession/LockableUnsafeContext.cs index 900498b94..6031c0e17 100644 --- a/cs/src/core/ClientSession/LockableUnsafeContext.cs +++ b/cs/src/core/ClientSession/LockableUnsafeContext.cs @@ -492,10 +492,8 @@ public InternalFasterSession(ClientSession _clientSession.functions.SingleReader(ref key, ref input, ref value, ref dst, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, long address, out bool lockFailed) @@ -513,10 +511,8 @@ public void ReadCompletionCallback(ref Key key, ref Input input, ref Output outp #region IFunctions - Upserts [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) - { - _clientSession.functions.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); - } + public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) + => _clientSession.functions.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) @@ -525,6 +521,16 @@ public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Va _clientSession.functions.PostSingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void CopyWriter(ref Key key, ref Value src, ref Value dst, ref RecordInfo recordInfo, long address) + => _clientSession.functions.CopyWriter(ref key, ref src, ref dst, ref recordInfo, address); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void PostCopyWriter(ref Key key, ref Value src, ref Value dst, ref RecordInfo recordInfo, long address) + { + // TODO: Placeholder for indexing + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address, out bool lockFailed) { @@ -544,10 +550,8 @@ public bool NeedInitialUpdate(ref Key key, ref Input input, ref Output output) => _clientSession.functions.NeedInitialUpdate(ref key, ref input, ref output); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) - { - _clientSession.functions.InitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); - } + public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) + => _clientSession.functions.InitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) diff --git a/cs/src/core/ClientSession/UnsafeContext.cs b/cs/src/core/ClientSession/UnsafeContext.cs index 24b370754..c79007e5d 100644 --- a/cs/src/core/ClientSession/UnsafeContext.cs +++ b/cs/src/core/ClientSession/UnsafeContext.cs @@ -473,6 +473,27 @@ private void PostSingleWriterLock(ref Key key, ref Input input, ref Value src, r } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void CopyWriter(ref Key key, ref Value src, ref Value dst, ref RecordInfo recordInfo, long address) + { + _clientSession.functions.CopyWriter(ref key, ref src, ref dst, ref recordInfo, address); + + if (this.SupportsPostOperations && this.SupportsLocking) + { + // Lock ephemerally before we CAS into the log; Unlocked in PostSingleWriterLock. + recordInfo.SetLockExclusiveBit(); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void PostCopyWriter(ref Key key, ref Value src, ref Value dst, ref RecordInfo recordInfo, long address) + { + // TODO: Placeholder for indexing + + if (this.SupportsLocking) + recordInfo.UnlockExclusive(); + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address, out bool lockFailed) { diff --git a/cs/src/core/Compaction/LogCompactionFunctions.cs b/cs/src/core/Compaction/LogCompactionFunctions.cs index 21e1c910e..5cec64a29 100644 --- a/cs/src/core/Compaction/LogCompactionFunctions.cs +++ b/cs/src/core/Compaction/LogCompactionFunctions.cs @@ -65,8 +65,14 @@ public void RMWCompletionCallback(ref Key key, ref Input input, ref Output outpu /// public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) => _functions.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); + public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) { } + public void CopyWriter(ref Key key, ref Value src, ref Value dst, ref RecordInfo recordInfo, long address) + => _functions.CopyWriter(ref key, ref src, ref dst, ref recordInfo, address); + + public void PostCopyWriter(ref Key key, ref Value src, ref Value dst, ref RecordInfo recordInfo, long address) { } + public void UpsertCompletionCallback(ref Key key, ref Input input, ref Value value, Context ctx) { } } } \ No newline at end of file diff --git a/cs/src/core/Index/FASTER/FASTERImpl.cs b/cs/src/core/Index/FASTER/FASTERImpl.cs index 8bd9ec8d0..bda81fe20 100644 --- a/cs/src/core/Index/FASTER/FASTERImpl.cs +++ b/cs/src/core/Index/FASTER/FASTERImpl.cs @@ -2474,8 +2474,8 @@ internal OperationStatus InternalTryCopyToTail dst = src; /// public virtual void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) { } + public virtual void CopyWriter(ref Key key, ref Value src, ref Value dst, ref RecordInfo recordInfo, long address) => dst = src; /// public virtual void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) { } diff --git a/cs/src/core/Index/Interfaces/IFasterSession.cs b/cs/src/core/Index/Interfaces/IFasterSession.cs index 132cd4da1..dfe789f40 100644 --- a/cs/src/core/Index/Interfaces/IFasterSession.cs +++ b/cs/src/core/Index/Interfaces/IFasterSession.cs @@ -42,6 +42,8 @@ internal interface IFasterSession : IFasterS #region Upserts void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address); void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address); + void CopyWriter(ref Key key, ref Value src, ref Value dst, ref RecordInfo recordInfo, long address); + void PostCopyWriter(ref Key key, ref Value src, ref Value dst, ref RecordInfo recordInfo, long address); bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address, out bool lockFailed); void UpsertCompletionCallback(ref Key key, ref Input input, ref Value value, Context ctx); #endregion Upserts diff --git a/cs/src/core/Index/Interfaces/IFunctions.cs b/cs/src/core/Index/Interfaces/IFunctions.cs index 52162a29e..74e373ba3 100644 --- a/cs/src/core/Index/Interfaces/IFunctions.cs +++ b/cs/src/core/Index/Interfaces/IFunctions.cs @@ -62,8 +62,7 @@ public interface IFunctions #region Upserts /// - /// Non-concurrent writer; called on an Upsert that does not find the key so does an insert or finds the key's record in the immutable region so does a read/copy/update (RCU), - /// or when copying reads fetched from disk to either read cache or tail of log. + /// Non-concurrent writer; called on an Upsert that does not find the key so does an insert or finds the key's record in the immutable region so does a read/copy/update (RCU). /// /// The key for this record /// The user input to be used for computing @@ -95,10 +94,20 @@ public interface IFunctions /// The location where is to be copied; because this method is called only for in-place updates, there is a previous value there. /// The location where the result of the update may be placed /// A reference to the header of the record - /// The logical address of the record being copied to; used as a RecordId by indexing"/> + /// The logical address of the record being copied to; used as a RecordId by indexing /// True if the value was written, else false bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address); + /// + /// Basic copy operation used in maintenance operations such as copying reads fetched from disk to either read cache or tail of log. + /// + /// The key for this record + /// The previous value to be copied/updated + /// The destination to be updated; because this is an copy to a new location, there is no previous value there. + /// A reference to the header of the record + /// The logical address of the record being copied to + void CopyWriter(ref Key key, ref Value src, ref Value dst, ref RecordInfo recordInfo, long address); + /// /// Upsert completion /// diff --git a/cs/src/core/VarLen/MemoryFunctions.cs b/cs/src/core/VarLen/MemoryFunctions.cs index 63c9ad16f..0e7555b1a 100644 --- a/cs/src/core/VarLen/MemoryFunctions.cs +++ b/cs/src/core/VarLen/MemoryFunctions.cs @@ -28,6 +28,12 @@ public override void SingleWriter(ref Key key, ref Memory input, ref Memory + public override void CopyWriter(ref Key key, ref Memory src, ref Memory dst, ref RecordInfo recordInfo, long address) + { + src.CopyTo(dst); + } + /// public override bool ConcurrentWriter(ref Key key, ref Memory input, ref Memory src, ref Memory dst, ref (IMemoryOwner, int) output, ref RecordInfo recordInfo, long address) { diff --git a/cs/test/AdvancedLockTests.cs b/cs/test/AdvancedLockTests.cs index 417236b9b..1fcffada8 100644 --- a/cs/test/AdvancedLockTests.cs +++ b/cs/test/AdvancedLockTests.cs @@ -36,10 +36,19 @@ internal class Functions : FunctionsBase internal readonly ManualResetEventSlim mres = new(); readonly Random rng = new(101); + // CopyWriter takes no Input + internal Input readCacheInput; + public Functions() : base(true) { } + public override void CopyWriter(ref int key, ref int src, ref int dst, ref RecordInfo recordInfo, long address) + { + int output = default; + SingleWriter(ref key, ref readCacheInput, ref src, ref dst, ref output, ref recordInfo, address); + } + public override void SingleWriter(ref int key, ref Input input, ref int src, ref int dst, ref int output, ref RecordInfo recordInfo, long address) { // In the wait case we are waiting for a signal that something else has completed, e.g. a pending Read, by the thread with SetEvent. @@ -137,12 +146,12 @@ public void SameKeyInsertAndCTTTest() key => { var sleepFlag = (iter % 5 == 0) ? LockFunctionFlags.None : LockFunctionFlags.SleepAfterEventOperation; - Input input = new() { flags = LockFunctionFlags.SetEvent | sleepFlag, sleepRangeMs = 10 }; + functions.readCacheInput = new() { flags = LockFunctionFlags.SetEvent | sleepFlag, sleepRangeMs = 10 }; int output = 0; RecordMetadata recordMetadata = default; // This will copy to ReadCache, and the test is trying to cause a race with the above Upsert. - var status = session.Read(ref key, ref input, ref output, ref recordMetadata); + var status = session.Read(ref key, ref functions.readCacheInput, ref output, ref recordMetadata); // If the Upsert completed before the Read started, we may Read() the Upserted value. if (status == Status.OK) diff --git a/cs/test/FunctionPerSessionTests.cs b/cs/test/FunctionPerSessionTests.cs index 3578c9801..fe9d39c2c 100644 --- a/cs/test/FunctionPerSessionTests.cs +++ b/cs/test/FunctionPerSessionTests.cs @@ -28,7 +28,7 @@ public override void InitialUpdater(ref int key, ref long input, ref RefCountedV value.ReferenceCount = 1; } - public override bool InPlaceUpdater(ref int key, ref long input, ref RefCountedValue value, ref Empty outpu, ref RecordInfo recordInfo, long addresst) + public override bool InPlaceUpdater(ref int key, ref long input, ref RefCountedValue value, ref Empty output, ref RecordInfo recordInfo, long address) { Interlocked.Increment(ref InPlaceCount); From 8a9d547354ab5e008f6e4b23f313e7660d5edd72 Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Tue, 18 Jan 2022 01:30:24 -0800 Subject: [PATCH 23/25] merge fixes --- cs/src/core/ClientSession/ClientSession.cs | 15 +-- cs/src/core/ClientSession/UnsafeContext.cs | 136 ++------------------- 2 files changed, 14 insertions(+), 137 deletions(-) diff --git a/cs/src/core/ClientSession/ClientSession.cs b/cs/src/core/ClientSession/ClientSession.cs index f9bea1bd0..f0b503891 100644 --- a/cs/src/core/ClientSession/ClientSession.cs +++ b/cs/src/core/ClientSession/ClientSession.cs @@ -871,24 +871,13 @@ public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Va => _clientSession.functions.PostSingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void CopyWriter(ref Key key, ref Value src, ref Value dst, ref RecordInfo recordInfo, long address) - { - _clientSession.functions.CopyWriter(ref key, ref src, ref dst, ref recordInfo, address); - - if (this.SupportsPostOperations && this.SupportsLocking) - { - // Lock ephemerally before we CAS into the log; Unlocked in PostSingleWriterLock. - recordInfo.SetLockExclusiveBit(); - } - } + public void CopyWriter(ref Key key, ref Value src, ref Value dst, ref RecordInfo recordInfo, long address) + => _clientSession.functions.CopyWriter(ref key, ref src, ref dst, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] public void PostCopyWriter(ref Key key, ref Value src, ref Value dst, ref RecordInfo recordInfo, long address) { // TODO: Placeholder for indexing - - if (this.SupportsPostOperations && this.SupportsLocking) - recordInfo.UnlockExclusive(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] diff --git a/cs/src/core/ClientSession/UnsafeContext.cs b/cs/src/core/ClientSession/UnsafeContext.cs index 200d4e6b7..45a669f1f 100644 --- a/cs/src/core/ClientSession/UnsafeContext.cs +++ b/cs/src/core/ClientSession/UnsafeContext.cs @@ -429,63 +429,21 @@ public void ReadCompletionCallback(ref Key key, ref Input input, ref Output outp #region IFunctions - Upserts [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) - { - _clientSession.functions.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); - - if (this.SupportsLocking) - { - // Lock ephemerally before we CAS into the log; Unlocked in PostSingleWriterLock. - recordInfo.SetLockExclusiveBit(); - } - } + public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) + => _clientSession.functions.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) - { - if (!this.SupportsLocking) - PostSingleWriterNoLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); - else - PostSingleWriterLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void PostSingleWriterNoLock(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) => _clientSession.functions.PostSingleWriter(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void PostSingleWriterLock(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) - { - // Lock was taken in SingleWriterLock - try - { - PostSingleWriterNoLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, address); - } - finally - { - recordInfo.UnlockExclusive(); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void CopyWriter(ref Key key, ref Value src, ref Value dst, ref RecordInfo recordInfo, long address) - { - _clientSession.functions.CopyWriter(ref key, ref src, ref dst, ref recordInfo, address); - - if (this.SupportsPostOperations && this.SupportsLocking) - { - // Lock ephemerally before we CAS into the log; Unlocked in PostSingleWriterLock. - recordInfo.SetLockExclusiveBit(); - } - } + public void CopyWriter(ref Key key, ref Value src, ref Value dst, ref RecordInfo recordInfo, long address) + => _clientSession.functions.CopyWriter(ref key, ref src, ref dst, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] public void PostCopyWriter(ref Key key, ref Value src, ref Value dst, ref RecordInfo recordInfo, long address) { // TODO: Placeholder for indexing - - if (this.SupportsLocking) - recordInfo.UnlockExclusive(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -535,45 +493,12 @@ public bool NeedInitialUpdate(ref Key key, ref Input input, ref Output output) => _clientSession.functions.NeedInitialUpdate(ref key, ref input, ref output); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) - { - _clientSession.functions.InitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); - - if (this.SupportsLocking) - { - // Lock ephemerally before we CAS into the log; Unlocked in PostInitialUpdaterLock. - recordInfo.SetLockExclusiveBit(); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) - { - if (!this.SupportsLocking) - PostInitialUpdaterNoLock(ref key, ref input, ref value, ref output, ref recordInfo, address); - else - PostInitialUpdaterLock(ref key, ref input, ref value, ref output, ref recordInfo, address); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void PostInitialUpdaterNoLock(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) - { - _clientSession.functions.PostInitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); - } + public void InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) + => _clientSession.functions.InitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void PostInitialUpdaterLock(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) - { - // Lock was taken in InitialUpdaterLock - try - { - PostInitialUpdaterNoLock(ref key, ref input, ref value, ref output, ref recordInfo, address); - } - finally - { - recordInfo.UnlockExclusive(); - } - } + public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, long address) + => _clientSession.functions.PostInitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, address); #endregion InitialUpdater #region CopyUpdater @@ -582,45 +507,12 @@ public bool NeedCopyUpdate(ref Key key, ref Input input, ref Value oldValue, ref => _clientSession.functions.NeedCopyUpdate(ref key, ref input, ref oldValue, ref output); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address) - { - _clientSession.functions.CopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); - - if (this.SupportsLocking) - { - // Lock ephemerally before we CAS into the log. Unlocked in PostInitialUpdaterLock. - recordInfo.SetLockExclusiveBit(); - } - } + public void CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address) + => _clientSession.functions.CopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address) - { - return !this.SupportsLocking - ? PostCopyUpdaterNoLock(ref key, ref input, ref output, ref oldValue, ref newValue, ref recordInfo, address) - : PostCopyUpdaterLock(ref key, ref input, ref output, ref oldValue, ref newValue, ref recordInfo, address); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool PostCopyUpdaterNoLock(ref Key key, ref Input input, ref Output output, ref Value oldValue, ref Value newValue, ref RecordInfo recordInfo, long address) - { - return _clientSession.functions.PostCopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool PostCopyUpdaterLock(ref Key key, ref Input input, ref Output output, ref Value oldValue, ref Value newValue, ref RecordInfo recordInfo, long address) - { - // Lock was taken in CopyUpdaterLock - try - { - // KeyIndexes do not need notification of in-place updates because the key does not change. - return !recordInfo.Tombstone && PostCopyUpdaterNoLock(ref key, ref input, ref output, ref oldValue, ref newValue, ref recordInfo, address); - } - finally - { - recordInfo.UnlockExclusive(); - } - } + public bool PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, long address) + => _clientSession.functions.PostCopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, address); #endregion CopyUpdater #region InPlaceUpdater @@ -669,12 +561,8 @@ public void RMWCompletionCallback(ref Key key, ref Input input, ref Output outpu [MethodImpl(MethodImplOptions.AggressiveInlining)] public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, long address) { - // There is no value to lock here, so we take a RecordInfo lock in InternalDelete and release it here. recordInfo.SetDirty(); - _clientSession.functions.PostSingleDeleter(ref key, ref recordInfo, address); - if (this.SupportsLocking) - recordInfo.UnlockExclusive(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] From 10ee16d325d3be9badadbf2665dfbf3e4ed011a6 Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Wed, 19 Jan 2022 16:05:49 -0800 Subject: [PATCH 24/25] Remove obsolete Functions-level locking and postOps specifications --- .../FasterClientSessionYcsbBenchmark.cs | 7 ++- cs/benchmark/FasterSpanByteYcsbBenchmark.cs | 7 ++- cs/benchmark/FasterYcsbBenchmark.cs | 7 ++- cs/benchmark/Functions.cs | 9 ---- cs/benchmark/FunctionsSB.cs | 1 - cs/benchmark/Options.cs | 9 ++-- cs/benchmark/scripts/compare_runs.ps1 | 7 +-- cs/benchmark/scripts/run_benchmark.ps1 | 50 +++++++------------ cs/samples/StoreVarLenTypes/AsciiSumSample.cs | 9 ++-- .../AsciiSumSpanByteFunctions.cs | 2 +- .../StoreVarLenTypes/CustomMemoryFunctions.cs | 4 +- .../CustomSpanByteFunctions.cs | 2 +- cs/samples/StoreVarLenTypes/SpanByteSample.cs | 2 +- cs/src/core/Index/Interfaces/FunctionsBase.cs | 12 ----- cs/src/core/VarLen/MemoryFunctions.cs | 3 +- cs/src/core/VarLen/SpanByteFunctions.cs | 17 +------ cs/test/AdvancedLockTests.cs | 4 -- cs/test/BasicLockTests.cs | 4 -- cs/test/PostOperationsTests.cs | 2 +- 19 files changed, 45 insertions(+), 113 deletions(-) diff --git a/cs/benchmark/FasterClientSessionYcsbBenchmark.cs b/cs/benchmark/FasterClientSessionYcsbBenchmark.cs index 2c68520d5..bb17c72d8 100644 --- a/cs/benchmark/FasterClientSessionYcsbBenchmark.cs +++ b/cs/benchmark/FasterClientSessionYcsbBenchmark.cs @@ -44,8 +44,7 @@ internal FASTER_ClientSessionYcsbBenchmark(Key[] i_keys_, Key[] t_keys_, TestLoa txn_keys_ = t_keys_; numaStyle = testLoader.Options.NumaStyle; readPercent = testLoader.Options.ReadPercent; - var lockImpl = testLoader.LockImpl; - functions = new Functions(lockImpl != LockImpl.None, testLoader.Options.PostOps); + functions = new Functions(); input_ = new Input[8]; for (int i = 0; i < 8; i++) @@ -59,11 +58,11 @@ internal FASTER_ClientSessionYcsbBenchmark(Key[] i_keys_, Key[] t_keys_, TestLoa if (testLoader.Options.UseSmallMemoryLog) store = new FasterKV (testLoader.MaxKey / 4, new LogSettings { LogDevice = device, PreallocateLog = true, PageSizeBits = 25, SegmentSizeBits = 30, MemorySizeBits = 28 }, - new CheckpointSettings { CheckpointDir = testLoader.BackupPath }); + new CheckpointSettings { CheckpointDir = testLoader.BackupPath }, supportsLocking: testLoader.LockImpl == LockImpl.Ephemeral); else store = new FasterKV (testLoader.MaxKey / 2, new LogSettings { LogDevice = device, PreallocateLog = true }, - new CheckpointSettings { CheckpointDir = testLoader.BackupPath }); + new CheckpointSettings { CheckpointDir = testLoader.BackupPath }, supportsLocking: testLoader.LockImpl == LockImpl.Ephemeral); } internal void Dispose() diff --git a/cs/benchmark/FasterSpanByteYcsbBenchmark.cs b/cs/benchmark/FasterSpanByteYcsbBenchmark.cs index 9613dd994..63a57dd1a 100644 --- a/cs/benchmark/FasterSpanByteYcsbBenchmark.cs +++ b/cs/benchmark/FasterSpanByteYcsbBenchmark.cs @@ -50,8 +50,7 @@ internal FasterSpanByteYcsbBenchmark(KeySpanByte[] i_keys_, KeySpanByte[] t_keys txn_keys_ = t_keys_; numaStyle = testLoader.Options.NumaStyle; readPercent = testLoader.Options.ReadPercent; - var lockImpl = testLoader.LockImpl; - functions = new FunctionsSB(lockImpl != LockImpl.None, testLoader.Options.PostOps); + functions = new FunctionsSB(); #if DASHBOARD statsWritten = new AutoResetEvent[threadCount]; @@ -76,11 +75,11 @@ internal FasterSpanByteYcsbBenchmark(KeySpanByte[] i_keys_, KeySpanByte[] t_keys if (testLoader.Options.UseSmallMemoryLog) store = new FasterKV (testLoader.MaxKey / 2, new LogSettings { LogDevice = device, PreallocateLog = true, PageSizeBits = 22, SegmentSizeBits = 26, MemorySizeBits = 26 }, - new CheckpointSettings { CheckpointDir = testLoader.BackupPath }); + new CheckpointSettings { CheckpointDir = testLoader.BackupPath }, supportsLocking: testLoader.LockImpl == LockImpl.Ephemeral); else store = new FasterKV (testLoader.MaxKey / 2, new LogSettings { LogDevice = device, PreallocateLog = true, MemorySizeBits = 35 }, - new CheckpointSettings { CheckpointDir = testLoader.BackupPath }); + new CheckpointSettings { CheckpointDir = testLoader.BackupPath }, supportsLocking: testLoader.LockImpl == LockImpl.Ephemeral); } internal void Dispose() diff --git a/cs/benchmark/FasterYcsbBenchmark.cs b/cs/benchmark/FasterYcsbBenchmark.cs index 85de7adbe..238f91472 100644 --- a/cs/benchmark/FasterYcsbBenchmark.cs +++ b/cs/benchmark/FasterYcsbBenchmark.cs @@ -47,8 +47,7 @@ internal FASTER_YcsbBenchmark(Key[] i_keys_, Key[] t_keys_, TestLoader testLoade txn_keys_ = t_keys_; numaStyle = testLoader.Options.NumaStyle; readPercent = testLoader.Options.ReadPercent; - var lockImpl = testLoader.LockImpl; - functions = new Functions(lockImpl != LockImpl.None, testLoader.Options.PostOps); + functions = new Functions(); #if DASHBOARD statsWritten = new AutoResetEvent[threadCount]; @@ -76,11 +75,11 @@ internal FASTER_YcsbBenchmark(Key[] i_keys_, Key[] t_keys_, TestLoader testLoade if (testLoader.Options.UseSmallMemoryLog) store = new FasterKV (testLoader.MaxKey / 4, new LogSettings { LogDevice = device, PreallocateLog = true, PageSizeBits = 25, SegmentSizeBits = 30, MemorySizeBits = 28 }, - new CheckpointSettings { CheckpointDir = testLoader.BackupPath }); + new CheckpointSettings { CheckpointDir = testLoader.BackupPath }, supportsLocking: testLoader.LockImpl == LockImpl.Ephemeral); else store = new FasterKV (testLoader.MaxKey / 2, new LogSettings { LogDevice = device, PreallocateLog = true }, - new CheckpointSettings { CheckpointDir = testLoader.BackupPath }); + new CheckpointSettings { CheckpointDir = testLoader.BackupPath }, supportsLocking: testLoader.LockImpl == LockImpl.Ephemeral); } internal void Dispose() diff --git a/cs/benchmark/Functions.cs b/cs/benchmark/Functions.cs index c4857690c..bebefcd52 100644 --- a/cs/benchmark/Functions.cs +++ b/cs/benchmark/Functions.cs @@ -9,15 +9,6 @@ namespace FASTER.benchmark { public struct Functions : IFunctions { - readonly bool locking; - readonly bool postOps; - - public Functions(bool locking, bool postOps) - { - this.locking = locking; - this.postOps = postOps; - } - public void RMWCompletionCallback(ref Key key, ref Input input, ref Output output, Empty ctx, Status status, RecordMetadata recordMetadata) { } diff --git a/cs/benchmark/FunctionsSB.cs b/cs/benchmark/FunctionsSB.cs index 9ff3382a0..fb80c07cf 100644 --- a/cs/benchmark/FunctionsSB.cs +++ b/cs/benchmark/FunctionsSB.cs @@ -7,6 +7,5 @@ namespace FASTER.benchmark { public sealed class FunctionsSB : SpanByteFunctions { - public FunctionsSB(bool locking, bool postOps) : base(locking: locking) { } } } diff --git a/cs/benchmark/Options.cs b/cs/benchmark/Options.cs index a451616ac..b10c7fbd9 100644 --- a/cs/benchmark/Options.cs +++ b/cs/benchmark/Options.cs @@ -76,10 +76,6 @@ class Options HelpText = "Do not use thread affinitization in experiment")] public bool NoThreadAffinity { get; set; } - [Option("post", Required = false, Default = false, - HelpText = "Support post-append operations")] - public bool PostOps { get; set; } - [Option("chkptms", Required = false, Default = 0, HelpText = "If > 0, the number of milliseconds between checkpoints in experiment (else checkpointing is not done")] public int PeriodicCheckpointMilliseconds { get; set; } @@ -102,8 +98,9 @@ public string GetOptionsString() { static string boolStr(bool value) => value ? "y" : "n"; return $"d: {DistributionName.ToLower()}; n: {NumaStyle}; r: {ReadPercent}; t: {ThreadCount}; z: {LockImpl}; i: {IterationCount};" - + $" sd: {boolStr(UseSmallData)}; sm: {boolStr(UseSmallMemoryLog)}; sy: {boolStr(this.UseSyntheticData)}; noaff: {boolStr(this.NoThreadAffinity)}; post: {boolStr(this.PostOps)};" - + $" chkptms: {this.PeriodicCheckpointMilliseconds}; chkpttype: {(this.PeriodicCheckpointMilliseconds > 0 ? this.PeriodicCheckpointType.ToString() : "None")}; chkptincr: {boolStr(this.PeriodicCheckpointTryIncremental)}"; + + $" sd: {boolStr(UseSmallData)}; sm: {boolStr(UseSmallMemoryLog)}; sy: {boolStr(this.UseSyntheticData)}; noaff: {boolStr(this.NoThreadAffinity)};" + + $" chkptms: {this.PeriodicCheckpointMilliseconds}; chkpttype: {(this.PeriodicCheckpointMilliseconds > 0 ? this.PeriodicCheckpointType.ToString() : "None")};" + + $" chkptincr: {boolStr(this.PeriodicCheckpointTryIncremental)}"; } } } diff --git a/cs/benchmark/scripts/compare_runs.ps1 b/cs/benchmark/scripts/compare_runs.ps1 index 376e32009..9547312e8 100644 --- a/cs/benchmark/scripts/compare_runs.ps1 +++ b/cs/benchmark/scripts/compare_runs.ps1 @@ -57,7 +57,6 @@ class Result : System.IComparable, System.IEquatable[Object] { [int]$ReadPercent [uint]$ThreadCount [uint]$LockMode - [uint]$PostOpsMode [uint]$Iterations [bool]$SmallData [bool]$SmallMemory @@ -82,7 +81,6 @@ class Result : System.IComparable, System.IEquatable[Object] { "r" { $this.ReadPercent = $value } "t" { $this.ThreadCount = $value } "z" { $this.LockMode = $value } - "post" { $this.PostOpsMode = $value -eq "y" } "i" { $this.Iterations = $value } "sd" { $this.SmallData = $value -eq "y" } "sm" { $this.SmallMemory = $value -eq "y" } @@ -101,7 +99,6 @@ class Result : System.IComparable, System.IEquatable[Object] { $this.ReadPercent = $other.ReadPercent $this.ThreadCount = $other.ThreadCount $this.LockMode = $other.LockMode - $this.PostOpsMode = $other.PostOpsMode $this.Iterations = $other.Iterations $this.SmallData = $other.SmallData $this.SmallMemory = $other.SmallMemory @@ -169,7 +166,6 @@ class Result : System.IComparable, System.IEquatable[Object] { -and $this.ReadPercent -eq $other.ReadPercent -and $this.ThreadCount -eq $other.ThreadCount -and $this.LockMode -eq $other.LockMode - -and $this.PostOpsMode -eq $other.PostOpsMode -and $this.Iterations -eq $other.Iterations -and $this.SmallData -eq $other.SmallData -and $this.SmallMemory -eq $other.SmallMemory @@ -181,7 +177,7 @@ class Result : System.IComparable, System.IEquatable[Object] { } [int] GetHashCode() { - return ($this.Numa, $this.Distribution, $this.ReadPercent, $this.ThreadCount, $this.LockMode, $this.PostOpsMode, + return ($this.Numa, $this.Distribution, $this.ReadPercent, $this.ThreadCount, $this.LockMode, $this.Iterations, $this.SmallData, $this.SmallMemory, $this.SyntheticData, $this.NoAff, $this.ChkptMs, $this.ChkptType, $this.ChkptIncr).GetHashCode(); } @@ -284,7 +280,6 @@ function RenameProperties([System.Object[]]$results) { ReadPercent, ThreadCount, LockMode, - PostOpsMode, Iterations, SmallData, SmallMemory, diff --git a/cs/benchmark/scripts/run_benchmark.ps1 b/cs/benchmark/scripts/run_benchmark.ps1 index 8b84696ff..6120b7725 100644 --- a/cs/benchmark/scripts/run_benchmark.ps1 +++ b/cs/benchmark/scripts/run_benchmark.ps1 @@ -31,11 +31,7 @@ Used primarily to debug changes to this script or do a quick one-off run; the default is multiple counts as defined in the script. .PARAMETER LockMode - Locking mode to use: 0 = No locking, 1 = RecordInfo locking - Used primarily to debug changes to this script or do a quick one-off run; the default is multiple counts as defined in the script. - -.PARAMETER PostOpsMode - Post-append operations mode to use: 0 = No post ops, 1 = do post ops + Locking mode to use: 0 = No locking, 1 = RecordInfo locking, 2 = Manual locking Used primarily to debug changes to this script or do a quick one-off run; the default is multiple counts as defined in the script. .PARAMETER ReadPercentages @@ -85,9 +81,9 @@ Clones the master branch to the .\master folder, the branch_with_my_changes to the branch_with_my_changes folder, and runs those with any specified. .EXAMPLE - pwsh -c "./run_benchmark.ps1 master,branch_with_my_changes -CloneAndBuild -LockMode 0 -PostOpsMode 0" + pwsh -c "./run_benchmark.ps1 master,branch_with_my_changes -CloneAndBuild -LockMode 0" - Clones the master branch to the .\master folder, the branch_with_my_changes to the branch_with_my_changes folder, and runs those with no locking or post-append operations; + Clones the master branch to the .\master folder, the branch_with_my_changes to the branch_with_my_changes folder, and runs those with no locking operations; this is for best performance. #> param ( @@ -95,7 +91,6 @@ param ( [Parameter(Mandatory=$false)] [int]$RunSeconds = 30, [Parameter(Mandatory=$false)] [int]$ThreadCount = -1, [Parameter(Mandatory=$false)] [int]$LockMode = -1, - [Parameter(Mandatory=$false)] [int]$PostOpsMode = -1, [Parameter(Mandatory=$false)] [int[]]$ReadPercentages, [Parameter(Mandatory=$false)] [switch]$UseRecover, [Parameter(Mandatory=$false)] [switch]$CloneAndBuild, @@ -145,7 +140,6 @@ $distributions = ("uniform", "zipf") $readPercents = (0, 100) $threadCounts = (1, 20, 40, 60, 80) $lockModes = (0, 1) -$postOpsModes = (0, 1) $smallDatas = (0) #, 1) $smallMemories = (0) #, 1) $syntheticDatas = (0) #, 1) @@ -157,9 +151,6 @@ if ($ThreadCount -ge 0) { if ($LockMode -ge 0) { $lockModes = ($LockMode) } -if ($PostOpsMode -ge 0) { - $postOpsModes = ($PostOpsMode) -} if ($ReadPercentages) { $readPercents = $ReadPercentages } @@ -172,7 +163,6 @@ $permutations = $distributions.Count * $readPercents.Count * $threadCounts.Count * $lockModes.Count * - $postOpsModes.Count * $smallDatas.Count * $smallMemories.Count * $syntheticDatas.Count @@ -182,29 +172,27 @@ foreach ($d in $distributions) { foreach ($r in $readPercents) { foreach ($t in $threadCounts) { foreach ($z in $lockModes) { - foreach ($p in $postOpsModes) { - foreach ($sd in $smallDatas) { - foreach ($sm in $smallMemories) { - foreach ($sy in $syntheticDatas) { - Write-Host - Write-Host "Permutation $permutation of $permutations" + foreach ($sd in $smallDatas) { + foreach ($sm in $smallMemories) { + foreach ($sy in $syntheticDatas) { + Write-Host + Write-Host "Permutation $permutation of $permutations" - # Only certain combinations of Numa/Threads are supported - $n = ($t -lt 48) ? 0 : 1; + # Only certain combinations of Numa/Threads are supported + $n = ($t -lt 48) ? 0 : 1; - for($ii = 0; $ii -lt $exeNames.Count; ++$ii) { - $exeName = $exeNames[$ii] - $resultDir = $resultDirs[$ii] + for($ii = 0; $ii -lt $exeNames.Count; ++$ii) { + $exeName = $exeNames[$ii] + $resultDir = $resultDirs[$ii] - Write-Host - Write-Host "Permutation $permutation/$permutations generating results $($ii + 1)/$($exeNames.Count) to $resultDir for: -n $n -d $d -r $r -t $t -z $z -post $p -i $iterations --runsec $RunSeconds $k" + Write-Host + Write-Host "Permutation $permutation/$permutations generating results $($ii + 1)/$($exeNames.Count) to $resultDir for: -n $n -d $d -r $r -t $t -z $z -post $p -i $iterations --runsec $RunSeconds $k" - # RunSec and Recover are for one-off operations and are not recorded in the filenames. - $post = $p -eq 0 ? "" : "--post" - & "$exeName" -b 0 -n $n -d $d -r $r -t $t -z $z $post -i $iterations --runsec $RunSeconds $k | Tee-Object "$resultDir/results_n-$($n)_d-$($d)_r-$($r)_t-$($t)_z-$($z)_post-$($p).txt" - } - ++$permutation + # RunSec and Recover are for one-off operations and are not recorded in the filenames. + $post = $p -eq 0 ? "" : "--post" + & "$exeName" -b 0 -n $n -d $d -r $r -t $t -z $z $post -i $iterations --runsec $RunSeconds $k | Tee-Object "$resultDir/results_n-$($n)_d-$($d)_r-$($r)_t-$($t)_z-$($z)_post-$($p).txt" } + ++$permutation } } } diff --git a/cs/samples/StoreVarLenTypes/AsciiSumSample.cs b/cs/samples/StoreVarLenTypes/AsciiSumSample.cs index 7520abe21..9c311eb18 100644 --- a/cs/samples/StoreVarLenTypes/AsciiSumSample.cs +++ b/cs/samples/StoreVarLenTypes/AsciiSumSample.cs @@ -24,15 +24,16 @@ public static void Run() using var log = Devices.CreateLogDevice("hlog.log", deleteOnClose: true); // Create store - // For custom varlen (not SpanByte), you need to provide IVariableLengthStructSettings and IFasterEqualityComparer + // For custom varlen (not SpanByte), you need to provide IVariableLengthStructSettings and IFasterEqualityComparer. + // For this test we require record-level locking using var store = new FasterKV( size: 1L << 20, - logSettings: new LogSettings { LogDevice = log, MemorySizeBits = 15, PageSizeBits = 12 }); + logSettings: new LogSettings { LogDevice = log, MemorySizeBits = 15, PageSizeBits = 12 }, supportsLocking: true); // Create session for ASCII sums. We require two callback function types to be provided: - // AsciiSumSpanByteFunctions implements RMW callback functions; we require record-level locking + // AsciiSumSpanByteFunctions implements RMW callback functions // AsciiSumVLS implements the callback for computing the length of the result new value, given an old value and an input - using var s = store.For(new AsciiSumSpanByteFunctions(locking: true)).NewSession + using var s = store.For(new AsciiSumSpanByteFunctions()).NewSession (sessionVariableLengthStructSettings: new SessionVariableLengthStructSettings { valueLength = new AsciiSumVLS() }); // Create key diff --git a/cs/samples/StoreVarLenTypes/AsciiSumSpanByteFunctions.cs b/cs/samples/StoreVarLenTypes/AsciiSumSpanByteFunctions.cs index ab4a94efe..5814bf86f 100644 --- a/cs/samples/StoreVarLenTypes/AsciiSumSpanByteFunctions.cs +++ b/cs/samples/StoreVarLenTypes/AsciiSumSpanByteFunctions.cs @@ -14,7 +14,7 @@ namespace StoreVarLenTypes public sealed class AsciiSumSpanByteFunctions : SpanByteFunctions { /// - public AsciiSumSpanByteFunctions(MemoryPool memoryPool = null, bool locking = false) : base(memoryPool, locking) { } + public AsciiSumSpanByteFunctions(MemoryPool memoryPool = null) : base(memoryPool) { } /// public override void InitialUpdater(ref SpanByte key, ref SpanByte input, ref SpanByte value, ref SpanByteAndMemory output, ref RecordInfo recordInfo, long address) diff --git a/cs/samples/StoreVarLenTypes/CustomMemoryFunctions.cs b/cs/samples/StoreVarLenTypes/CustomMemoryFunctions.cs index 8c7459f9c..04d72118a 100644 --- a/cs/samples/StoreVarLenTypes/CustomMemoryFunctions.cs +++ b/cs/samples/StoreVarLenTypes/CustomMemoryFunctions.cs @@ -15,8 +15,8 @@ public sealed class CustomMemoryFunctions : MemoryFunctions where T : unmanaged { /// - public CustomMemoryFunctions(MemoryPool memoryPool = default, bool locking = false) - : base(memoryPool, locking) { } + public CustomMemoryFunctions(MemoryPool memoryPool = default) + : base(memoryPool) { } /// public override void ReadCompletionCallback(ref ReadOnlyMemory key, ref Memory input, ref (IMemoryOwner, int) output, T ctx, Status status, RecordMetadata recordMetadata) diff --git a/cs/samples/StoreVarLenTypes/CustomSpanByteFunctions.cs b/cs/samples/StoreVarLenTypes/CustomSpanByteFunctions.cs index 334f1a8a9..0914df2d6 100644 --- a/cs/samples/StoreVarLenTypes/CustomSpanByteFunctions.cs +++ b/cs/samples/StoreVarLenTypes/CustomSpanByteFunctions.cs @@ -13,7 +13,7 @@ namespace StoreVarLenTypes public sealed class CustomSpanByteFunctions : SpanByteFunctions_ByteArrayOutput { /// - public CustomSpanByteFunctions(bool locking = false) : base(locking) { } + public CustomSpanByteFunctions() : base() { } // Read completion callback public override void ReadCompletionCallback(ref SpanByte key, ref SpanByte input, ref byte[] output, byte ctx, Status status, RecordMetadata recordMetadata) diff --git a/cs/samples/StoreVarLenTypes/SpanByteSample.cs b/cs/samples/StoreVarLenTypes/SpanByteSample.cs index bc3a5a049..45a7a8640 100644 --- a/cs/samples/StoreVarLenTypes/SpanByteSample.cs +++ b/cs/samples/StoreVarLenTypes/SpanByteSample.cs @@ -30,7 +30,7 @@ public static void Run() logSettings: new LogSettings { LogDevice = log, MemorySizeBits = 15, PageSizeBits = 12 }); // Create session - var s = store.For(new CustomSpanByteFunctions(locking: false)).NewSession(); + var s = store.For(new CustomSpanByteFunctions()).NewSession(); Random r = new Random(100); diff --git a/cs/src/core/Index/Interfaces/FunctionsBase.cs b/cs/src/core/Index/Interfaces/FunctionsBase.cs index d3d486e99..fc2844055 100644 --- a/cs/src/core/Index/Interfaces/FunctionsBase.cs +++ b/cs/src/core/Index/Interfaces/FunctionsBase.cs @@ -17,15 +17,6 @@ namespace FASTER.core /// public abstract class FunctionsBase : IFunctions { - protected readonly bool locking; - protected readonly bool postOps; - - protected FunctionsBase(bool locking = false, bool postOps = false) - { - this.locking = locking; - this.postOps = postOps; - } - /// public virtual bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, long address) => true; /// @@ -82,8 +73,6 @@ public virtual void CheckpointCompletionCallback(string sessionId, CommitPoint c /// public class SimpleFunctions : FunctionsBase { - public SimpleFunctions(bool locking = false, bool postOps = false) : base(locking, postOps) { } - private readonly Func merger; public SimpleFunctions() => merger = (l, r) => l; public SimpleFunctions(Func merger) => this.merger = merger; @@ -130,7 +119,6 @@ public override void CheckpointCompletionCallback(string sessionId, CommitPoint public class SimpleFunctions : SimpleFunctions { public SimpleFunctions() : base() { } - public SimpleFunctions(bool locking = false, bool postOps = false) : base(locking, postOps) { } public SimpleFunctions(Func merger) : base(merger) { } } } \ No newline at end of file diff --git a/cs/src/core/VarLen/MemoryFunctions.cs b/cs/src/core/VarLen/MemoryFunctions.cs index 0e7555b1a..95d042d42 100644 --- a/cs/src/core/VarLen/MemoryFunctions.cs +++ b/cs/src/core/VarLen/MemoryFunctions.cs @@ -16,8 +16,7 @@ public class MemoryFunctions : FunctionsBase, Me /// Constructor /// /// - /// Whether we lock values before concurrent operations (implemented using a spin lock on length header bit) - public MemoryFunctions(MemoryPool memoryPool = default, bool locking = false) : base(locking) + public MemoryFunctions(MemoryPool memoryPool = default) { this.memoryPool = memoryPool ?? MemoryPool.Shared; } diff --git a/cs/src/core/VarLen/SpanByteFunctions.cs b/cs/src/core/VarLen/SpanByteFunctions.cs index dbb6441eb..dde822402 100644 --- a/cs/src/core/VarLen/SpanByteFunctions.cs +++ b/cs/src/core/VarLen/SpanByteFunctions.cs @@ -10,13 +10,6 @@ namespace FASTER.core /// public class SpanByteFunctions : FunctionsBase { - /// - /// Constructor - /// - /// - /// - public SpanByteFunctions(bool locking = false, bool postOps = false) : base(locking, postOps) { } - /// public override void SingleWriter(ref Key key, ref SpanByte input, ref SpanByte src, ref SpanByte dst, ref Output output, ref RecordInfo recordInfo, long address) { @@ -73,9 +66,7 @@ public class SpanByteFunctions : SpanByteFunctions /// - /// - /// - public SpanByteFunctions(MemoryPool memoryPool = default, bool locking = false, bool postOps = false) : base(locking, postOps) + public SpanByteFunctions(MemoryPool memoryPool = default) { this.memoryPool = memoryPool ?? MemoryPool.Shared; } @@ -100,12 +91,6 @@ public unsafe override bool ConcurrentReader(ref SpanByte key, ref SpanByte inpu /// public class SpanByteFunctions_ByteArrayOutput : SpanByteFunctions { - /// - /// Constructor - /// - /// - public SpanByteFunctions_ByteArrayOutput(bool locking = false) : base(locking) { } - /// public override bool SingleReader(ref SpanByte key, ref SpanByte input, ref SpanByte value, ref byte[] dst, ref RecordInfo recordInfo, long address) { diff --git a/cs/test/AdvancedLockTests.cs b/cs/test/AdvancedLockTests.cs index 1fcffada8..9c18abfd5 100644 --- a/cs/test/AdvancedLockTests.cs +++ b/cs/test/AdvancedLockTests.cs @@ -39,10 +39,6 @@ internal class Functions : FunctionsBase // CopyWriter takes no Input internal Input readCacheInput; - public Functions() : base(true) - { - } - public override void CopyWriter(ref int key, ref int src, ref int dst, ref RecordInfo recordInfo, long address) { int output = default; diff --git a/cs/test/BasicLockTests.cs b/cs/test/BasicLockTests.cs index 67dfcf1dc..40a5ae0ca 100644 --- a/cs/test/BasicLockTests.cs +++ b/cs/test/BasicLockTests.cs @@ -15,10 +15,6 @@ internal class BasicLockTests { internal class Functions : SimpleFunctions { - public Functions() : base(true) - { - } - static bool Increment(ref int dst) { ++dst; diff --git a/cs/test/PostOperationsTests.cs b/cs/test/PostOperationsTests.cs index 5cd581cc0..f64530d1a 100644 --- a/cs/test/PostOperationsTests.cs +++ b/cs/test/PostOperationsTests.cs @@ -24,7 +24,7 @@ internal void Clear() psdAddress = Constants.kInvalidAddress; } - internal PostFunctions() : base(locking: false, postOps: true) { } + internal PostFunctions() : base() { } public override void PostSingleWriter(ref int key, ref int input, ref int src, ref int dst, ref int output, ref RecordInfo recordInfo, long address) { this.pswAddress = address; } From 158928d50ace29cfaa558b802d518440dbbdc662 Mon Sep 17 00:00:00 2001 From: TedHartMS <15467143+TedHartMS@users.noreply.github.com> Date: Wed, 19 Jan 2022 16:12:18 -0800 Subject: [PATCH 25/25] Fix Remote build --- cs/remote/samples/FixedLenServer/Types.cs | 3 +++ cs/remote/src/FASTER.server/ServerKVFunctions.cs | 3 +++ cs/remote/src/FASTER.server/SpanByteFunctionsForServer.cs | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/cs/remote/samples/FixedLenServer/Types.cs b/cs/remote/samples/FixedLenServer/Types.cs index 54ee525f3..7c906362c 100644 --- a/cs/remote/samples/FixedLenServer/Types.cs +++ b/cs/remote/samples/FixedLenServer/Types.cs @@ -83,6 +83,9 @@ public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref } // Upsert functions + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void CopyWriter(ref Key key, ref Value src, ref Value dst, ref RecordInfo recordInfo, long address) => dst = src; + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) => dst = src; diff --git a/cs/remote/src/FASTER.server/ServerKVFunctions.cs b/cs/remote/src/FASTER.server/ServerKVFunctions.cs index c4f507b07..ea4e534bc 100644 --- a/cs/remote/src/FASTER.server/ServerKVFunctions.cs +++ b/cs/remote/src/FASTER.server/ServerKVFunctions.cs @@ -77,6 +77,9 @@ public void SingleWriter(ref Key key, ref Input input, ref Value src, ref Value public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, long address) { } + public void CopyWriter(ref Key key, ref Value src, ref Value dst, ref RecordInfo recordInfo, long address) + => functions.CopyWriter(ref key, ref src, ref dst, ref recordInfo, address); + public void UpsertCompletionCallback(ref Key key, ref Input input, ref Value value, long ctx) => functions.UpsertCompletionCallback(ref key, ref input, ref value, ctx); diff --git a/cs/remote/src/FASTER.server/SpanByteFunctionsForServer.cs b/cs/remote/src/FASTER.server/SpanByteFunctionsForServer.cs index 849f32542..2e370752f 100644 --- a/cs/remote/src/FASTER.server/SpanByteFunctionsForServer.cs +++ b/cs/remote/src/FASTER.server/SpanByteFunctionsForServer.cs @@ -20,7 +20,7 @@ public class SpanByteFunctionsForServer : SpanByteFunctions /// - public SpanByteFunctionsForServer(MemoryPool memoryPool = default) : base(true) + public SpanByteFunctionsForServer(MemoryPool memoryPool = default) { this.memoryPool = memoryPool ?? MemoryPool.Shared; }