diff --git a/README.md b/README.md index f5d37ef29..d62c57e32 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@

- FASTER logo + FASTER logo

[![NuGet](https://img.shields.io/nuget/v/Microsoft.FASTER.Core.svg)](https://www.nuget.org/packages/Microsoft.FASTER.Core/) diff --git a/cs/FASTER.sln b/cs/FASTER.sln index 1ab8db8c5..b724c1553 100644 --- a/cs/FASTER.sln +++ b/cs/FASTER.sln @@ -61,8 +61,6 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SecondaryReaderStore", "sam EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "VersionedRead", "samples\ReadAddress\VersionedRead.csproj", "{33ED9E1B-1EF0-4984-A07A-7A26C205A446}" EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MemOnlyCache", "samples\MemOnlyCache\MemOnlyCache.csproj", "{998D4C78-B0C5-40FF-9BDC-716BAC8CF864}" -EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AsyncStress", "playground\AsyncStress\AsyncStress.csproj", "{9EFCF8C5-320B-473C-83DE-3815981D465B}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FasterLogStress", "playground\FasterLogMLSDTest\FasterLogStress.csproj", "{E8C7FB0F-38B8-468A-B1CA-8793DF8F2693}" @@ -112,6 +110,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "root", "root", "{CEDB9572-7 ..\README.md = ..\README.md EndProjectSection EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ResizableCacheStore", "samples\ResizableCacheStore\ResizableCacheStore.csproj", "{B4A55211-5457-44B9-8BCB-A5488C994965}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -288,14 +288,6 @@ Global {33ED9E1B-1EF0-4984-A07A-7A26C205A446}.Release|Any CPU.Build.0 = Release|x64 {33ED9E1B-1EF0-4984-A07A-7A26C205A446}.Release|x64.ActiveCfg = Release|x64 {33ED9E1B-1EF0-4984-A07A-7A26C205A446}.Release|x64.Build.0 = Release|x64 - {998D4C78-B0C5-40FF-9BDC-716BAC8CF864}.Debug|Any CPU.ActiveCfg = Debug|x64 - {998D4C78-B0C5-40FF-9BDC-716BAC8CF864}.Debug|Any CPU.Build.0 = Debug|x64 - {998D4C78-B0C5-40FF-9BDC-716BAC8CF864}.Debug|x64.ActiveCfg = Debug|x64 - {998D4C78-B0C5-40FF-9BDC-716BAC8CF864}.Debug|x64.Build.0 = Debug|x64 - {998D4C78-B0C5-40FF-9BDC-716BAC8CF864}.Release|Any CPU.ActiveCfg = Release|x64 - {998D4C78-B0C5-40FF-9BDC-716BAC8CF864}.Release|Any CPU.Build.0 = Release|x64 - {998D4C78-B0C5-40FF-9BDC-716BAC8CF864}.Release|x64.ActiveCfg = Release|x64 - {998D4C78-B0C5-40FF-9BDC-716BAC8CF864}.Release|x64.Build.0 = Release|x64 {9EFCF8C5-320B-473C-83DE-3815981D465B}.Debug|Any CPU.ActiveCfg = Debug|x64 {9EFCF8C5-320B-473C-83DE-3815981D465B}.Debug|Any CPU.Build.0 = Debug|x64 {9EFCF8C5-320B-473C-83DE-3815981D465B}.Debug|x64.ActiveCfg = Debug|x64 @@ -335,6 +327,14 @@ Global {AF996720-DB6C-4ED7-9693-B9531F0B119A}.Release|Any CPU.Build.0 = Release|Any CPU {AF996720-DB6C-4ED7-9693-B9531F0B119A}.Release|x64.ActiveCfg = Release|Any CPU {AF996720-DB6C-4ED7-9693-B9531F0B119A}.Release|x64.Build.0 = Release|Any CPU + {B4A55211-5457-44B9-8BCB-A5488C994965}.Debug|Any CPU.ActiveCfg = Debug|x64 + {B4A55211-5457-44B9-8BCB-A5488C994965}.Debug|Any CPU.Build.0 = Debug|x64 + {B4A55211-5457-44B9-8BCB-A5488C994965}.Debug|x64.ActiveCfg = Debug|x64 + {B4A55211-5457-44B9-8BCB-A5488C994965}.Debug|x64.Build.0 = Debug|x64 + {B4A55211-5457-44B9-8BCB-A5488C994965}.Release|Any CPU.ActiveCfg = Release|x64 + {B4A55211-5457-44B9-8BCB-A5488C994965}.Release|Any CPU.Build.0 = Release|x64 + {B4A55211-5457-44B9-8BCB-A5488C994965}.Release|x64.ActiveCfg = Release|x64 + {B4A55211-5457-44B9-8BCB-A5488C994965}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -363,12 +363,12 @@ Global {E2A1C205-4D35-448C-A72F-B9A4AE28EB4E} = {62BC1134-B6E1-476A-B894-7CA278A8B6DE} {EBE313E5-22D2-4C74-BA1F-16B60404B335} = {62BC1134-B6E1-476A-B894-7CA278A8B6DE} {33ED9E1B-1EF0-4984-A07A-7A26C205A446} = {62BC1134-B6E1-476A-B894-7CA278A8B6DE} - {998D4C78-B0C5-40FF-9BDC-716BAC8CF864} = {62BC1134-B6E1-476A-B894-7CA278A8B6DE} {9EFCF8C5-320B-473C-83DE-3815981D465B} = {E6026D6A-01C5-4582-B2C1-64751490DABE} {E8C7FB0F-38B8-468A-B1CA-8793DF8F2693} = {E6026D6A-01C5-4582-B2C1-64751490DABE} {A265D9D2-3FEA-48BB-B1CC-273ECFEA0611} = {E6026D6A-01C5-4582-B2C1-64751490DABE} {DC3E0640-9A36-43D0-AA37-A1B61B0BFBC9} = {62BC1134-B6E1-476A-B894-7CA278A8B6DE} {AF996720-DB6C-4ED7-9693-B9531F0B119A} = {5E4C9997-3350-4761-9FC9-F27649848B1D} + {B4A55211-5457-44B9-8BCB-A5488C994965} = {62BC1134-B6E1-476A-B894-7CA278A8B6DE} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {A0750637-2CCB-4139-B25E-F2CE740DCFAC} diff --git a/cs/benchmark/FasterSpanByteYcsbBenchmark.cs b/cs/benchmark/FasterSpanByteYcsbBenchmark.cs index 988442412..adb467b73 100644 --- a/cs/benchmark/FasterSpanByteYcsbBenchmark.cs +++ b/cs/benchmark/FasterSpanByteYcsbBenchmark.cs @@ -77,12 +77,12 @@ internal FasterSpanByteYcsbBenchmark(KeySpanByte[] i_keys_, KeySpanByte[] t_keys if (testLoader.Options.UseSmallMemoryLog) store = new FasterKV - (testLoader.MaxKey / 2, new LogSettings { LogDevice = device, PreallocateLog = true, PageSizeBits = 22, SegmentSizeBits = 26, MemorySizeBits = 26 }, - new CheckpointSettings { CheckpointDir = testLoader.BackupPath }, disableEphemeralLocking: testLoader.LockImpl != LockImpl.Ephemeral); + (testLoader.MaxKey / testLoader.Options.HashPacking, new LogSettings { LogDevice = device, PreallocateLog = true, PageSizeBits = 22, SegmentSizeBits = 26, MemorySizeBits = 26 }, + new CheckpointSettings { CheckpointDir = testLoader.BackupPath }, lockingMode: testLoader.LockingMode); else store = new FasterKV - (testLoader.MaxKey / 2, new LogSettings { LogDevice = device, PreallocateLog = true, MemorySizeBits = 35 }, - new CheckpointSettings { CheckpointDir = testLoader.BackupPath }, disableEphemeralLocking: testLoader.LockImpl != LockImpl.Ephemeral); + (testLoader.MaxKey / testLoader.Options.HashPacking, new LogSettings { LogDevice = device, PreallocateLog = true, MemorySizeBits = 35 }, + new CheckpointSettings { CheckpointDir = testLoader.BackupPath }, lockingMode: testLoader.LockingMode); } internal void Dispose() diff --git a/cs/benchmark/FasterYcsbBenchmark.cs b/cs/benchmark/FasterYcsbBenchmark.cs index fcde88918..5b2365dbc 100644 --- a/cs/benchmark/FasterYcsbBenchmark.cs +++ b/cs/benchmark/FasterYcsbBenchmark.cs @@ -77,12 +77,12 @@ internal FASTER_YcsbBenchmark(Key[] i_keys_, Key[] t_keys_, TestLoader testLoade if (testLoader.Options.UseSmallMemoryLog) store = new FasterKV - (testLoader.MaxKey / 4, new LogSettings { LogDevice = device, PreallocateLog = true, PageSizeBits = 25, SegmentSizeBits = 30, MemorySizeBits = 28 }, - new CheckpointSettings { CheckpointDir = testLoader.BackupPath }, disableEphemeralLocking: testLoader.LockImpl != LockImpl.Ephemeral); + (testLoader.MaxKey / testLoader.Options.HashPacking, new LogSettings { LogDevice = device, PreallocateLog = true, PageSizeBits = 25, SegmentSizeBits = 30, MemorySizeBits = 28 }, + new CheckpointSettings { CheckpointDir = testLoader.BackupPath }, lockingMode: testLoader.LockingMode); else store = new FasterKV - (testLoader.MaxKey / 2, new LogSettings { LogDevice = device, PreallocateLog = true }, - new CheckpointSettings { CheckpointDir = testLoader.BackupPath }, disableEphemeralLocking: testLoader.LockImpl != LockImpl.Ephemeral); + (testLoader.MaxKey / testLoader.Options.HashPacking, new LogSettings { LogDevice = device, PreallocateLog = true }, + new CheckpointSettings { CheckpointDir = testLoader.BackupPath }, lockingMode: testLoader.LockingMode); } internal void Dispose() @@ -310,22 +310,6 @@ internal unsafe (double, double) Run(TestLoader testLoader) dash.Start(); #endif - ClientSession session = default; - LockableUnsafeContext luContext = default; - - (Key key, LockType kind) xlock = (new Key { value = long.MaxValue }, LockType.Exclusive); - (Key key, LockType kind) slock = (new Key { value = long.MaxValue - 1 }, LockType.Shared); - if (testLoader.Options.LockImpl == (int)LockImpl.Manual) - { - session = store.For(functions).NewSession(); - luContext = session.LockableUnsafeContext; - luContext.BeginLockable(); - - Console.WriteLine("Taking 2 manual locks"); - luContext.Lock(xlock.key, xlock.kind); - luContext.Lock(slock.key, slock.kind); - } - Thread[] workers = new Thread[testLoader.Options.ThreadCount]; Console.WriteLine("Executing setup."); @@ -432,14 +416,6 @@ internal unsafe (double, double) Run(TestLoader testLoader) worker.Join(); } - if (testLoader.Options.LockImpl == (int)LockImpl.Manual) - { - luContext.Unlock(xlock.key, xlock.kind); - luContext.Unlock(slock.key, slock.kind); - luContext.EndLockable(); - session.Dispose(); - } - waiter.Reset(); #if DASHBOARD diff --git a/cs/benchmark/Options.cs b/cs/benchmark/Options.cs index 0a0281fe0..02e35c00e 100644 --- a/cs/benchmark/Options.cs +++ b/cs/benchmark/Options.cs @@ -35,9 +35,8 @@ class Options [Option('z', "locking", Required = false, Default = 0, HelpText = "Locking Implementation:" + "\n 0 = None (default)" + - "\n 1 = Ephemeral locking using RecordInfo.SpinLock()" + - "\n 2 = Manual locking using LockableUnsafeContext")] - public int LockImpl { get; set; } + "\n 1 = Mixed-mode locking using main HashTable buckets")] + public int LockingMode { get; set; } [Option('i', "iterations", Required = false, Default = 1, HelpText = "Number of iterations of the test to run")] @@ -71,6 +70,10 @@ class Options HelpText = "Use Small Memory log in experiment")] public bool UseSmallMemoryLog { get; set; } + [Option("hashpack", Required = false, Default = 2, + HelpText = "The hash table packing; divide the number of keys by this to cause hash collisions")] + public int HashPacking { get; set; } + [Option("safectx", Required = false, Default = false, HelpText = "Use 'safe' context (slower, per-operation epoch control) in experiment")] public bool UseSafeContext { get; set; } @@ -96,7 +99,7 @@ class Options public string GetOptionsString() { static string boolStr(bool value) => value ? "y" : "n"; - return $"d: {DistributionName.ToLower()}; n: {NumaStyle}; r: {ReadPercent}; t: {ThreadCount}; z: {LockImpl}; i: {IterationCount};" + return $"d: {DistributionName.ToLower()}; n: {NumaStyle}; r: {ReadPercent}; t: {ThreadCount}; z: {LockingMode}; i: {IterationCount}; hp: {HashPacking}" + $" sd: {boolStr(UseSmallData)}; sm: {boolStr(UseSmallMemoryLog)}; sy: {boolStr(this.UseSyntheticData)}; safectx: {boolStr(this.UseSafeContext)};" + $" chkptms: {this.PeriodicCheckpointMilliseconds}; chkpttype: {(this.PeriodicCheckpointMilliseconds > 0 ? this.PeriodicCheckpointType.ToString() : "None")};" + $" chkptincr: {boolStr(this.PeriodicCheckpointTryIncremental)}"; diff --git a/cs/benchmark/TestLoader.cs b/cs/benchmark/TestLoader.cs index 8d73284bb..9e5f73739 100644 --- a/cs/benchmark/TestLoader.cs +++ b/cs/benchmark/TestLoader.cs @@ -28,7 +28,7 @@ class TestLoader internal KeySpanByte[] txn_span_keys = default; internal readonly BenchmarkType BenchmarkType; - internal readonly LockImpl LockImpl; + internal readonly LockingMode LockingMode; internal readonly long InitCount; internal readonly long TxnCount; internal readonly int MaxKey; @@ -60,13 +60,21 @@ static bool verifyOption(bool isValid, string name) if (!verifyOption(Options.NumaStyle >= 0 && Options.NumaStyle <= 1, "NumaStyle")) return; - this.LockImpl = (LockImpl)Options.LockImpl; - if (!verifyOption(Enum.IsDefined(typeof(LockImpl), this.LockImpl), "Lock Implementation")) + this.LockingMode = Options.LockingMode switch + { + 0 => LockingMode.None, + 1 => LockingMode.Standard, + _ => throw new InvalidOperationException($"Unknown Locking mode int: {Options.LockingMode}") + }; + if (!verifyOption(Enum.IsDefined(typeof(LockingMode), this.LockingMode), "LockingMode")) return; if (!verifyOption(Options.IterationCount > 0, "Iteration Count")) return; + if (!verifyOption(Options.HashPacking > 0, "Iteration Count")) + return; + if (!verifyOption(Options.ReadPercent >= -1 && Options.ReadPercent <= 100, "Read Percent")) return; diff --git a/cs/benchmark/YcsbConstants.cs b/cs/benchmark/YcsbConstants.cs index 77a56b8c7..ab9a33c22 100644 --- a/cs/benchmark/YcsbConstants.cs +++ b/cs/benchmark/YcsbConstants.cs @@ -12,13 +12,6 @@ enum BenchmarkType : byte ConcurrentDictionaryYcsb }; - enum LockImpl : byte - { - None = 0, - Ephemeral = 1, - Manual = 2 - }; - enum AddressLineNum : int { Before = 1, diff --git a/cs/remote/samples/FixedLenServer/Program.cs b/cs/remote/samples/FixedLenServer/Program.cs index bc9285d5a..727dc8e7d 100644 --- a/cs/remote/samples/FixedLenServer/Program.cs +++ b/cs/remote/samples/FixedLenServer/Program.cs @@ -8,6 +8,7 @@ using FASTER.server; using System.Diagnostics; using Microsoft.Extensions.Logging; +using FASTER.core; namespace FasterFixedLenServer { @@ -38,7 +39,7 @@ static void Main(string[] args) builder.SetMinimumLevel(LogLevel.Error); }); - using var server = new FixedLenServer(opts.GetServerOptions(), () => new Functions(), disableEphemeralLocking: true); + using var server = new FixedLenServer(opts.GetServerOptions(), () => new Functions(), lockingMode: LockingMode.Standard); server.Start(); Console.WriteLine("Started server"); diff --git a/cs/remote/src/FASTER.server/Servers/FixedLenServer.cs b/cs/remote/src/FASTER.server/Servers/FixedLenServer.cs index b0dee88de..e535b8dac 100644 --- a/cs/remote/src/FASTER.server/Servers/FixedLenServer.cs +++ b/cs/remote/src/FASTER.server/Servers/FixedLenServer.cs @@ -23,11 +23,11 @@ public sealed class FixedLenServer : Gener /// /// /// - /// + /// /// /// - public FixedLenServer(ServerOptions opts, Func functionsGen, bool disableEphemeralLocking, MaxSizeSettings maxSizeSettings = default, ILoggerFactory loggerFactory = null) - : base(opts, functionsGen, new FixedLenSerializer(), new FixedLenKeySerializer(), disableEphemeralLocking, maxSizeSettings, loggerFactory) + public FixedLenServer(ServerOptions opts, Func functionsGen, LockingMode lockingMode, MaxSizeSettings maxSizeSettings = default, ILoggerFactory loggerFactory = null) + : base(opts, functionsGen, new FixedLenSerializer(), new FixedLenKeySerializer(), lockingMode: lockingMode, maxSizeSettings, loggerFactory) { } } diff --git a/cs/remote/src/FASTER.server/Servers/GenericServer.cs b/cs/remote/src/FASTER.server/Servers/GenericServer.cs index ac28969b5..89bfb6207 100644 --- a/cs/remote/src/FASTER.server/Servers/GenericServer.cs +++ b/cs/remote/src/FASTER.server/Servers/GenericServer.cs @@ -31,11 +31,11 @@ public class GenericServer /// /// - /// + /// /// /// public GenericServer(ServerOptions opts, Func functionsGen, ParameterSerializer serializer, IKeyInputSerializer keyInputSerializer, - bool disableEphemeralLocking, MaxSizeSettings maxSizeSettings = default, ILoggerFactory loggerFactory = null) + LockingMode lockingMode, MaxSizeSettings maxSizeSettings = default, ILoggerFactory loggerFactory = null) { this.opts = opts; @@ -45,7 +45,7 @@ public GenericServer(ServerOptions opts, Func functionsGen, Parameter if (!Directory.Exists(opts.CheckpointDir)) Directory.CreateDirectory(opts.CheckpointDir); - store = new FasterKV(indexSize, logSettings, checkpointSettings, disableEphemeralLocking: disableEphemeralLocking, loggerFactory: loggerFactory); + store = new FasterKV(indexSize, logSettings, checkpointSettings, lockingMode: lockingMode, loggerFactory: loggerFactory); if (opts.Recover) { diff --git a/cs/remote/src/FASTER.server/Servers/VarLenServer.cs b/cs/remote/src/FASTER.server/Servers/VarLenServer.cs index da251d239..2c12b374c 100644 --- a/cs/remote/src/FASTER.server/Servers/VarLenServer.cs +++ b/cs/remote/src/FASTER.server/Servers/VarLenServer.cs @@ -37,7 +37,7 @@ public VarLenServer(ServerOptions opts, ILoggerFactory loggerFactory = null) if (!Directory.Exists(opts.CheckpointDir)) Directory.CreateDirectory(opts.CheckpointDir); - store = new FasterKV(indexSize, logSettings, checkpointSettings, disableEphemeralLocking: false, loggerFactory: loggerFactory); + store = new FasterKV(indexSize, logSettings, checkpointSettings, lockingMode: LockingMode.Standard, loggerFactory: loggerFactory); if (!opts.DisablePubSub) { diff --git a/cs/remote/test/FASTER.remote.test/TestUtils.cs b/cs/remote/test/FASTER.remote.test/TestUtils.cs index 511a70a8d..de8126a63 100644 --- a/cs/remote/test/FASTER.remote.test/TestUtils.cs +++ b/cs/remote/test/FASTER.remote.test/TestUtils.cs @@ -34,7 +34,7 @@ public static FixedLenServer>(opts, () => new SimpleFunctions(merger), disableEphemeralLocking: true); + return new FixedLenServer>(opts, () => new SimpleFunctions(merger), lockingMode: LockingMode.Standard); } /// diff --git a/cs/samples/MemOnlyCache/CacheSizeTracker.cs b/cs/samples/MemOnlyCache/CacheSizeTracker.cs deleted file mode 100644 index 93ee92cb7..000000000 --- a/cs/samples/MemOnlyCache/CacheSizeTracker.cs +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT license. - -using FASTER.core; -using System; -using System.Threading; - -namespace MemOnlyCache -{ - /// - /// Cache size tracker - /// - public class CacheSizeTracker : IObserver> - { - readonly FasterKV store; - long storeHeapSize; - - /// - /// Target size request for FASTER - /// - public long TargetSizeBytes { get; private set; } - - /// - /// Total size (bytes) used by FASTER including index and log - /// - public long TotalSizeBytes => storeHeapSize + store.IndexSize * 64 + store.Log.MemorySizeBytes + (store.ReadCache != null ? store.ReadCache.MemorySizeBytes : 0) + store.OverflowBucketCount * 64; - - /// - /// Class to track and update cache size - /// - /// FASTER store instance - /// Initial target memory size of FASTER in bytes - public CacheSizeTracker(FasterKV store, long targetMemoryBytes = long.MaxValue) - { - this.store = store; - if (targetMemoryBytes < long.MaxValue) - Console.WriteLine("**** Setting initial target memory: {0,11:N2}KB", targetMemoryBytes / 1024.0); - this.TargetSizeBytes = targetMemoryBytes; - - Console.WriteLine("Index size: {0}", store.IndexSize * 64); - Console.WriteLine("Total store size: {0}", TotalSizeBytes); - - // Register subscriber to receive notifications of log evictions from memory - store.Log.SubscribeEvictions(this); - - // Include the separate read cache, if enabled - if (store.ReadCache != null) - store.ReadCache.SubscribeEvictions(this); - } - - /// - /// Set target total memory size (in bytes) for the FASTER store - /// - /// Target size - public void SetTargetSizeBytes(long newTargetSize) - { - if (newTargetSize < TargetSizeBytes) - { - TargetSizeBytes = newTargetSize; - store.Log.EmptyPageCount++; // trigger eviction to start the memory reduction process - } - else - TargetSizeBytes = newTargetSize; - } - - /// - /// Add to the tracked size of FASTER. This is called by IFunctions as well as the subscriber to evictions (OnNext) - /// - /// - public void AddTrackedSize(int size) => Interlocked.Add(ref storeHeapSize, size); - - /// - /// Subscriber to pages as they are getting evicted from main memory - /// - /// - public void OnNext(IFasterScanIterator iter) - { - int size = 0; - while (iter.GetNext(out RecordInfo info, out CacheKey key, out CacheValue value)) - { - size += key.GetSize; - if (!info.Tombstone) // ignore deleted values being evicted (they are accounted for by ConcurrentDeleter) - size += value.GetSize; - } - AddTrackedSize(-size); - - // Adjust empty page count to drive towards desired memory utilization - if (store.Log.PageAllocationStabilized()) - { - if (TotalSizeBytes > TargetSizeBytes) - store.Log.EmptyPageCount++; - else - store.Log.EmptyPageCount--; - } - } - - /// - /// OnCompleted - /// - public void OnCompleted() { } - - /// - /// OnError - /// - /// - public void OnError(Exception error) { } - } -} diff --git a/cs/samples/ResizableCacheStore/CacheSizeTracker.cs b/cs/samples/ResizableCacheStore/CacheSizeTracker.cs new file mode 100644 index 000000000..a49bbe063 --- /dev/null +++ b/cs/samples/ResizableCacheStore/CacheSizeTracker.cs @@ -0,0 +1,101 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using FASTER.core; +using System; + +namespace ResizableCacheStore +{ + /// + /// Cache size tracker + /// + public class CacheSizeTracker + { + readonly FasterKV store; + + /// + /// Total size (bytes) used by FASTER including index and log + /// + public long TotalSizeBytes => + IndexSizeBytes + + mainLog.TotalSizeBytes + + (readCache != null ? readCache.TotalSizeBytes : 0); + + public long IndexSizeBytes => + store.IndexSize * 64 + + store.OverflowBucketCount * 64; + + public long LogSizeBytes => mainLog.TotalSizeBytes; + public long ReadCacheSizeBytes => readCache != null ? readCache.TotalSizeBytes : 0; + + readonly LogSizeTracker mainLog; + readonly LogSizeTracker readCache; + + public void PrintStats() + { + Console.WriteLine("Sizes: [store]: {0,8:N2}KB [index]: {1,9:N2}KB [hylog]: {2,8:N2}KB ({3,7} objs) [rcach]: {4,9:N2}KB ({5,7} objs)", + TotalSizeBytes / 1024.0, + IndexSizeBytes / 1024.0, + LogSizeBytes / 1024.0, + mainLog.NumRecords, + ReadCacheSizeBytes / 1024.0, + readCache != null ? readCache.NumRecords : 0 + ); + } + + /// + /// Class to track and update cache size + /// + /// FASTER store instance + /// Initial target memory size of FASTER in bytes + public CacheSizeTracker(FasterKV store, long targetMemoryBytes = long.MaxValue) + { + this.store = store; + this.mainLog = new LogSizeTracker(store.Log, "mnlog"); + if (store.ReadCache != null) + this.readCache = new LogSizeTracker(store.ReadCache, "readc"); + + if (targetMemoryBytes < long.MaxValue) + { + Console.WriteLine("**** Setting initial target memory: {0,11:N2}KB", targetMemoryBytes / 1024.0); + SetTargetSizeBytes(targetMemoryBytes); + } + + PrintStats(); + } + + /// + /// Set target total memory size (in bytes) for the FASTER store + /// + /// Target size + public void SetTargetSizeBytes(long newTargetSize) + { + // In this sample, we split the residual space equally between the log and the read cache + long residual = newTargetSize - IndexSizeBytes; + + if (residual > 0) + { + if (readCache == null) + mainLog.SetTargetSizeBytes(residual); + else + { + mainLog.SetTargetSizeBytes(residual / 2); + readCache.SetTargetSizeBytes(residual / 2); + } + } + } + + + /// + /// Add to the tracked size of FASTER. This is called by IFunctions as well as the subscriber to evictions (OnNext) + /// + /// + public void AddTrackedSize(int size, bool isReadCache = false) + { + if (isReadCache) + readCache.AddTrackedSize(size); + else + mainLog.AddTrackedSize(size); + } + } +} diff --git a/cs/samples/ResizableCacheStore/ISizeTracker.cs b/cs/samples/ResizableCacheStore/ISizeTracker.cs new file mode 100644 index 000000000..97b89e09d --- /dev/null +++ b/cs/samples/ResizableCacheStore/ISizeTracker.cs @@ -0,0 +1,10 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +namespace ResizableCacheStore +{ + public interface ISizeTracker + { + int GetSize { get; } + } +} diff --git a/cs/samples/ResizableCacheStore/LogSizeTracker.cs b/cs/samples/ResizableCacheStore/LogSizeTracker.cs new file mode 100644 index 000000000..72e4b0385 --- /dev/null +++ b/cs/samples/ResizableCacheStore/LogSizeTracker.cs @@ -0,0 +1,115 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using FASTER.core; +using System; +using System.Threading; + +namespace ResizableCacheStore +{ + public class LogSizeTracker : IObserver> + where TCacheKey : ISizeTracker + where TCacheValue : ISizeTracker + { + readonly string name; + + /// + /// Number of records in the log + /// + public int NumRecords; + + /// + /// Total size occupied by log, including heap + /// + public long TotalSizeBytes => Log.MemorySizeBytes + heapSize; + + /// + /// Target size request for FASTER + /// + public long TargetSizeBytes { get; private set; } + + + int heapSize; + readonly LogAccessor Log; + + public LogSizeTracker(LogAccessor log, string name) + { + this.name = name; + Log = log; + + // Register subscriber to receive notifications of log evictions from memory + Log.SubscribeEvictions(this); + } + + /// + /// Add to the tracked size of FASTER. This is called by IFunctions as well as the subscriber to evictions (OnNext) + /// + /// + public void AddTrackedSize(int size) + { + Interlocked.Add(ref heapSize, size); + if (size > 0) Interlocked.Increment(ref NumRecords); + else Interlocked.Decrement(ref NumRecords); + } + + /// + /// Set target total memory size (in bytes) for the FASTER store + /// + /// Target size + public void SetTargetSizeBytes(long newTargetSize) + { + TargetSizeBytes = newTargetSize; + AdjustAllocation(); + } + public void OnNext(IFasterScanIterator iter) + { + int size = 0; + int count = 0; + while (iter.GetNext(out RecordInfo info, out TCacheKey key, out TCacheValue value)) + { + size += key.GetSize; + count++; + if (!info.Tombstone) // ignore deleted values being evicted (they are accounted for by ConcurrentDeleter) + size += value.GetSize; + } + Interlocked.Add(ref heapSize, -size); + Interlocked.Add(ref NumRecords, -count); + AdjustAllocation(); + } + + public void OnCompleted() { } + + public void OnError(Exception error) { } + + void AdjustAllocation() + { + const long Delta = 1L << 15; + if (TotalSizeBytes > TargetSizeBytes + Delta) + { + while (TotalSizeBytes > TargetSizeBytes + Delta) + { + if (Log.AllocatedPageCount > Log.BufferSize - Log.EmptyPageCount + 1) + { + // Console.WriteLine($"{name}: {Log.EmptyPageCount} (wait++)"); + return; // wait for allocation to stabilize + } + Log.EmptyPageCount++; + // Console.WriteLine($"{name}: {Log.EmptyPageCount} (++)"); + } + } + else if (TotalSizeBytes < TargetSizeBytes - Delta) + { + while (TotalSizeBytes < TargetSizeBytes - Delta) + { + if (Log.AllocatedPageCount < Log.BufferSize - Log.EmptyPageCount - 1) + { + // Console.WriteLine($"{name}: {Log.EmptyPageCount} (wait--)"); + return; // wait for allocation to stabilize + } + Log.EmptyPageCount--; + // Console.WriteLine($"{name}: {Log.EmptyPageCount} (--)"); + } + } + } + } +} diff --git a/cs/samples/MemOnlyCache/Program.cs b/cs/samples/ResizableCacheStore/Program.cs similarity index 96% rename from cs/samples/MemOnlyCache/Program.cs rename to cs/samples/ResizableCacheStore/Program.cs index 3453b78fc..29622981e 100644 --- a/cs/samples/MemOnlyCache/Program.cs +++ b/cs/samples/ResizableCacheStore/Program.cs @@ -10,7 +10,7 @@ #pragma warning disable IDE0079 // Remove unnecessary suppression #pragma warning disable CS0162 // Unreachable code detected -namespace MemOnlyCache +namespace ResizableCacheStore { class Program { @@ -85,7 +85,7 @@ class Program const string UseUniformArg = "-u"; /// - /// If true, create a log file in the {tempdir}\MemOnlyCacheSample + /// If true, create a log file in the {tempdir}\ResizableCacheStoreSample /// static bool UseLogFile = false; const string UseLogFileArg = "-l"; @@ -97,13 +97,13 @@ class Program const string QuietArg = "-q"; /// - /// Uniform random distribution (true) or Zipf distribution (false) of requests + /// Copy to tail on read /// static bool UseReadCTT = true; const string NoReadCTTArg = "--noreadctt"; /// - /// Uniform random distribution (true) or Zipf distribution (false) of requests + /// Copy to read cache on read /// static bool UseReadCache = false; const string UseReadCacheArg = "--readcache"; @@ -225,6 +225,11 @@ static bool GetArgs(string[] args) MaxKeySize = int.Parse(val); continue; } + if (arg == MaxValueSizeArg) + { + MaxValueSize = int.Parse(val); + continue; + } if (arg == MemorySizeBitsArg) { MemorySizeBits = int.Parse(val); @@ -303,7 +308,7 @@ static bool GetArgs(string[] args) return true; } - static string GetLogPath() => Path.GetTempPath() + "MemOnlyCacheSample\\"; + static string GetLogPath() => Path.GetTempPath() + "ResizableCacheStoreSample\\"; static void Main(string[] args) { @@ -415,9 +420,11 @@ private static void ContinuousRandomWorkload() var ts = TimeSpan.FromSeconds(currentTimeMs / 1000); var totalElapsed = ts.ToString(); - Console.WriteLine("Throughput: {0,8:0.00}K ops/sec; Hit rate: {1:N2}; Memory footprint: {2,12:N2}KB, elapsed: {3:c}", - (currentReads - _lastReads) / (double)(currentElapsed), statusFound / (double)(statusFound + statusNotFound), - sizeTracker.TotalSizeBytes / 1024.0, totalElapsed); + Console.WriteLine("Throughput: {0,8:0.00}K ops/sec; Hit rate: {1:N2}; elapsed: {2:c}", + (currentReads - _lastReads) / (double)(currentElapsed), + statusFound / (double)(statusFound + statusNotFound), + totalElapsed); + sizeTracker.PrintStats(); Interlocked.Exchange(ref statusFound, 0); Interlocked.Exchange(ref statusNotFound, 0); diff --git a/cs/samples/MemOnlyCache/MemOnlyCache.csproj b/cs/samples/ResizableCacheStore/ResizableCacheStore.csproj similarity index 100% rename from cs/samples/MemOnlyCache/MemOnlyCache.csproj rename to cs/samples/ResizableCacheStore/ResizableCacheStore.csproj diff --git a/cs/samples/MemOnlyCache/Types.cs b/cs/samples/ResizableCacheStore/Types.cs similarity index 94% rename from cs/samples/MemOnlyCache/Types.cs rename to cs/samples/ResizableCacheStore/Types.cs index d2c4036b3..264e226d6 100644 --- a/cs/samples/MemOnlyCache/Types.cs +++ b/cs/samples/ResizableCacheStore/Types.cs @@ -5,9 +5,9 @@ using System; using System.Threading; -namespace MemOnlyCache +namespace ResizableCacheStore { - public class CacheKey : IFasterEqualityComparer + public class CacheKey : IFasterEqualityComparer, ISizeTracker { public long key; public byte[] extra; @@ -47,7 +47,7 @@ public override void Serialize(ref CacheKey obj) } } - public sealed class CacheValue + public sealed class CacheValue : ISizeTracker { public byte[] value; @@ -104,7 +104,7 @@ public override bool ConcurrentWriter(ref CacheKey key, ref CacheValue input, re public override void PostSingleWriter(ref CacheKey key, ref CacheValue input, ref CacheValue src, ref CacheValue dst, ref CacheValue output, ref UpsertInfo upsertInfo, WriteReason reason) { dst = src; - sizeTracker.AddTrackedSize(key.GetSize + src.GetSize); + sizeTracker.AddTrackedSize(key.GetSize + src.GetSize, reason == WriteReason.CopyToReadCache); } public override bool ConcurrentDeleter(ref CacheKey key, ref CacheValue value, ref DeleteInfo deleteInfo) diff --git a/cs/samples/MemOnlyCache/ZipfGenerator.cs b/cs/samples/ResizableCacheStore/ZipfGenerator.cs similarity index 97% rename from cs/samples/MemOnlyCache/ZipfGenerator.cs rename to cs/samples/ResizableCacheStore/ZipfGenerator.cs index 02932baa9..bd47d66d7 100644 --- a/cs/samples/MemOnlyCache/ZipfGenerator.cs +++ b/cs/samples/ResizableCacheStore/ZipfGenerator.cs @@ -3,7 +3,7 @@ using System; -namespace MemOnlyCache +namespace ResizableCacheStore { public class ZipfGenerator { diff --git a/cs/samples/StoreVarLenTypes/AsciiSumSample.cs b/cs/samples/StoreVarLenTypes/AsciiSumSample.cs index 31b3551c0..31788e995 100644 --- a/cs/samples/StoreVarLenTypes/AsciiSumSample.cs +++ b/cs/samples/StoreVarLenTypes/AsciiSumSample.cs @@ -28,7 +28,7 @@ public static void Run() // For this test we require record-level locking using var store = new FasterKV( size: 1L << 20, - logSettings: new LogSettings { LogDevice = log, MemorySizeBits = 15, PageSizeBits = 12 }, disableEphemeralLocking: false); + logSettings: new LogSettings { LogDevice = log, MemorySizeBits = 15, PageSizeBits = 12 }, lockingMode: LockingMode.Standard); // Create session for ASCII sums. We require two callback function types to be provided: // AsciiSumSpanByteFunctions implements RMW callback functions diff --git a/cs/src/core/Allocator/AllocatorBase.cs b/cs/src/core/Allocator/AllocatorBase.cs index 402419d02..257244513 100644 --- a/cs/src/core/Allocator/AllocatorBase.cs +++ b/cs/src/core/Allocator/AllocatorBase.cs @@ -141,12 +141,14 @@ public abstract partial class AllocatorBase : IDisposable public long SafeReadOnlyAddress; /// - /// Head address + /// The lowest in-memory address in the log. While we hold the epoch this may be changed by other threads as part of ShiftHeadAddress, + /// but as long as an address was >= HeadAddress while we held the epoch, it cannot be actually evicted until we release the epoch. /// public long HeadAddress; /// - /// Safe head address + /// The lowest reliable in-memory address. This is set by OnPagesClosed as the highest address of the range it is starting to close; + /// thus it leads . As long as we hold the epoch, records above this address will not be evicted. /// public long SafeHeadAddress; @@ -156,12 +158,13 @@ public abstract partial class AllocatorBase : IDisposable public long FlushedUntilAddress; /// - /// Flushed until address + /// The highest address that has been closed by . It will catch up to + /// when a region is closed. /// public long ClosedUntilAddress; /// - /// Begin address + /// The lowest valid address in the log /// public long BeginAddress; @@ -251,11 +254,6 @@ public override string ToString() /// internal IObserver> OnEvictionObserver; - /// - /// Observer for locked records getting evicted from memory (page closed) - /// - internal IObserver> OnLockEvictionObserver; - /// /// The "event" to be waited on for flush completion by the initiator of an operation /// @@ -574,8 +572,7 @@ internal unsafe void ApplyDelta(DeltaLog log, long startPage, long endPage, long // Clean up temporary bits when applying the delta log ref var destInfo = ref GetInfo(destination); - destInfo.ClearLocks(); - destInfo.Unseal(); + destInfo.ClearBitsForDiskImages(); } physicalAddress += size; } @@ -1377,8 +1374,6 @@ private void OnPagesClosedWorker() long start = closeStartAddress > closePageAddress ? closeStartAddress : closePageAddress; long end = closeEndAddress < closePageAddress + PageSize ? closeEndAddress : closePageAddress + PageSize; - if (OnLockEvictionObserver is not null) - MemoryPageScan(start, end, OnLockEvictionObserver); if (OnEvictionObserver is not null) MemoryPageScan(start, end, OnEvictionObserver); @@ -1948,7 +1943,7 @@ private unsafe void AsyncGetFromDiskCallback(uint errorCode, uint numBytes, obje { if (errorCode != 0) { - logger?.LogError("AsyncGetFromDiskCallback error: {errorCode}", errorCode); + logger?.LogError($"AsyncGetFromDiskCallback error: {errorCode}"); } var result = (AsyncGetFromDiskResult>)context; diff --git a/cs/src/core/Allocator/GenericAllocator.cs b/cs/src/core/Allocator/GenericAllocator.cs index 3f02085d0..fe1479ba4 100644 --- a/cs/src/core/Allocator/GenericAllocator.cs +++ b/cs/src/core/Allocator/GenericAllocator.cs @@ -424,7 +424,7 @@ private void WriteAsync(long flushPage, ulong alignedDestinationAddres if (!src[i].info.Invalid) { var address = (flushPage << LogPageSizeBits) + i * recordSize; - if (address < fuzzyStartLogicalAddress || !src[i].info.InNewVersion) + if (address < fuzzyStartLogicalAddress || !src[i].info.IsInNewVersion) { if (KeyHasObjects()) { diff --git a/cs/src/core/Allocator/LockEvictionObserver.cs b/cs/src/core/Allocator/LockEvictionObserver.cs deleted file mode 100644 index e50008533..000000000 --- a/cs/src/core/Allocator/LockEvictionObserver.cs +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT license. - -using System; - -namespace FASTER.core -{ - /// - /// Observer for page-lock evictions - /// - public class LockEvictionObserver : IObserver> - { - readonly FasterKV store; - - /// - /// Class to manage lock eviction transfers to LockTable - /// - /// FASTER store instance - public LockEvictionObserver(FasterKV store) => this.store = store; - - /// - /// Subscriber to pages as they are getting evicted from main memory - /// - /// - public void OnNext(IFasterScanIterator iter) - { - while (iter.GetNext(out RecordInfo info)) - { - // Note: we do not have to worry about conflicts with other threads, because other operations - // (data operations and lock and unlock) stop at HeadAddress. - if (info.IsLocked) - this.store.LockTable.TransferFromLogRecord(ref iter.GetKey(), info); - } - } - - /// - /// OnCompleted - /// - public void OnCompleted() { } - - /// - /// OnError - /// - /// - public void OnError(Exception error) { } - } -} diff --git a/cs/src/core/Allocator/MemoryPageScanIterator.cs b/cs/src/core/Allocator/MemoryPageScanIterator.cs index e3aa62573..e0a026369 100644 --- a/cs/src/core/Allocator/MemoryPageScanIterator.cs +++ b/cs/src/core/Allocator/MemoryPageScanIterator.cs @@ -88,5 +88,8 @@ public ref Value GetValue() { return ref page[offset].value; } + + /// + public override string ToString() => $"BA {BeginAddress}, EA {EndAddress}, CA {CurrentAddress}, NA {NextAddress}, start {start}, end {end}, recSize {recordSize}, pageSA {pageStartAddress}"; } } diff --git a/cs/src/core/Async/AsyncOperationInternal.cs b/cs/src/core/Async/AsyncOperationInternal.cs index d5e2cdc78..dbea18bac 100644 --- a/cs/src/core/Async/AsyncOperationInternal.cs +++ b/cs/src/core/Async/AsyncOperationInternal.cs @@ -33,22 +33,20 @@ internal interface IAsyncOperation /// The instance the async call was made on /// The for the pending operation /// The for this operation - /// The for this operation /// The output to be populated by this operation /// Status DoFastOperation(FasterKV fasterKV, ref PendingContext pendingContext, IFasterSession fasterSession, - FasterExecutionContext currentCtx, out Output output); + out Output output); /// /// Performs the asynchronous operation. This may be a wait for either a page-flush or a disk-read IO. /// /// The instance the async call was made on /// The for this operation - /// The for this operation /// The for the pending operation /// The cancellation token, if any /// ValueTask DoSlowOperation(FasterKV fasterKV, IFasterSession fasterSession, - FasterExecutionContext currentCtx, PendingContext pendingContext, CancellationToken token); + PendingContext pendingContext, CancellationToken token); /// /// For RMW only, indicates whether there is a pending IO; no-op for other implementations. @@ -64,19 +62,16 @@ internal sealed class AsyncOperationInternal _fasterKV; readonly IFasterSession _fasterSession; - readonly FasterExecutionContext _currentCtx; TAsyncOperation _asyncOperation; PendingContext _pendingContext; int CompletionComputeStatus; internal AsyncOperationInternal(FasterKV fasterKV, IFasterSession fasterSession, - FasterExecutionContext currentCtx, PendingContext pendingContext, - ExceptionDispatchInfo exceptionDispatchInfo, TAsyncOperation asyncOperation) + PendingContext pendingContext, ExceptionDispatchInfo exceptionDispatchInfo, TAsyncOperation asyncOperation) { _exception = exceptionDispatchInfo; _fasterKV = fasterKV; _fasterSession = fasterSession; - _currentCtx = currentCtx; _pendingContext = pendingContext; _asyncOperation = asyncOperation; CompletionComputeStatus = Pending; @@ -94,7 +89,7 @@ internal ValueTask CompleteAsync(CancellationToken token = default _exception.Throw(); // DoSlowOperation returns a new XxxAsyncResult, which contains a new UpdateAsyncInternal with a pendingContext with a default flushEvent - return _asyncOperation.DoSlowOperation(_fasterKV, _fasterSession, _currentCtx, _pendingContext, token); + return _asyncOperation.DoSlowOperation(_fasterKV, _fasterSession, _pendingContext, token); } internal TAsyncResult CompleteSync() @@ -139,9 +134,9 @@ private bool TryCompleteAsyncState(out TAsyncResult asyncResult) { if (hasPendingIO) { - _currentCtx.ioPendingRequests.Remove(pendingId); - _currentCtx.asyncPendingCount--; - _currentCtx.pendingReads.Remove(); + _fasterSession.Ctx.ioPendingRequests.Remove(pendingId); + _fasterSession.Ctx.asyncPendingCount--; + _fasterSession.Ctx.pendingReads.Remove(); } } } @@ -156,7 +151,7 @@ private bool TryCompleteSync(out TAsyncResult asyncResult) _fasterSession.UnsafeResumeThread(); try { - Status status = _asyncOperation.DoFastOperation(_fasterKV, ref _pendingContext, _fasterSession, _currentCtx, out Output output); + Status status = _asyncOperation.DoFastOperation(_fasterKV, ref _pendingContext, _fasterSession, out Output output); if (!status.IsPending) { @@ -244,7 +239,7 @@ private static async ValueTask WaitForFlushCompletionAsyn // This takes flushEvent as a parameter because we can't pass by ref to an async method. private static async ValueTask<(AsyncIOContext diskRequest, ExceptionDispatchInfo edi)> WaitForFlushOrIOCompletionAsync( - FasterKV @this, FasterExecutionContext currentCtx, + FasterKV @this, FasterExecutionContext sessionCtx, CompletionEvent flushEvent, AsyncIOContext diskRequest, CancellationToken token) { ExceptionDispatchInfo exceptionDispatchInfo = default; @@ -264,8 +259,8 @@ private static async ValueTask WaitForFlushCompletionAsyn else { Debug.Assert(flushEvent.IsDefault()); - currentCtx.asyncPendingCount++; - currentCtx.pendingReads.Add(); + sessionCtx.asyncPendingCount++; + sessionCtx.pendingReads.Add(); using (token.Register(() => diskRequest.asyncOperation.TrySetCanceled())) diskRequest = await diskRequest.asyncOperation.Task.WithCancellationAsync(token).ConfigureAwait(false); diff --git a/cs/src/core/Async/CompletePendingAsync.cs b/cs/src/core/Async/CompletePendingAsync.cs index 23802bfe0..237b67e58 100644 --- a/cs/src/core/Async/CompletePendingAsync.cs +++ b/cs/src/core/Async/CompletePendingAsync.cs @@ -16,38 +16,38 @@ public partial class FasterKV : FasterBase, IFasterKV /// /// Check if at least one (sync) request is ready for CompletePending to operate on /// - /// + /// /// /// - internal static ValueTask ReadyToCompletePendingAsync(FasterExecutionContext currentCtx, CancellationToken token = default) - => currentCtx.WaitPendingAsync(token); + internal static ValueTask ReadyToCompletePendingAsync(FasterExecutionContext sessionCtx, CancellationToken token = default) + => sessionCtx.WaitPendingAsync(token); /// /// Complete outstanding pending operations that were issued synchronously /// Async operations (e.g., ReadAsync) need to be completed individually /// /// - internal async ValueTask CompletePendingAsync(IFasterSession fasterSession, - FasterExecutionContext currentCtx, CancellationToken token, - CompletedOutputIterator completedOutputs) + internal async ValueTask CompletePendingAsync(FasterSession fasterSession, + CancellationToken token, CompletedOutputIterator completedOutputs) + where FasterSession : IFasterSession { while (true) { fasterSession.UnsafeResumeThread(); try { - InternalCompletePendingRequests(currentCtx, currentCtx, fasterSession, completedOutputs); + InternalCompletePendingRequests(fasterSession, completedOutputs); } finally { fasterSession.UnsafeSuspendThread(); } - await currentCtx.WaitPendingAsync(token).ConfigureAwait(false); + await fasterSession.Ctx.WaitPendingAsync(token).ConfigureAwait(false); - if (currentCtx.HasNoPendingRequests) return; + if (fasterSession.Ctx.HasNoPendingRequests) return; - InternalRefresh(currentCtx, fasterSession); + InternalRefresh(fasterSession); Thread.Yield(); } diff --git a/cs/src/core/Async/DeleteAsync.cs b/cs/src/core/Async/DeleteAsync.cs index edd267f3b..63e602ccd 100644 --- a/cs/src/core/Async/DeleteAsync.cs +++ b/cs/src/core/Async/DeleteAsync.cs @@ -18,21 +18,21 @@ internal struct DeleteAsyncOperation : IAsyncOperation public Status DoFastOperation(FasterKV fasterKV, ref PendingContext pendingContext, IFasterSession fasterSession, - FasterExecutionContext currentCtx, out Output output) + out Output output) { OperationStatus internalStatus; do { - internalStatus = fasterKV.InternalDelete(ref pendingContext.key.Get(), ref pendingContext.userContext, ref pendingContext, fasterSession, currentCtx, pendingContext.serialNum); - } while (fasterKV.HandleImmediateRetryStatus(internalStatus, currentCtx, currentCtx, fasterSession, ref pendingContext)); + internalStatus = fasterKV.InternalDelete(ref pendingContext.key.Get(), ref pendingContext.userContext, ref pendingContext, fasterSession, pendingContext.serialNum); + } while (fasterKV.HandleImmediateRetryStatus(internalStatus, fasterSession, ref pendingContext)); output = default; return TranslateStatus(internalStatus); } /// public ValueTask> DoSlowOperation(FasterKV fasterKV, IFasterSession fasterSession, - FasterExecutionContext currentCtx, PendingContext pendingContext, CancellationToken token) - => SlowDeleteAsync(fasterKV, fasterSession, currentCtx, pendingContext, token); + PendingContext pendingContext, CancellationToken token) + => SlowDeleteAsync(fasterKV, fasterSession, pendingContext, token); /// public bool HasPendingIO => false; @@ -55,11 +55,11 @@ internal DeleteAsyncResult(Status status) } internal DeleteAsyncResult(FasterKV fasterKV, IFasterSession fasterSession, - FasterExecutionContext currentCtx, PendingContext pendingContext, ExceptionDispatchInfo exceptionDispatchInfo) + PendingContext pendingContext, ExceptionDispatchInfo exceptionDispatchInfo) { this.Status = new(StatusCode.Pending); updateAsyncInternal = new AsyncOperationInternal, DeleteAsyncResult>( - fasterKV, fasterSession, currentCtx, pendingContext, exceptionDispatchInfo, new ()); + fasterKV, fasterSession, pendingContext, exceptionDispatchInfo, new ()); } /// Complete the Delete operation, issuing additional allocation asynchronously if needed. It is usually preferable to use Complete() instead of this. @@ -75,25 +75,20 @@ public ValueTask> CompleteAsync(Cancel } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal ValueTask> DeleteAsync(IFasterSession fasterSession, - FasterExecutionContext currentCtx, ref Key key, Context userContext, long serialNo, CancellationToken token = default) + internal ValueTask> DeleteAsync(FasterSession fasterSession, + ref Key key, Context userContext, long serialNo, CancellationToken token = default) + where FasterSession : IFasterSession { var pcontext = new PendingContext { IsAsync = true }; - return DeleteAsync(fasterSession, currentCtx, ref pcontext, ref key, userContext, serialNo, token); - } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal ValueTask> DeleteAsync(IFasterSession fasterSession, - FasterExecutionContext currentCtx, ref PendingContext pcontext, ref Key key, Context userContext, long serialNo, CancellationToken token) - { fasterSession.UnsafeResumeThread(); try { OperationStatus internalStatus; do { - internalStatus = InternalDelete(ref key, ref userContext, ref pcontext, fasterSession, currentCtx, serialNo); - } while (HandleImmediateRetryStatus(internalStatus, currentCtx, currentCtx, fasterSession, ref pcontext)); + internalStatus = InternalDelete(ref key, ref userContext, ref pcontext, fasterSession, serialNo); + } while (HandleImmediateRetryStatus(internalStatus, fasterSession, ref pcontext)); if (OperationStatusUtils.TryConvertToCompletedStatusCode(internalStatus, out Status status)) return new ValueTask>(new DeleteAsyncResult(status)); @@ -101,23 +96,22 @@ internal ValueTask> DeleteAsync= currentCtx.serialNum, "Operation serial numbers must be non-decreasing"); - currentCtx.serialNum = serialNo; + Debug.Assert(serialNo >= fasterSession.Ctx.serialNum, "Operation serial numbers must be non-decreasing"); + fasterSession.Ctx.serialNum = serialNo; fasterSession.UnsafeSuspendThread(); } - return SlowDeleteAsync(this, fasterSession, currentCtx, pcontext, token); + return SlowDeleteAsync(this, fasterSession, pcontext, token); } private static async ValueTask> SlowDeleteAsync( FasterKV @this, IFasterSession fasterSession, - FasterExecutionContext currentCtx, PendingContext pcontext, CancellationToken token = default) { ExceptionDispatchInfo exceptionDispatchInfo = await WaitForFlushCompletionAsync(@this, pcontext.flushEvent, token).ConfigureAwait(false); pcontext.flushEvent = default; - return new DeleteAsyncResult(@this, fasterSession, currentCtx, pcontext, exceptionDispatchInfo); + return new DeleteAsyncResult(@this, fasterSession, pcontext, exceptionDispatchInfo); } } } diff --git a/cs/src/core/Async/RMWAsync.cs b/cs/src/core/Async/RMWAsync.cs index 8d7b7e448..f0dfae835 100644 --- a/cs/src/core/Async/RMWAsync.cs +++ b/cs/src/core/Async/RMWAsync.cs @@ -21,11 +21,11 @@ internal struct RmwAsyncOperation : IAsyncOperation public Status DoFastOperation(FasterKV fasterKV, ref PendingContext pendingContext, IFasterSession fasterSession, - FasterExecutionContext currentCtx, out Output output) + out Output output) { Status status = !this.diskRequest.IsDefault() - ? fasterKV.InternalCompletePendingRequestFromContext(currentCtx, currentCtx, fasterSession, this.diskRequest, ref pendingContext, out AsyncIOContext newDiskRequest) - : fasterKV.CallInternalRMW(fasterSession, currentCtx, ref pendingContext, ref pendingContext.key.Get(), ref pendingContext.input.Get(), ref pendingContext.output, pendingContext.userContext, + ? fasterKV.InternalCompletePendingRequestFromContext(fasterSession, this.diskRequest, ref pendingContext, out AsyncIOContext newDiskRequest) + : fasterKV.CallInternalRMW(fasterSession, ref pendingContext, ref pendingContext.key.Get(), ref pendingContext.input.Get(), ref pendingContext.output, pendingContext.userContext, pendingContext.serialNum, out newDiskRequest); output = pendingContext.output; this.diskRequest = newDiskRequest; @@ -34,8 +34,8 @@ public Status DoFastOperation(FasterKV fasterKV, ref PendingContext< /// public ValueTask> DoSlowOperation(FasterKV fasterKV, IFasterSession fasterSession, - FasterExecutionContext currentCtx, PendingContext pendingContext, CancellationToken token) - => SlowRmwAsync(fasterKV, fasterSession, currentCtx, pendingContext, diskRequest, token); + PendingContext pendingContext, CancellationToken token) + => SlowRmwAsync(fasterKV, fasterSession, pendingContext, diskRequest, token); /// public bool HasPendingIO => !this.diskRequest.IsDefault(); @@ -67,14 +67,13 @@ internal RmwAsyncResult(Status status, TOutput output, RecordMetadata recordMeta } internal RmwAsyncResult(FasterKV fasterKV, IFasterSession fasterSession, - FasterExecutionContext currentCtx, PendingContext pendingContext, - AsyncIOContext diskRequest, ExceptionDispatchInfo exceptionDispatchInfo) + PendingContext pendingContext, AsyncIOContext diskRequest, ExceptionDispatchInfo exceptionDispatchInfo) { Status = new(StatusCode.Pending); this.Output = default; this.RecordMetadata = default; updateAsyncInternal = new AsyncOperationInternal, RmwAsyncResult>( - fasterKV, fasterSession, currentCtx, pendingContext, exceptionDispatchInfo, new (diskRequest)); + fasterKV, fasterSession, pendingContext, exceptionDispatchInfo, new (diskRequest)); } /// Complete the RMW operation, issuing additional (rare) I/O asynchronously if needed. It is usually preferable to use Complete() instead of this. @@ -105,8 +104,9 @@ public ValueTask> CompleteAsync(Cancella } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal ValueTask> RmwAsync(IFasterSession fasterSession, - FasterExecutionContext currentCtx, ref Key key, ref Input input, Context context, long serialNo, CancellationToken token = default) + internal ValueTask> RmwAsync(FasterSession fasterSession, + ref Key key, ref Input input, Context context, long serialNo, CancellationToken token = default) + where FasterSession : IFasterSession { var pcontext = new PendingContext { IsAsync = true }; var diskRequest = default(AsyncIOContext); @@ -115,43 +115,40 @@ internal ValueTask> RmwAsync>(new RmwAsyncResult(status, output, new RecordMetadata(pcontext.recordInfo, pcontext.logicalAddress))); } finally { - Debug.Assert(serialNo >= currentCtx.serialNum, "Operation serial numbers must be non-decreasing"); - currentCtx.serialNum = serialNo; + Debug.Assert(serialNo >= fasterSession.Ctx.serialNum, "Operation serial numbers must be non-decreasing"); + fasterSession.Ctx.serialNum = serialNo; fasterSession.UnsafeSuspendThread(); } - return SlowRmwAsync(this, fasterSession, currentCtx, pcontext, diskRequest, token); + return SlowRmwAsync(this, fasterSession, pcontext, diskRequest, token); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private Status CallInternalRMW(IFasterSession fasterSession, - FasterExecutionContext currentCtx, ref PendingContext pcontext, ref Key key, ref Input input, ref Output output, Context context, long serialNo, - out AsyncIOContext diskRequest) + private Status CallInternalRMW(IFasterSession fasterSession, ref PendingContext pcontext, + ref Key key, ref Input input, ref Output output, Context context, long serialNo, out AsyncIOContext diskRequest) { - diskRequest = default; OperationStatus internalStatus; do - internalStatus = InternalRMW(ref key, ref input, ref output, ref context, ref pcontext, fasterSession, currentCtx, serialNo); - while (HandleImmediateRetryStatus(internalStatus, currentCtx, currentCtx, fasterSession, ref pcontext)); + internalStatus = InternalRMW(ref key, ref input, ref output, ref context, ref pcontext, fasterSession, serialNo); + while (HandleImmediateRetryStatus(internalStatus, fasterSession, ref pcontext)); - return HandleOperationStatus(currentCtx, ref pcontext, internalStatus, out diskRequest); + return HandleOperationStatus(fasterSession.Ctx, ref pcontext, internalStatus, out diskRequest); } private static async ValueTask> SlowRmwAsync( FasterKV @this, IFasterSession fasterSession, - FasterExecutionContext currentCtx, PendingContext pcontext, - AsyncIOContext diskRequest, CancellationToken token = default) + PendingContext pcontext, AsyncIOContext diskRequest, CancellationToken token = default) { ExceptionDispatchInfo exceptionDispatchInfo; - (diskRequest, exceptionDispatchInfo) = await WaitForFlushOrIOCompletionAsync(@this, currentCtx, pcontext.flushEvent, diskRequest, token); + (diskRequest, exceptionDispatchInfo) = await WaitForFlushOrIOCompletionAsync(@this, fasterSession.Ctx, pcontext.flushEvent, diskRequest, token); pcontext.flushEvent = default; - return new RmwAsyncResult(@this, fasterSession, currentCtx, pcontext, diskRequest, exceptionDispatchInfo); + return new RmwAsyncResult(@this, fasterSession, pcontext, diskRequest, exceptionDispatchInfo); } } } diff --git a/cs/src/core/Async/ReadAsync.cs b/cs/src/core/Async/ReadAsync.cs index 8aa84636a..656dcd4f5 100644 --- a/cs/src/core/Async/ReadAsync.cs +++ b/cs/src/core/Async/ReadAsync.cs @@ -1,7 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. -using System; using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.ExceptionServices; @@ -27,12 +26,12 @@ internal ReadAsyncOperation(AsyncIOContext diskRequest, ref ReadOpti public ReadAsyncResult CreateCompletedResult(Status status, Output output, RecordMetadata recordMetadata) => new(status, output, recordMetadata); /// - public Status DoFastOperation(FasterKV fasterKV, ref PendingContext pendingContext, IFasterSession fasterSession, - FasterExecutionContext currentCtx, out Output output) + public Status DoFastOperation(FasterKV fasterKV, ref PendingContext pendingContext, + IFasterSession fasterSession, out Output output) { Status status = !this.diskRequest.IsDefault() - ? fasterKV.InternalCompletePendingRequestFromContext(currentCtx, currentCtx, fasterSession, this.diskRequest, ref pendingContext, out var newDiskRequest) - : fasterKV.CallInternalRead(fasterSession, currentCtx, ref pendingContext, ref pendingContext.key.Get(), ref pendingContext.input.Get(), ref pendingContext.output, + ? fasterKV.InternalCompletePendingRequestFromContext(fasterSession, this.diskRequest, ref pendingContext, out var newDiskRequest) + : fasterKV.CallInternalRead(fasterSession, ref pendingContext, ref pendingContext.key.Get(), ref pendingContext.input.Get(), ref pendingContext.output, ref this.readOptions, pendingContext.userContext, pendingContext.serialNum, out newDiskRequest); output = pendingContext.output; this.diskRequest = newDiskRequest; @@ -41,8 +40,8 @@ public Status DoFastOperation(FasterKV fasterKV, ref PendingContext< /// public ValueTask> DoSlowOperation(FasterKV fasterKV, IFasterSession fasterSession, - FasterExecutionContext currentCtx, PendingContext pendingContext, CancellationToken token) - => SlowReadAsync(fasterKV, fasterSession, currentCtx, pendingContext, this.readOptions, this.diskRequest, token); + PendingContext pendingContext, CancellationToken token) + => SlowReadAsync(fasterKV, fasterSession, pendingContext, this.readOptions, this.diskRequest, token); /// public bool HasPendingIO => !this.diskRequest.IsDefault(); @@ -72,15 +71,14 @@ internal ReadAsyncResult(Status status, TOutput output, RecordMetadata recordMet this.updateAsyncInternal = default; } - internal ReadAsyncResult(FasterKV fasterKV, IFasterSession fasterSession, - FasterExecutionContext currentCtx, PendingContext pendingContext, - ref ReadOptions readOptions, AsyncIOContext diskRequest, ExceptionDispatchInfo exceptionDispatchInfo) + internal ReadAsyncResult(FasterKV fasterKV, IFasterSession fasterSession, PendingContext pendingContext, + ref ReadOptions readOptions, AsyncIOContext diskRequest, ExceptionDispatchInfo exceptionDispatchInfo) { Status = new(StatusCode.Pending); this.Output = default; this.RecordMetadata = default; updateAsyncInternal = new AsyncOperationInternal, ReadAsyncResult>( - fasterKV, fasterSession, currentCtx, pendingContext, exceptionDispatchInfo, new ReadAsyncOperation(diskRequest, ref readOptions)); + fasterKV, fasterSession, pendingContext, exceptionDispatchInfo, new ReadAsyncOperation(diskRequest, ref readOptions)); } /// Complete the RMW operation, issuing additional (rare) I/O synchronously if needed. @@ -105,11 +103,10 @@ internal ReadAsyncResult(FasterKV fasterKV, IFasterSession> ReadAsync(IFasterSession fasterSession, - FasterExecutionContext currentCtx, ref Key key, ref Input input, ref ReadOptions readOptions, Context context, long serialNo, CancellationToken token, bool noKey = false) { var pcontext = new PendingContext { IsAsync = true }; - var operationFlags = PendingContext.GetOperationFlags(MergeReadFlags(currentCtx.ReadFlags, readOptions.ReadFlags), noKey); + var operationFlags = PendingContext.GetOperationFlags(MergeReadFlags(fasterSession.Ctx.ReadFlags, readOptions.ReadFlags), noKey); pcontext.SetOperationFlags(operationFlags, readOptions.StopAddress); var diskRequest = default(AsyncIOContext); @@ -117,42 +114,41 @@ internal ValueTask> ReadAsync>(new ReadAsyncResult(status, output, new RecordMetadata(pcontext.recordInfo, pcontext.logicalAddress))); } finally { - Debug.Assert(serialNo >= currentCtx.serialNum, "Operation serial numbers must be non-decreasing"); - currentCtx.serialNum = serialNo; + Debug.Assert(serialNo >= fasterSession.Ctx.serialNum, "Operation serial numbers must be non-decreasing"); + fasterSession.Ctx.serialNum = serialNo; fasterSession.UnsafeSuspendThread(); } - return SlowReadAsync(this, fasterSession, currentCtx, pcontext, readOptions, diskRequest, token); + return SlowReadAsync(this, fasterSession, pcontext, readOptions, diskRequest, token); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private Status CallInternalRead(IFasterSession fasterSession, - FasterExecutionContext currentCtx, ref PendingContext pcontext, ref Key key, ref Input input, ref Output output, ref ReadOptions readOptions, Context context, long serialNo, - out AsyncIOContext diskRequest) + ref PendingContext pcontext, ref Key key, ref Input input, ref Output output, ref ReadOptions readOptions, Context context, long serialNo, + out AsyncIOContext diskRequest) { OperationStatus internalStatus; do - internalStatus = InternalRead(ref key, ref input, ref output, readOptions.StartAddress, ref context, ref pcontext, fasterSession, currentCtx, serialNo); - while (HandleImmediateRetryStatus(internalStatus, currentCtx, currentCtx, fasterSession, ref pcontext)); + internalStatus = InternalRead(ref key, ref input, ref output, readOptions.StartAddress, ref context, ref pcontext, fasterSession, serialNo); + while (HandleImmediateRetryStatus(internalStatus, fasterSession, ref pcontext)); - return HandleOperationStatus(currentCtx, ref pcontext, internalStatus, out diskRequest); + return HandleOperationStatus(fasterSession.Ctx, ref pcontext, internalStatus, out diskRequest); } private static async ValueTask> SlowReadAsync( FasterKV @this, IFasterSession fasterSession, - FasterExecutionContext currentCtx, PendingContext pcontext, ReadOptions readOptions, AsyncIOContext diskRequest, CancellationToken token = default) { ExceptionDispatchInfo exceptionDispatchInfo; - (diskRequest, exceptionDispatchInfo) = await WaitForFlushOrIOCompletionAsync(@this, currentCtx, pcontext.flushEvent, diskRequest, token); + (diskRequest, exceptionDispatchInfo) = await WaitForFlushOrIOCompletionAsync(@this, fasterSession.Ctx, pcontext.flushEvent, diskRequest, token); pcontext.flushEvent = default; - return new ReadAsyncResult(@this, fasterSession, currentCtx, pcontext, ref readOptions, diskRequest, exceptionDispatchInfo); + return new ReadAsyncResult(@this, fasterSession, pcontext, ref readOptions, diskRequest, exceptionDispatchInfo); } } } diff --git a/cs/src/core/Async/UpsertAsync.cs b/cs/src/core/Async/UpsertAsync.cs index f94c0e751..f4b175796 100644 --- a/cs/src/core/Async/UpsertAsync.cs +++ b/cs/src/core/Async/UpsertAsync.cs @@ -18,22 +18,22 @@ internal struct UpsertAsyncOperation : IAsyncOperation public Status DoFastOperation(FasterKV fasterKV, ref PendingContext pendingContext, IFasterSession fasterSession, - FasterExecutionContext currentCtx, out Output output) + out Output output) { output = default; OperationStatus internalStatus; do { internalStatus = fasterKV.InternalUpsert(ref pendingContext.key.Get(), ref pendingContext.input.Get(), ref pendingContext.value.Get(), ref output, ref pendingContext.userContext, ref pendingContext, - fasterSession, currentCtx, pendingContext.serialNum); - } while (fasterKV.HandleImmediateRetryStatus(internalStatus, currentCtx, currentCtx, fasterSession, ref pendingContext)); + fasterSession, pendingContext.serialNum); + } while (fasterKV.HandleImmediateRetryStatus(internalStatus, fasterSession, ref pendingContext)); return TranslateStatus(internalStatus); } /// public ValueTask> DoSlowOperation(FasterKV fasterKV, IFasterSession fasterSession, - FasterExecutionContext currentCtx, PendingContext pendingContext, CancellationToken token) - => SlowUpsertAsync(fasterKV, fasterSession, currentCtx, pendingContext, token); + PendingContext pendingContext, CancellationToken token) + => SlowUpsertAsync(fasterKV, fasterSession, pendingContext, token); /// public bool HasPendingIO => false; @@ -64,13 +64,13 @@ internal UpsertAsyncResult(Status status, TOutput output, RecordMetadata recordM } internal UpsertAsyncResult(FasterKV fasterKV, IFasterSession fasterSession, - FasterExecutionContext currentCtx, PendingContext pendingContext, ExceptionDispatchInfo exceptionDispatchInfo) + PendingContext pendingContext, ExceptionDispatchInfo exceptionDispatchInfo) { this.Status = new(StatusCode.Pending); this.Output = default; this.RecordMetadata = default; updateAsyncInternal = new AsyncOperationInternal, UpsertAsyncResult>( - fasterKV, fasterSession, currentCtx, pendingContext, exceptionDispatchInfo, new ()); + fasterKV, fasterSession, pendingContext, exceptionDispatchInfo, new ()); } /// Complete the Upsert operation, issuing additional allocation asynchronously if needed. It is usually preferable to use Complete() instead of this. @@ -100,17 +100,11 @@ public ValueTask> CompleteAsync(Cance } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal ValueTask> UpsertAsync(IFasterSession fasterSession, - FasterExecutionContext currentCtx, ref Key key, ref Input input, ref Value value, Context userContext, long serialNo, CancellationToken token = default) + internal ValueTask> UpsertAsync(FasterSession fasterSession, + ref Key key, ref Input input, ref Value value, Context userContext, long serialNo, CancellationToken token = default) + where FasterSession : IFasterSession { var pcontext = new PendingContext { IsAsync = true }; - return UpsertAsync(fasterSession, currentCtx, ref pcontext, ref key, ref input, ref value, userContext, serialNo, token); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private ValueTask> UpsertAsync(IFasterSession fasterSession, - FasterExecutionContext currentCtx, ref PendingContext pcontext, ref Key key, ref Input input, ref Value value, Context userContext, long serialNo, CancellationToken token) - { Output output = default; fasterSession.UnsafeResumeThread(); @@ -119,8 +113,8 @@ private ValueTask> UpsertAsync>(new UpsertAsyncResult(status, output, new RecordMetadata(pcontext.recordInfo, pcontext.logicalAddress))); @@ -128,23 +122,21 @@ private ValueTask> UpsertAsync= currentCtx.serialNum, "Operation serial numbers must be non-decreasing"); - currentCtx.serialNum = serialNo; + Debug.Assert(serialNo >= fasterSession.Ctx.serialNum, "Operation serial numbers must be non-decreasing"); + fasterSession.Ctx.serialNum = serialNo; fasterSession.UnsafeSuspendThread(); } - return SlowUpsertAsync(this, fasterSession, currentCtx, pcontext, token); + return SlowUpsertAsync(this, fasterSession, pcontext, token); } private static async ValueTask> SlowUpsertAsync( - FasterKV @this, - IFasterSession fasterSession, - FasterExecutionContext currentCtx, + FasterKV @this, IFasterSession fasterSession, PendingContext pcontext, CancellationToken token = default) { ExceptionDispatchInfo exceptionDispatchInfo = await WaitForFlushCompletionAsync(@this, pcontext.flushEvent, token).ConfigureAwait(false); pcontext.flushEvent = default; - return new UpsertAsyncResult(@this, fasterSession, currentCtx, pcontext, exceptionDispatchInfo); + return new UpsertAsyncResult(@this, fasterSession, pcontext, exceptionDispatchInfo); } } } diff --git a/cs/src/core/ClientSession/ClientSession.cs b/cs/src/core/ClientSession/ClientSession.cs index 781e6a472..39ad23159 100644 --- a/cs/src/core/ClientSession/ClientSession.cs +++ b/cs/src/core/ClientSession/ClientSession.cs @@ -106,11 +106,15 @@ internal ClientSession( SessionVariableLengthStructSettings sessionVariableLengthStructSettings, ILoggerFactory loggerFactory = null) { - this.lContext = new(this); this.bContext = new(this); - this.luContext = new(this); this.uContext = new(this); + if (fht.LockTable.IsEnabled) + { + this.lContext = new(this); + this.luContext = new(this); + } + this.loggerFactory = loggerFactory; this.logger = loggerFactory?.CreateLogger($"ClientSession-{GetHashCode():X8}"); this.fht = fht; @@ -228,12 +232,28 @@ public void Dispose() /// /// Return a new interface to Faster operations that supports manual locking and epoch control. /// - public LockableUnsafeContext LockableUnsafeContext => luContext; + public LockableUnsafeContext LockableUnsafeContext + { + get + { + if (!this.fht.LockTable.IsEnabled) + throw new FasterException($"LockableUnsafeContext requires {nameof(LockingMode.Standard)}"); + return luContext; + } + } /// /// Return a session wrapper that supports manual locking. /// - public LockableContext LockableContext => lContext; + public LockableContext LockableContext + { + get + { + if (!this.fht.LockTable.IsEnabled) + throw new FasterException($"LockableContext requires {nameof(LockingMode.Standard)}"); + return lContext; + } + } /// /// Return a session wrapper struct that passes through to client session @@ -248,7 +268,7 @@ public Status Read(ref Key key, ref Input input, ref Output output, Context user UnsafeResumeThread(); try { - return fht.ContextRead(ref key, ref input, ref output, userContext, FasterSession, serialNo, ctx); + return fht.ContextRead(ref key, ref input, ref output, userContext, FasterSession, serialNo); } finally { @@ -297,7 +317,7 @@ public Status Read(ref Key key, ref Input input, ref Output output, ref ReadOpti UnsafeResumeThread(); try { - return fht.ContextRead(ref key, ref input, ref output, ref readOptions, out recordMetadata, userContext, FasterSession, serialNo, ctx); + return fht.ContextRead(ref key, ref input, ref output, ref readOptions, out recordMetadata, userContext, FasterSession, serialNo); } finally { @@ -312,7 +332,7 @@ public Status ReadAtAddress(ref Input input, ref Output output, ref ReadOptions UnsafeResumeThread(); try { - return fht.ContextReadAtAddress(ref input, ref output, ref readOptions, userContext, FasterSession, serialNo, ctx); + return fht.ContextReadAtAddress(ref input, ref output, ref readOptions, userContext, FasterSession, serialNo); } finally { @@ -325,7 +345,7 @@ public Status ReadAtAddress(ref Input input, ref Output output, ref ReadOptions public ValueTask.ReadAsyncResult> ReadAsync(ref Key key, ref Input input, Context userContext = default, long serialNo = 0, CancellationToken cancellationToken = default) { ReadOptions readOptions = default; - return fht.ReadAsync(this.FasterSession, this.ctx, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken); + return fht.ReadAsync(this.FasterSession, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken); } /// @@ -333,7 +353,7 @@ public ValueTask.ReadAsyncResult> R public ValueTask.ReadAsyncResult> ReadAsync(Key key, Input input, Context context = default, long serialNo = 0, CancellationToken token = default) { ReadOptions readOptions = default; - return fht.ReadAsync(this.FasterSession, this.ctx, ref key, ref input, ref readOptions, context, serialNo, token); + return fht.ReadAsync(this.FasterSession, ref key, ref input, ref readOptions, context, serialNo, token); } /// @@ -342,7 +362,7 @@ public ValueTask.ReadAsyncResult> R { Input input = default; ReadOptions readOptions = default; - return fht.ReadAsync(this.FasterSession, this.ctx, ref key, ref input, ref readOptions, userContext, serialNo, token); + return fht.ReadAsync(this.FasterSession, ref key, ref input, ref readOptions, userContext, serialNo, token); } /// @@ -351,14 +371,14 @@ public ValueTask.ReadAsyncResult> R { Input input = default; ReadOptions readOptions = default; - return fht.ReadAsync(this.FasterSession, this.ctx, ref key, ref input, ref readOptions, context, serialNo, token); + return fht.ReadAsync(this.FasterSession, ref key, ref input, ref readOptions, context, serialNo, token); } /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public ValueTask.ReadAsyncResult> ReadAsync(ref Key key, ref Input input, ref ReadOptions readOptions, Context userContext = default, long serialNo = 0, CancellationToken cancellationToken = default) - => fht.ReadAsync(this.FasterSession, this.ctx, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken); + => fht.ReadAsync(this.FasterSession, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken); /// [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -366,7 +386,7 @@ public ValueTask.ReadAsyncResult> R Context userContext = default, long serialNo = 0, CancellationToken cancellationToken = default) { Key key = default; - return fht.ReadAsync(this.FasterSession, this.ctx, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken, noKey: true); + return fht.ReadAsync(this.FasterSession, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken, noKey: true); } /// @@ -385,7 +405,7 @@ public Status Upsert(ref Key key, ref Input input, ref Value desiredValue, ref O UnsafeResumeThread(); try { - return fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, userContext, FasterSession, serialNo, ctx); + return fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, userContext, FasterSession, serialNo); } finally { @@ -400,7 +420,7 @@ public Status Upsert(ref Key key, ref Input input, ref Value desiredValue, ref O UnsafeResumeThread(); try { - return fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, out recordMetadata, userContext, FasterSession, serialNo, ctx); + return fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, out recordMetadata, userContext, FasterSession, serialNo); } finally { @@ -429,7 +449,7 @@ public ValueTask.UpsertAsyncResult> /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public ValueTask.UpsertAsyncResult> UpsertAsync(ref Key key, ref Input input, ref Value desiredValue, Context userContext = default, long serialNo = 0, CancellationToken token = default) - => fht.UpsertAsync(this.FasterSession, this.ctx, ref key, ref input, ref desiredValue, userContext, serialNo, token); + => fht.UpsertAsync(this.FasterSession, ref key, ref input, ref desiredValue, userContext, serialNo, token); /// [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -453,7 +473,7 @@ public Status RMW(ref Key key, ref Input input, ref Output output, out RecordMet UnsafeResumeThread(); try { - return fht.ContextRMW(ref key, ref input, ref output, out recordMetadata, userContext, FasterSession, serialNo, ctx); + return fht.ContextRMW(ref key, ref input, ref output, out recordMetadata, userContext, FasterSession, serialNo); } finally { @@ -488,7 +508,7 @@ public Status RMW(Key key, Input input, Context userContext = default, long seri /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public ValueTask.RmwAsyncResult> RMWAsync(ref Key key, ref Input input, Context context = default, long serialNo = 0, CancellationToken token = default) - => fht.RmwAsync(this.FasterSession, this.ctx, ref key, ref input, context, serialNo, token); + => fht.RmwAsync(this.FasterSession, ref key, ref input, context, serialNo, token); /// [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -502,7 +522,7 @@ public Status Delete(ref Key key, Context userContext = default, long serialNo = UnsafeResumeThread(); try { - return fht.ContextDelete(ref key, userContext, FasterSession, serialNo, ctx); + return fht.ContextDelete(ref key, userContext, FasterSession, serialNo); } finally { @@ -518,7 +538,7 @@ public Status Delete(Key key, Context userContext = default, long serialNo = 0) /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public ValueTask.DeleteAsyncResult> DeleteAsync(ref Key key, Context userContext = default, long serialNo = 0, CancellationToken token = default) - => fht.DeleteAsync(this.FasterSession, this.ctx, ref key, userContext, serialNo, token); + => fht.DeleteAsync(this.FasterSession, ref key, userContext, serialNo, token); /// [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -529,10 +549,49 @@ public ValueTask.DeleteAsyncResult> public void Refresh() { UnsafeResumeThread(); - fht.InternalRefresh(ctx, FasterSession); + fht.InternalRefresh(FasterSession); UnsafeSuspendThread(); } + /// + public void ResetModified(ref Key key) + { + UnsafeResumeThread(); + try + { + UnsafeResetModified(ref key); + } + finally + { + UnsafeSuspendThread(); + } + } + + /// + public bool NeedKeyLockCode => this.fht.LockTable.IsEnabled && this.fht.LockTable.NeedKeyLockCode; + + /// + public long GetLockCode(ref Key key, out long keyHash) + { + keyHash = this.fht.comparer.GetHashCode64(ref key); + return this.fht.LockTable.IsEnabled ? this.fht.LockTable.GetLockCode(ref key, keyHash) : keyHash; + } + + /// + public long GetLockCode(ref Key key, long keyHash) => this.fht.LockTable.IsEnabled ? this.fht.LockTable.GetLockCode(ref key, keyHash) : keyHash; + + /// + public int CompareLockCodes(TLockableKey key1, TLockableKey key2) where TLockableKey : ILockableKey => fht.LockTable.CompareLockCodes(key1, key2); + + /// + public int CompareLockCodes(ref TLockableKey key1, ref TLockableKey key2) where TLockableKey : ILockableKey => fht.LockTable.CompareLockCodes(ref key1, ref key2); + + /// + public void SortLockCodes(TLockableKey[] keys) where TLockableKey : ILockableKey => fht.LockTable.SortLockCodes(keys); + + /// + public void SortLockCodes(TLockableKey[] keys, int start, int count) where TLockableKey : ILockableKey => fht.LockTable.SortLockCodes(keys, start, count); + #endregion IFasterContext #region Pending Operations @@ -601,7 +660,7 @@ internal bool UnsafeCompletePending(FasterSession fasterSession, where FasterSession : IFasterSession { var requestedOutputs = getOutputs ? this.completedOutputs : default; - var result = fht.InternalCompletePending(ctx, fasterSession, wait, requestedOutputs); + var result = fht.InternalCompletePending(fasterSession, wait, requestedOutputs); if (spinWaitForCommit) { if (wait != true) @@ -610,10 +669,10 @@ internal bool UnsafeCompletePending(FasterSession fasterSession, } do { - fht.InternalCompletePending(ctx, fasterSession, wait, requestedOutputs); + fht.InternalCompletePending(fasterSession, wait, requestedOutputs); if (fht.InRestPhase()) { - fht.InternalCompletePending(ctx, fasterSession, wait, requestedOutputs); + fht.InternalCompletePending(fasterSession, wait, requestedOutputs); return true; } } while (wait); @@ -641,7 +700,7 @@ private async ValueTask CompletePendingAsync(bool getOutputs, bool waitForCommit throw new NotSupportedException("Async operations not supported over protected epoch"); // Complete all pending operations on session - await fht.CompletePendingAsync(this.FasterSession, this.ctx, token, getOutputs ? this.completedOutputs : null).ConfigureAwait(false); + await fht.CompletePendingAsync(this.FasterSession, token, getOutputs ? this.completedOutputs : null).ConfigureAwait(false); // Wait for commit if necessary if (waitForCommit) @@ -668,26 +727,12 @@ public async ValueTask ReadyToCompletePendingAsync(CancellationToken token = def #region Other Operations - /// - public void ResetModified(ref Key key) - { - UnsafeResumeThread(); - try - { - UnsafeResetModified(ref key); - } - finally - { - UnsafeSuspendThread(); - } - } - internal void UnsafeResetModified(ref Key key) { OperationStatus status; do status = fht.InternalModifiedBitOperation(ref key, out _); - while (fht.HandleImmediateNonPendingRetryStatus(status, ctx, FasterSession)); + while (fht.HandleImmediateNonPendingRetryStatus(status, FasterSession)); } /// @@ -713,7 +758,7 @@ internal bool UnsafeIsModified(ref Key key) OperationStatus status; do status = fht.InternalModifiedBitOperation(ref key, out modifiedInfo, false); - while (fht.HandleImmediateNonPendingRetryStatus(status, ctx, FasterSession)); + while (fht.HandleImmediateNonPendingRetryStatus(status, FasterSession)); return modifiedInfo.Modified; } @@ -823,7 +868,7 @@ internal OperationStatus CompactionCopyToTail(ref Key key, ref Input input, ref UnsafeResumeThread(); try { - return fht.InternalCopyToTailForCompaction(ref key, ref input, ref desiredValue, ref output, untilAddress, actualAddress, FasterSession, ctx); + return fht.InternalCopyToTailForCompaction(ref key, ref input, ref desiredValue, ref output, untilAddress, actualAddress, FasterSession); } finally { @@ -846,7 +891,7 @@ internal Status ContainsKeyInMemory(ref Key key, out long logicalAddress, long f UnsafeResumeThread(); try { - return fht.InternalContainsKeyInMemory(ref key, ctx, FasterSession, out logicalAddress, fromAddress); + return fht.InternalContainsKeyInMemory(ref key, FasterSession, out logicalAddress, fromAddress); } finally { @@ -877,7 +922,7 @@ internal void UnsafeResumeThread() // We do not track any "acquired" state here; if someone mixes calls between safe and unsafe contexts, they will // get the "trying to acquire already-acquired epoch" error. fht.epoch.Resume(); - fht.InternalRefresh(ctx, FasterSession); + fht.InternalRefresh(FasterSession); } /// @@ -908,7 +953,7 @@ internal bool IsInPreparePhase() #region IFasterSession [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal bool InPlaceUpdater(ref Key key, ref Input input, ref Output output, ref Value value, ref RecordInfo recordInfo, ref RMWInfo rmwInfo, out OperationStatus status) + internal bool InPlaceUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo, out OperationStatus status) { recordInfo.SetDirty(); @@ -931,8 +976,8 @@ internal bool InPlaceUpdater(ref Key key, ref Input input, ref Output output, re if (rmwInfo.Action == RMWAction.ExpireAndResume) { // This inserts the tombstone if appropriate - return this.fht.ReinitializeExpiredRecord(ref key, ref input, ref value, ref output, ref recordInfo, ref rmwInfo, - rmwInfo.Address, this.ctx, this.FasterSession, isIpu: true, out status); + return this.fht.ReinitializeExpiredRecord(ref key, ref input, ref value, ref output, ref recordInfo, + ref rmwInfo, rmwInfo.Address, this.FasterSession, isIpu: true, out status); } if (rmwInfo.Action == RMWAction.ExpireAndStop) { @@ -956,26 +1001,34 @@ public InternalFasterSession(ClientSession _clientSession.fht.DisableEphemeralLocking; - public bool IsManualLocking => false; - public SessionType SessionType => SessionType.BasicContext; - #endregion IFunctions - Optional features supported - #region IFunctions - Reads public bool SingleReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, ref ReadInfo readInfo) => _clientSession.functions.SingleReader(ref key, ref input, ref value, ref dst, ref readInfo); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, ref ReadInfo readInfo) + public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, ref ReadInfo readInfo, out EphemeralLockResult lockResult) { - if (_clientSession.functions.ConcurrentReader(ref key, ref input, ref value, ref dst, ref readInfo)) - return true; - if (readInfo.Action == ReadAction.Expire) - recordInfo.Tombstone = true; - return false; + lockResult = EphemeralLockResult.Success; + return _clientSession.fht.DoEphemeralLocking + ? ConcurrentReaderLockEphemeral(ref key, ref input, ref value, ref dst, ref recordInfo, ref readInfo, out lockResult) + : _clientSession.functions.ConcurrentReader(ref key, ref input, ref value, ref dst, ref readInfo); + } + + public bool ConcurrentReaderLockEphemeral(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, ref ReadInfo readInfo, out EphemeralLockResult lockResult) + { + lockResult = recordInfo.TryLockShared() ? EphemeralLockResult.Success : EphemeralLockResult.Failed; + if (lockResult == EphemeralLockResult.Failed) + return false; + try + { + return _clientSession.functions.ConcurrentReader(ref key, ref input, ref value, ref dst, ref readInfo); + } + finally + { + recordInfo.UnlockShared(); + } } public void ReadCompletionCallback(ref Key key, ref Input input, ref Output output, Context ctx, Status status, RecordMetadata recordMetadata) @@ -989,19 +1042,72 @@ public bool SingleWriter(ref Key key, ref Input input, ref Value src, ref Value => _clientSession.functions.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref upsertInfo, reason); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, ref UpsertInfo upsertInfo, WriteReason reason) + public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, ref UpsertInfo upsertInfo, WriteReason reason) + { + if (_clientSession.fht.DoEphemeralLocking) + PostSingleWriterLockEphemeral(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, ref upsertInfo, reason); + else + PostSingleWriterNoLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, ref upsertInfo, reason); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void PostSingleWriterNoLock(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, ref UpsertInfo upsertInfo, WriteReason reason) { recordInfo.SetDirtyAndModified(); _clientSession.functions.PostSingleWriter(ref key, ref input, ref src, ref dst, ref output, ref upsertInfo, reason); } + public void PostSingleWriterLockEphemeral(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, ref UpsertInfo upsertInfo, WriteReason reason) + { + try + { + PostSingleWriterNoLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, ref upsertInfo, reason); + } + finally + { + if (reason == WriteReason.Upsert) + recordInfo.UnlockExclusive(); + else if (recordInfo.IsLockedShared) // readcache records are not locked + recordInfo.UnlockShared(); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, ref UpsertInfo upsertInfo, out EphemeralLockResult lockResult) + { + lockResult = EphemeralLockResult.Success; + return _clientSession.fht.DoEphemeralLocking + ? ConcurrentWriterLockEphemeral(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, ref upsertInfo, out lockResult) + : ConcurrentWriterNoLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, ref upsertInfo); + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, ref UpsertInfo upsertInfo) + private bool ConcurrentWriterNoLock(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, ref UpsertInfo upsertInfo) { + if (!_clientSession.functions.ConcurrentWriter(ref key, ref input, ref src, ref dst, ref output, ref upsertInfo)) + return false; recordInfo.SetDirtyAndModified(); + return true; + } - // Note: KeyIndexes do not need notification of in-place updates because the key does not change. - return _clientSession.functions.ConcurrentWriter(ref key, ref input, ref src, ref dst, ref output, ref upsertInfo); + public bool ConcurrentWriterLockEphemeral(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, ref UpsertInfo upsertInfo, out EphemeralLockResult lockResult) + { + lockResult = recordInfo.TryLockExclusive() ? EphemeralLockResult.Success : EphemeralLockResult.Failed; + if (lockResult == EphemeralLockResult.Failed) + return false; + try + { + if (ConcurrentWriterNoLock(ref key, ref input, ref src, ref dst, ref output, ref recordInfo, ref upsertInfo)) + return true; + if (upsertInfo.Action != UpsertAction.CancelOperation) + lockResult = EphemeralLockResult.HoldForSeal; + return false; + } + finally + { + if (lockResult != EphemeralLockResult.HoldForSeal) + recordInfo.UnlockExclusive(); + } } #endregion IFunctions - Upserts @@ -1016,11 +1122,33 @@ public bool InitialUpdater(ref Key key, ref Input input, ref Value value, ref Ou => _clientSession.functions.InitialUpdater(ref key, ref input, ref value, ref output, ref rmwInfo); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo) + public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo) + { + if (_clientSession.fht.DoEphemeralLocking) + PostInitialUpdaterLockEphemeral(ref key, ref input, ref value, ref output, ref recordInfo, ref rmwInfo); + else + PostInitialUpdaterNoLock(ref key, ref input, ref value, ref output, ref recordInfo, ref rmwInfo); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void PostInitialUpdaterNoLock(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo) { recordInfo.SetDirtyAndModified(); _clientSession.functions.PostInitialUpdater(ref key, ref input, ref value, ref output, ref rmwInfo); } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void PostInitialUpdaterLockEphemeral(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo) + { + try + { + PostInitialUpdaterNoLock(ref key, ref input, ref value, ref output, ref recordInfo, ref rmwInfo); + } + finally + { + recordInfo.UnlockExclusive(); + } + } #endregion InitialUpdater #region CopyUpdater @@ -1033,19 +1161,78 @@ public bool CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Va => _clientSession.functions.CopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref rmwInfo); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo) + public void PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo) + { + if (_clientSession.fht.DoEphemeralLocking) + PostCopyUpdaterLockEphemeral(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, ref rmwInfo); + else + PostCopyUpdaterNoLock(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, ref rmwInfo); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void PostCopyUpdaterNoLock(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo) { recordInfo.SetDirtyAndModified(); _clientSession.functions.PostCopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref rmwInfo); } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void PostCopyUpdaterLockEphemeral(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo) + { + try + { + PostCopyUpdaterNoLock(ref key, ref input, ref oldValue, ref newValue, ref output, ref recordInfo, ref rmwInfo); + } + finally + { + recordInfo.UnlockExclusive(); + } + } #endregion CopyUpdater #region InPlaceUpdater [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool InPlaceUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo, out OperationStatus status) + public bool InPlaceUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo, out OperationStatus status, + out EphemeralLockResult lockResult) { + lockResult = EphemeralLockResult.Success; + return _clientSession.fht.DoEphemeralLocking + ? InPlaceUpdaterLockEphemeral(ref key, ref input, ref value, ref output, ref recordInfo, ref rmwInfo, out status, out lockResult) + : InPlaceUpdaterNoLock(ref key, ref input, ref value, ref output, ref recordInfo, ref rmwInfo, out status); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool InPlaceUpdaterNoLock(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo, out OperationStatus status) + { + if (!_clientSession.InPlaceUpdater(ref key, ref input, ref value, ref output, ref recordInfo, ref rmwInfo, out status)) + return false; recordInfo.SetDirtyAndModified(); - return _clientSession.InPlaceUpdater(ref key, ref input, ref output, ref value, ref recordInfo, ref rmwInfo, out status); + return true; + } + + public bool InPlaceUpdaterLockEphemeral(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo, out OperationStatus status, + out EphemeralLockResult lockResult) + { + lockResult = recordInfo.TryLockExclusive() ? EphemeralLockResult.Success : EphemeralLockResult.Failed; + if (lockResult == EphemeralLockResult.Failed) + { + status = OperationStatus.RETRY_LATER; + return false; + } + try + { + if (InPlaceUpdaterNoLock(ref key, ref input, ref value, ref output, ref recordInfo, ref rmwInfo, out status)) + return true; + // Expiration sets additional bits beyond SUCCESS, and Cancel does not set SUCCESS. + if (status == OperationStatus.SUCCESS) + lockResult = EphemeralLockResult.HoldForSeal; + return false; + } + finally + { + if (lockResult != EphemeralLockResult.HoldForSeal) + recordInfo.UnlockExclusive(); + } } public void RMWCompletionCallback(ref Key key, ref Input input, ref Output output, Context ctx, Status status, RecordMetadata recordMetadata) @@ -1060,18 +1247,65 @@ public bool SingleDeleter(ref Key key, ref Value value, ref RecordInfo recordInf => _clientSession.functions.SingleDeleter(ref key, ref value, ref deleteInfo); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo) + public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo) + { + if (_clientSession.fht.DoEphemeralLocking) + PostSingleDeleterLockEphemeral(ref key, ref recordInfo, ref deleteInfo); + else + PostSingleDeleterNoLock(ref key, ref recordInfo, ref deleteInfo); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void PostSingleDeleterNoLock(ref Key key, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo) { recordInfo.SetDirtyAndModified(); _clientSession.functions.PostSingleDeleter(ref key, ref deleteInfo); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo) + public void PostSingleDeleterLockEphemeral(ref Key key, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo) { + try + { + PostSingleDeleterNoLock(ref key, ref recordInfo, ref deleteInfo); + } + finally + { + recordInfo.UnlockExclusive(); + } + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo, out EphemeralLockResult lockResult) + { + lockResult = EphemeralLockResult.Success; + return _clientSession.fht.DoEphemeralLocking + ? ConcurrentDeleterLockEphemeral(ref key, ref value, ref recordInfo, ref deleteInfo, out lockResult) + : ConcurrentDeleterNoLock(ref key, ref value, ref recordInfo, ref deleteInfo); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool ConcurrentDeleterNoLock(ref Key key, ref Value value, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo) + { + if (!_clientSession.functions.ConcurrentDeleter(ref key, ref value, ref deleteInfo)) + return false; recordInfo.SetDirtyAndModified(); recordInfo.SetTombstone(); - return _clientSession.functions.ConcurrentDeleter(ref key, ref value, ref deleteInfo); + return true; + } + + public bool ConcurrentDeleterLockEphemeral(ref Key key, ref Value value, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo, out EphemeralLockResult lockResult) + { + lockResult = recordInfo.TryLockExclusive() ? EphemeralLockResult.Success : EphemeralLockResult.Failed; + if (lockResult == EphemeralLockResult.Failed) + return false; + try + { + return ConcurrentDeleterNoLock(ref key, ref value, ref recordInfo, ref deleteInfo); + } + finally + { + recordInfo.UnlockExclusive(); + } } #endregion IFunctions - Deletes @@ -1096,16 +1330,41 @@ public void CheckpointCompletionCallback(int sessionID, string sessionName, Comm } #endregion IFunctions - Checkpointing - #region Ephemeral locking - public bool TryLockEphemeralExclusive(ref RecordInfo recordInfo) => _clientSession.fht.DisableEphemeralLocking || recordInfo.TryLockExclusive(); - public bool TryLockEphemeralShared(ref RecordInfo recordInfo) => _clientSession.fht.DisableEphemeralLocking || recordInfo.TryLockShared(); - public void UnlockEphemeralExclusive(ref RecordInfo recordInfo) + #region Transient locking + public bool TryLockTransientExclusive(ref Key key, ref OperationStackContext stackCtx) { - if (!_clientSession.fht.DisableEphemeralLocking) - recordInfo.UnlockExclusive(); + if (!_clientSession.fht.DoTransientLocking) + return true; + if (!_clientSession.fht.LockTable.TryLockTransientExclusive(ref key, ref stackCtx.hei)) + return false; + return stackCtx.recSrc.HasTransientLock = true; } - public bool TryUnlockEphemeralShared(ref RecordInfo recordInfo) => _clientSession.fht.DisableEphemeralLocking || recordInfo.TryUnlockShared(); - #endregion Ephemeral locking + + public bool TryLockTransientShared(ref Key key, ref OperationStackContext stackCtx) + { + if (!_clientSession.fht.DoTransientLocking) + return true; + if (!_clientSession.fht.LockTable.TryLockTransientShared(ref key, ref stackCtx.hei)) + return false; + return stackCtx.recSrc.HasTransientLock = true; + } + + public void UnlockTransientExclusive(ref Key key, ref OperationStackContext stackCtx) + { + if (!_clientSession.fht.DoTransientLocking) + return; + _clientSession.fht.LockTable.UnlockExclusive(ref key, ref stackCtx.hei); + stackCtx.recSrc.HasTransientLock = false; + } + + public void UnlockTransientShared(ref Key key, ref OperationStackContext stackCtx) + { + if (!_clientSession.fht.DoTransientLocking) + return; + _clientSession.fht.LockTable.UnlockShared(ref key, ref stackCtx.hei); + stackCtx.recSrc.HasTransientLock = false; + } + #endregion Transient locking #region Internal utilities public int GetInitialLength(ref Input input) @@ -1127,6 +1386,8 @@ public IHeapContainer GetHeapContainer(ref Input input) public bool CompletePendingWithOutputs(out CompletedOutputIterator completedOutputs, bool wait = false, bool spinWaitForCommit = false) => _clientSession.CompletePendingWithOutputs(out completedOutputs, wait, spinWaitForCommit); + + public FasterKV.FasterExecutionContext Ctx => this._clientSession.ctx; #endregion Internal utilities } #endregion IFasterSession diff --git a/cs/src/core/ClientSession/IFasterContext.cs b/cs/src/core/ClientSession/IFasterContext.cs index d1806b011..164cf3c4f 100644 --- a/cs/src/core/ClientSession/IFasterContext.cs +++ b/cs/src/core/ClientSession/IFasterContext.cs @@ -510,7 +510,6 @@ ValueTask.ReadAsyncResult> ReadAtAd /// to complete the Upsert operation. Failure to complete the operation will result in leaked allocations. ValueTask.DeleteAsyncResult> DeleteAsync(Key key, Context userContext = default, long serialNo = 0, CancellationToken token = default); - /// /// Reset the modified bit of a record (for in memory records) /// diff --git a/cs/src/core/ClientSession/ILockableContext.cs b/cs/src/core/ClientSession/ILockableContext.cs index 2a7b11b9e..2ffc102bc 100644 --- a/cs/src/core/ClientSession/ILockableContext.cs +++ b/cs/src/core/ClientSession/ILockableContext.cs @@ -20,43 +20,119 @@ public interface ILockableContext void EndLockable(); /// - /// Lock the key with the specified , waiting until it is acquired + /// If true, then keys must use one of the overloads to obtain a code by which groups of keys will be sorted for manual locking, to avoid deadlocks. /// - /// The key to lock - /// The type of lock to take - void Lock(ref TKey key, LockType lockType); + /// Whether this returns true depends on the on , or passed to the FasterKV constructor. + bool NeedKeyLockCode { get; } /// - /// Lock the key with the specified , waiting until it is acquired + /// Obtain a code by which groups of keys will be sorted for manual locking, to avoid deadlocks. + /// The key to obtain a code for + /// The hashcode of the key; created and returned by . /// - /// The key to lock - /// The type of lock to take - void Lock(TKey key, LockType lockType); + /// If is true, this code is obtained by FASTER on method calls and is used in its locking scheme. + /// In that case the app must ensure that the keys in a group are sorted by this value, to avoid deadlock. + long GetLockCode(TKey key, out long keyHash); /// - /// Lock the key with the specified + /// Obtain a code by which groups of keys will be sorted for manual locking, to avoid deadlocks. + /// The key to obtain a code for + /// The hashcode of the key; created and returned by . /// - /// The key to lock - /// The type of lock to release - void Unlock(ref TKey key, LockType lockType); + /// If is true, this code is obtained by FASTER on method calls and is used in its locking scheme. + /// In that case the app must ensure that the keys in a group are sorted by this value, to avoid deadlock. + long GetLockCode(ref TKey key, out long keyHash); /// - /// Unlock the key with the specified + /// Obtain a code by which groups of keys will be sorted for manual locking, to avoid deadlocks. + /// The key to obtain a code for + /// The hashcode of the key; must be the value returned by . /// - /// The key to lock - /// The type of lock to release - void Unlock(TKey key, LockType lockType); + /// If is true, this code is obtained by FASTER on method calls and is used in its locking scheme. + /// In that case the app must ensure that the keys in a group are sorted by this value, to avoid deadlock. + long GetLockCode(TKey key, long keyHash); /// - /// Determines if the key is locked. Note this value may be obsolete as soon as it returns. + /// Obtain a code by which groups of keys will be sorted for manual locking, to avoid deadlocks. + /// The key to obtain a code for + /// The hashcode of the key; must be the value returned by . /// - /// The key to lock - (bool exclusive, byte shared) IsLocked(ref TKey key); + /// If is true, this code is obtained by FASTER on method calls and is used in its locking scheme. + /// In that case the app must ensure that the keys in a group are sorted by this value, to avoid deadlock. + long GetLockCode(ref TKey key, long keyHash); /// - /// Determines if the key is locked. Note this value may be obsolete as soon as it returns. + /// Compare two structures that implement ILockableKey. /// - /// The key to lock - (bool exclusive, byte shared) IsLocked(TKey key); + /// The type of the app data struct or class containing key info + /// The first key to compare + /// The first key to compare + /// The result of key1.CompareTo(key2) + int CompareLockCodes(TLockableKey key1, TLockableKey key2) + where TLockableKey : ILockableKey; + + /// + /// Compare two structures that implement ILockableKey. + /// + /// The type of the app data struct or class containing key info + /// The first key to compare + /// The first key to compare + /// The result of key1.CompareTo(key2) + int CompareLockCodes(ref TLockableKey key1, ref TLockableKey key2) + where TLockableKey : ILockableKey; + + /// + /// Sort an array of app data structures (or classes) by lock code and lock type; these will be passed to Lockable*Session.Lock + /// + /// The type of the app data struct or class containing key info + /// The array of app key data + void SortLockCodes(TLockableKey[] keys) + where TLockableKey : ILockableKey; + + /// + /// Sort an array of app data structures (or classes) by lock code and lock type; these will be passed to Lockable*Session.Lock + /// + /// The type of the app data struct or class containing key info + /// The array of app key data + /// The starting index to sort + /// The number of keys to sort + void SortLockCodes(TLockableKey[] keys, int start, int count) + where TLockableKey : ILockableKey; + + /// + /// Locks the keys identified in the passed array. + /// + /// + /// keyCodes to be locked, and whether that locking is shared or exclusive; must be sorted by . + void Lock(TLockableKey[] keys) + where TLockableKey : ILockableKey; + + /// + /// Locks the keys identified in the passed array. + /// + /// + /// keyCodes to be locked, and whether that locking is shared or exclusive; must be sorted by . + /// The starting index to Lock + /// The number of keys to Lock + void Lock(TLockableKey[] keys, int start, int count) + where TLockableKey : ILockableKey; + + /// + /// Unlocks the keys identified in the passed array. + /// + /// + /// keyCodes to be unlocked, and whether that unlocking is shared or exclusive; must be sorted by . + void Unlock(TLockableKey[] keys) + where TLockableKey : ILockableKey; + + /// + /// Unlocks the keys identified in the passed array. + /// + /// + /// keyCodes to be unlocked, and whether that unlocking is shared or exclusive; must be sorted by . + /// The starting index to Unlock + /// The number of keys to Unlock + void Unlock(TLockableKey[] keys, int start, int count) + where TLockableKey : ILockableKey; } } diff --git a/cs/src/core/ClientSession/LockableContext.cs b/cs/src/core/ClientSession/LockableContext.cs index 89351cec5..39a2c34fa 100644 --- a/cs/src/core/ClientSession/LockableContext.cs +++ b/cs/src/core/ClientSession/LockableContext.cs @@ -39,55 +39,99 @@ internal LockableContext(ClientSession - public unsafe void Lock(ref Key key, LockType lockType) + public bool NeedKeyLockCode => clientSession.NeedKeyLockCode; + + /// + public long GetLockCode(Key key, out long keyHash) => clientSession.GetLockCode(ref key, out keyHash); + + /// + public long GetLockCode(ref Key key, out long keyHash) => clientSession.GetLockCode(ref key, out keyHash); + + /// + public long GetLockCode(Key key, long keyHash) => clientSession.GetLockCode(ref key, keyHash); + + /// + public long GetLockCode(ref Key key, long keyHash) => clientSession.GetLockCode(ref key, keyHash); + + /// + public int CompareLockCodes(TLockableKey key1, TLockableKey key2) where TLockableKey : ILockableKey => clientSession.CompareLockCodes(key1, key2); + + /// + public int CompareLockCodes(ref TLockableKey key1, ref TLockableKey key2) where TLockableKey : ILockableKey => clientSession.CompareLockCodes(ref key1, ref key2); + + /// + public void SortLockCodes(TLockableKey[] keys) where TLockableKey : ILockableKey => clientSession.SortLockCodes(keys); + + /// + public void SortLockCodes(TLockableKey[] keys, int start, int count) where TLockableKey : ILockableKey => clientSession.SortLockCodes(keys, start, count); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static unsafe void DoInternalLockOp(FasterSession fasterSession, ClientSession clientSession, + TLockableKey[] keys, int start, int count, LockOperationType lockOpType) + where FasterSession : IFasterSession + where TLockableKey : ILockableKey { - clientSession.CheckIsAcquiredLockable(); - Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); - clientSession.UnsafeResumeThread(); - try + // The key codes are sorted, but there may be duplicates; the sorting is such that exclusive locks come first for each key code, + // which of course allows the session to do shared operations as well, so we take the first occurrence of each key code. + // Unlock has to be done in the reverse order of locking, so we take the *last* occurrence of each key there. + var end = start + count - 1; + if (lockOpType == LockOperationType.Lock) { - LockOperation lockOp = new(LockOperationType.Lock, lockType); - - OperationStatus status; - do - status = clientSession.fht.InternalLock(ref key, lockOp, out _); - while (clientSession.fht.HandleImmediateNonPendingRetryStatus(status, clientSession.ctx, FasterSession)); - Debug.Assert(status == OperationStatus.SUCCESS); + for (int ii = start; ii <= end; ++ii) + { + var lockType = DoLockOp(fasterSession, clientSession, keys, start, lockOpType, ii); + if (lockType == LockType.Exclusive) + ++clientSession.exclusiveLockCount; + else if (lockType == LockType.Shared) + ++clientSession.sharedLockCount; + } + return; + } + // LockOperationType.Unlock; go through the keys in reverse. + for (int ii = end; ii >= start; --ii) + { + var lockType = DoLockOp(fasterSession, clientSession, keys, start, lockOpType, ii); if (lockType == LockType.Exclusive) - ++clientSession.exclusiveLockCount; - else - ++clientSession.sharedLockCount; + --clientSession.exclusiveLockCount; + else if (lockType == LockType.Shared) + --clientSession.sharedLockCount; } - finally + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe LockType DoLockOp(FasterSession fasterSession, ClientSession clientSession, + TLockableKey[] keys, int start, LockOperationType lockOpType, int idx) + where FasterSession : IFasterSession + where TLockableKey : ILockableKey + { + ref var key = ref keys[idx]; + if (idx == start || clientSession.fht.LockTable.GetBucketIndex(key.LockCode) != clientSession.fht.LockTable.GetBucketIndex(keys[idx - 1].LockCode)) { - clientSession.UnsafeSuspendThread(); + OperationStatus status; + do + status = clientSession.fht.InternalLock(key.LockCode, new(lockOpType, key.LockType)); + while (clientSession.fht.HandleImmediateNonPendingRetryStatus(status, fasterSession)); + Debug.Assert(status == OperationStatus.SUCCESS); + return key.LockType; } + return LockType.None; } /// - public unsafe void Lock(Key key, LockType lockType) => Lock(ref key, lockType); - + public void Lock(TLockableKey[] keys) where TLockableKey : ILockableKey => Lock(keys, 0, keys.Length); + /// - public void Unlock(ref Key key, LockType lockType) + public void Lock(TLockableKey[] keys, int start, int count) + where TLockableKey : ILockableKey { clientSession.CheckIsAcquiredLockable(); - Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); + Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected(), "Trying to protect an already-protected epoch for LockableUnsafeContext.Lock()"); + clientSession.UnsafeResumeThread(); try { - LockOperation lockOp = new(LockOperationType.Unlock, lockType); - - OperationStatus status; - do - status = clientSession.fht.InternalLock(ref key, lockOp, out _); - while (clientSession.fht.HandleImmediateNonPendingRetryStatus(status, clientSession.ctx, FasterSession)); - Debug.Assert(status == OperationStatus.SUCCESS); - - if (lockType == LockType.Exclusive) - --clientSession.exclusiveLockCount; - else - --clientSession.sharedLockCount; + DoInternalLockOp(FasterSession, clientSession, keys, start, count, LockOperationType.Lock); } finally { @@ -96,25 +140,19 @@ public void Unlock(ref Key key, LockType lockType) } /// - public void Unlock(Key key, LockType lockType) => Unlock(ref key, lockType); + public void Unlock(TLockableKey[] keys) where TLockableKey : ILockableKey => Unlock(keys, 0, keys.Length); /// - public (bool exclusive, byte shared) IsLocked(ref Key key) + public void Unlock(TLockableKey[] keys, int start, int count) + where TLockableKey : ILockableKey { clientSession.CheckIsAcquiredLockable(); - Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); + Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected(), "Trying to protect an already-protected epoch for LockableUnsafeContext.Unlock()"); + clientSession.UnsafeResumeThread(); try { - LockOperation lockOp = new(LockOperationType.IsLocked, LockType.None); - - OperationStatus status; - RecordInfo lockInfo; - do - status = clientSession.fht.InternalLock(ref key, lockOp, out lockInfo); - while (clientSession.fht.HandleImmediateNonPendingRetryStatus(status, clientSession.ctx, FasterSession)); - Debug.Assert(status == OperationStatus.SUCCESS); - return (lockInfo.IsLockedExclusive, lockInfo.NumLockedShared); + DoInternalLockOp(FasterSession, clientSession, keys, start, count, LockOperationType.Unlock); } finally { @@ -122,9 +160,6 @@ public void Unlock(ref Key key, LockType lockType) } } - /// - public (bool exclusive, byte shared) IsLocked(Key key) => IsLocked(ref key); - /// /// The session id of FasterSession /// @@ -180,7 +215,7 @@ public Status Read(ref Key key, ref Input input, ref Output output, Context user clientSession.UnsafeResumeThread(); try { - return clientSession.fht.ContextRead(ref key, ref input, ref output, userContext, FasterSession, serialNo, clientSession.ctx); + return clientSession.fht.ContextRead(ref key, ref input, ref output, userContext, FasterSession, serialNo); } finally { @@ -230,7 +265,7 @@ public Status Read(ref Key key, ref Input input, ref Output output, ref ReadOpti clientSession.UnsafeResumeThread(); try { - return clientSession.fht.ContextRead(ref key, ref input, ref output, ref readOptions, out recordMetadata, userContext, FasterSession, serialNo, clientSession.ctx); + return clientSession.fht.ContextRead(ref key, ref input, ref output, ref readOptions, out recordMetadata, userContext, FasterSession, serialNo); } finally { @@ -246,7 +281,7 @@ public Status ReadAtAddress(ref Input input, ref Output output, ref ReadOptions clientSession.UnsafeResumeThread(); try { - return clientSession.fht.ContextReadAtAddress(ref input, ref output, ref readOptions, userContext, FasterSession, serialNo, clientSession.ctx); + return clientSession.fht.ContextReadAtAddress(ref input, ref output, ref readOptions, userContext, FasterSession, serialNo); } finally { @@ -260,7 +295,7 @@ public ValueTask.ReadAsyncResult> R { Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); ReadOptions readOptions = default; - return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken); + return clientSession.fht.ReadAsync(FasterSession, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken); } /// @@ -269,7 +304,7 @@ public ValueTask.ReadAsyncResult> R { Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); ReadOptions readOptions = default; - return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, ref readOptions, context, serialNo, token); + return clientSession.fht.ReadAsync(FasterSession, ref key, ref input, ref readOptions, context, serialNo, token); } /// @@ -279,7 +314,7 @@ public ValueTask.ReadAsyncResult> R Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); Input input = default; ReadOptions readOptions = default; - return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, ref readOptions, userContext, serialNo, token); + return clientSession.fht.ReadAsync(FasterSession, ref key, ref input, ref readOptions, userContext, serialNo, token); } /// @@ -289,7 +324,7 @@ public ValueTask.ReadAsyncResult> R Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); Input input = default; ReadOptions readOptions = default; - return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, ref readOptions, context, serialNo, token); + return clientSession.fht.ReadAsync(FasterSession, ref key, ref input, ref readOptions, context, serialNo, token); } /// @@ -298,7 +333,7 @@ public ValueTask.ReadAsyncResult> R Context userContext = default, long serialNo = 0, CancellationToken cancellationToken = default) { Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken); + return clientSession.fht.ReadAsync(FasterSession, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken); } /// @@ -308,7 +343,7 @@ public ValueTask.ReadAsyncResult> R { Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); Key key = default; - return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken, noKey: true); + return clientSession.fht.ReadAsync(FasterSession, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken, noKey: true); } /// @@ -329,7 +364,7 @@ public Status Upsert(ref Key key, ref Input input, ref Value desiredValue, ref O clientSession.UnsafeResumeThread(); try { - return clientSession.fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, userContext, FasterSession, serialNo, clientSession.ctx); + return clientSession.fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, userContext, FasterSession, serialNo); } finally { @@ -345,7 +380,7 @@ public Status Upsert(ref Key key, ref Input input, ref Value desiredValue, ref O clientSession.UnsafeResumeThread(); try { - return clientSession.fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, out recordMetadata, userContext, FasterSession, serialNo, clientSession.ctx); + return clientSession.fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, out recordMetadata, userContext, FasterSession, serialNo); } finally { @@ -376,7 +411,7 @@ public ValueTask.UpsertAsyncResult> public ValueTask.UpsertAsyncResult> UpsertAsync(ref Key key, ref Input input, ref Value desiredValue, Context userContext = default, long serialNo = 0, CancellationToken token = default) { Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.UpsertAsync(FasterSession, clientSession.ctx, ref key, ref input, ref desiredValue, userContext, serialNo, token); + return clientSession.fht.UpsertAsync(FasterSession, ref key, ref input, ref desiredValue, userContext, serialNo, token); } /// @@ -402,7 +437,7 @@ public Status RMW(ref Key key, ref Input input, ref Output output, out RecordMet clientSession.UnsafeResumeThread(); try { - return clientSession.fht.ContextRMW(ref key, ref input, ref output, out recordMetadata, userContext, FasterSession, serialNo, clientSession.ctx); + return clientSession.fht.ContextRMW(ref key, ref input, ref output, out recordMetadata, userContext, FasterSession, serialNo); } finally { @@ -439,7 +474,7 @@ public Status RMW(Key key, Input input, Context userContext = default, long seri public ValueTask.RmwAsyncResult> RMWAsync(ref Key key, ref Input input, Context context = default, long serialNo = 0, CancellationToken token = default) { Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.RmwAsync(FasterSession, clientSession.ctx, ref key, ref input, context, serialNo, token); + return clientSession.fht.RmwAsync(FasterSession, ref key, ref input, context, serialNo, token); } /// @@ -455,7 +490,7 @@ public Status Delete(ref Key key, Context userContext = default, long serialNo = clientSession.UnsafeResumeThread(); try { - return clientSession.fht.ContextDelete(ref key, userContext, FasterSession, serialNo, clientSession.ctx); + return clientSession.fht.ContextDelete(ref key, userContext, FasterSession, serialNo); } finally { @@ -473,7 +508,7 @@ public Status Delete(Key key, Context userContext = default, long serialNo = 0) public ValueTask.DeleteAsyncResult> DeleteAsync(ref Key key, Context userContext = default, long serialNo = 0, CancellationToken token = default) { Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.DeleteAsync(FasterSession, clientSession.ctx, ref key, userContext, serialNo, token); + return clientSession.fht.DeleteAsync(FasterSession, ref key, userContext, serialNo, token); } /// @@ -498,7 +533,7 @@ public void Refresh() clientSession.UnsafeResumeThread(); try { - clientSession.fht.InternalRefresh(clientSession.ctx, FasterSession); + clientSession.fht.InternalRefresh(FasterSession); } finally { @@ -520,27 +555,20 @@ public InternalFasterSession(ClientSession true; // We only lock in Lock/Unlock, explicitly; these are longer-duration locks. + public bool DisableTransientLocking => true; // We only lock in Lock/Unlock, explicitly; these are longer-duration locks. public bool IsManualLocking => true; - public SessionType SessionType => SessionType.LockableContext; - #endregion IFunctions - Optional features supported - #region IFunctions - Reads [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool SingleReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, ref ReadInfo readInfo) => _clientSession.functions.SingleReader(ref key, ref input, ref value, ref dst, ref readInfo); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, ref ReadInfo readInfo) + public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, ref ReadInfo readInfo, out EphemeralLockResult lockResult) { - if (_clientSession.functions.ConcurrentReader(ref key, ref input, ref value, ref dst, ref readInfo)) - return true; - if (readInfo.Action == ReadAction.Expire) - recordInfo.Tombstone = true; - return false; + lockResult = EphemeralLockResult.Success; // Ephemeral locking is not used with Lockable contexts + return _clientSession.functions.ConcurrentReader(ref key, ref input, ref value, ref dst, ref readInfo); } public void ReadCompletionCallback(ref Key key, ref Input input, ref Output output, Context ctx, Status status, RecordMetadata recordMetadata) @@ -561,12 +589,13 @@ public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Va } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, ref UpsertInfo upsertInfo) + public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, ref UpsertInfo upsertInfo, out EphemeralLockResult lockResult) { + lockResult = EphemeralLockResult.Success; // Ephemeral locking is not used with Lockable contexts + if (!_clientSession.functions.ConcurrentWriter(ref key, ref input, ref src, ref dst, ref output, ref upsertInfo)) + return false; recordInfo.SetDirtyAndModified(); - - // Note: KeyIndexes do not need notification of in-place updates because the key does not change. - return _clientSession.functions.ConcurrentWriter(ref key, ref input, ref src, ref dst, ref output, ref upsertInfo); + return true; } #endregion IFunctions - Upserts @@ -598,7 +627,7 @@ public bool CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Va => _clientSession.functions.CopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref rmwInfo); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo) + public void PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo) { recordInfo.SetDirtyAndModified(); _clientSession.functions.PostCopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref rmwInfo); @@ -607,10 +636,13 @@ public void PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, re #region InPlaceUpdater [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool InPlaceUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo, out OperationStatus status) + public bool InPlaceUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo, out OperationStatus status, out EphemeralLockResult lockResult) { + lockResult = EphemeralLockResult.Success; // Ephemeral locking is not used with Lockable contexts + if (!_clientSession.InPlaceUpdater(ref key, ref input, ref value, ref output, ref recordInfo, ref rmwInfo, out status)) + return false; recordInfo.SetDirtyAndModified(); - return _clientSession.InPlaceUpdater(ref key, ref input, ref output, ref value, ref recordInfo, ref rmwInfo, out status); + return true; } public void RMWCompletionCallback(ref Key key, ref Input input, ref Output output, Context ctx, Status status, RecordMetadata recordMetadata) @@ -625,18 +657,21 @@ public bool SingleDeleter(ref Key key, ref Value value, ref RecordInfo recordInf => _clientSession.functions.SingleDeleter(ref key, ref value, ref deleteInfo); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo) + public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo) { recordInfo.SetDirtyAndModified(); _clientSession.functions.PostSingleDeleter(ref key, ref deleteInfo); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo) + public bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo, out EphemeralLockResult lockResult) { + lockResult = EphemeralLockResult.Success; // Ephemeral locking is not used with Lockable contexts + if (!_clientSession.functions.ConcurrentDeleter(ref key, ref value, ref deleteInfo)) + return false; recordInfo.SetDirtyAndModified(); recordInfo.SetTombstone(); - return _clientSession.functions.ConcurrentDeleter(ref key, ref value, ref deleteInfo); + return true; } #endregion IFunctions - Deletes @@ -661,21 +696,40 @@ public void CheckpointCompletionCallback(int sessionID, string sessionName, Comm } #endregion IFunctions - Checkpointing - #region Ephemeral locking - public bool TryLockEphemeralExclusive(ref RecordInfo recordInfo) + #region Transient locking + public bool TryLockTransientExclusive(ref Key key, ref OperationStackContext stackCtx) { - Debug.Assert(recordInfo.IsLockedExclusive, $"Attempting to use a non-XLocked key in a Lockable context (requesting XLock): XLocked {recordInfo.IsLockedExclusive}, Slocked {recordInfo.NumLockedShared}"); + Debug.Assert(_clientSession.fht.LockTable.IsLockedExclusive(ref key, ref stackCtx.hei), + $"Attempting to use a non-XLocked key in a Lockable context (requesting XLock):" + + $" XLocked {_clientSession.fht.LockTable.IsLockedExclusive(ref key, ref stackCtx.hei)}," + + $" Slocked {_clientSession.fht.LockTable.IsLockedShared(ref key, ref stackCtx.hei)}"); return true; } - public bool TryLockEphemeralShared(ref RecordInfo recordInfo) + public bool TryLockTransientShared(ref Key key, ref OperationStackContext stackCtx) { - Debug.Assert(recordInfo.IsLocked, $"Attempting to use a non-Locked (S or X) key in a Lockable context (requesting SLock): XLocked {recordInfo.IsLockedExclusive}, Slocked {recordInfo.NumLockedShared}"); + Debug.Assert(_clientSession.fht.LockTable.IsLocked(ref key, ref stackCtx.hei), + $"Attempting to use a non-Locked (S or X) key in a Lockable context (requesting SLock):" + + $" XLocked {_clientSession.fht.LockTable.IsLockedExclusive(ref key, ref stackCtx.hei)}," + + $" Slocked {_clientSession.fht.LockTable.IsLockedShared(ref key, ref stackCtx.hei)}"); return true; } - public void UnlockEphemeralExclusive(ref RecordInfo recordInfo) { } - public bool TryUnlockEphemeralShared(ref RecordInfo recordInfo) => true; + public void UnlockTransientExclusive(ref Key key, ref OperationStackContext stackCtx) + { + Debug.Assert(_clientSession.fht.LockTable.IsLockedExclusive(ref key, ref stackCtx.hei), + $"Attempting to unlock a non-XLocked key in a Lockable context (requesting XLock):" + + $" XLocked {_clientSession.fht.LockTable.IsLockedExclusive(ref key, ref stackCtx.hei)}," + + $" Slocked {_clientSession.fht.LockTable.IsLockedShared(ref key, ref stackCtx.hei)}"); + } + + public void UnlockTransientShared(ref Key key, ref OperationStackContext stackCtx) + { + Debug.Assert(_clientSession.fht.LockTable.IsLockedShared(ref key, ref stackCtx.hei), + $"Attempting to use a non-XLocked key in a Lockable context (requesting XLock):" + + $" XLocked {_clientSession.fht.LockTable.IsLockedExclusive(ref key, ref stackCtx.hei)}," + + $" Slocked {_clientSession.fht.LockTable.IsLockedShared(ref key, ref stackCtx.hei)}"); + } #endregion #region Internal utilities @@ -698,6 +752,8 @@ public IHeapContainer GetHeapContainer(ref Input input) public bool CompletePendingWithOutputs(out CompletedOutputIterator completedOutputs, bool wait = false, bool spinWaitForCommit = false) => _clientSession.CompletePendingWithOutputs(out completedOutputs, wait, spinWaitForCommit); + + public FasterKV.FasterExecutionContext Ctx => this._clientSession.ctx; #endregion Internal utilities } #endregion IFasterSession diff --git a/cs/src/core/ClientSession/LockableUnsafeContext.cs b/cs/src/core/ClientSession/LockableUnsafeContext.cs index 85bb87cdf..4ffc23493 100644 --- a/cs/src/core/ClientSession/LockableUnsafeContext.cs +++ b/cs/src/core/ClientSession/LockableUnsafeContext.cs @@ -15,7 +15,7 @@ namespace FASTER.core where Functions : IFunctions { readonly ClientSession clientSession; - internal readonly InternalFasterSession FasterSession; + internal readonly LockableContext.InternalFasterSession FasterSession; /// Indicates whether this struct has been initialized public bool IsNull => this.clientSession is null; @@ -23,7 +23,7 @@ namespace FASTER.core internal LockableUnsafeContext(ClientSession clientSession) { this.clientSession = clientSession; - FasterSession = new InternalFasterSession(clientSession); + FasterSession = new LockableContext.InternalFasterSession(clientSession); } #region Begin/EndUnsafe @@ -48,72 +48,59 @@ internal LockableUnsafeContext(ClientSession - public unsafe void Lock(ref Key key, LockType lockType) - { - clientSession.CheckIsAcquiredLockable(); - Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected(), "Epoch protection required for Lock()"); + public bool NeedKeyLockCode => clientSession.NeedKeyLockCode; - LockOperation lockOp = new(LockOperationType.Lock, lockType); + /// + public long GetLockCode(Key key, out long keyHash) => clientSession.GetLockCode(ref key, out keyHash); - OperationStatus status; - do - status = clientSession.fht.InternalLock(ref key, lockOp, out _); - while (clientSession.fht.HandleImmediateNonPendingRetryStatus(status, clientSession.ctx, FasterSession)); - Debug.Assert(status == OperationStatus.SUCCESS); + /// + public long GetLockCode(ref Key key, out long keyHash) => clientSession.GetLockCode(ref key, out keyHash); - if (lockType == LockType.Exclusive) - ++clientSession.exclusiveLockCount; - else - ++clientSession.sharedLockCount; - } + /// + public long GetLockCode(Key key, long keyHash) => clientSession.GetLockCode(ref key, keyHash); /// - public unsafe void Lock(Key key, LockType lockType) => Lock(ref key, lockType); + public long GetLockCode(ref Key key, long keyHash) => clientSession.GetLockCode(ref key, keyHash); /// - public void Unlock(ref Key key, LockType lockType) - { - clientSession.CheckIsAcquiredLockable(); - Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected(), "Epoch protection required for Unlock()"); + public int CompareLockCodes(TLockableKey key1, TLockableKey key2) where TLockableKey : ILockableKey => clientSession.CompareLockCodes(key1, key2); - LockOperation lockOp = new(LockOperationType.Unlock, lockType); + /// + public int CompareLockCodes(ref TLockableKey key1, ref TLockableKey key2) where TLockableKey : ILockableKey => clientSession.CompareLockCodes(ref key1, ref key2); - OperationStatus status; - do - status = clientSession.fht.InternalLock(ref key, lockOp, out _); - while (clientSession.fht.HandleImmediateNonPendingRetryStatus(status, clientSession.ctx, FasterSession)); - Debug.Assert(status == OperationStatus.SUCCESS); + /// + public void SortLockCodes(TLockableKey[] keys) where TLockableKey : ILockableKey => clientSession.SortLockCodes(keys); - if (lockType == LockType.Exclusive) - --clientSession.exclusiveLockCount; - else - --clientSession.sharedLockCount; - } + /// + public void SortLockCodes(TLockableKey[] keys, int start, int count) where TLockableKey : ILockableKey => clientSession.SortLockCodes(keys, start, count); /// - public void Unlock(Key key, LockType lockType) => Unlock(ref key, lockType); + public void Lock(TLockableKey[] keys) where TLockableKey : ILockableKey => Lock(keys, 0, keys.Length); /// - public (bool exclusive, byte shared) IsLocked(ref Key key) + public void Lock(TLockableKey[] keys, int start, int count) + where TLockableKey : ILockableKey { clientSession.CheckIsAcquiredLockable(); - Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected(), "Epoch protection required for IsLocked()"); - - LockOperation lockOp = new(LockOperationType.IsLocked, LockType.None); + Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected(), "Epoch protection required for LockableUnsafeContext.Lock()"); - OperationStatus status; - RecordInfo lockInfo; - do - status = clientSession.fht.InternalLock(ref key, lockOp, out lockInfo); - while (clientSession.fht.HandleImmediateNonPendingRetryStatus(status, clientSession.ctx, FasterSession)); - Debug.Assert(status == OperationStatus.SUCCESS); - return (lockInfo.IsLockedExclusive, lockInfo.NumLockedShared); + LockableContext.DoInternalLockOp(FasterSession, clientSession, keys, start, count, LockOperationType.Lock); } /// - public (bool exclusive, byte shared) IsLocked(Key key) => IsLocked(ref key); + public void Unlock(TLockableKey[] keys) where TLockableKey : ILockableKey => Unlock(keys, 0, keys.Length); + + /// + public void Unlock(TLockableKey[] keys, int start, int count) + where TLockableKey : ILockableKey + { + clientSession.CheckIsAcquiredLockable(); + Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected(), "Epoch protection required for LockableUnsafeContext.Unlock()"); + + LockableContext.DoInternalLockOp(FasterSession, clientSession, keys, start, count, LockOperationType.Unlock); + } /// /// The session id of FasterSession @@ -150,7 +137,7 @@ public ValueTask> Co public Status Read(ref Key key, ref Input input, ref Output output, Context userContext = default, long serialNo = 0) { Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.ContextRead(ref key, ref input, ref output, userContext, FasterSession, serialNo, clientSession.ctx); + return clientSession.fht.ContextRead(ref key, ref input, ref output, userContext, FasterSession, serialNo); } /// @@ -192,7 +179,7 @@ public Status Read(Key key, out Output output, Context userContext = default, lo public Status Read(ref Key key, ref Input input, ref Output output, ref ReadOptions readOptions, out RecordMetadata recordMetadata, Context userContext = default, long serialNo = 0) { Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.ContextRead(ref key, ref input, ref output, ref readOptions, out recordMetadata, userContext, FasterSession, serialNo, clientSession.ctx); + return clientSession.fht.ContextRead(ref key, ref input, ref output, ref readOptions, out recordMetadata, userContext, FasterSession, serialNo); } /// @@ -200,7 +187,7 @@ public Status Read(ref Key key, ref Input input, ref Output output, ref ReadOpti public Status ReadAtAddress(ref Input input, ref Output output, ref ReadOptions readOptions, Context userContext = default, long serialNo = 0) { Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.ContextReadAtAddress(ref input, ref output, ref readOptions, userContext, FasterSession, serialNo, clientSession.ctx); + return clientSession.fht.ContextReadAtAddress(ref input, ref output, ref readOptions, userContext, FasterSession, serialNo); } /// @@ -209,7 +196,7 @@ public ValueTask.ReadAsyncResult> R { Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); ReadOptions readOptions = default; - return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken); + return clientSession.fht.ReadAsync(FasterSession, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken); } /// @@ -218,7 +205,7 @@ public ValueTask.ReadAsyncResult> R { Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); ReadOptions readOptions = default; - return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, ref readOptions, context, serialNo, token); + return clientSession.fht.ReadAsync(FasterSession, ref key, ref input, ref readOptions, context, serialNo, token); } /// @@ -228,7 +215,7 @@ public ValueTask.ReadAsyncResult> R Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); Input input = default; ReadOptions readOptions = default; - return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, ref readOptions, userContext, serialNo, token); + return clientSession.fht.ReadAsync(FasterSession, ref key, ref input, ref readOptions, userContext, serialNo, token); } /// @@ -238,7 +225,7 @@ public ValueTask.ReadAsyncResult> R Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); Input input = default; ReadOptions readOptions = default; - return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, ref readOptions, context, serialNo, token); + return clientSession.fht.ReadAsync(FasterSession, ref key, ref input, ref readOptions, context, serialNo, token); } /// @@ -247,7 +234,7 @@ public ValueTask.ReadAsyncResult> R Context userContext = default, long serialNo = 0, CancellationToken cancellationToken = default) { Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken); + return clientSession.fht.ReadAsync(FasterSession, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken); } /// @@ -257,7 +244,7 @@ public ValueTask.ReadAsyncResult> R { Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); Key key = default; - return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken, noKey: true); + return clientSession.fht.ReadAsync(FasterSession, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken, noKey: true); } /// @@ -275,7 +262,7 @@ public Status Upsert(ref Key key, ref Value desiredValue, Context userContext = public Status Upsert(ref Key key, ref Input input, ref Value desiredValue, ref Output output, Context userContext = default, long serialNo = 0) { Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, userContext, FasterSession, serialNo, clientSession.ctx); + return clientSession.fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, userContext, FasterSession, serialNo); } /// @@ -283,7 +270,7 @@ public Status Upsert(ref Key key, ref Input input, ref Value desiredValue, ref O public Status Upsert(ref Key key, ref Input input, ref Value desiredValue, ref Output output, out RecordMetadata recordMetadata, Context userContext = default, long serialNo = 0) { Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, out recordMetadata, userContext, FasterSession, serialNo, clientSession.ctx); + return clientSession.fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, out recordMetadata, userContext, FasterSession, serialNo); } /// @@ -309,7 +296,8 @@ public ValueTask.UpsertAsyncResult> public ValueTask.UpsertAsyncResult> UpsertAsync(ref Key key, ref Input input, ref Value desiredValue, Context userContext = default, long serialNo = 0, CancellationToken token = default) { Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.UpsertAsync(FasterSession, clientSession.ctx, ref key, ref input, ref desiredValue, userContext, serialNo, token); + return clientSession.fht.UpsertAsync.InternalFasterSession>( + FasterSession, ref key, ref input, ref desiredValue, userContext, serialNo, token); } /// @@ -332,7 +320,7 @@ public Status RMW(ref Key key, ref Input input, ref Output output, Context userC public Status RMW(ref Key key, ref Input input, ref Output output, out RecordMetadata recordMetadata, Context userContext = default, long serialNo = 0) { Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.ContextRMW(ref key, ref input, ref output, out recordMetadata, userContext, FasterSession, serialNo, clientSession.ctx); + return clientSession.fht.ContextRMW(ref key, ref input, ref output, out recordMetadata, userContext, FasterSession, serialNo); } /// @@ -364,7 +352,8 @@ public Status RMW(Key key, Input input, Context userContext = default, long seri public ValueTask.RmwAsyncResult> RMWAsync(ref Key key, ref Input input, Context context = default, long serialNo = 0, CancellationToken token = default) { Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.RmwAsync(FasterSession, clientSession.ctx, ref key, ref input, context, serialNo, token); + return clientSession.fht.RmwAsync.InternalFasterSession>( + FasterSession, ref key, ref input, context, serialNo, token); } /// @@ -377,7 +366,8 @@ public ValueTask.RmwAsyncResult> RM public Status Delete(ref Key key, Context userContext = default, long serialNo = 0) { Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.ContextDelete(ref key, userContext, FasterSession, serialNo, clientSession.ctx); + return clientSession.fht.ContextDelete.InternalFasterSession>( + ref key, userContext, FasterSession, serialNo); } /// @@ -390,7 +380,8 @@ public Status Delete(Key key, Context userContext = default, long serialNo = 0) public ValueTask.DeleteAsyncResult> DeleteAsync(ref Key key, Context userContext = default, long serialNo = 0, CancellationToken token = default) { Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.DeleteAsync(FasterSession, clientSession.ctx, ref key, userContext, serialNo, token); + return clientSession.fht.DeleteAsync.InternalFasterSession>( + FasterSession, ref key, userContext, serialNo, token); } /// @@ -412,203 +403,9 @@ internal bool IsModified(Key key) public void Refresh() { Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); - clientSession.fht.InternalRefresh(clientSession.ctx, FasterSession); + clientSession.fht.InternalRefresh.InternalFasterSession>(FasterSession); } #endregion IFasterContext - - #region IFasterSession - - // This is a struct to allow JIT to inline calls (and bypass default interface call mechanism) - internal readonly struct InternalFasterSession : IFasterSession - { - private readonly ClientSession _clientSession; - - public InternalFasterSession(ClientSession clientSession) - { - _clientSession = clientSession; - } - - #region IFunctions - Optional features supported - public bool DisableEphemeralLocking => true; // We only lock in Lock/Unlock, explicitly; these are longer-duration locks. - - public bool IsManualLocking => true; - - public SessionType SessionType => SessionType.LockableUnsafeContext; - #endregion IFunctions - Optional features supported - - #region IFunctions - Reads - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool SingleReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, ref ReadInfo readInfo) - => _clientSession.functions.SingleReader(ref key, ref input, ref value, ref dst, ref readInfo); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, ref ReadInfo readInfo) - { - if (_clientSession.functions.ConcurrentReader(ref key, ref input, ref value, ref dst, ref readInfo)) - return true; - if (readInfo.Action == ReadAction.Expire) - recordInfo.Tombstone = true; - return false; - } - - public void ReadCompletionCallback(ref Key key, ref Input input, ref Output output, Context ctx, Status status, RecordMetadata recordMetadata) - => _clientSession.functions.ReadCompletionCallback(ref key, ref input, ref output, ctx, status, recordMetadata); - - #endregion IFunctions - Reads - - #region IFunctions - Upserts - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, ref UpsertInfo upsertInfo, WriteReason reason) - => _clientSession.functions.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref upsertInfo, reason); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, ref UpsertInfo upsertInfo, WriteReason reason) - { - recordInfo.SetDirtyAndModified(); - _clientSession.functions.PostSingleWriter(ref key, ref input, ref src, ref dst, ref output, ref upsertInfo, reason); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, ref UpsertInfo upsertInfo) - { - recordInfo.SetDirtyAndModified(); - - // Note: KeyIndexes do not need notification of in-place updates because the key does not change. - return _clientSession.functions.ConcurrentWriter(ref key, ref input, ref src, ref dst, ref output, ref upsertInfo); - } -#endregion IFunctions - Upserts - - #region IFunctions - RMWs - #region InitialUpdater - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool NeedInitialUpdate(ref Key key, ref Input input, ref Output output, ref RMWInfo rmwInfo) - => _clientSession.functions.NeedInitialUpdate(ref key, ref input, ref output, ref rmwInfo); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo) - => _clientSession.functions.InitialUpdater(ref key, ref input, ref value, ref output, ref rmwInfo); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo) - { - recordInfo.SetDirtyAndModified(); - _clientSession.functions.PostInitialUpdater(ref key, ref input, ref value, ref output, ref rmwInfo); - } - #endregion InitialUpdater - - #region CopyUpdater - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool NeedCopyUpdate(ref Key key, ref Input input, ref Value oldValue, ref Output output, ref RMWInfo rmwInfo) - => _clientSession.functions.NeedCopyUpdate(ref key, ref input, ref oldValue, ref output, ref rmwInfo); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo) - => _clientSession.functions.CopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref rmwInfo); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo) - { - recordInfo.SetDirtyAndModified(); - _clientSession.functions.PostCopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref rmwInfo); - } - #endregion CopyUpdater - - #region InPlaceUpdater - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool InPlaceUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo, out OperationStatus status) - { - recordInfo.SetDirtyAndModified(); - return _clientSession.InPlaceUpdater(ref key, ref input, ref output, ref value, ref recordInfo, ref rmwInfo, out status); - } - - public void RMWCompletionCallback(ref Key key, ref Input input, ref Output output, Context ctx, Status status, RecordMetadata recordMetadata) - => _clientSession.functions.RMWCompletionCallback(ref key, ref input, ref output, ctx, status, recordMetadata); - - #endregion InPlaceUpdater - #endregion IFunctions - RMWs - - #region IFunctions - Deletes - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool SingleDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo) - => _clientSession.functions.SingleDeleter(ref key, ref value, ref deleteInfo); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo) - { - recordInfo.SetDirtyAndModified(); - _clientSession.functions.PostSingleDeleter(ref key, ref deleteInfo); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo) - { - recordInfo.SetDirtyAndModified(); - recordInfo.SetTombstone(); - return _clientSession.functions.ConcurrentDeleter(ref key, ref value, ref deleteInfo); - } - #endregion IFunctions - Deletes - - #region IFunctions - Dispose - public void DisposeSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, ref UpsertInfo upsertInfo, WriteReason reason) - => _clientSession.functions.DisposeSingleWriter(ref key, ref input, ref src, ref dst, ref output, ref upsertInfo, reason); - public void DisposeCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo) - => _clientSession.functions.DisposeCopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref rmwInfo); - public void DisposeInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo) - => _clientSession.functions.DisposeInitialUpdater(ref key, ref input, ref value, ref output, ref rmwInfo); - public void DisposeSingleDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo) - => _clientSession.functions.DisposeSingleDeleter(ref key, ref value, ref deleteInfo); - public void DisposeDeserializedFromDisk(ref Key key, ref Value value, ref RecordInfo recordInfo) - => _clientSession.functions.DisposeDeserializedFromDisk(ref key, ref value); - #endregion IFunctions - Dispose - - #region IFunctions - Checkpointing - public void CheckpointCompletionCallback(int sessionID, string sessionName, CommitPoint commitPoint) - { - _clientSession.functions.CheckpointCompletionCallback(sessionID, sessionName, commitPoint); - _clientSession.LatestCommitPoint = commitPoint; - } - #endregion IFunctions - Checkpointing - - #region Ephemeral locking - public bool TryLockEphemeralExclusive(ref RecordInfo recordInfo) - { - Debug.Assert(recordInfo.IsLockedExclusive, $"Attempting to use a non-XLocked key in a Lockable context (requesting XLock): XLocked {recordInfo.IsLockedExclusive}, Slocked {recordInfo.NumLockedShared}"); - return true; - } - - public bool TryLockEphemeralShared(ref RecordInfo recordInfo) - { - Debug.Assert(recordInfo.IsLocked, $"Attempting to use a non-Locked (S or X) key in a Lockable context (requesting SLock): XLocked {recordInfo.IsLockedExclusive}, Slocked {recordInfo.NumLockedShared}"); - return true; - } - - public void UnlockEphemeralExclusive(ref RecordInfo recordInfo) { } - public bool TryUnlockEphemeralShared(ref RecordInfo recordInfo) => true; - #endregion - - #region Internal utilities - public int GetInitialLength(ref Input input) - => _clientSession.variableLengthStruct.GetInitialLength(ref input); - - public int GetLength(ref Value t, ref Input input) - => _clientSession.variableLengthStruct.GetLength(ref t, ref input); - - public IHeapContainer GetHeapContainer(ref Input input) - { - if (_clientSession.inputVariableLengthStruct == default) - return new StandardHeapContainer(ref input); - return new VarLenHeapContainer(ref input, _clientSession.inputVariableLengthStruct, _clientSession.fht.hlog.bufferPool); - } - - public void UnsafeResumeThread() => _clientSession.UnsafeResumeThread(); - - public void UnsafeSuspendThread() => _clientSession.UnsafeSuspendThread(); - - public bool CompletePendingWithOutputs(out CompletedOutputIterator completedOutputs, bool wait = false, bool spinWaitForCommit = false) - => _clientSession.CompletePendingWithOutputs(out completedOutputs, wait, spinWaitForCommit); - #endregion Internal utilities - } -#endregion IFasterSession } } diff --git a/cs/src/core/ClientSession/UnsafeContext.cs b/cs/src/core/ClientSession/UnsafeContext.cs index 2a28aae79..d35872d2b 100644 --- a/cs/src/core/ClientSession/UnsafeContext.cs +++ b/cs/src/core/ClientSession/UnsafeContext.cs @@ -15,7 +15,7 @@ namespace FASTER.core where Functions : IFunctions { readonly ClientSession clientSession; - internal readonly InternalFasterSession FasterSession; + internal readonly ClientSession.InternalFasterSession FasterSession; /// Indicates whether this struct has been initialized public bool IsNull => this.clientSession is null; @@ -23,7 +23,7 @@ namespace FASTER.core internal UnsafeContext(ClientSession clientSession) { this.clientSession = clientSession; - FasterSession = new InternalFasterSession(clientSession); + FasterSession = new ClientSession.InternalFasterSession(clientSession); } #region Begin/EndUnsafe @@ -67,7 +67,7 @@ public ValueTask> Co public Status Read(ref Key key, ref Input input, ref Output output, Context userContext = default, long serialNo = 0) { Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.ContextRead(ref key, ref input, ref output, userContext, FasterSession, serialNo, clientSession.ctx); + return clientSession.fht.ContextRead(ref key, ref input, ref output, userContext, FasterSession, serialNo); } /// @@ -109,7 +109,7 @@ public Status Read(Key key, out Output output, Context userContext = default, lo public Status Read(ref Key key, ref Input input, ref Output output, ref ReadOptions readOptions, out RecordMetadata recordMetadata, Context userContext = default, long serialNo = 0) { Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.ContextRead(ref key, ref input, ref output, ref readOptions, out recordMetadata, userContext, FasterSession, serialNo, clientSession.ctx); + return clientSession.fht.ContextRead(ref key, ref input, ref output, ref readOptions, out recordMetadata, userContext, FasterSession, serialNo); } /// @@ -117,7 +117,7 @@ public Status Read(ref Key key, ref Input input, ref Output output, ref ReadOpti public Status ReadAtAddress(ref Input input, ref Output output, ref ReadOptions readOptions, Context userContext = default, long serialNo = 0) { Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.ContextReadAtAddress(ref input, ref output, ref readOptions, userContext, FasterSession, serialNo, clientSession.ctx); + return clientSession.fht.ContextReadAtAddress(ref input, ref output, ref readOptions, userContext, FasterSession, serialNo); } /// @@ -126,7 +126,7 @@ public ValueTask.ReadAsyncResult> R { Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); ReadOptions readOptions = default; - return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken); + return clientSession.fht.ReadAsync(FasterSession, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken); } /// @@ -135,7 +135,7 @@ public ValueTask.ReadAsyncResult> R { Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); ReadOptions readOptions = default; - return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, ref readOptions, context, serialNo, token); + return clientSession.fht.ReadAsync(FasterSession, ref key, ref input, ref readOptions, context, serialNo, token); } /// @@ -145,7 +145,7 @@ public ValueTask.ReadAsyncResult> R Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); Input input = default; ReadOptions readOptions = default; - return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, ref readOptions, userContext, serialNo, token); + return clientSession.fht.ReadAsync(FasterSession, ref key, ref input, ref readOptions, userContext, serialNo, token); } /// @@ -155,7 +155,7 @@ public ValueTask.ReadAsyncResult> R Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); Input input = default; ReadOptions readOptions = default; - return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, ref readOptions, context, serialNo, token); + return clientSession.fht.ReadAsync(FasterSession, ref key, ref input, ref readOptions, context, serialNo, token); } /// @@ -164,7 +164,7 @@ public ValueTask.ReadAsyncResult> R Context userContext = default, long serialNo = 0, CancellationToken cancellationToken = default) { Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken); + return clientSession.fht.ReadAsync(FasterSession, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken); } /// @@ -174,7 +174,7 @@ public ValueTask.ReadAsyncResult> R { Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); Key key = default; - return clientSession.fht.ReadAsync(FasterSession, clientSession.ctx, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken, noKey: true); + return clientSession.fht.ReadAsync(FasterSession, ref key, ref input, ref readOptions, userContext, serialNo, cancellationToken, noKey: true); } /// @@ -192,7 +192,7 @@ public Status Upsert(ref Key key, ref Value desiredValue, Context userContext = public Status Upsert(ref Key key, ref Input input, ref Value desiredValue, ref Output output, Context userContext = default, long serialNo = 0) { Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, userContext, FasterSession, serialNo, clientSession.ctx); + return clientSession.fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, userContext, FasterSession, serialNo); } /// @@ -200,7 +200,7 @@ public Status Upsert(ref Key key, ref Input input, ref Value desiredValue, ref O public Status Upsert(ref Key key, ref Input input, ref Value desiredValue, ref Output output, out RecordMetadata recordMetadata, Context userContext = default, long serialNo = 0) { Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, out recordMetadata, userContext, FasterSession, serialNo, clientSession.ctx); + return clientSession.fht.ContextUpsert(ref key, ref input, ref desiredValue, ref output, out recordMetadata, userContext, FasterSession, serialNo); } /// @@ -226,7 +226,8 @@ public ValueTask.UpsertAsyncResult> public ValueTask.UpsertAsyncResult> UpsertAsync(ref Key key, ref Input input, ref Value desiredValue, Context userContext = default, long serialNo = 0, CancellationToken token = default) { Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.UpsertAsync(FasterSession, clientSession.ctx, ref key, ref input, ref desiredValue, userContext, serialNo, token); + return clientSession.fht.UpsertAsync.InternalFasterSession>( + FasterSession, ref key, ref input, ref desiredValue, userContext, serialNo, token); } /// @@ -249,7 +250,7 @@ public Status RMW(ref Key key, ref Input input, ref Output output, Context userC public Status RMW(ref Key key, ref Input input, ref Output output, out RecordMetadata recordMetadata, Context userContext = default, long serialNo = 0) { Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.ContextRMW(ref key, ref input, ref output, out recordMetadata, userContext, FasterSession, serialNo, clientSession.ctx); + return clientSession.fht.ContextRMW(ref key, ref input, ref output, out recordMetadata, userContext, FasterSession, serialNo); } /// @@ -281,7 +282,8 @@ public Status RMW(Key key, Input input, Context userContext = default, long seri public ValueTask.RmwAsyncResult> RMWAsync(ref Key key, ref Input input, Context context = default, long serialNo = 0, CancellationToken token = default) { Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.RmwAsync(FasterSession, clientSession.ctx, ref key, ref input, context, serialNo, token); + return clientSession.fht.RmwAsync.InternalFasterSession>( + FasterSession, ref key, ref input, context, serialNo, token); } /// @@ -294,7 +296,8 @@ public ValueTask.RmwAsyncResult> RM public Status Delete(ref Key key, Context userContext = default, long serialNo = 0) { Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.ContextDelete(ref key, userContext, FasterSession, serialNo, clientSession.ctx); + return clientSession.fht.ContextDelete.InternalFasterSession>( + ref key, userContext, FasterSession, serialNo); } /// @@ -307,7 +310,8 @@ public Status Delete(Key key, Context userContext = default, long serialNo = 0) public ValueTask.DeleteAsyncResult> DeleteAsync(ref Key key, Context userContext = default, long serialNo = 0, CancellationToken token = default) { Debug.Assert(!clientSession.fht.epoch.ThisInstanceProtected()); - return clientSession.fht.DeleteAsync(FasterSession, clientSession.ctx, ref key, userContext, serialNo, token); + return clientSession.fht.DeleteAsync.InternalFasterSession>( + FasterSession, ref key, userContext, serialNo, token); } /// @@ -329,195 +333,8 @@ internal bool IsModified(Key key) public void Refresh() { Debug.Assert(clientSession.fht.epoch.ThisInstanceProtected()); - clientSession.fht.InternalRefresh(clientSession.ctx, FasterSession); + clientSession.fht.InternalRefresh.InternalFasterSession>(FasterSession); } - #endregion IFasterContext - - #region IFasterSession - - // This is a struct to allow JIT to inline calls (and bypass default interface call mechanism) - internal readonly struct InternalFasterSession : IFasterSession - { - private readonly ClientSession _clientSession; - - public InternalFasterSession(ClientSession clientSession) - { - _clientSession = clientSession; - } - - #region IFunctions - Optional features supported - public bool DisableEphemeralLocking => _clientSession.fht.DisableEphemeralLocking; - - public bool IsManualLocking => false; - - public SessionType SessionType => SessionType.UnsafeContext; - #endregion IFunctions - Optional features supported - - #region IFunctions - Reads - public bool SingleReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, ref ReadInfo readInfo) - => _clientSession.functions.SingleReader(ref key, ref input, ref value, ref dst, ref readInfo); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, ref ReadInfo readInfo) - { - if (_clientSession.functions.ConcurrentReader(ref key, ref input, ref value, ref dst, ref readInfo)) - return true; - if (readInfo.Action == ReadAction.Expire) - recordInfo.Tombstone = true; - return false; - } - - public void ReadCompletionCallback(ref Key key, ref Input input, ref Output output, Context ctx, Status status, RecordMetadata recordMetadata) - => _clientSession.functions.ReadCompletionCallback(ref key, ref input, ref output, ctx, status, recordMetadata); - - #endregion IFunctions - Reads - - #region IFunctions - Upserts - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, ref UpsertInfo upsertInfo, WriteReason reason) - => _clientSession.functions.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref upsertInfo, reason); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, ref UpsertInfo upsertInfo, WriteReason reason) - { - recordInfo.SetDirtyAndModified(); - _clientSession.functions.PostSingleWriter(ref key, ref input, ref src, ref dst, ref output, ref upsertInfo, reason); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, ref UpsertInfo upsertInfo) - { - recordInfo.SetDirtyAndModified(); - // Note: KeyIndexes do not need notification of in-place updates because the key does not change. - return _clientSession.functions.ConcurrentWriter(ref key, ref input, ref src, ref dst, ref output, ref upsertInfo); - } - #endregion IFunctions - Upserts - - #region IFunctions - RMWs - #region InitialUpdater - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool NeedInitialUpdate(ref Key key, ref Input input, ref Output output, ref RMWInfo rmwInfo) - => _clientSession.functions.NeedInitialUpdate(ref key, ref input, ref output, ref rmwInfo); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool InitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo) - => _clientSession.functions.InitialUpdater(ref key, ref input, ref value, ref output, ref rmwInfo); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo) - { - recordInfo.SetDirtyAndModified(); - _clientSession.functions.PostInitialUpdater(ref key, ref input, ref value, ref output, ref rmwInfo); - } - #endregion InitialUpdater - - #region CopyUpdater - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool NeedCopyUpdate(ref Key key, ref Input input, ref Value oldValue, ref Output output, ref RMWInfo rmwInfo) - => _clientSession.functions.NeedCopyUpdate(ref key, ref input, ref oldValue, ref output, ref rmwInfo); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool CopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo) - => _clientSession.functions.CopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref rmwInfo); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo) - { - recordInfo.SetDirtyAndModified(); - _clientSession.functions.PostCopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref rmwInfo); - } - #endregion CopyUpdater - - #region InPlaceUpdater - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool InPlaceUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo, out OperationStatus status) - { - recordInfo.SetDirtyAndModified(); - return _clientSession.InPlaceUpdater(ref key, ref input, ref output, ref value, ref recordInfo, ref rmwInfo, out status); - } - - public void RMWCompletionCallback(ref Key key, ref Input input, ref Output output, Context ctx, Status status, RecordMetadata recordMetadata) - => _clientSession.functions.RMWCompletionCallback(ref key, ref input, ref output, ctx, status, recordMetadata); - - #endregion InPlaceUpdater - #endregion IFunctions - RMWs - - #region IFunctions - Deletes - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool SingleDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo) - => _clientSession.functions.SingleDeleter(ref key, ref value, ref deleteInfo); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo) - { - recordInfo.SetDirtyAndModified(); - _clientSession.functions.PostSingleDeleter(ref key, ref deleteInfo); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo) - { - recordInfo.SetDirtyAndModified(); - recordInfo.SetTombstone(); - return _clientSession.functions.ConcurrentDeleter(ref key, ref value, ref deleteInfo); - } - #endregion IFunctions - Deletes - - #region IFunctions - Dispose - public void DisposeSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, ref UpsertInfo upsertInfo, WriteReason reason) - => _clientSession.functions.DisposeSingleWriter(ref key, ref input, ref src, ref dst, ref output, ref upsertInfo, reason); - public void DisposeCopyUpdater(ref Key key, ref Input input, ref Value oldValue, ref Value newValue, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo) - => _clientSession.functions.DisposeCopyUpdater(ref key, ref input, ref oldValue, ref newValue, ref output, ref rmwInfo); - public void DisposeInitialUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo) - => _clientSession.functions.DisposeInitialUpdater(ref key, ref input, ref value, ref output, ref rmwInfo); - public void DisposeSingleDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo) - => _clientSession.functions.DisposeSingleDeleter(ref key, ref value, ref deleteInfo); - public void DisposeDeserializedFromDisk(ref Key key, ref Value value, ref RecordInfo recordInfo) - => _clientSession.functions.DisposeDeserializedFromDisk(ref key, ref value); - #endregion IFunctions - Dispose - - #region IFunctions - Checkpointing - public void CheckpointCompletionCallback(int sessionID, string sessionName, CommitPoint commitPoint) - { - _clientSession.functions.CheckpointCompletionCallback(sessionID, sessionName, commitPoint); - _clientSession.LatestCommitPoint = commitPoint; - } - #endregion IFunctions - Checkpointing - - #region Ephemeral locking - public bool TryLockEphemeralExclusive(ref RecordInfo recordInfo) => _clientSession.fht.DisableEphemeralLocking || recordInfo.TryLockExclusive(); - public bool TryLockEphemeralShared(ref RecordInfo recordInfo) => _clientSession.fht.DisableEphemeralLocking || recordInfo.TryLockShared(); - public void UnlockEphemeralExclusive(ref RecordInfo recordInfo) - { - if (!_clientSession.fht.DisableEphemeralLocking) - recordInfo.UnlockExclusive(); - } - public bool TryUnlockEphemeralShared(ref RecordInfo recordInfo) => _clientSession.fht.DisableEphemeralLocking || recordInfo.TryUnlockShared(); - #endregion Ephemeral locking - - #region Internal utilities - public int GetInitialLength(ref Input input) - => _clientSession.variableLengthStruct.GetInitialLength(ref input); - - public int GetLength(ref Value t, ref Input input) - => _clientSession.variableLengthStruct.GetLength(ref t, ref input); - - public IHeapContainer GetHeapContainer(ref Input input) - { - if (_clientSession.inputVariableLengthStruct == default) - return new StandardHeapContainer(ref input); - return new VarLenHeapContainer(ref input, _clientSession.inputVariableLengthStruct, _clientSession.fht.hlog.bufferPool); - } - - public void UnsafeResumeThread() => _clientSession.UnsafeResumeThread(); - - public void UnsafeSuspendThread() => _clientSession.UnsafeSuspendThread(); - - public bool CompletePendingWithOutputs(out CompletedOutputIterator completedOutputs, bool wait = false, bool spinWaitForCommit = false) - => _clientSession.CompletePendingWithOutputs(out completedOutputs, wait, spinWaitForCommit); - #endregion Internal utilities - } - #endregion IFasterSession } } diff --git a/cs/src/core/Device/LocalStorageDevice.cs b/cs/src/core/Device/LocalStorageDevice.cs index 401b6bfe1..03559b5c0 100644 --- a/cs/src/core/Device/LocalStorageDevice.cs +++ b/cs/src/core/Device/LocalStorageDevice.cs @@ -60,6 +60,7 @@ public unsafe class LocalStorageDevice : StorageDeviceBase /// Whether to recover device metadata from existing files /// Whether we use IO completion port with polling public LocalStorageDevice(string filename, + bool preallocateFile = false, bool deleteOnClose = false, bool disableFileBuffering = true, @@ -106,6 +107,9 @@ protected internal LocalStorageDevice(string filename, throw new FasterException("Cannot use LocalStorageDevice from non-Windows OS platform, use ManagedLocalStorageDevice instead."); } + if (filename.Length > Native32.WIN32_MAX_PATH - 11) // -11 to allow for "." + throw new FasterException($"Path {filename} is too long"); + ThrottleLimit = 120; this.useIoCompletionPort = useIoCompletionPort; this._disposed = false; @@ -275,10 +279,10 @@ public override unsafe void WriteAsync(IntPtr sourceAddress, try { - var logHandle = GetOrAddHandle(segmentId); - Interlocked.Increment(ref numPending); + var logHandle = GetOrAddHandle(segmentId); + bool _result = Native32.WriteFile(logHandle, sourceAddress, numBytesToWrite, @@ -400,8 +404,9 @@ protected internal static SafeFileHandle CreateHandle(int segmentId, bool disabl fileShare = fileShare | Native32.FILE_SHARE_DELETE; } + string segmentFileName = GetSegmentName(fileName, segmentId); var logHandle = Native32.CreateFileW( - GetSegmentName(fileName, segmentId), + segmentFileName, fileAccess, fileShare, IntPtr.Zero, fileCreation, fileFlags, IntPtr.Zero); @@ -409,7 +414,10 @@ protected internal static SafeFileHandle CreateHandle(int segmentId, bool disabl if (logHandle.IsInvalid) { var error = Marshal.GetLastWin32Error(); - throw new IOException($"Error creating log file for {GetSegmentName(fileName, segmentId)}, error: {error}", Native32.MakeHRFromErrorCode(error)); + var message = $"Error creating log file for {segmentFileName}, error: {error} 0x({Native32.MakeHRFromErrorCode(error)})"; + if (error == Native32.ERROR_PATH_NOT_FOUND) + message += $" (Path not found; name length = {segmentFileName.Length}, MAX_PATH = {Native32.WIN32_MAX_PATH}"; + throw new IOException(message); } if (preallocateFile && segmentSize != -1) @@ -428,7 +436,7 @@ protected internal static SafeFileHandle CreateHandle(int segmentId, bool disabl } catch (Exception e) { - throw new FasterException("Error binding log handle for " + GetSegmentName(fileName, segmentId) + ": " + e.ToString()); + throw new FasterException("Error binding log handle for " + segmentFileName + ": " + e.ToString()); } } return logHandle; diff --git a/cs/src/core/FASTER.core.csproj b/cs/src/core/FASTER.core.csproj index dbc6d1edb..fd57409b7 100644 --- a/cs/src/core/FASTER.core.csproj +++ b/cs/src/core/FASTER.core.csproj @@ -36,8 +36,7 @@ bin\$(Platform)\Release\ - + ;NU1605 diff --git a/cs/src/core/Index/Common/Contexts.cs b/cs/src/core/Index/Common/Contexts.cs index 81ec26d1b..b6e536fec 100644 --- a/cs/src/core/Index/Common/Contexts.cs +++ b/cs/src/core/Index/Common/Contexts.cs @@ -77,7 +77,7 @@ internal enum OperationStatus /// /// Allocation failed, due to a need to flush pages. Clients do not see this status directly; they see . /// - /// For Sync operations we retry this as part of . + /// For Sync operations we retry this as part of . /// For Async operations we retry this as part of the ".Complete(...)" or ".CompleteAsync(...)" operation on the appropriate "*AsyncResult{}" object. /// /// @@ -199,12 +199,11 @@ static PendingContext() Debug.Assert((ushort)ReadFlags.DisableReadCacheReads >> 1 == kDisableReadCacheReads); Debug.Assert((ushort)ReadFlags.CopyReadsToTail >> 1 == kCopyReadsToTail); Debug.Assert((ushort)ReadFlags.CopyFromDeviceOnly >> 1 == kCopyFromDeviceOnly); - Debug.Assert((ushort)ReadFlags.ResetModifiedBit >> 1 == kResetModifiedBit); } [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static ushort GetOperationFlags(ReadFlags readFlags) - => (ushort)((int)(readFlags & (ReadFlags.DisableReadCacheUpdates | ReadFlags.DisableReadCacheReads | ReadFlags.CopyReadsToTail | ReadFlags.CopyFromDeviceOnly | ReadFlags.ResetModifiedBit)) >> 1); + => (ushort)((int)(readFlags & (ReadFlags.DisableReadCacheUpdates | ReadFlags.DisableReadCacheReads | ReadFlags.CopyReadsToTail | ReadFlags.CopyFromDeviceOnly)) >> 1); [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static ushort GetOperationFlags(ReadFlags readFlags, bool noKey) @@ -248,8 +247,6 @@ internal bool NoKey internal bool CopyFromDeviceOnly => (operationFlags & kCopyFromDeviceOnly) != 0; - internal bool ResetModifiedBit => (operationFlags & kResetModifiedBit) != 0; - internal bool HasMinAddress => this.minAddress != Constants.kInvalidAddress; internal bool IsAsync @@ -258,16 +255,16 @@ internal bool IsAsync set => operationFlags = value ? (ushort)(operationFlags | kIsAsync) : (ushort)(operationFlags & ~kIsAsync); } - internal long PrevHighestKeyHashAddress + internal long InitialEntryAddress { get => recordInfo.PreviousAddress; set => recordInfo.PreviousAddress = value; } - internal long PrevLatestLogicalAddress + internal long InitialLatestLogicalAddress { - get => entry.word; - set => entry.word = value; + get => entry.Address; + set => entry.Address = value; } public void Dispose() diff --git a/cs/src/core/Index/Common/FasterKVSettings.cs b/cs/src/core/Index/Common/FasterKVSettings.cs index d78eed100..960433128 100644 --- a/cs/src/core/Index/Common/FasterKVSettings.cs +++ b/cs/src/core/Index/Common/FasterKVSettings.cs @@ -3,7 +3,6 @@ using Microsoft.Extensions.Logging; using System; -using System.Diagnostics; using System.IO; namespace FASTER.core @@ -23,9 +22,9 @@ public sealed class FasterKVSettings : IDisposable public long IndexSize = 1L << 26; /// - /// Whether non-Lockable FASTER contexts take read and write locks on records internally as part of operations + /// How FASTER should do record locking /// - public bool DisableEphemeralLocking = false; + public LockingMode LockingMode; /// /// Device used for main hybrid log @@ -139,11 +138,6 @@ public sealed class FasterKVSettings : IDisposable /// Whether we should throttle the disk IO for checkpoints (one write at a time, wait between each write) and issue IO from separate task (-1 = throttling disabled) /// public int ThrottleCheckpointFlushDelayMs = -1; - - /// - /// Number of buckets in the lock table. - /// - public int LockTableSize = Constants.kDefaultLockTableSize; /// /// Create default configuration settings for FasterKV. You need to create and specify LogDevice @@ -200,7 +194,7 @@ public override string ToString() var retStr = $"index: {Utility.PrettySize(IndexSize)}; log memory: {Utility.PrettySize(MemorySize)}; log page: {Utility.PrettySize(PageSize)}; log segment: {Utility.PrettySize(SegmentSize)}"; retStr += $"; log device: {(LogDevice == null ? "null" : LogDevice.GetType().Name)}"; retStr += $"; obj log device: {(ObjectLogDevice == null ? "null" : ObjectLogDevice.GetType().Name)}"; - retStr += $"; mutable fraction: {MutableFraction}; supports locking: {(DisableEphemeralLocking ? "no" : "yes")}"; + retStr += $"; mutable fraction: {MutableFraction}; locking mode: {this.LockingMode}"; retStr += $"; read cache (rc): {(ReadCacheEnabled ? "yes" : "no")}"; if (ReadCacheEnabled) retStr += $"; rc memory: {Utility.PrettySize(ReadCacheMemorySize)}; rc page: {Utility.PrettySize(ReadCachePageSize)}"; diff --git a/cs/src/core/Index/Common/RecordInfo.cs b/cs/src/core/Index/Common/RecordInfo.cs index e8d4de639..46e2628a0 100644 --- a/cs/src/core/Index/Common/RecordInfo.cs +++ b/cs/src/core/Index/Common/RecordInfo.cs @@ -12,8 +12,8 @@ namespace FASTER.core { // RecordInfo layout (64 bits total): - // [-][Modified][InNewVersion][Filler][Dirty][Tentative][Sealed] [Valid][Tombstone][X][SSSSSS] [RAAAAAAA] [AAAAAAAA] [AAAAAAAA] [AAAAAAAA] [AAAAAAAA] [AAAAAAAA] - // where X = exclusive lock, S = shared lock, R = readcache, A = address, - = unused + // [Unused3][Modified][InNewVersion][Filler][Dirty][Unused2][Unused1][Valid][Tombstone][X][SSSSSS] [RAAAAAAA] [AAAAAAAA] [AAAAAAAA] [AAAAAAAA] [AAAAAAAA] [AAAAAAAA] + // where X = exclusive lock, S = shared lock, R = readcache, A = address [StructLayout(LayoutKind.Explicit, Size = 8)] public struct RecordInfo { @@ -40,87 +40,69 @@ public struct RecordInfo const long kExclusiveLockBitMask = 1L << kExclusiveLockBitOffset; const long kLockBitMask = kSharedLockMaskInWord | kExclusiveLockBitMask; - // Other marker bits + // Other marker bits. Unused* means bits not yet assigned; use the highest number when assigning const int kTombstoneBitOffset = kExclusiveLockBitOffset + 1; const int kValidBitOffset = kTombstoneBitOffset + 1; - const int kTentativeBitOffset = kValidBitOffset + 1; - const int kSealedBitOffset = kTentativeBitOffset + 1; - const int kDirtyBitOffset = kSealedBitOffset + 1; + const int kSealedBitOffset = kValidBitOffset + 1; + const int kUnused2BitOffset = kSealedBitOffset + 1; + const int kDirtyBitOffset = kUnused2BitOffset + 1; const int kFillerBitOffset = kDirtyBitOffset + 1; const int kInNewVersionBitOffset = kFillerBitOffset + 1; const int kModifiedBitOffset = kInNewVersionBitOffset + 1; - // If these become used, start with the highest number - internal const int kUnusedBitOffset = kModifiedBitOffset + 1; + internal const int kUnused1BitOffset = kModifiedBitOffset + 1; const long kTombstoneBitMask = 1L << kTombstoneBitOffset; const long kValidBitMask = 1L << kValidBitOffset; - const long kTentativeBitMask = 1L << kTentativeBitOffset; const long kSealedBitMask = 1L << kSealedBitOffset; + const long kUnused2BitMask = 1L << kUnused2BitOffset; const long kDirtyBitMask = 1L << kDirtyBitOffset; const long kFillerBitMask = 1L << kFillerBitOffset; const long kInNewVersionBitMask = 1L << kInNewVersionBitOffset; const long kModifiedBitMask = 1L << kModifiedBitOffset; - internal const long kUnused1BitMask = 1L << kUnusedBitOffset; + internal const long kUnused1BitMask = 1L << kUnused1BitOffset; [FieldOffset(0)] private long word; - public static void WriteInfo(ref RecordInfo info, bool inNewVersion, bool tombstone, long previousAddress) + public void WriteInfo(bool inNewVersion, bool tombstone, long previousAddress) { - info.word = default; - info.Tombstone = tombstone; - info.SetValid(); - info.Dirty = false; - info.PreviousAddress = previousAddress; - info.InNewVersion = inNewVersion; - info.Modified = false; + this.word = default; + this.Tombstone = tombstone; + this.SetValid(); + this.PreviousAddress = previousAddress; + this.IsInNewVersion = inNewVersion; } public bool Equals(RecordInfo other) => this.word == other.word; public long GetHashCode64() => Utility.GetHashCode(this.word); - public bool IsLocked => (word & (kExclusiveLockBitMask | kSharedLockMaskInWord)) != 0; - public bool IsLockedExclusive => (word & kExclusiveLockBitMask) != 0; public bool IsLockedShared => NumLockedShared != 0; + public bool IsLocked => IsLockedExclusive || IsLockedShared; public byte NumLockedShared => (byte)((word & kSharedLockMaskInWord) >> kLockShiftInWord); - public void ClearLocks() => word &= ~(kExclusiveLockBitMask | kSharedLockMaskInWord); - - public bool IsIntermediate => IsIntermediateWord(word); - - private static bool IsIntermediateWord(long word) => (word & (kTentativeBitMask | kSealedBitMask)) != 0; - - private static bool IsIntermediateOrInvalidWord(long word) => (word & (kTentativeBitMask | kSealedBitMask | kValidBitMask)) != kValidBitMask; - - private static bool IsInvalidOrSealedWord(long word) => (word & (kSealedBitMask | kValidBitMask)) != kValidBitMask; - - public void CleanDiskImage() + // We ignore locks and temp bits for disk images + public void ClearBitsForDiskImages() { - // We ignore locks and temp bits for disk images - this.word &= ~(kExclusiveLockBitMask | kSharedLockMaskInWord | kTentativeBitMask | kSealedBitMask); + // Locks can be evicted even with ephemeral locks, if the record is locked when BlockAllocate allows an epoch refresh + // that sends it below HeadAddress. Sealed records are normal. In Pending IO completions, Ephemeral locking does not + // lock records read from disk (they should be found in memory). But a Sealed record may become current again during + // recovery, if the RCU-inserted record was not written to disk during a crash, etc. So clear these bits here. + word &= ~(kLockBitMask | kDirtyBitMask | kSealedBitMask); } - public bool TryLock(LockType lockType) - { - if (lockType == LockType.Shared) - return this.TryLockShared(); - if (lockType == LockType.Exclusive) - return this.TryLockExclusive(); - else - Debug.Fail($"Unexpected LockType: {lockType}"); - return false; - } + private static bool IsClosedWord(long word) => (word & (kValidBitMask | kSealedBitMask)) != kValidBitMask; - public bool TryUnlock(LockType lockType) - { - if (lockType != LockType.Exclusive) - return TryUnlockShared(); - UnlockExclusive(); - return true; - } + public bool IsClosed => IsClosedWord(word); + private bool IsSealed => (this.word & kSealedBitMask) != 0; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void InitializeLockShared() => this.word += kSharedLockIncrement; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void InitializeLockExclusive() => this.word |= kExclusiveLockBitMask; /// /// Unlock RecordInfo that was previously locked for exclusive access, via @@ -130,28 +112,39 @@ public void UnlockExclusive() { Debug.Assert(!IsLockedShared, "Trying to X unlock an S locked record"); Debug.Assert(IsLockedExclusive, "Trying to X unlock an unlocked record"); + Debug.Assert(!IsSealed, "Trying to X unlock a Sealed record"); word &= ~kExclusiveLockBitMask; // Safe because there should be no other threads (e.g., readers) updating the word at this point } + /// + /// Unlock RecordInfo that was previously locked for exclusive access, via , and which is a source that has become Sealed. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void UnlockExclusiveAndSeal() + { + Debug.Assert(!IsLockedShared, "Trying to X unlock an S locked record"); + Debug.Assert(IsLockedExclusive, "Trying to X unlock an unlocked record"); + Debug.Assert(!IsSealed, "Trying to X unlock a Sealed record"); + word = (word & ~kExclusiveLockBitMask) | kSealedBitMask; // Safe because there should be no other threads (e.g., readers) updating the word at this point + } + /// /// Try to take an exclusive (write) lock on RecordInfo /// /// Whether lock was acquired successfully [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool TryLockExclusive(bool tentative = false) + public bool TryLockExclusive() { int spinCount = Constants.kMaxLockSpins; - long tentativeBit = tentative ? kTentativeBitMask : 0; // Acquire exclusive lock (readers may still be present; we'll drain them later) for (; ; Thread.Yield()) { long expected_word = word; - if (IsIntermediateOrInvalidWord(expected_word)) - return false; + Debug.Assert(!IsClosedWord(expected_word), "Should not be X locking readcache records, pt 1"); if ((expected_word & kExclusiveLockBitMask) == 0) { - if (expected_word == Interlocked.CompareExchange(ref word, expected_word | kExclusiveLockBitMask | tentativeBit, expected_word)) + if (expected_word == Interlocked.CompareExchange(ref word, expected_word | kExclusiveLockBitMask, expected_word)) break; } if (spinCount > 0 && --spinCount <= 0) @@ -163,11 +156,9 @@ public bool TryLockExclusive(bool tentative = false) { if ((word & kSharedLockMaskInWord) == 0) { - // Someone else may have transferred/invalidated the record while we were draining reads. *Don't* check for Tentative here; - // we may have set it above, and no record should be set to tentative after it's been inserted into the hash chain. - if ((this.word & (kSealedBitMask | kValidBitMask)) == kValidBitMask) - return true; - break; + // Someone else may have closed the record while we were draining reads. + Debug.Assert(!IsClosedWord(this.word), "Should not be X locking readcache records, pt 2"); + return true; } Thread.Yield(); } @@ -177,24 +168,21 @@ public bool TryLockExclusive(bool tentative = false) for (; ; Thread.Yield()) { long expected_word = word; - if (Interlocked.CompareExchange(ref word, expected_word & ~(kExclusiveLockBitMask | tentativeBit), expected_word) == expected_word) + if (Interlocked.CompareExchange(ref word, expected_word & ~kExclusiveLockBitMask, expected_word) == expected_word) break; } return false; } /// Unlock RecordInfo that was previously locked for shared access, via - /// Whether the record is still valid and unsealed (otherwise it was probably transferred, e.g. from the readcache or compaction). [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool TryUnlockShared() + public void UnlockShared() { - // X and S locks means an X lock is still trying to drain readers, like this one. + // X *and* S locks means an X lock is still trying to drain readers, like this one. Debug.Assert((word & kLockBitMask) != kExclusiveLockBitMask, "Trying to S unlock an X-only locked record"); Debug.Assert(IsLockedShared, "Trying to S unlock an unlocked record"); - var current_word = Interlocked.Add(ref word, -kSharedLockIncrement); - - // An invalid or Sealed record means we have to retry. - return (current_word & (kValidBitMask | kSealedBitMask)) == kValidBitMask; + Debug.Assert(!IsSealed, "Trying to S unlock a Sealed record"); + Interlocked.Add(ref word, -kSharedLockIncrement); } /// @@ -202,7 +190,7 @@ public bool TryUnlockShared() /// /// Whether lock was acquired successfully [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool TryLockShared(bool tentative = false) + public bool TryLockShared() { int spinCount = Constants.kMaxLockSpins; @@ -210,38 +198,18 @@ public bool TryLockShared(bool tentative = false) for (; ; Thread.Yield()) { long expected_word = word; - if (IsIntermediateOrInvalidWord(expected_word)) - return false; + Debug.Assert(!IsClosedWord(expected_word), "Should not be S locking readcache records"); if (((expected_word & kExclusiveLockBitMask) == 0) // not exclusively locked && (expected_word & kSharedLockMaskInWord) != kSharedLockMaskInWord) // shared lock is not full { - // If there are no shared locks, this one will be tentative if requested. Otherwise, do not force existing locks to be tentative. - long tentativeBit = tentative && ((expected_word & kSharedLockMaskInWord) == 0) ? kTentativeBitMask : 0; - if (expected_word == Interlocked.CompareExchange(ref word, (expected_word + kSharedLockIncrement) | tentativeBit, expected_word)) - break; + if (expected_word == Interlocked.CompareExchange(ref word, expected_word + kSharedLockIncrement, expected_word)) + return true; } if (spinCount > 0 && --spinCount <= 0) return false; } - return true; } - // For new records, which don't need the Interlocked overhead. - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void InitializeLock(LockType lockType, bool tentative) - { - if (lockType == LockType.Shared) - this.InitializeLockShared(tentative); - else - this.InitializeLockExclusive(tentative); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void InitializeLockShared(bool tentative = false) => this.word += kSharedLockIncrement | (tentative ? kTentativeBitMask : 0); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void InitializeLockExclusive(bool tentative = false) => this.word |= kExclusiveLockBitMask | (tentative ? kTentativeBitMask : 0); - /// /// Try to reset the modified bit of the RecordInfo /// @@ -250,76 +218,22 @@ internal void InitializeLock(LockType lockType, bool tentative) internal bool TryResetModifiedAtomic() { int spinCount = Constants.kMaxLockSpins; - while (true) + for (; ; Thread.Yield()) { long expected_word = word; - if (IsIntermediateOrInvalidWord(expected_word)) + if (IsClosedWord(expected_word)) return false; if ((expected_word & kModifiedBitMask) == 0) return true; if (expected_word == Interlocked.CompareExchange(ref word, expected_word & (~kModifiedBitMask), expected_word)) - break; + return true; if (spinCount > 0 && --spinCount <= 0) return false; - Thread.Yield(); } - return true; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void TransferLocksFrom(ref RecordInfo source) - { - // This is called only in the Lock Table, when we have an exclusive bucket lock, so no interlock is needed and we clear the locks - // to make it InActive and to ensure that ReadCache won't double-copy if there is a CAS failure during ReadCacheEvict. - this.word &= ~(kExclusiveLockBitMask | kSharedLockMaskInWord); - this.word |= source.word & (kExclusiveLockBitMask | kSharedLockMaskInWord); - source.word &= ~(kExclusiveLockBitMask | kSharedLockMaskInWord); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool CopyReadLocksFromAndMarkSourceAtomic(ref RecordInfo source, bool allowXLock, bool seal, bool removeEphemeralLock) - { - // This is called when transferring read locks from the read cache or Lock Table to a tentative log record. This does not remove - // locks from the source record because if they exist it means other threads have the record locked and must be allowed to - // unlock it (and observe the 'false' return of that unlock due to the Seal/Invalid, and then go chase the record where it is now). - Debug.Assert(this.Tentative, "Must only transfer locks to a tentative recordInfo"); - Debug.Assert((word & (kExclusiveLockBitMask | kSharedLockMaskInWord)) != kExclusiveLockBitMask, "Must only transfer readlocks"); - for (; ; Thread.Yield()) - { - long expected_word = source.word; - - // If this is invalid or sealed, someone else won the race. - if (IsInvalidOrSealedWord(expected_word)) - return false; - var new_word = expected_word; - - // Fail if there is an established XLock. Having both X and S locks means the other thread is still in the read-lock draining portion - // of TryLockExclusive, so we can remove the exclusive bit, and TryLockExclusive will see the "invalid" mark bits after the SLocks are - // drained, and will return false. If there is only an XLock, we cannot proceed. - if (!allowXLock && (word & (kExclusiveLockBitMask | kSharedLockMaskInWord)) == kExclusiveLockBitMask) - return false; - new_word &= ~kExclusiveLockBitMask; - - // Mark the source record atomically with the transfer. - if (seal) - new_word |= kSealedBitMask; - else - new_word &= ~kValidBitMask; - - // If the source record has an ephemeral lock, remove it now. (Check this *after* the "established XLock" test above.) - if (removeEphemeralLock) - new_word -= kSharedLockIncrement; - - // Update the source record; this ensures we atomically copy the lock count while setting the mark bit. - // If that succeeds, then we update our own word. - if (expected_word == Interlocked.CompareExchange(ref source.word, new_word, expected_word)) - { - this.word &= ~(kExclusiveLockBitMask | kSharedLockMaskInWord); - this.word |= new_word & (kExclusiveLockBitMask | kSharedLockMaskInWord); - return true; - } - } - } + public void CloseAtomic() => SetInvalidAtomic(); [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool TryUpdateAddress(long expectedPrevAddress, long newPrevAddress) @@ -354,46 +268,13 @@ public bool Valid } } - public bool Tentative - { - get => (word & kTentativeBitMask) > 0; - set - { - if (value) word |= kTentativeBitMask; - else word &= ~kTentativeBitMask; - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void ClearTentativeBitAtomic() - { - Debug.Assert(this.Tentative, "Should only ClearTentative a tentative record"); - - // Call this when locking or splicing may be done simultaneously - while (true) - { - long expected_word = word; // TODO: Interlocked.And is not supported in netstandard2.1 - if (expected_word == Interlocked.CompareExchange(ref word, expected_word & ~kTentativeBitMask, expected_word)) - return; - - // Tentative records should not be operated on by other threads. - Debug.Assert((word & kSealedBitMask) == 0 && !this.Invalid); - Thread.Yield(); - } - } - - public bool Sealed => (word & kSealedBitMask) > 0; - public void Seal() => word |= kSealedBitMask; - public void Unseal() => word &= ~kSealedBitMask; - public void ClearDirtyAtomic() { - while (true) + for (; ; Thread.Yield()) { long expected_word = word; // TODO: Interlocked.And is not supported in netstandard2.1 if (expected_word == Interlocked.CompareExchange(ref word, expected_word & ~kDirtyBitMask, expected_word)) break; - Thread.Yield(); } } @@ -427,7 +308,7 @@ public bool Filler } } - public bool InNewVersion + public bool IsInNewVersion { get => (word & kInNewVersionBitMask) > 0; set @@ -441,49 +322,22 @@ public bool InNewVersion public void SetDirty() => word |= kDirtyBitMask; public void SetTombstone() => word |= kTombstoneBitMask; public void SetValid() => word |= kValidBitMask; - public void SetInvalid() => word &= ~(kValidBitMask | kTentativeBitMask); + public void SetInvalid() => word &= ~(kValidBitMask | kExclusiveLockBitMask); [MethodImpl(MethodImplOptions.AggressiveInlining)] public void SetInvalidAtomic() { - while (true) + for (; ; Thread.Yield()) { long expected_word = word; // TODO: Interlocked.And is not supported in netstandard2.1 - if (expected_word == Interlocked.CompareExchange(ref word, expected_word & ~(kValidBitMask | kTentativeBitMask), expected_word)) + if (expected_word == Interlocked.CompareExchange(ref word, expected_word & ~kValidBitMask, expected_word)) return; - Thread.Yield(); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool SetInvalidAtomicIfNoLocks() - { - while (!this.Invalid) - { - long expected_word = word; - - if ((expected_word & (kSealedBitMask | kTentativeBitMask | kExclusiveLockBitMask | kSharedLockMaskInWord)) != 0) - return false; - - long new_word = expected_word & ~kValidBitMask; - long current_word = Interlocked.CompareExchange(ref word, new_word, expected_word); - if (expected_word == current_word) - return true; - Thread.Yield(); } - - // If we got here, someone else set it Invalid--that means we cannot rely on a consistent state in the caller, so return false. - return false; } public bool Invalid => (word & kValidBitMask) == 0; - public bool SkipOnScan => Invalid || (word & (kSealedBitMask | kTentativeBitMask)) != 0; - - /// - /// Indicates whether this RecordInfo is a valid source for updates or record locks. - /// - public bool IsValidUpdateOrLockSource => (word & (kValidBitMask | kTentativeBitMask | kSealedBitMask)) == kValidBitMask; + public bool SkipOnScan => IsClosedWord(word); public long PreviousAddress { @@ -502,18 +356,24 @@ public long PreviousAddress public static int GetLength() => kTotalSizeInBytes; internal bool Unused1 - { - get => (word & kUnused1BitMask) != 0; + { + get => (word & kUnused1BitMask) != 0; set => word = value ? word | kUnused1BitMask : word & ~kUnused1BitMask; } + internal bool Unused2 + { + get => (word & kUnused2BitMask) != 0; + set => word = value ? word | kUnused2BitMask : word & ~kUnused2BitMask; + } + public override string ToString() { var paRC = this.PreviousAddressIsReadCache ? "(rc)" : string.Empty; var locks = $"{(this.IsLockedExclusive ? "x" : string.Empty)}{this.NumLockedShared}"; static string bstr(bool value) => value ? "T" : "F"; - return $"prev {this.AbsolutePreviousAddress}{paRC}, locks {locks}, valid {bstr(Valid)}, mod {bstr(Modified)}," - + $" tomb {bstr(Tombstone)}, tent {bstr(Tentative)}, seal {bstr(Sealed)}, fill {bstr(Filler)}, dirty {bstr(Dirty)}, Un1 {bstr(Unused1)}"; + return $"prev {this.AbsolutePreviousAddress}{paRC}, locks {locks}, valid {bstr(Valid)}, tomb {bstr(Tombstone)}, seal {bstr(this.IsSealed)}," + + $" mod {bstr(Modified)}, dirty {bstr(Dirty)}, fill {bstr(Filler)}, Un1 {bstr(Unused1)}, Un2 {bstr(Unused2)}"; } } } diff --git a/cs/src/core/Index/FASTER/FASTER.cs b/cs/src/core/Index/FASTER/FASTER.cs index 742464ab8..c01abbcd6 100644 --- a/cs/src/core/Index/FASTER/FASTER.cs +++ b/cs/src/core/Index/FASTER/FASTER.cs @@ -64,8 +64,9 @@ public partial class FasterKV : FasterBase, ConcurrentDictionary _recoveredSessionNameMap; int maxSessionID; - internal readonly bool DisableEphemeralLocking; - internal readonly LockTable LockTable; + internal readonly bool DoTransientLocking; // uses LockTable + internal readonly bool DoEphemeralLocking; // uses RecordInfo + internal readonly OverflowBucketLockTable LockTable; internal void IncrementNumLockingSessions() { @@ -85,7 +86,7 @@ public FasterKV(FasterKVSettings fasterKVSettings) : fasterKVSettings.GetIndexSizeCacheLines(), fasterKVSettings.GetLogSettings(), fasterKVSettings.GetCheckpointSettings(), fasterKVSettings.GetSerializerSettings(), fasterKVSettings.EqualityComparer, fasterKVSettings.GetVariableLengthStructSettings(), - fasterKVSettings.TryRecoverLatest, fasterKVSettings.DisableEphemeralLocking, null, fasterKVSettings.logger, fasterKVSettings.LockTableSize) + fasterKVSettings.TryRecoverLatest, fasterKVSettings.LockingMode, null, fasterKVSettings.logger) { } /// @@ -98,18 +99,19 @@ public FasterKV(FasterKVSettings fasterKVSettings) : /// FASTER equality comparer for key /// /// Try to recover from latest checkpoint, if any - /// Whether FASTER takes ephemeral read and write locks on records + /// How FASTER should do record locking /// Logger factory to create an ILogger, if one is not passed in (e.g. from ). /// Logger to use. /// Number of buckets in the lock table public FasterKV(long size, LogSettings logSettings, CheckpointSettings checkpointSettings = null, SerializerSettings serializerSettings = null, IFasterEqualityComparer comparer = null, - VariableLengthStructSettings variableLengthStructSettings = null, bool tryRecoverLatest = false, bool disableEphemeralLocking = false, + VariableLengthStructSettings variableLengthStructSettings = null, bool tryRecoverLatest = false, LockingMode lockingMode = LockingMode.Standard, ILoggerFactory loggerFactory = null, ILogger logger = null, int lockTableSize = Constants.kDefaultLockTableSize) { this.loggerFactory = loggerFactory; this.logger = logger ?? this.loggerFactory?.CreateLogger("FasterKV Constructor"); + if (comparer != null) this.comparer = comparer; else @@ -131,7 +133,8 @@ public FasterKV(long size, LogSettings logSettings, } } - this.DisableEphemeralLocking = disableEphemeralLocking; + this.DoTransientLocking = lockingMode == LockingMode.Standard; + this.DoEphemeralLocking = lockingMode == LockingMode.Ephemeral; if (checkpointSettings is null) checkpointSettings = new CheckpointSettings(); @@ -222,12 +225,11 @@ public FasterKV(long size, LogSettings logSettings, } hlog.Initialize(); - hlog.OnLockEvictionObserver = new LockEvictionObserver(this); sectorSize = (int)logSettings.LogDevice.SectorSize; Initialize(size, sectorSize); - this.LockTable = new LockTable(lockTableSize, this.comparer, keyLen); + this.LockTable = new OverflowBucketLockTable(lockingMode == LockingMode.Standard ? this : null); systemState = SystemState.Make(Phase.REST, 1); @@ -575,149 +577,140 @@ internal static ReadFlags MergeReadFlags(ReadFlags upper, ReadFlags lower) } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal Status ContextRead(ref Key key, ref Input input, ref Output output, Context context, FasterSession fasterSession, long serialNo, - FasterExecutionContext sessionCtx) + internal Status ContextRead(ref Key key, ref Input input, ref Output output, Context context, FasterSession fasterSession, long serialNo) where FasterSession : IFasterSession { var pcontext = default(PendingContext); - pcontext.SetOperationFlags(sessionCtx.ReadFlags); + pcontext.SetOperationFlags(fasterSession.Ctx.ReadFlags); OperationStatus internalStatus; do - internalStatus = InternalRead(ref key, ref input, ref output, Constants.kInvalidAddress, ref context, ref pcontext, fasterSession, sessionCtx, serialNo); - while (HandleImmediateRetryStatus(internalStatus, sessionCtx, sessionCtx, fasterSession, ref pcontext)); + internalStatus = InternalRead(ref key, ref input, ref output, Constants.kInvalidAddress, ref context, ref pcontext, fasterSession, serialNo); + while (HandleImmediateRetryStatus(internalStatus, fasterSession, ref pcontext)); - var status = HandleOperationStatus(sessionCtx, ref pcontext, internalStatus); + var status = HandleOperationStatus(fasterSession.Ctx, ref pcontext, internalStatus); - Debug.Assert(serialNo >= sessionCtx.serialNum, "Operation serial numbers must be non-decreasing"); - sessionCtx.serialNum = serialNo; + Debug.Assert(serialNo >= fasterSession.Ctx.serialNum, "Operation serial numbers must be non-decreasing"); + fasterSession.Ctx.serialNum = serialNo; return status; } [MethodImpl(MethodImplOptions.AggressiveInlining)] internal Status ContextRead(ref Key key, ref Input input, ref Output output, ref ReadOptions readOptions, out RecordMetadata recordMetadata, Context context, - FasterSession fasterSession, long serialNo, FasterExecutionContext sessionCtx) + FasterSession fasterSession, long serialNo) where FasterSession : IFasterSession { var pcontext = default(PendingContext); - pcontext.SetOperationFlags(MergeReadFlags(sessionCtx.ReadFlags, readOptions.ReadFlags), ref readOptions); + pcontext.SetOperationFlags(MergeReadFlags(fasterSession.Ctx.ReadFlags, readOptions.ReadFlags), ref readOptions); OperationStatus internalStatus; do - internalStatus = InternalRead(ref key, ref input, ref output, readOptions.StartAddress, ref context, ref pcontext, fasterSession, sessionCtx, serialNo); - while (HandleImmediateRetryStatus(internalStatus, sessionCtx, sessionCtx, fasterSession, ref pcontext)); + internalStatus = InternalRead(ref key, ref input, ref output, readOptions.StartAddress, ref context, ref pcontext, fasterSession, serialNo); + while (HandleImmediateRetryStatus(internalStatus, fasterSession, ref pcontext)); - var status = HandleOperationStatus(sessionCtx, ref pcontext, internalStatus); - recordMetadata = status.IsCompletedSuccessfully ? recordMetadata = new(pcontext.recordInfo, pcontext.logicalAddress) : default; + var status = HandleOperationStatus(fasterSession.Ctx, ref pcontext, internalStatus); + recordMetadata = status.IsCompletedSuccessfully ? new(pcontext.recordInfo, pcontext.logicalAddress) : default; - Debug.Assert(serialNo >= sessionCtx.serialNum, "Operation serial numbers must be non-decreasing"); - sessionCtx.serialNum = serialNo; + Debug.Assert(serialNo >= fasterSession.Ctx.serialNum, "Operation serial numbers must be non-decreasing"); + fasterSession.Ctx.serialNum = serialNo; return status; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal Status ContextReadAtAddress(ref Input input, ref Output output, ref ReadOptions readOptions, Context context, FasterSession fasterSession, long serialNo, - FasterExecutionContext sessionCtx) + internal Status ContextReadAtAddress(ref Input input, ref Output output, ref ReadOptions readOptions, Context context, FasterSession fasterSession, long serialNo) where FasterSession : IFasterSession { var pcontext = default(PendingContext); - pcontext.SetOperationFlags(MergeReadFlags(sessionCtx.ReadFlags, readOptions.ReadFlags), ref readOptions, noKey: true); + pcontext.SetOperationFlags(MergeReadFlags(fasterSession.Ctx.ReadFlags, readOptions.ReadFlags), ref readOptions, noKey: true); Key key = default; OperationStatus internalStatus; do - internalStatus = InternalRead(ref key, ref input, ref output, readOptions.StartAddress, ref context, ref pcontext, fasterSession, sessionCtx, serialNo); - while (HandleImmediateRetryStatus(internalStatus, sessionCtx, sessionCtx, fasterSession, ref pcontext)); + internalStatus = InternalRead(ref key, ref input, ref output, readOptions.StartAddress, ref context, ref pcontext, fasterSession, serialNo); + while (HandleImmediateRetryStatus(internalStatus, fasterSession, ref pcontext)); - var status = HandleOperationStatus(sessionCtx, ref pcontext, internalStatus); + var status = HandleOperationStatus(fasterSession.Ctx, ref pcontext, internalStatus); - Debug.Assert(serialNo >= sessionCtx.serialNum, "Operation serial numbers must be non-decreasing"); - sessionCtx.serialNum = serialNo; + Debug.Assert(serialNo >= fasterSession.Ctx.serialNum, "Operation serial numbers must be non-decreasing"); + fasterSession.Ctx.serialNum = serialNo; return status; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal Status ContextUpsert(ref Key key, ref Input input, ref Value value, ref Output output, - Context context, FasterSession fasterSession, long serialNo, FasterExecutionContext sessionCtx) + internal Status ContextUpsert(ref Key key, ref Input input, ref Value value, ref Output output, Context context, FasterSession fasterSession, long serialNo) where FasterSession : IFasterSession { var pcontext = default(PendingContext); OperationStatus internalStatus; do - internalStatus = InternalUpsert(ref key, ref input, ref value, ref output, ref context, ref pcontext, fasterSession, sessionCtx, serialNo); - while (HandleImmediateRetryStatus(internalStatus, sessionCtx, sessionCtx, fasterSession, ref pcontext)); + internalStatus = InternalUpsert(ref key, ref input, ref value, ref output, ref context, ref pcontext, fasterSession, serialNo); + while (HandleImmediateRetryStatus(internalStatus, fasterSession, ref pcontext)); - var status = HandleOperationStatus(sessionCtx, ref pcontext, internalStatus); + var status = HandleOperationStatus(fasterSession.Ctx, ref pcontext, internalStatus); - Debug.Assert(serialNo >= sessionCtx.serialNum, "Operation serial numbers must be non-decreasing"); - sessionCtx.serialNum = serialNo; + Debug.Assert(serialNo >= fasterSession.Ctx.serialNum, "Operation serial numbers must be non-decreasing"); + fasterSession.Ctx.serialNum = serialNo; return status; } [MethodImpl(MethodImplOptions.AggressiveInlining)] internal Status ContextUpsert(ref Key key, ref Input input, ref Value value, ref Output output, out RecordMetadata recordMetadata, - Context context, FasterSession fasterSession, long serialNo, FasterExecutionContext sessionCtx) + Context context, FasterSession fasterSession, long serialNo) where FasterSession : IFasterSession { var pcontext = default(PendingContext); OperationStatus internalStatus; do - internalStatus = InternalUpsert(ref key, ref input, ref value, ref output, ref context, ref pcontext, fasterSession, sessionCtx, serialNo); - while (HandleImmediateRetryStatus(internalStatus, sessionCtx, sessionCtx, fasterSession, ref pcontext)); + internalStatus = InternalUpsert(ref key, ref input, ref value, ref output, ref context, ref pcontext, fasterSession, serialNo); + while (HandleImmediateRetryStatus(internalStatus, fasterSession, ref pcontext)); - var status = HandleOperationStatus(sessionCtx, ref pcontext, internalStatus); - recordMetadata = status.IsCompletedSuccessfully ? recordMetadata = new(pcontext.recordInfo, pcontext.logicalAddress) : default; + var status = HandleOperationStatus(fasterSession.Ctx, ref pcontext, internalStatus); + recordMetadata = status.IsCompletedSuccessfully ? new(pcontext.recordInfo, pcontext.logicalAddress) : default; - Debug.Assert(serialNo >= sessionCtx.serialNum, "Operation serial numbers must be non-decreasing"); - sessionCtx.serialNum = serialNo; + Debug.Assert(serialNo >= fasterSession.Ctx.serialNum, "Operation serial numbers must be non-decreasing"); + fasterSession.Ctx.serialNum = serialNo; return status; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal Status ContextRMW(ref Key key, ref Input input, ref Output output, Context context, FasterSession fasterSession, long serialNo, - FasterExecutionContext sessionCtx) + internal Status ContextRMW(ref Key key, ref Input input, ref Output output, Context context, FasterSession fasterSession, long serialNo) where FasterSession : IFasterSession - => ContextRMW(ref key, ref input, ref output, out _, context, fasterSession, serialNo, sessionCtx); + => ContextRMW(ref key, ref input, ref output, out _, context, fasterSession, serialNo); [MethodImpl(MethodImplOptions.AggressiveInlining)] internal Status ContextRMW(ref Key key, ref Input input, ref Output output, out RecordMetadata recordMetadata, - Context context, FasterSession fasterSession, long serialNo, FasterExecutionContext sessionCtx) + Context context, FasterSession fasterSession, long serialNo) where FasterSession : IFasterSession { var pcontext = default(PendingContext); OperationStatus internalStatus; do - internalStatus = InternalRMW(ref key, ref input, ref output, ref context, ref pcontext, fasterSession, sessionCtx, serialNo); - while (HandleImmediateRetryStatus(internalStatus, sessionCtx, sessionCtx, fasterSession, ref pcontext)); + internalStatus = InternalRMW(ref key, ref input, ref output, ref context, ref pcontext, fasterSession, serialNo); + while (HandleImmediateRetryStatus(internalStatus, fasterSession, ref pcontext)); - var status = HandleOperationStatus(sessionCtx, ref pcontext, internalStatus); - recordMetadata = status.IsCompletedSuccessfully ? recordMetadata = new(pcontext.recordInfo, pcontext.logicalAddress) : default; + var status = HandleOperationStatus(fasterSession.Ctx, ref pcontext, internalStatus); + recordMetadata = status.IsCompletedSuccessfully ? new(pcontext.recordInfo, pcontext.logicalAddress) : default; - Debug.Assert(serialNo >= sessionCtx.serialNum, "Operation serial numbers must be non-decreasing"); - sessionCtx.serialNum = serialNo; + Debug.Assert(serialNo >= fasterSession.Ctx.serialNum, "Operation serial numbers must be non-decreasing"); + fasterSession.Ctx.serialNum = serialNo; return status; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal Status ContextDelete( - ref Key key, - Context context, - FasterSession fasterSession, - long serialNo, - FasterExecutionContext sessionCtx) + internal Status ContextDelete(ref Key key, Context context, FasterSession fasterSession, long serialNo) where FasterSession : IFasterSession { var pcontext = default(PendingContext); OperationStatus internalStatus; do - internalStatus = InternalDelete(ref key, ref context, ref pcontext, fasterSession, sessionCtx, serialNo); - while (HandleImmediateRetryStatus(internalStatus, sessionCtx, sessionCtx, fasterSession, ref pcontext)); + internalStatus = InternalDelete(ref key, ref context, ref pcontext, fasterSession, serialNo); + while (HandleImmediateRetryStatus(internalStatus, fasterSession, ref pcontext)); - var status = HandleOperationStatus(sessionCtx, ref pcontext, internalStatus); + var status = HandleOperationStatus(fasterSession.Ctx, ref pcontext, internalStatus); - Debug.Assert(serialNo >= sessionCtx.serialNum, "Operation serial numbers must be non-decreasing"); - sessionCtx.serialNum = serialNo; + Debug.Assert(serialNo >= fasterSession.Ctx.serialNum, "Operation serial numbers must be non-decreasing"); + fasterSession.Ctx.serialNum = serialNo; return status; } @@ -769,6 +762,7 @@ public void Dispose() Free(); hlog.Dispose(); readcache?.Dispose(); + LockTable.Dispose(); _lastSnapshotCheckpoint.Dispose(); if (disposeCheckpointManager) checkpointManager?.Dispose(); diff --git a/cs/src/core/Index/FASTER/FASTERBase.cs b/cs/src/core/Index/FASTER/FASTERBase.cs index c12a1de8c..a2b384464 100644 --- a/cs/src/core/Index/FASTER/FASTERBase.cs +++ b/cs/src/core/Index/FASTER/FASTERBase.cs @@ -6,6 +6,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Threading; +using FASTER.core; using Microsoft.Extensions.Logging; namespace FASTER.core @@ -50,7 +51,7 @@ internal static class Constants public const int kDefaultLockTableSize = 16 * 1024; public const int kMaxLockSpins = 10; // TODO verify these - public const int kMaxReaderLockDrainSpins = 100; + public const int kMaxReaderLockDrainSpins = kMaxLockSpins * 10; /// Invalid entry value public const int kInvalidEntrySlot = kEntriesPerBucket; @@ -106,9 +107,12 @@ internal unsafe struct HashBucket public fixed long bucket_entries[Constants.kEntriesPerBucket]; [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool TryAcquireSharedLatch(ref HashEntryInfo hei) + public static bool TryAcquireSharedLatch(ref HashEntryInfo hei) => TryAcquireSharedLatch(hei.firstBucket); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool TryAcquireSharedLatch(HashBucket* bucket) { - ref long entry_word = ref hei.firstBucket->bucket_entries[Constants.kOverflowBucketIndex]; + ref long entry_word = ref bucket->bucket_entries[Constants.kOverflowBucketIndex]; int spinCount = Constants.kMaxLockSpins; for (; ; Thread.Yield()) @@ -126,9 +130,12 @@ public static bool TryAcquireSharedLatch(ref HashEntryInfo hei) } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void ReleaseSharedLatch(ref HashEntryInfo hei) + public static void ReleaseSharedLatch(ref HashEntryInfo hei) => ReleaseSharedLatch(hei.firstBucket); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void ReleaseSharedLatch(HashBucket* bucket) { - ref long entry_word = ref hei.firstBucket->bucket_entries[Constants.kOverflowBucketIndex]; + ref long entry_word = ref bucket->bucket_entries[Constants.kOverflowBucketIndex]; // X and S latches means an X latch is still trying to drain readers, like this one. Debug.Assert((entry_word & kLatchBitMask) != kExclusiveLatchBitMask, "Trying to S unlatch an X-only latched record"); Debug.Assert((entry_word & kSharedLatchBitMask) != 0, "Trying to S unlatch an unlatched record"); @@ -136,9 +143,12 @@ public static void ReleaseSharedLatch(ref HashEntryInfo hei) } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool TryAcquireExclusiveLatch(ref HashEntryInfo hei) + public static bool TryAcquireExclusiveLatch(ref HashEntryInfo hei) => TryAcquireExclusiveLatch(hei.firstBucket); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool TryAcquireExclusiveLatch(HashBucket* bucket) { - ref long entry_word = ref hei.firstBucket->bucket_entries[Constants.kOverflowBucketIndex]; + ref long entry_word = ref bucket->bucket_entries[Constants.kOverflowBucketIndex]; int spinCount = Constants.kMaxLockSpins; // Acquire exclusive lock (readers may still be present; we'll drain them later) @@ -173,9 +183,12 @@ public static bool TryAcquireExclusiveLatch(ref HashEntryInfo hei) } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void ReleaseExclusiveLatch(ref HashEntryInfo hei) + public static void ReleaseExclusiveLatch(ref HashEntryInfo hei) => ReleaseExclusiveLatch(hei.firstBucket); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void ReleaseExclusiveLatch(HashBucket* bucket) { - ref long entry_word = ref hei.firstBucket->bucket_entries[Constants.kOverflowBucketIndex]; + ref long entry_word = ref bucket->bucket_entries[Constants.kOverflowBucketIndex]; // We should not be calling this method unless we have successfully acquired the latch (all existing readers were drained). Debug.Assert((entry_word & kSharedLatchBitMask) == 0, "Trying to X unlatch an S latched record"); @@ -189,6 +202,24 @@ public static void ReleaseExclusiveLatch(ref HashEntryInfo hei) break; } } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static ushort NumLatchedShared(HashBucket* bucket) + => (ushort)((bucket->bucket_entries[Constants.kOverflowBucketIndex] & kSharedLatchBitMask) >> kSharedLatchBitOffset); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool IsLatchedExclusive(HashBucket* bucket) + => (bucket->bucket_entries[Constants.kOverflowBucketIndex] & kExclusiveLatchBitMask) != 0; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool IsLatched(HashBucket* bucket) + => (bucket->bucket_entries[Constants.kOverflowBucketIndex] & kLatchBitMask) != 0; + + public static string ToString(HashBucket* bucket) + { + var locks = $"{(IsLatchedExclusive(bucket) ? "x" : string.Empty)}{NumLatchedShared(bucket)}"; + return $"locks {locks}"; + } } // Long value layout: [1-bit tentative][15-bit TAG][48-bit address] @@ -200,11 +231,7 @@ internal struct HashBucketEntry public long word; public long Address { - readonly get - { - return word & Constants.kAddressMask; - } - + readonly get => word & Constants.kAddressMask; set { word &= ~Constants.kAddressMask; @@ -216,11 +243,7 @@ readonly get public ushort Tag { - readonly get - { - return (ushort)((word & Constants.kTagPositionMask) >> Constants.kTagShift); - } - + readonly get => (ushort)((word & Constants.kTagPositionMask) >> Constants.kTagShift); set { word &= ~Constants.kTagPositionMask; @@ -230,61 +253,37 @@ readonly get public bool Pending { - readonly get - { - return (word & Constants.kPendingBitMask) != 0; - } - + readonly get => (word & Constants.kPendingBitMask) != 0; set { if (value) - { word |= Constants.kPendingBitMask; - } else - { word &= ~Constants.kPendingBitMask; - } } } public bool Tentative { - readonly get - { - return (word & Constants.kTentativeBitMask) != 0; - } - + readonly get => (word & Constants.kTentativeBitMask) != 0; set { if (value) - { word |= Constants.kTentativeBitMask; - } else - { word &= ~Constants.kTentativeBitMask; - } } } public bool ReadCache { - readonly get - { - return (word & Constants.kReadCacheBitMask) != 0; - } - + readonly get => (word & Constants.kReadCacheBitMask) != 0; set { if (value) - { word |= Constants.kReadCacheBitMask; - } else - { word &= ~Constants.kReadCacheBitMask; - } } } @@ -421,222 +420,211 @@ internal void Initialize(int version, long size, int sector_size) /// A helper function that is used to find the slot corresponding to a /// key in the specified version of the hash table /// - /// true if such a slot exists, false otherwise + /// true if such a slot exists, and populates , else returns false [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal bool FindTag(long hash, ushort tag, ref HashBucket* firstBucket, ref HashBucket* bucket, ref int slot, ref HashBucketEntry entry) + internal bool FindTag(ref HashEntryInfo hei) { var target_entry_word = default(long); var entry_slot_bucket = default(HashBucket*); var version = resizeInfo.version; - var masked_entry_word = hash & state[version].size_mask; - firstBucket = bucket = state[version].tableAligned + masked_entry_word; - slot = Constants.kInvalidEntrySlot; + var masked_entry_word = hei.hash & state[version].size_mask; + hei.firstBucket = hei.bucket = state[version].tableAligned + masked_entry_word; + hei.slot = Constants.kInvalidEntrySlot; + hei.entry = default; +#if DEBUG + hei.LockCode = masked_entry_word; +#endif // DEBUG do { - // Search through the bucket looking for our key. Last entry is reserved - // for the overflow pointer. + // Search through the bucket looking for our key. Last entry is reserved for the overflow pointer. for (int index = 0; index < Constants.kOverflowBucketIndex; ++index) { - target_entry_word = *(((long*)bucket) + index); + target_entry_word = *(((long*)hei.bucket) + index); if (0 == target_entry_word) - { continue; - } - entry.word = target_entry_word; - if (tag == entry.Tag) + hei.entry.word = target_entry_word; + if (hei.tag == hei.entry.Tag && !hei.entry.Tentative) { - slot = index; - if (!entry.Tentative) - return true; + hei.slot = index; + return true; } } - target_entry_word = *(((long*)bucket) + Constants.kOverflowBucketIndex) & Constants.kAddressMask; - // Go to next bucket in the chain - - + // Go to next bucket in the chain (if it is a nonzero overflow allocation) + target_entry_word = *(((long*)hei.bucket) + Constants.kOverflowBucketIndex) & Constants.kAddressMask; if (target_entry_word == 0) { - entry = default; + // We lock the firstBucket, so it can't be cleared. + hei.bucket = default; + hei.entry = default; return false; } - bucket = (HashBucket*)overflowBucketsAllocator.GetPhysicalAddress(target_entry_word); + hei.bucket = (HashBucket*)overflowBucketsAllocator.GetPhysicalAddress(target_entry_word); } while (true); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void FindOrCreateTag(long hash, ushort tag, ref HashBucket* firstBucket, ref HashBucket* bucket, ref int slot, ref HashBucketEntry entry, long BeginAddress) + internal void FindOrCreateTag(ref HashEntryInfo hei, long BeginAddress) { var version = resizeInfo.version; - var masked_entry_word = hash & state[version].size_mask; + var masked_entry_word = hei.hash & state[version].size_mask; +#if DEBUG + hei.LockCode = masked_entry_word; +#endif // DEBUG while (true) { - firstBucket = bucket = state[version].tableAligned + masked_entry_word; - slot = Constants.kInvalidEntrySlot; + hei.firstBucket = hei.bucket = state[version].tableAligned + masked_entry_word; + hei.slot = Constants.kInvalidEntrySlot; - if (FindTagOrFreeInternal(hash, tag, ref bucket, ref slot, ref entry, BeginAddress)) + if (FindTagOrFreeInternal(ref hei, BeginAddress)) return; // Install tentative tag in free slot - entry = default; - entry.Tag = tag; - entry.Address = Constants.kTempInvalidAddress; - entry.Pending = false; - entry.Tentative = true; - - if (0 == Interlocked.CompareExchange(ref bucket->bucket_entries[slot], entry.word, 0)) + hei.entry = default; + hei.entry.Tag = hei.tag; + hei.entry.Address = Constants.kTempInvalidAddress; + hei.entry.Pending = false; + hei.entry.Tentative = true; + + // Insert the tag into this slot. Failure means another session inserted a key into that slot, so continue the loop to find another free slot. + if (0 == Interlocked.CompareExchange(ref hei.bucket->bucket_entries[hei.slot], hei.entry.word, 0)) { - var orig_bucket = state[version].tableAligned + masked_entry_word; - var orig_slot = Constants.kInvalidEntrySlot; + // Make sure this tag isn't in a different slot already; if it is, make this slot 'available' and continue the search loop. + var orig_bucket = state[version].tableAligned + masked_entry_word; // TODO local var not used; use or change to byval param + var orig_slot = Constants.kInvalidEntrySlot; // TODO local var not used; use or change to byval param - if (FindOtherTagMaybeTentativeInternal(hash, tag, ref orig_bucket, ref orig_slot, bucket, slot)) + if (FindOtherSlotForThisTagMaybeTentativeInternal(hei.tag, ref orig_bucket, ref orig_slot, hei.bucket, hei.slot)) { - bucket->bucket_entries[slot] = 0; + // We own the slot per CAS above, so it is OK to non-CAS the 0 back in + hei.bucket->bucket_entries[hei.slot] = 0; + // TODO: Why not return orig_bucket and orig_slot if it's not Tentative? } else { - entry.Tentative = false; - *((long*)bucket + slot) = entry.word; - break; + hei.entry.Tentative = false; + *((long*)hei.bucket + hei.slot) = hei.entry.word; + return; } } } } /// - /// Find existing entry (non-tenative) - /// If not found, return pointer to some empty slot + /// Find existing entry (non-tentative) entry. /// - /// - /// - /// - /// - /// - /// - /// + /// If found, return the slot it is in, else return a pointer to some empty slot (which we may have allocated) [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool FindTagOrFreeInternal(long hash, ushort tag, ref HashBucket* bucket, ref int slot, ref HashBucketEntry entry, long BeginAddress = 0) + private bool FindTagOrFreeInternal(ref HashEntryInfo hei, long BeginAddress = 0) { var target_entry_word = default(long); var entry_slot_bucket = default(HashBucket*); do { - // Search through the bucket looking for our key. Last entry is reserved - // for the overflow pointer. + // Search through the bucket looking for our key. Last entry is reserved for the overflow pointer. for (int index = 0; index < Constants.kOverflowBucketIndex; ++index) { - target_entry_word = *(((long*)bucket) + index); + target_entry_word = *(((long*)hei.bucket) + index); if (0 == target_entry_word) { - if (slot == Constants.kInvalidEntrySlot) + if (hei.slot == Constants.kInvalidEntrySlot) { - slot = index; - entry_slot_bucket = bucket; + // Record the free slot and continue to search for the key + hei.slot = index; + entry_slot_bucket = hei.bucket; } continue; } - entry.word = target_entry_word; - if (entry.Address < BeginAddress && entry.Address != Constants.kTempInvalidAddress) + // If the entry points to an address that has been truncated, it's free; try to reclaim it by setting its word to 0. + hei.entry.word = target_entry_word; + if (hei.entry.Address < BeginAddress && hei.entry.Address != Constants.kTempInvalidAddress) { - if (entry.word == Interlocked.CompareExchange(ref bucket->bucket_entries[index], Constants.kInvalidAddress, target_entry_word)) + if (hei.entry.word == Interlocked.CompareExchange(ref hei.bucket->bucket_entries[index], Constants.kInvalidAddress, target_entry_word)) { - if (slot == Constants.kInvalidEntrySlot) + if (hei.slot == Constants.kInvalidEntrySlot) { - slot = index; - entry_slot_bucket = bucket; + // Record the free slot and continue to search for the key + hei.slot = index; + entry_slot_bucket = hei.bucket; } continue; } } - if (tag == entry.Tag && !entry.Tentative) + if (hei.tag == hei.entry.Tag && !hei.entry.Tentative) { - slot = index; + hei.slot = index; return true; } } - // Go to next bucket in the chain - target_entry_word = *(((long*)bucket) + Constants.kOverflowBucketIndex); - + // Go to next bucket in the chain (if it is a nonzero overflow allocation). Don't mask off the non-address bits here; they're needed for CAS. + target_entry_word = *(((long*)hei.bucket) + Constants.kOverflowBucketIndex); while ((target_entry_word & Constants.kAddressMask) == 0) { - if (slot == Constants.kInvalidEntrySlot) + // There is no next bucket. If slot is Constants.kInvalidEntrySlot then we did not find an empty slot, so must allocate a new bucket. + if (hei.slot == Constants.kInvalidEntrySlot) { // Allocate new bucket var logicalBucketAddress = overflowBucketsAllocator.Allocate(); var physicalBucketAddress = (HashBucket*)overflowBucketsAllocator.GetPhysicalAddress(logicalBucketAddress); long compare_word = target_entry_word; target_entry_word = logicalBucketAddress; - target_entry_word |= (compare_word & ~Constants.kAddressMask); + target_entry_word |= compare_word & ~Constants.kAddressMask; long result_word = Interlocked.CompareExchange( - ref bucket->bucket_entries[Constants.kOverflowBucketIndex], + ref hei.bucket->bucket_entries[Constants.kOverflowBucketIndex], target_entry_word, compare_word); if (compare_word != result_word) { - // Install failed, undo allocation; use the winner's entry + // Install of new bucket failed; free the allocation and and continue the search using the winner's entry overflowBucketsAllocator.Free(logicalBucketAddress); target_entry_word = result_word; continue; } - else - { - // Install of new overflow bucket succeeded; tag was not found, so return the first slot of the new bucket - bucket = physicalBucketAddress; - slot = 0; - entry = default; - return false; - } - } - else - { - // Tag was not found and an empty slot was found, so return the empty slot - bucket = entry_slot_bucket; - entry = default; - return false; + + // Install of new overflow bucket succeeded; the tag was not found, so return the first slot of the new bucket + hei.bucket = physicalBucketAddress; + hei.slot = 0; + hei.entry = default; + return false; // tag was not found } + + // Tag was not found and an empty slot was found, so return the empty slot + hei.bucket = entry_slot_bucket; + hei.entry = default; + return false; // tag was not found } - bucket = (HashBucket*)overflowBucketsAllocator.GetPhysicalAddress(target_entry_word & Constants.kAddressMask); + // The next bucket was there or was allocated. Move to it. + hei.bucket = (HashBucket*)overflowBucketsAllocator.GetPhysicalAddress(target_entry_word & Constants.kAddressMask); } while (true); } /// - /// Find existing entry (tenative or otherwise) other than the specified "exception" slot - /// If not found, return false. Does not return a free slot. + /// Look for an existing entry (tentative or otherwise) for this hash/tag, other than the specified "except for this" bucket/slot. /// - /// - /// - /// - /// - /// - /// - /// + /// True if found, else false. Does not return a free slot. [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool FindOtherTagMaybeTentativeInternal(long hash, ushort tag, ref HashBucket* bucket, ref int slot, HashBucket* except_bucket, int except_entry_slot) + private bool FindOtherSlotForThisTagMaybeTentativeInternal(ushort tag, ref HashBucket* bucket, ref int slot, HashBucket* except_bucket, int except_entry_slot) { var target_entry_word = default(long); var entry_slot_bucket = default(HashBucket*); do { - // Search through the bucket looking for our key. Last entry is reserved - // for the overflow pointer. + // Search through the bucket looking for our key. Last entry is reserved for the overflow pointer. for (int index = 0; index < Constants.kOverflowBucketIndex; ++index) { target_entry_word = *(((long*)bucket) + index); if (0 == target_entry_word) - { continue; - } HashBucketEntry entry = default; entry.word = target_entry_word; @@ -644,25 +632,19 @@ private bool FindOtherTagMaybeTentativeInternal(long hash, ushort tag, ref HashB { if ((except_entry_slot == index) && (except_bucket == bucket)) continue; - slot = index; return true; } } + // Go to next bucket in the chain (if it is a nonzero overflow allocation). target_entry_word = *(((long*)bucket) + Constants.kOverflowBucketIndex) & Constants.kAddressMask; - // Go to next bucket in the chain - - if (target_entry_word == 0) - { return false; - } bucket = (HashBucket*)overflowBucketsAllocator.GetPhysicalAddress(target_entry_word); } while (true); } - /// /// Helper function used to update the slot atomically with the /// new offset value using the CAS operation @@ -676,12 +658,8 @@ private bool FindOtherTagMaybeTentativeInternal(long hash, ushort tag, ref HashB [MethodImpl(MethodImplOptions.AggressiveInlining)] internal bool UpdateSlot(HashBucket* bucket, int entrySlot, long expected, long desired, out long found) { - found = Interlocked.CompareExchange( - ref bucket->bucket_entries[entrySlot], - desired, - expected); - - return (found == expected); + found = Interlocked.CompareExchange(ref bucket->bucket_entries[entrySlot], desired, expected); + return found == expected; } } } diff --git a/cs/src/core/Index/FASTER/FASTERIterator.cs b/cs/src/core/Index/FASTER/FASTERIterator.cs index e6fa3e27c..f97ed8a38 100644 --- a/cs/src/core/Index/FASTER/FASTERIterator.cs +++ b/cs/src/core/Index/FASTER/FASTERIterator.cs @@ -116,13 +116,8 @@ public unsafe bool GetNext(out RecordInfo recordInfo) ref var key = ref iter1.GetKey(); ref var value = ref iter1.GetValue(); - var bucket = default(HashBucket*); - var firstBucket = default(HashBucket*); - var slot = default(int); - var entry = default(HashBucketEntry); - var hash = fht.Comparer.GetHashCode64(ref key); - var tag = (ushort)((ulong)hash >> Constants.kHashTagShift); - if (fht.FindTag(hash, tag, ref firstBucket, ref bucket, ref slot, ref entry) && entry.Address == iter1.CurrentAddress) + HashEntryInfo hei = new(fht.Comparer.GetHashCode64(ref key)); + if (fht.FindTag(ref hei) && hei.entry.Address == iter1.CurrentAddress) { if (recordInfo.PreviousAddress >= fht.Log.BeginAddress) { diff --git a/cs/src/core/Index/FASTER/FASTERThread.cs b/cs/src/core/Index/FASTER/FASTERThread.cs index 97ec48321..e24fbd52c 100644 --- a/cs/src/core/Index/FASTER/FASTERThread.cs +++ b/cs/src/core/Index/FASTER/FASTERThread.cs @@ -61,19 +61,17 @@ public partial class FasterKV : FasterBase, IFasterKV } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void InternalRefresh(FasterExecutionContext ctx, FasterSession fasterSession) - where FasterSession : IFasterSession + internal void InternalRefresh(FasterSession fasterSession) + where FasterSession : IFasterSession { epoch.ProtectAndDrain(); // We check if we are in normal mode var newPhaseInfo = SystemState.Copy(ref systemState); - if (ctx.phase == Phase.REST && newPhaseInfo.Phase == Phase.REST && ctx.version == newPhaseInfo.Version) - { + if (fasterSession.Ctx.phase == Phase.REST && newPhaseInfo.Phase == Phase.REST && fasterSession.Ctx.version == newPhaseInfo.Version) return; - } - ThreadStateMachineStep(ctx, fasterSession, default); + ThreadStateMachineStep(fasterSession.Ctx, fasterSession, default); } internal static void InitContext(FasterExecutionContext ctx, int sessionID, string sessionName, long lsn = -1) @@ -112,20 +110,18 @@ internal static void CopyContext(FasterExecutionContext< } } - internal bool InternalCompletePending( - FasterExecutionContext ctx, - FasterSession fasterSession, - bool wait = false, CompletedOutputIterator completedOutputs = null) + internal bool InternalCompletePending(FasterSession fasterSession, bool wait = false, + CompletedOutputIterator completedOutputs = null) where FasterSession : IFasterSession { while (true) { - InternalCompletePendingRequests(ctx, ctx, fasterSession, completedOutputs); - if (wait) ctx.WaitPending(epoch); + InternalCompletePendingRequests(fasterSession, completedOutputs); + if (wait) fasterSession.Ctx.WaitPending(epoch); - if (ctx.HasNoPendingRequests) return true; + if (fasterSession.Ctx.HasNoPendingRequests) return true; - InternalRefresh(ctx, fasterSession); + InternalRefresh(fasterSession); if (!wait) return false; Thread.Yield(); @@ -135,34 +131,29 @@ internal bool InternalCompletePending( internal bool InRestPhase() => systemState.Phase == Phase.REST; #region Complete Pending Requests - internal void InternalCompletePendingRequests( - FasterExecutionContext opCtx, - FasterExecutionContext currentCtx, - FasterSession fasterSession, CompletedOutputIterator completedOutputs) + internal void InternalCompletePendingRequests(FasterSession fasterSession, + CompletedOutputIterator completedOutputs) where FasterSession : IFasterSession { hlog.TryComplete(); - if (opCtx.readyResponses.Count == 0) return; + if (fasterSession.Ctx.readyResponses.Count == 0) return; - while (opCtx.readyResponses.TryDequeue(out AsyncIOContext request)) + while (fasterSession.Ctx.readyResponses.TryDequeue(out AsyncIOContext request)) { - InternalCompletePendingRequest(opCtx, currentCtx, fasterSession, request, completedOutputs); + InternalCompletePendingRequest(fasterSession, request, completedOutputs); } } - internal void InternalCompletePendingRequest( - FasterExecutionContext opCtx, - FasterExecutionContext currentCtx, - FasterSession fasterSession, - AsyncIOContext request, CompletedOutputIterator completedOutputs) + internal void InternalCompletePendingRequest(FasterSession fasterSession, AsyncIOContext request, + CompletedOutputIterator completedOutputs) where FasterSession : IFasterSession { - if (opCtx.ioPendingRequests.TryGetValue(request.id, out var pendingContext)) + if (fasterSession.Ctx.ioPendingRequests.TryGetValue(request.id, out var pendingContext)) { // Remove from pending dictionary - opCtx.ioPendingRequests.Remove(request.id); - var status = InternalCompletePendingRequestFromContext(opCtx, currentCtx, fasterSession, request, ref pendingContext, out _); + fasterSession.Ctx.ioPendingRequests.Remove(request.id); + var status = InternalCompletePendingRequestFromContext(fasterSession, request, ref pendingContext, out _); if (completedOutputs is not null && status.IsCompletedSuccessfully) { // Transfer things to outputs from pendingContext before we dispose it. @@ -176,12 +167,8 @@ internal void InternalCompletePendingRequest /// Caller is expected to dispose pendingContext after this method completes /// - internal Status InternalCompletePendingRequestFromContext( - FasterExecutionContext opCtx, - FasterExecutionContext currentCtx, - FasterSession fasterSession, - AsyncIOContext request, - ref PendingContext pendingContext, out AsyncIOContext newRequest) + internal Status InternalCompletePendingRequestFromContext(FasterSession fasterSession, AsyncIOContext request, + ref PendingContext pendingContext, out AsyncIOContext newRequest) where FasterSession : IFasterSession { Debug.Assert(epoch.ThisInstanceProtected(), "InternalCompletePendingRequestFromContext requires epoch acquision"); @@ -194,10 +181,10 @@ internal Status InternalCompletePendingRequestFromContext : FasterBase, IFasterKV { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool BlockAllocate( - int recordSize, - out long logicalAddress, - ref PendingContext pendingContext, - out OperationStatus internalStatus) - => TryBlockAllocate(hlog, recordSize, out logicalAddress, ref pendingContext, out internalStatus); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool BlockAllocateReadCache( - int recordSize, - out long logicalAddress, - ref PendingContext pendingContext, - out OperationStatus internalStatus) - => TryBlockAllocate(readcache, recordSize, out logicalAddress, ref pendingContext, out internalStatus); - [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool TryBlockAllocate( AllocatorBase allocator, @@ -54,6 +39,75 @@ private static bool TryBlockAllocate( return false; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + bool TryAllocateRecord(ref PendingContext pendingContext, ref OperationStackContext stackCtx, + int allocatedSize, bool recycle, out long newLogicalAddress, out long newPhysicalAddress, out OperationStatus status) + { + status = OperationStatus.SUCCESS; + if (recycle && GetAllocationForRetry(ref pendingContext, stackCtx.hei.Address, allocatedSize, out newLogicalAddress, out newPhysicalAddress)) + return true; + + // Spin to make sure newLogicalAddress is > recSrc.LatestLogicalAddress (the .PreviousAddress and CAS comparison value). + for (; ; Thread.Yield() ) + { + if (!TryBlockAllocate(hlog, allocatedSize, out newLogicalAddress, ref pendingContext, out status)) + break; + + newPhysicalAddress = hlog.GetPhysicalAddress(newLogicalAddress); + if (VerifyInMemoryAddresses(ref stackCtx)) + { + if (newLogicalAddress > stackCtx.recSrc.LatestLogicalAddress) + return true; + + // This allocation is below the necessary address so abandon it and repeat the loop. TODO potential reuse + hlog.GetInfo(newPhysicalAddress).SetInvalid(); // Skip on log scan + continue; + } + + // In-memory source dropped below HeadAddress during BlockAllocate. + if (recycle) + SaveAllocationForRetry(ref pendingContext, newLogicalAddress, newPhysicalAddress, allocatedSize); + else + hlog.GetInfo(newPhysicalAddress).SetInvalid(); // Skip on log scan + status = OperationStatus.RETRY_LATER; + break; + } + + newPhysicalAddress = 0; + return false; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + bool TryAllocateRecordReadCache(ref PendingContext pendingContext, ref OperationStackContext stackCtx, + int allocatedSize, out long newLogicalAddress, out long newPhysicalAddress, out OperationStatus status) + { + // Spin to make sure the start of the tag chain is not readcache, or that newLogicalAddress is > the first address in the tag chain. + for (; ; Thread.Yield()) + { + if (!TryBlockAllocate(readcache, allocatedSize, out newLogicalAddress, ref pendingContext, out status)) + break; + + newPhysicalAddress = readcache.GetPhysicalAddress(newLogicalAddress); + if (VerifyInMemoryAddresses(ref stackCtx)) + { + if (!stackCtx.hei.IsReadCache || newLogicalAddress > stackCtx.hei.AbsoluteAddress) + return true; + + // This allocation is below the necessary address so abandon it and repeat the loop. + ReadCacheAbandonRecord(newPhysicalAddress); + continue; + } + + // In-memory source dropped below HeadAddress during BlockAllocate. + ReadCacheAbandonRecord(newPhysicalAddress); + status = OperationStatus.RETRY_LATER; + break; + } + + newPhysicalAddress = 0; + return false; + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] void SaveAllocationForRetry(ref PendingContext pendingContext, long logicalAddress, long physicalAddress, int allocatedSize) { diff --git a/cs/src/core/Index/FASTER/Implementation/ContainsKeyInMemory.cs b/cs/src/core/Index/FASTER/Implementation/ContainsKeyInMemory.cs index 264c5f32d..bdb1a947c 100644 --- a/cs/src/core/Index/FASTER/Implementation/ContainsKeyInMemory.cs +++ b/cs/src/core/Index/FASTER/Implementation/ContainsKeyInMemory.cs @@ -9,30 +9,27 @@ public unsafe partial class FasterKV : FasterBase, IFasterKV( - ref Key key, - FasterExecutionContext sessionCtx, - FasterSession fasterSession, out long logicalAddress, long fromAddress = -1) - where FasterSession : IFasterSession + ref Key key, FasterSession fasterSession, out long logicalAddress, long fromAddress = -1) + where FasterSession : IFasterSession { - if (fromAddress < hlog.HeadAddress) - fromAddress = hlog.HeadAddress; + OperationStackContext stackCtx = new(comparer.GetHashCode64(ref key)); - long physicalAddress; - HashEntryInfo hei = new (comparer.GetHashCode64(ref key)); + if (fasterSession.Ctx.phase != Phase.REST) + HeavyEnter(stackCtx.hei.hash, fasterSession.Ctx, fasterSession); - if (sessionCtx.phase != Phase.REST) - HeavyEnter(hei.hash, sessionCtx, fasterSession); - - if (FindTag(ref hei)) + if (FindTag(ref stackCtx.hei)) { - logicalAddress = hei.Address; + stackCtx.SetRecordSourceToHashEntry(hlog); if (UseReadCache) - SkipReadCache(ref hei, ref logicalAddress); + SkipReadCache(ref stackCtx, out _); + + if (fromAddress < hlog.HeadAddress) + fromAddress = hlog.HeadAddress; - if (logicalAddress >= fromAddress) + if (stackCtx.recSrc.LogicalAddress >= fromAddress) { - physicalAddress = hlog.GetPhysicalAddress(logicalAddress); + var physicalAddress = stackCtx.recSrc.SetPhysicalAddress(); ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); if (recordInfo.Invalid || !comparer.Equals(ref key, ref hlog.GetKey(physicalAddress))) { @@ -40,11 +37,12 @@ internal Status InternalContainsKeyInMemory : FasterBase, IFasterKV - /// The thread (or session) context to execute operation in. /// Async response from disk. /// Pending context corresponding to operation. /// Callback functions. - /// /// /// /// @@ -29,16 +27,12 @@ public unsafe partial class FasterKV : FasterBase, IFasterKV /// /// - internal OperationStatus InternalContinuePendingRead( - FasterExecutionContext ctx, - AsyncIOContext request, - ref PendingContext pendingContext, - FasterSession fasterSession, - FasterExecutionContext currentCtx) + internal OperationStatus InternalContinuePendingRead(AsyncIOContext request, + ref PendingContext pendingContext, FasterSession fasterSession) where FasterSession : IFasterSession { ref RecordInfo srcRecordInfo = ref hlog.GetInfoFromBytePointer(request.record.GetValidPointer()); - srcRecordInfo.CleanDiskImage(); + srcRecordInfo.ClearBitsForDiskImages(); if (request.logicalAddress >= hlog.BeginAddress) { @@ -50,21 +44,28 @@ internal OperationStatus InternalContinuePendingRead(fasterSession, ref key, ref stackCtx, LockType.Shared, pendingContext.PrevHighestKeyHashAddress); - if (status != OperationStatus.SUCCESS) + // During the pending operation, a record for the key may have been added to the log or readcache. + ref var value = ref hlog.GetContextRecordValue(ref request); + if (TryFindRecordInMemory(ref key, ref stackCtx, ref pendingContext)) { - if (HandleImmediateRetryStatus(status, currentCtx, currentCtx, fasterSession, ref pendingContext)) + srcRecordInfo = ref stackCtx.recSrc.GetInfo(); + + // V threads cannot access V+1 records. Use the latest logical address rather than the traced address (logicalAddress) per comments in AcquireCPRLatchRMW. + if (fasterSession.Ctx.phase == Phase.PREPARE && IsEntryVersionNew(ref stackCtx.hei.entry)) + return OperationStatus.CPR_SHIFT_DETECTED; // Pivot thread; retry + value = ref stackCtx.recSrc.GetValue(); + } + + if (!TryTransientSLock(fasterSession, ref key, ref stackCtx, out var status)) + { + if (HandleImmediateRetryStatus(status, fasterSession, ref pendingContext)) continue; return status; } - if (stackCtx.recSrc.HasInMemorySrc) - srcRecordInfo = ref stackCtx.recSrc.GetSrcRecordInfo(); try { @@ -74,41 +75,45 @@ internal OperationStatus InternalContinuePendingRead(fasterSession, ref key, ref stackCtx, ref srcRecordInfo); + } + + // Must do this *after* Unlocking. Status was set by InternalTryCopyToTail. + if (!HandleImmediateRetryStatus(status, fasterSession, ref pendingContext)) + { + // If no copy to tail was done. + if (status == OperationStatus.NOTFOUND || status == OperationStatus.RECORD_ON_DISK) + return OperationStatus.SUCCESS; + return status; } + } // end while (true) } @@ -132,11 +147,9 @@ internal OperationStatus InternalContinuePendingRead /// Continue a pending RMW operation with the record retrieved from disk. /// - /// thread (or session) context under which operation must be executed. /// record read from the disk. /// internal context for the pending RMW operation /// Callback functions. - /// Session context /// /// /// @@ -147,22 +160,10 @@ internal OperationStatus InternalContinuePendingReadSUCCESS /// The value has been successfully updated(or inserted). /// - /// - /// RECORD_ON_DISK - /// The record corresponding to 'key' is on disk. Issue async IO to retrieve record and retry later. - /// - /// - /// RETRY_LATER - /// Cannot be processed immediately due to system state. Add to pending list and retry later. - /// /// /// - internal OperationStatus InternalContinuePendingRMW( - FasterExecutionContext opCtx, - AsyncIOContext request, - ref PendingContext pendingContext, - FasterSession fasterSession, - FasterExecutionContext sessionCtx) + internal OperationStatus InternalContinuePendingRMW(AsyncIOContext request, + ref PendingContext pendingContext, FasterSession fasterSession) where FasterSession : IFasterSession { ref Key key = ref pendingContext.key.Get(); @@ -170,66 +171,59 @@ internal OperationStatus InternalContinuePendingRMW stackCtx = new(comparer.GetHashCode64(ref key)); OperationStatus status; while (true) { - FindOrCreateTag(ref stackCtx.hei); + FindOrCreateTag(ref stackCtx.hei, hlog.BeginAddress); stackCtx.SetRecordSourceToHashEntry(hlog); - // A 'ref' variable must be initialized. If we find a record for the key, we reassign the reference. - RecordInfo dummyRecordInfo = default; - ref RecordInfo srcRecordInfo = ref dummyRecordInfo; - - // During the pending operation, the record may have been added to any of the possible locations. - status = TryFindAndEphemeralLockRecord(fasterSession, ref key, ref stackCtx, LockType.Exclusive, pendingContext.PrevHighestKeyHashAddress); - if (status != OperationStatus.SUCCESS) - { - if (HandleImmediateRetryStatus(status, sessionCtx, sessionCtx, fasterSession, ref pendingContext)) - continue; - return status; + // During the pending operation, a record for the key may have been added to the log. If so, go through the full InternalRMW + // sequence; the record in 'request' is stale. + if (TryFindRecordInMemory(ref key, ref stackCtx, ref pendingContext)) + break; + + // We didn't find a record for the key in memory, but if recSrc.LogicalAddress (which is the .PreviousAddress of the lowest record + // above InitialLatestLogicalAddress we could reach) is > InitialLatestLogicalAddress, then it means InitialLatestLogicalAddress is + // now below HeadAddress and there is at least one record below HeadAddress but above InitialLatestLogicalAddress. We must do InternalRMW. + if (stackCtx.recSrc.LogicalAddress > pendingContext.InitialLatestLogicalAddress) + { + Debug.Assert(pendingContext.InitialLatestLogicalAddress < hlog.HeadAddress, "Failed to search all in-memory records"); + break; } - if (stackCtx.recSrc.HasInMemorySrc) - srcRecordInfo = ref stackCtx.recSrc.GetSrcRecordInfo(); + + if (!TryTransientXLock(fasterSession, ref key, ref stackCtx, out status)) + goto CheckRetry; try { - // pendingContext.entry.Address is the previous latestLogicalAddress; if recSrc.LatestLogicalAddress (set by FindRecordInReadCacheOrLockTable) - // is greater than the previous latestLogicalAddress, then another thread inserted or spliced in a new record and we must do InternalRMW. - if (stackCtx.recSrc.LatestLogicalAddress > pendingContext.entry.Address) - break; - - // Here, the input* data for 'doingCU' is the from the request, so create a RecordSource copy for that. - RecordSource inputSrc = new() - { - LogicalAddress = request.logicalAddress, - PhysicalAddress = (long)recordPointer, - HasMainLogSrc = (request.logicalAddress >= hlog.BeginAddress) && !inputRecordInfo.Tombstone, - Log = hlog - }; + // Here, the input data for 'doingCU' is the from the request, so populate the RecordSource copy from that, preserving LowestReadCache*. + stackCtx.recSrc.LogicalAddress = request.logicalAddress; + stackCtx.recSrc.PhysicalAddress = (long)recordPointer; status = CreateNewRecordRMW(ref key, ref pendingContext.input.Get(), ref hlog.GetContextRecordValue(ref request), ref pendingContext.output, - ref pendingContext, fasterSession, sessionCtx, ref stackCtx, ref srcRecordInfo, ref inputSrc, inputRecordInfo, fromPending: true); - - // Retries should drop down to InternalRMW - if (!HandleImmediateRetryStatus(status, sessionCtx, sessionCtx, fasterSession, ref pendingContext)) - return status; + ref pendingContext, fasterSession, ref stackCtx, ref srcRecordInfo, + doingCU: request.logicalAddress >= hlog.BeginAddress && !srcRecordInfo.Tombstone); } finally { - stackCtx.HandleNewRecordOnError(this); - EphemeralXUnlockAfterUpdate(fasterSession, ref key, ref stackCtx, ref srcRecordInfo); + stackCtx.HandleNewRecordOnException(this); + TransientXUnlock(fasterSession, ref key, ref stackCtx); } + + // Must do this *after* Unlocking. Retries should drop down to InternalRMW + CheckRetry: + if (!HandleImmediateRetryStatus(status, fasterSession, ref pendingContext)) + return status; } // end while (true) do - status = InternalRMW(ref key, ref pendingContext.input.Get(), ref pendingContext.output, ref pendingContext.userContext, ref pendingContext, fasterSession, opCtx, pendingContext.serialNum); - while (HandleImmediateRetryStatus(status, sessionCtx, sessionCtx, fasterSession, ref pendingContext)); + status = InternalRMW(ref key, ref pendingContext.input.Get(), ref pendingContext.output, ref pendingContext.userContext, ref pendingContext, fasterSession, pendingContext.serialNum); + while (HandleImmediateRetryStatus(status, fasterSession, ref pendingContext)); return status; } @@ -247,13 +241,11 @@ internal OperationStatus InternalContinuePendingRMWLower-bound address (addresses are searched from tail (high) to head (low); do not search for "future records" earlier than this) /// Actual address of existing key record /// - /// /// [MethodImpl(MethodImplOptions.AggressiveInlining)] internal OperationStatus InternalCopyToTailForCompaction( ref Key key, ref Input input, ref Value value, ref Output output, - long untilAddress, long actualAddress, FasterSession fasterSession, - FasterExecutionContext currentCtx) + long untilAddress, long actualAddress, FasterSession fasterSession) where FasterSession : IFasterSession { Debug.Assert(epoch.ThisInstanceProtected(), "This is currently only called from Compaction so the epoch should be protected"); @@ -267,12 +259,18 @@ internal OperationStatus InternalCopyToTailForCompaction= hlog.BeginAddress) { @@ -281,17 +279,14 @@ internal OperationStatus InternalCopyToTailForCompaction(fasterSession, ref key, ref stackCtx, LockType.Shared); - if (status == OperationStatus.SUCCESS && stackCtx.recSrc.HasInMemorySrc) - srcRecordInfo = ref stackCtx.recSrc.GetSrcRecordInfo(); + if (TryFindRecordInMemory(ref key, ref stackCtx, ref pendingContext)) + srcRecordInfo = ref stackCtx.recSrc.GetInfo(); } } else @@ -306,52 +301,31 @@ internal OperationStatus InternalCopyToTailForCompaction(fasterSession, ref key, ref stackCtx, ref srcRecordInfo); } } - } while (HandleImmediateRetryStatus(status, currentCtx, currentCtx, fasterSession, ref pendingContext)); + } while (HandleImmediateRetryStatus(status, fasterSession, ref pendingContext)); return status; } /// - /// Helper function for trying to copy existing immutable records (at foundLogicalAddress) to the tail, used in: - /// - /// - /// , - /// - /// - /// Succeeds only if the record for the same key hasn't changed. + /// Helper function for trying to copy existing immutable records (at foundLogicalAddress) to the tail. /// - /// The thread(or session) context to execute operation in /// /// /// @@ -369,7 +343,6 @@ internal OperationStatus InternalCopyToTailForCompaction /// /// The reason for this operation. - /// If true, this is called to append an expired (Tombstoned) record /// /// /// RETRY_NOW: failed CAS, so no copy done. This routine deals entirely with new records, so will not encounter Sealed records @@ -378,52 +351,41 @@ internal OperationStatus InternalCopyToTailForCompactionSUCCESS: no record found beyond expectedLogicalAddress, so copy was done /// /// - internal OperationStatus InternalTryCopyToTail( - FasterExecutionContext currentCtx, ref PendingContext pendingContext, + internal OperationStatus InternalTryCopyToTail(ref PendingContext pendingContext, ref Key key, ref Input input, ref Value recordValue, ref Output output, ref OperationStackContext stackCtx, - ref RecordInfo srcRecordInfo, long untilLogicalAddress, FasterSession fasterSession, WriteReason reason, bool expired = false) + ref RecordInfo srcRecordInfo, long untilLogicalAddress, FasterSession fasterSession, WriteReason reason) where FasterSession : IFasterSession { #region Trace back for newly-inserted record in HybridLog if (stackCtx.recSrc.LatestLogicalAddress > untilLogicalAddress) { // Entries exist in the log above our last-checked address; another session inserted them after our FindTag. See if there is a newer entry for this key. - long logicalAddress = stackCtx.recSrc.LatestLogicalAddress; - long physicalAddress = hlog.GetPhysicalAddress(logicalAddress); - ref RecordInfo ri = ref hlog.GetInfo(physicalAddress); - if (ri.Invalid || !comparer.Equals(ref key, ref hlog.GetKey(physicalAddress))) - { - logicalAddress = ri.PreviousAddress; - TraceBackForKeyMatch(ref key, logicalAddress, hlog.HeadAddress, out logicalAddress, out physicalAddress); - } - - if (logicalAddress > untilLogicalAddress) + var minAddress = untilLogicalAddress < hlog.HeadAddress ? hlog.HeadAddress : untilLogicalAddress; + TraceBackForKeyMatch(ref key, stackCtx.recSrc.LatestLogicalAddress, minAddress, out long foundLogicalAddress, out _); + if (foundLogicalAddress > untilLogicalAddress) { // Note: ReadAtAddress bails here by design; we assume anything in the readcache is the latest version. // Any loop to retrieve prior versions should set ReadFlags.DisableReadCache*; see ReadAddressTests. - return logicalAddress < hlog.HeadAddress ? OperationStatus.RECORD_ON_DISK : OperationStatus.NOTFOUND; + return foundLogicalAddress < hlog.HeadAddress ? OperationStatus.RECORD_ON_DISK : OperationStatus.NOTFOUND; } - } - // Update untilLogicalAddress to the latest address we've checked; recSrc.LatestLogicalAddress can be updated by VerifyReadCacheSplicePoint. - untilLogicalAddress = stackCtx.recSrc.LatestLogicalAddress; + // Update untilLogicalAddress to the latest address we've checked; recSrc.LatestLogicalAddress can be updated by VerifyReadCacheSplicePoint. + untilLogicalAddress = stackCtx.recSrc.LatestLogicalAddress; + } #endregion #region Create new copy in mutable region - Value defaultValue = default; - ref Value value = ref (expired ? ref defaultValue : ref recordValue); - var (actualSize, allocatedSize) = hlog.GetRecordSize(ref key, ref value); + var (actualSize, allocatedSize) = hlog.GetRecordSize(ref key, ref recordValue); UpsertInfo upsertInfo = new() { - SessionType = fasterSession.SessionType, - Version = currentCtx.version, - SessionID = currentCtx.sessionID, + Version = fasterSession.Ctx.version, + SessionID = fasterSession.Ctx.sessionID, Address = stackCtx.recSrc.HasInMemorySrc ? stackCtx.recSrc.LogicalAddress : Constants.kInvalidAddress, KeyHash = stackCtx.hei.hash }; - StatusCode advancedStatusCode = expired ? StatusCode.Expired : StatusCode.Found; + StatusCode advancedStatusCode = StatusCode.Found; // A 'ref' variable must be initialized; we'll assign it to the new record we allocate. RecordInfo dummyRecordInfo = default; @@ -439,25 +401,10 @@ internal OperationStatus InternalTryCopyToTail hei.Address (the .PreviousAddress and CAS comparison value). - do - { - if (!BlockAllocateReadCache(allocatedSize, out newLogicalAddress, ref pendingContext, out _)) - return OperationStatus.SUCCESS; // We don't slow down Reads to handle allocation failure in the read cache, but don't return StatusCode.CopiedRecordToReadCache - newPhysicalAddress = readcache.GetPhysicalAddress(newLogicalAddress); - - if (!VerifyInMemoryAddresses(ref stackCtx)) - { - // We don't save readcache addresses (they'll eventually be evicted) - ref var ri = ref readcache.GetInfo(newPhysicalAddress); - ri.SetInvalid(); // We haven't yet set stackCtx.newLogicalAddress, so do this directly here - ri.PreviousAddress = Constants.kTempInvalidAddress; // Necessary for ReadCacheEvict, but cannot be kInvalidAddress or we have recordInfo.IsNull - return OperationStatus.RETRY_LATER; - } - } while (stackCtx.hei.IsReadCache && newLogicalAddress < stackCtx.hei.AbsoluteAddress); + if (!TryAllocateRecordReadCache(ref pendingContext, ref stackCtx, allocatedSize, out newLogicalAddress, out newPhysicalAddress, out OperationStatus status)) + return status; - newRecordInfo = ref WriteTentativeInfo(ref key, readcache, newPhysicalAddress, inNewVersion: false, tombstone: false, stackCtx.hei.Address); - stackCtx.newLogicalAddress = newLogicalAddress | readcacheNewAddressBit; + newRecordInfo = ref WriteNewRecordInfo(ref key, readcache, newPhysicalAddress, inNewVersion: false, tombstone: false, stackCtx.hei.Address); upsertInfo.Address = Constants.kInvalidAddress; // We do not expose readcache addresses advancedStatusCode |= StatusCode.CopiedRecordToReadCache; @@ -465,39 +412,24 @@ internal OperationStatus InternalTryCopyToTail recSrc.LatestLogicalAddress (the .PreviousAddress and CAS comparison value). TODO: save record for reuse - do - { - if (!BlockAllocate(allocatedSize, out newLogicalAddress, ref pendingContext, out OperationStatus status)) - return status; // For CopyToTail, we do want to make sure the record is appended to the tail, so return the failing status for retry - newPhysicalAddress = hlog.GetPhysicalAddress(newLogicalAddress); - - if (!VerifyInMemoryAddresses(ref stackCtx)) - { - SaveAllocationForRetry(ref pendingContext, newLogicalAddress, newPhysicalAddress, allocatedSize); - return OperationStatus.RETRY_LATER; - } - } while (newLogicalAddress < stackCtx.recSrc.LatestLogicalAddress); - } + if (!TryAllocateRecord(ref pendingContext, ref stackCtx, allocatedSize, recycle: true, out newLogicalAddress, out newPhysicalAddress, out OperationStatus status)) + return status; - newRecordInfo = ref WriteTentativeInfo(ref key, hlog, newPhysicalAddress, inNewVersion: currentCtx.InNewVersion, tombstone: false, stackCtx.recSrc.LatestLogicalAddress); - stackCtx.newLogicalAddress = newLogicalAddress; + newRecordInfo = ref WriteNewRecordInfo(ref key, hlog, newPhysicalAddress, inNewVersion: fasterSession.Ctx.InNewVersion, tombstone: false, stackCtx.recSrc.LatestLogicalAddress); - newRecordInfo.Tombstone = expired; upsertInfo.Address = newLogicalAddress; advancedStatusCode |= StatusCode.CopiedRecord; if (reason == WriteReason.CopyToReadCache) reason = WriteReason.CopyToTail; } + stackCtx.SetNewRecord(newLogicalAddress | readcacheNewAddressBit); upsertInfo.RecordInfo = newRecordInfo; - if (!fasterSession.SingleWriter(ref key, ref input, ref value, ref localLog.GetValue(newPhysicalAddress, newPhysicalAddress + actualSize), + if (!fasterSession.SingleWriter(ref key, ref input, ref recordValue, ref localLog.GetValue(newPhysicalAddress, newPhysicalAddress + actualSize), ref output, ref newRecordInfo, ref upsertInfo, reason)) { - // No SaveAlloc here, but TODO this record could be reused later if not a readcache record. + // No SaveAlloc here, but TODO this record could be reused later. stackCtx.SetNewRecordInvalid(ref newRecordInfo); return (upsertInfo.Action == UpsertAction.CancelOperation) ? OperationStatus.CANCELED : OperationStatus.SUCCESS; } @@ -507,7 +439,9 @@ internal OperationStatus InternalTryCopyToTail(fasterSession, ref key, ref stackCtx, ref srcRecordInfo, ref newRecordInfo); - - if (!success) + { + if (!copyToReadCache) + PostInsertAtTail(ref key, ref stackCtx, ref srcRecordInfo); + } + else { stackCtx.SetNewRecordInvalid(ref newRecordInfo); if (!casSuccess) { // Let user dispose similar to a deleted record, and save for retry, *only* if CAS failed; otherwise we must preserve it in the chain. - fasterSession.DisposeSingleWriter(ref localLog.GetKey(newPhysicalAddress), ref input, ref value, ref localLog.GetValue(newPhysicalAddress), ref output, ref newRecordInfo, ref upsertInfo, reason); + fasterSession.DisposeSingleWriter(ref localLog.GetKey(newPhysicalAddress), ref input, ref recordValue, ref localLog.GetValue(newPhysicalAddress), ref output, ref newRecordInfo, ref upsertInfo, reason); newRecordInfo.PreviousAddress = Constants.kTempInvalidAddress; // Necessary for ReadCacheEvict, but cannot be kInvalidAddress or we have recordInfo.IsNull if (!copyToReadCache) SaveAllocationForRetry(ref pendingContext, newLogicalAddress, newPhysicalAddress, allocatedSize); @@ -550,18 +487,12 @@ internal OperationStatus InternalTryCopyToTail : FasterBase, IFasterKV - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private OperationStatus TryFindAndEphemeralLockAuxiliaryRecord( - FasterSession fasterSession, ref Key key, ref OperationStackContext stackCtx, - LockType lockType, long prevHighestKeyHashAddress = Constants.kInvalidAddress) - where FasterSession : IFasterSession - { - if (UseReadCache && FindInReadCache(ref key, ref stackCtx, - untilAddress: (prevHighestKeyHashAddress & Constants.kReadCacheBitMask) != 0 ? prevHighestKeyHashAddress : Constants.kInvalidAddress)) - return TryLockInMemoryRecord(fasterSession, ref stackCtx, lockType); - - if (LockTable.IsActive && !fasterSession.DisableEphemeralLocking && !LockTable.TryLockEphemeral(ref key, stackCtx.hei.hash, lockType, out stackCtx.recSrc.HasLockTableLock)) - return OperationStatus.RETRY_LATER; - return OperationStatus.SUCCESS; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private OperationStatus TryFindAndEphemeralLockRecord( - FasterSession fasterSession, ref Key key, ref OperationStackContext stackCtx, - LockType lockType, long prevHighestKeyHashAddress = Constants.kInvalidAddress) - where FasterSession : IFasterSession - { - var internalStatus = TryFindAndEphemeralLockAuxiliaryRecord(fasterSession, ref key, ref stackCtx, lockType, prevHighestKeyHashAddress); - if (stackCtx.recSrc.HasSrc) - return internalStatus; - - if (!TryFindRecordInMainLog(ref key, ref stackCtx, minOffset: hlog.HeadAddress, waitForTentative: true)) - return OperationStatus.SUCCESS; - return TryLockInMemoryRecord(fasterSession, ref stackCtx, lockType); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static OperationStatus TryLockInMemoryRecord(FasterSession fasterSession, ref OperationStackContext stackCtx, LockType lockType) where FasterSession : IFasterSession - { - ref var recordInfo = ref stackCtx.recSrc.GetSrcRecordInfo(); - var ok = lockType == LockType.Shared - ? fasterSession.TryLockEphemeralShared(ref recordInfo) - : fasterSession.TryLockEphemeralExclusive(ref recordInfo); - if (!ok) - return OperationStatus.RETRY_LATER; - stackCtx.recSrc.HasInMemoryLock = !fasterSession.DisableEphemeralLocking; - return OperationStatus.SUCCESS; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static bool TryEphemeralXLock(FasterSession fasterSession, ref RecordSource recSrc, ref RecordInfo recordInfo, out OperationStatus status) - where FasterSession : IFasterSession - { - status = OperationStatus.SUCCESS; - if (fasterSession.DisableEphemeralLocking) - { - Debug.Assert(!fasterSession.IsManualLocking || recordInfo.IsLockedExclusive, $"Attempting to use a non-XLocked key in a Manual Locking context (requesting XLock): XLocked {recordInfo.IsLockedExclusive}, Slocked {recordInfo.NumLockedShared}"); - return true; - } - - // A failed lockOp means this is an intermediate record, e.g. Tentative or Sealed, or we exhausted the spin count. All these must RETRY_LATER. - if (!fasterSession.TryLockEphemeralExclusive(ref recordInfo)) - status = OperationStatus.RETRY_LATER; - else if (!IsRecordValid(recordInfo, out status)) - fasterSession.UnlockEphemeralExclusive(ref recordInfo); - else - recSrc.HasInMemoryLock = true; - return recSrc.HasInMemoryLock; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static bool TryEphemeralSLock(FasterSession fasterSession, ref RecordSource recSrc, ref RecordInfo recordInfo, out OperationStatus status) - where FasterSession : IFasterSession - { - status = OperationStatus.SUCCESS; - if (fasterSession.DisableEphemeralLocking) - { - Debug.Assert(!fasterSession.IsManualLocking || recordInfo.IsLocked, $"Attempting to use a non-Locked (S or X) key in a Manual Locking context (requesting SLock): XLocked {recordInfo.IsLockedExclusive}, Slocked {recordInfo.NumLockedShared}"); - return true; - } - - // A failed lockOp means this is an intermediate record, e.g. Tentative or Sealed, or we exhausted the spin count. All these must RETRY_LATER. - if (!fasterSession.TryLockEphemeralShared(ref recordInfo)) - status = OperationStatus.RETRY_LATER; - else if (!IsRecordValid(recordInfo, out status)) - fasterSession.TryUnlockEphemeralShared(ref recordInfo); - else - recSrc.HasInMemoryLock = true; - return recSrc.HasInMemoryLock; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - void EphemeralSUnlock(FasterSession fasterSession, FasterExecutionContext currentCtx, - ref PendingContext pendingContext, - ref Key key, ref OperationStackContext stackCtx, ref RecordInfo recordInfo) - where FasterSession : IFasterSession - { - if (!stackCtx.recSrc.HasLock) - return; - - // This is called on recovery from Pending Read, in which case we may have a LockTable lock. First try to unlock the in-memory record. - // If on recovery in either immediate or pending read, then we may have evicted the source address or it may have been Read/CopyToTail. - // So we fall through on a failed unlock or non-InMemory lock. - if (!stackCtx.recSrc.HasInMemoryLock || stackCtx.recSrc.LogicalAddress < stackCtx.recSrc.Log.HeadAddress || !recordInfo.TryUnlockShared()) - { - // Updaters (Upsert, RMW, Delete) XLock records. Readers do not, including anything calling InternalTryCopyToTail. This means the record may - // be transferred from the readcache to the main log (or even to the LockTable, if the record was in the (SafeHeadAddress, ClosedUntilAddress) - // interval when a Read started). - - if (stackCtx.recSrc.HasLockTableLock && LockTable.Unlock(ref key, stackCtx.hei.hash, LockType.Shared)) - return; - - // If the record dived below HeadAddress, we must wait for it to enter the lock table before unlocking; InternalLock does this (and starts - // by searching the in-memory space first, which is good because the record may have been transferred). - // If RecordInfo unlock fails, the locks were transferred to another recordInfo; do InternalLock to chase the key through the full process. - OperationStatus status; - do - { - status = InternalLock(ref key, new(LockOperationType.Unlock, LockType.Shared), out _); - } while (HandleImmediateRetryStatus(status, currentCtx, currentCtx, fasterSession, ref pendingContext)); - } - - stackCtx.recSrc.HasInMemoryLock = false; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void EphemeralXUnlockAfterUpdate(FasterSession fasterSession, ref Key key, - ref OperationStackContext stackCtx, ref RecordInfo srcRecordInfo) - where FasterSession : IFasterSession - { - if (fasterSession.DisableEphemeralLocking) - { - Debug.Assert(!stackCtx.recSrc.HasLock, "HasLock should only be true if we are doing ephemeral locking"); - return; - } - - // Unlock exclusive locks, if any. Exclusive locks are different from shared locks, in that Shared locks can be transferred - // (due to CopyToTail or ReadCache) while the lock is held. Exclusive locks pin the lock in place: - // - The owning thread ensures that no epoch refresh is done, so there is no eviction if it is in memory - // - Other threads will attempt (and fail) to lock it in memory or in the locktable, until we release it. - // - This means there can be no transfer *from* the locktable while the XLock is held - if (stackCtx.recSrc.HasInMemoryLock) - { - // This unlocks the source (old) record; the new record may already be operated on by other threads, which is fine. - if (stackCtx.recSrc.LogicalAddress >= stackCtx.recSrc.Log.HeadAddress) - { - // The record is now Invalid or Sealed, but we have to unlock it so any threads waiting on it can continue. - fasterSession.UnlockEphemeralExclusive(ref srcRecordInfo); - } - else - { - // We must always wait until the lock table entry is in place; it will be orphaned because we've transferred the record, - // so we must remove it from the LockTable. - SpinWaitUntilRecordIsClosed(ref key, stackCtx.hei.hash, stackCtx.recSrc.LogicalAddress, stackCtx.recSrc.Log); - LockTable.Remove(ref key, stackCtx.hei.hash); - } - stackCtx.recSrc.HasInMemoryLock = false; - return; - } - - if (stackCtx.recSrc.HasLockTableLock) - { - LockTable.Unlock(ref key, stackCtx.hei.hash, LockType.Exclusive); - stackCtx.recSrc.HasLockTableLock = false; - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void EphemeralXUnlockAndAbandonUpdate(FasterSession fasterSession, ref RecordSource recSrc, ref RecordInfo srcRecordInfo) - where FasterSession : IFasterSession - { - if (!fasterSession.DisableEphemeralLocking) - fasterSession.UnlockEphemeralExclusive(ref srcRecordInfo); - recSrc.ClearSrc(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool CompleteTwoPhaseUpdate(FasterSession fasterSession, ref Key key, ref OperationStackContext stackCtx, - ref RecordInfo srcRecordInfo, ref RecordInfo newRecordInfo, out OperationStatus status) - where FasterSession : IFasterSession - { - // We don't check for ephemeral xlocking here; we know we had that lock, but we don't need to actually lock the new record because - // we know this is the last step and we are going to unlock it immediately; it is protected until we remove the Tentative bit. - - if (fasterSession.IsManualLocking) - { - // For manual locking, we should already have made sure there is an XLock for this, and must preserve it on the new record. - // If we do not have an in-memory source there should be a LockTable entry; transfer from it (which will remove it from the LockTable). - // Otherwise (we do have an in-memory source) just set the bit directly; we'll mark and clear the IM source below. - bool transferred = false; - if (!stackCtx.recSrc.HasInMemorySrc) - { - bool found = this.LockTable.TryGet(ref key, stackCtx.hei.hash, out var ltriLT); - Debug.Assert(found && ltriLT.IsLocked && !ltriLT.Tentative, "Error--non-InMemorySrc expected to find a non-tentative locked locktable entry"); - - transferred = LockTable.IsActive && LockTable.TransferToLogRecord(ref key, stackCtx.hei.hash, ref newRecordInfo); - Debug.Assert(transferred, "ManualLocking Non-InMemory source should find a LockTable entry to transfer locks from in CompleteTwoPhaseUpdate"); - } -#if DEBUG - if (this.LockTable.TryGet(ref key, stackCtx.hei.hash, out var ltri)) - { - // If !recSrc.HasInMemorySrc, then we just did a LockTable transfer to an existing tentative log record, and that tentative record should have - // prevented anyone from making a non-tentative LockTable entry. If we recSrc.HasInMemorySrc, then there should never be LockTable entry. - Debug.Assert(!ltri.IsLocked || ltri.Tentative, $"Error--existing non-tentative LT entry in CompleteTwoPhaseUpdate transfer; HasInMemSrc = {stackCtx.recSrc.HasInMemorySrc}"); - } -#endif - if (!transferred) - newRecordInfo.InitializeLockExclusive(); - } - else if ((LockTable.IsActive && !LockTable.CompleteTwoPhaseUpdate(ref key, stackCtx.hei.hash)) - || (UseReadCache && !ReadCacheCompleteTwoPhaseUpdate(ref key, ref stackCtx.hei))) - { - // A permanent LockTable entry or a ReadCache entry with a lock was added before we inserted the tentative record, so we must invalidate the new record and retry. - // We cannot reuse the allocation because it's in the hash chain. // TODO consider eliding similar to InternalDelete - stackCtx.SetNewRecordInvalidAtomic(ref newRecordInfo); - status = OperationStatus.RETRY_LATER; - return false; - } - - status = OperationStatus.SUCCESS; - stackCtx.recSrc.MarkSourceRecordAfterSuccessfulCopyUpdate(fasterSession, ref srcRecordInfo); - return true; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool CompleteTwoPhaseCopyToTail(FasterSession fasterSession, ref Key key, ref OperationStackContext stackCtx, - ref RecordInfo srcRecordInfo, ref RecordInfo newRecordInfo) - where FasterSession : IFasterSession - { - // Transfer locks; these will always be read locks and include the caller's read lock if we've not disabled ephemeral locking. - bool success = true; - if (stackCtx.recSrc.HasInMemorySrc) - { - stackCtx.recSrc.AssertInMemorySourceWasNotEvicted(); - - // Unlock the ephemeral lock here; we mark the source, so we *know* we will have an invalid unlock on srcRecordInfo and would have to chase - // through InternalLock to unlock it, so we save the time by not transferring our ephemeral lock. 'Tentative' still protects the new record. - success = newRecordInfo.CopyReadLocksFromAndMarkSourceAtomic(ref srcRecordInfo, allowXLock: fasterSession.IsManualLocking, - seal: stackCtx.recSrc.HasMainLogSrc, removeEphemeralLock: stackCtx.recSrc.HasInMemoryLock); - if (success) - stackCtx.recSrc.HasInMemoryLock = false; - } - else - { - if (fasterSession.IsManualLocking) - { - // For manual locking, we should already have made sure there is at least an SLock for this; since there is no HasInMemorySrc, it is in the Lock Table. - if (LockTable.IsActive) - { - if (!LockTable.TransferToLogRecord(ref key, stackCtx.hei.hash, ref newRecordInfo)) - Debug.Fail("ManualLocking Non-InMemory source should find a LockTable entry to transfer locks from in CompleteTwoPhaseCopyToTail"); - } - } - else - { - // XLocks are not allowed here in the ephemeral section, because another thread owns them (for operations that end up here, ephemeral locking only takes a read lock). - var lt_success = !LockTable.IsActive || LockTable.CompleteTwoPhaseCopyToTail(ref key, stackCtx.hei.hash, ref newRecordInfo, allowXLock: fasterSession.IsManualLocking, - removeEphemeralLock: stackCtx.recSrc.HasLockTableLock); - - // We must check readcache even if we don't have an in-memory source, because another thread may have transferred from the LockTable to the ReadCache. - var rc_success = !UseReadCache || ReadCacheCompleteTwoPhaseCopyToTail(ref key, ref stackCtx.hei, ref newRecordInfo, allowXLock: fasterSession.IsManualLocking, - removeEphemeralLock: stackCtx.recSrc.HasLockTableLock); // HasLockTableLock because we checked HasInMemorySrc above - - // We don't have to worry about checking the main log, because a CAS by another thread would fail due to our Tentative record being there - // for both mainhash entry (CAS compares to entry.word) and readcache (splice CAS compares to LowestLogicalAddress). - success = lt_success && rc_success; - Debug.Assert(success, "Expected success releasing LockTable lock in Complete2pCTT"); - - if (success) - stackCtx.recSrc.HasLockTableLock = false; - } - } - return success; - } - } -} diff --git a/cs/src/core/Index/FASTER/Implementation/EpochOperations.cs b/cs/src/core/Index/FASTER/Implementation/EpochOperations.cs index d30873157..a8fecc0c5 100644 --- a/cs/src/core/Index/FASTER/Implementation/EpochOperations.cs +++ b/cs/src/core/Index/FASTER/Implementation/EpochOperations.cs @@ -11,24 +11,23 @@ public unsafe partial class FasterKV : FasterBase, IFasterKV( - FasterExecutionContext opCtx, - FasterExecutionContext currentCtx, + FasterExecutionContext sessionCtx, ref PendingContext pendingContext, FasterSession fasterSession) - where FasterSession : IFasterSession + where FasterSession : IFasterSession { - var version = opCtx.version; - Debug.Assert(currentCtx.version == version); - Debug.Assert(currentCtx.phase == Phase.PREPARE); - InternalRefresh(currentCtx, fasterSession); - Debug.Assert(currentCtx.version > version); + var version = sessionCtx.version; + Debug.Assert(sessionCtx.version == version); + Debug.Assert(sessionCtx.phase == Phase.PREPARE); + InternalRefresh(fasterSession); + Debug.Assert(sessionCtx.version > version); - pendingContext.version = currentCtx.version; + pendingContext.version = sessionCtx.version; } [MethodImpl(MethodImplOptions.AggressiveInlining)] private void HeavyEnter(long hash, FasterExecutionContext ctx, FasterSession session) - where FasterSession : IFasterSession + where FasterSession : IFasterSession { if (ctx.phase == Phase.PREPARE_GROW) { @@ -36,7 +35,7 @@ private void HeavyEnter(long hash, Faster // Could instead do a "heavy operation" here while (systemState.Phase != Phase.IN_PROGRESS_GROW) Thread.SpinWait(100); - InternalRefresh(ctx, session); + InternalRefresh(session); } if (ctx.phase == Phase.IN_PROGRESS_GROW) { @@ -50,14 +49,14 @@ void SpinWaitUntilClosed(long address) // Unlike HeadAddress, ClosedUntilAddress is a high-water mark; a record that is == to ClosedUntilAddress has *not* been closed yet. while (address >= this.hlog.ClosedUntilAddress) { - Debug.Assert(address < hlog.HeadAddress); + Debug.Assert(address < hlog.HeadAddress, "expected address < hlog.HeadAddress"); epoch.ProtectAndDrain(); Thread.Yield(); } } [MethodImpl(MethodImplOptions.AggressiveInlining)] - void SpinWaitUntilRecordIsClosed(ref Key key, long keyHash, long logicalAddress, AllocatorBase log) + void SpinWaitUntilRecordIsClosed(long logicalAddress, AllocatorBase log) { Debug.Assert(logicalAddress < log.HeadAddress, "SpinWaitUntilRecordIsClosed should not be called for addresses above HeadAddress"); @@ -84,23 +83,5 @@ void SpinWaitUntilRecordIsClosed(ref Key key, long keyHash, long logicalAddress, // Note: We cannot jump out here if the Lock Table contains the key, because it may be an older version of the record. } } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - void SpinWaitUntilAddressIsClosed(long logicalAddress, AllocatorBase log) - { - Debug.Assert(logicalAddress < log.HeadAddress, "SpinWaitUntilAddressIsClosed should not be called for addresses above HeadAddress"); - - // This is nearly identical to SpinWaitUntilRecordIsClosed (see comments there), but here we are called during chain traversal - // (e.g. ReadCacheEvict) and thus do not want to short-circuit if the key is found (the address may be for a colliding key). - while (true) - { - epoch.ProtectAndDrain(); - Thread.Yield(); - - // Unlike HeadAddress, ClosedUntilAddress is a high-water mark; a record that is == to ClosedUntilAddress has *not* been closed yet. - if (logicalAddress < log.ClosedUntilAddress) - break; - } - } } } diff --git a/cs/src/core/Index/FASTER/Implementation/FindRecord.cs b/cs/src/core/Index/FASTER/Implementation/FindRecord.cs index 93cdb4c09..a6c62317e 100644 --- a/cs/src/core/Index/FASTER/Implementation/FindRecord.cs +++ b/cs/src/core/Index/FASTER/Implementation/FindRecord.cs @@ -3,49 +3,64 @@ using System.Diagnostics; using System.Runtime.CompilerServices; -using static FASTER.core.LockUtility; +using static FASTER.core.Utility; namespace FASTER.core { public unsafe partial class FasterKV : FasterBase, IFasterKV { [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool TryFindRecordInMemory(ref Key key, ref OperationStackContext stackCtx, long minOffset, bool waitForTentative = true) + private bool TryFindRecordInMemory(ref Key key, ref OperationStackContext stackCtx, long minAddress, bool stopAtHeadAddress = true) { - if (!UseReadCache || !FindInReadCache(ref key, ref stackCtx, untilAddress: Constants.kInvalidAddress)) - { - TryFindRecordInMainLog(ref key, ref stackCtx, minOffset, waitForTentative); + if (UseReadCache && FindInReadCache(ref key, ref stackCtx, minAddress: Constants.kInvalidAddress)) + return true; + if (minAddress < hlog.HeadAddress && stopAtHeadAddress) + minAddress = hlog.HeadAddress; + return TryFindRecordInMainLog(ref key, ref stackCtx, minAddress: minAddress); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool TryFindRecordInMemory(ref Key key, ref OperationStackContext stackCtx, + ref PendingContext pendingContext) + { + // Add 1 to the pendingContext minAddresses because we don't want an inclusive search; we're looking to see if it was added *after*. + if (UseReadCache) + { + var minRC = IsReadCache(pendingContext.InitialEntryAddress) ? pendingContext.InitialEntryAddress + 1 : Constants.kInvalidAddress; + if (FindInReadCache(ref key, ref stackCtx, minAddress: minRC)) + return true; } - return stackCtx.recSrc.HasInMemorySrc; + var minLog = pendingContext.InitialLatestLogicalAddress < hlog.HeadAddress ? hlog.HeadAddress : pendingContext.InitialLatestLogicalAddress + 1; + return TryFindRecordInMainLog(ref key, ref stackCtx, minAddress: minLog); } - private bool TryFindRecordInMainLog(ref Key key, ref OperationStackContext stackCtx, long minOffset, bool waitForTentative) + internal bool TryFindRecordInMainLog(ref Key key, ref OperationStackContext stackCtx, long minAddress) { Debug.Assert(!stackCtx.recSrc.HasInMemorySrc, "Should not have found record before this call"); - if (stackCtx.recSrc.LogicalAddress >= hlog.HeadAddress) + if (stackCtx.recSrc.LogicalAddress >= minAddress) { - stackCtx.recSrc.PhysicalAddress = hlog.GetPhysicalAddress(stackCtx.recSrc.LogicalAddress); - TraceBackForKeyMatch(ref key, ref stackCtx.recSrc, minOffset, waitForTentative); + stackCtx.recSrc.SetPhysicalAddress(); + TraceBackForKeyMatch(ref key, ref stackCtx.recSrc, minAddress); } return stackCtx.recSrc.HasInMemorySrc; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool TraceBackForKeyMatch(ref Key key, ref RecordSource recSrc, long minOffset, bool waitForTentative = true) + private bool TraceBackForKeyMatch(ref Key key, ref RecordSource recSrc, long minAddress) { - ref var recordInfo = ref hlog.GetInfo(recSrc.PhysicalAddress); - if (!recordInfo.Invalid && comparer.Equals(ref key, ref hlog.GetKey(recSrc.PhysicalAddress))) - { - if (!waitForTentative || SpinWaitWhileTentativeAndReturnValidity(ref recordInfo)) - return recSrc.HasMainLogSrc = true; - } + // PhysicalAddress must already be populated by callers. + ref var recordInfo = ref recSrc.GetInfo(); + if (!recordInfo.Invalid && comparer.Equals(ref key, ref recSrc.GetKey())) + return recSrc.HasMainLogSrc = true; + recSrc.LogicalAddress = recordInfo.PreviousAddress; - return recSrc.HasMainLogSrc = TraceBackForKeyMatch(ref key, recSrc.LogicalAddress, minOffset, out recSrc.LogicalAddress, out recSrc.PhysicalAddress, waitForTentative); + return recSrc.HasMainLogSrc = TraceBackForKeyMatch(ref key, recSrc.LogicalAddress, minAddress, out recSrc.LogicalAddress, out recSrc.PhysicalAddress); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool TraceBackForKeyMatch(ref Key key, long fromLogicalAddress, long minAddress, out long foundLogicalAddress, out long foundPhysicalAddress, bool waitForTentative = true) + private bool TraceBackForKeyMatch(ref Key key, long fromLogicalAddress, long minAddress, out long foundLogicalAddress, out long foundPhysicalAddress) { + // This overload is called when the record at the "current" logical address does not match 'key'; fromLogicalAddress is its .PreviousAddress. foundLogicalAddress = fromLogicalAddress; while (foundLogicalAddress >= minAddress) { @@ -53,10 +68,7 @@ private bool TraceBackForKeyMatch(ref Key key, long fromLogicalAddress, long min ref var recordInfo = ref hlog.GetInfo(foundPhysicalAddress); if (!recordInfo.Invalid && comparer.Equals(ref key, ref hlog.GetKey(foundPhysicalAddress))) - { - if (!waitForTentative || SpinWaitWhileTentativeAndReturnValidity(ref recordInfo)) - return true; - } + return true; foundLogicalAddress = recordInfo.PreviousAddress; } diff --git a/cs/src/core/Index/FASTER/Implementation/HandleOperationStatus.cs b/cs/src/core/Index/FASTER/Implementation/HandleOperationStatus.cs index c6b724dfd..7849a76dc 100644 --- a/cs/src/core/Index/FASTER/Implementation/HandleOperationStatus.cs +++ b/cs/src/core/Index/FASTER/Implementation/HandleOperationStatus.cs @@ -13,20 +13,18 @@ public unsafe partial class FasterKV : FasterBase, IFasterKV( OperationStatus internalStatus, - FasterExecutionContext opCtx, - FasterExecutionContext currentCtx, FasterSession fasterSession, ref PendingContext pendingContext) - where FasterSession : IFasterSession + where FasterSession : IFasterSession => (internalStatus & OperationStatus.BASIC_MASK) > OperationStatus.MAX_MAP_TO_COMPLETED_STATUSCODE - && HandleRetryStatus(internalStatus, opCtx, currentCtx, fasterSession, ref pendingContext); + && HandleRetryStatus(internalStatus, fasterSession, ref pendingContext); /// /// Handle retry for operations that will not go pending (e.g., InternalLock) /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal bool HandleImmediateNonPendingRetryStatus(OperationStatus internalStatus, FasterExecutionContext currentCtx, FasterSession fasterSession) - where FasterSession : IFasterSession + internal bool HandleImmediateNonPendingRetryStatus(OperationStatus internalStatus, FasterSession fasterSession) + where FasterSession : IFasterSession { Debug.Assert(epoch.ThisInstanceProtected()); switch (internalStatus) @@ -35,7 +33,7 @@ internal bool HandleImmediateNonPendingRetryStatus(fasterSession); Thread.Yield(); return true; default: @@ -45,11 +43,9 @@ internal bool HandleImmediateNonPendingRetryStatus( OperationStatus internalStatus, - FasterExecutionContext opCtx, - FasterExecutionContext currentCtx, FasterSession fasterSession, ref PendingContext pendingContext) - where FasterSession : IFasterSession + where FasterSession : IFasterSession { Debug.Assert(epoch.ThisInstanceProtected()); switch (internalStatus) @@ -58,13 +54,13 @@ private bool HandleRetryStatus( Thread.Yield(); return true; case OperationStatus.RETRY_LATER: - InternalRefresh(currentCtx, fasterSession); - pendingContext.version = currentCtx.version; + InternalRefresh(fasterSession); + pendingContext.version = fasterSession.Ctx.version; Thread.Yield(); return true; case OperationStatus.CPR_SHIFT_DETECTED: // Retry as (v+1) Operation - SynchronizeEpoch(opCtx, currentCtx, ref pendingContext, fasterSession); + SynchronizeEpoch(fasterSession.Ctx, ref pendingContext, fasterSession); return true; case OperationStatus.ALLOCATE_FAILED: // Async handles this in its own way, as part of the *AsyncResult.Complete*() sequence. @@ -90,32 +86,32 @@ private bool HandleRetryStatus( /// /// Performs appropriate handling based on the internal failure status of the trial. /// - /// Thread (or session) context under which operation was tried to execute. + /// Thread (or session) context under which operation was tried to execute. /// Internal context of the operation. /// Internal status of the trial. /// Operation status [MethodImpl(MethodImplOptions.AggressiveInlining)] internal Status HandleOperationStatus( - FasterExecutionContext opCtx, + FasterExecutionContext sessionCtx, ref PendingContext pendingContext, OperationStatus operationStatus) { if (OperationStatusUtils.TryConvertToCompletedStatusCode(operationStatus, out Status status)) return status; - return HandleOperationStatus(opCtx, ref pendingContext, operationStatus, out _); + return HandleOperationStatus(sessionCtx, ref pendingContext, operationStatus, out _); } /// /// Performs appropriate handling based on the internal failure status of the trial. /// - /// Thread (or session) context under which operation was tried to execute. + /// Thread (or session) context under which operation was tried to execute. /// Internal context of the operation. /// Internal status of the trial. /// IO request, if operation went pending /// Operation status [MethodImpl(MethodImplOptions.AggressiveInlining)] internal Status HandleOperationStatus( - FasterExecutionContext opCtx, + FasterExecutionContext sessionCtx, ref PendingContext pendingContext, OperationStatus operationStatus, out AsyncIOContext request) @@ -139,8 +135,8 @@ internal Status HandleOperationStatus( { Debug.Assert(pendingContext.flushEvent.IsDefault(), "Cannot have flushEvent with RECORD_ON_DISK"); // Add context to dictionary - pendingContext.id = opCtx.totalPending++; - opCtx.ioPendingRequests.Add(pendingContext.id, pendingContext); + pendingContext.id = sessionCtx.totalPending++; + sessionCtx.ioPendingRequests.Add(pendingContext.id, pendingContext); // Issue asynchronous I/O request request.id = pendingContext.id; @@ -151,7 +147,7 @@ internal Status HandleOperationStatus( if (pendingContext.IsAsync) request.asyncOperation = new TaskCompletionSource>(TaskCreationOptions.RunContinuationsAsynchronously); else - request.callbackQueue = opCtx.readyResponses; + request.callbackQueue = sessionCtx.readyResponses; hlog.AsyncGetFromDisk(pendingContext.logicalAddress, hlog.GetAverageRecordSize(), request); return new(StatusCode.Pending); diff --git a/cs/src/core/Index/FASTER/Implementation/HashEntryInfo.cs b/cs/src/core/Index/FASTER/Implementation/HashEntryInfo.cs index a110b3a10..0b0aa9d8e 100644 --- a/cs/src/core/Index/FASTER/Implementation/HashEntryInfo.cs +++ b/cs/src/core/Index/FASTER/Implementation/HashEntryInfo.cs @@ -3,6 +3,7 @@ using System.Runtime.CompilerServices; using System.Threading; +using static FASTER.core.Utility; namespace FASTER.core { @@ -27,6 +28,10 @@ internal unsafe struct HashEntryInfo /// The hash tag for this key internal ushort tag; +#if DEBUG + internal long LockCode; +#endif // DEBUG + [MethodImpl(MethodImplOptions.AggressiveInlining)] internal HashEntryInfo(long hash) { @@ -61,9 +66,9 @@ internal HashEntryInfo(long hash) internal bool IsReadCache => entry.ReadCache; /// - /// Whether the original address for this hash entry (at the time of FindTag, etc.) is a readcache address. + /// Whether the current address for this hash entry (possibly modified after FindTag, etc.) is a readcache address. /// - internal bool IsCurrentReadCache => (this.bucket->bucket_entries[this.slot] & Constants.kReadCacheBitMask) != 0; + internal bool IsCurrentReadCache => IsReadCache(this.bucket->bucket_entries[this.slot]); /// /// Set members to the current entry (which may have been updated (via CAS) in the bucket after FindTag, etc.) @@ -97,42 +102,25 @@ internal bool TryCAS(long newLogicalAddress, ushort tag) public override string ToString() { + // The debugger often can't call the Globalization NegativeSign property so ToString() would just display the class name + var hashSign = hash < 0 ? "-" : string.Empty; + var absHash = this.hash >= 0 ? this.hash : -this.hash; + var hashStr = $"{hashSign}{absHash}"; + if (bucket == null) - return $"hash {this.hash} "; + return $"hash {hashStr} "; var isRC = "(rc)"; var addrRC = this.IsReadCache ? isRC : string.Empty; var currAddrRC = this.IsCurrentReadCache ? isRC : string.Empty; var isNotCurr = this.Address == this.CurrentAddress ? string.Empty : "*"; - // The debugger often can't call the Globalization NegativeSign property so ToString() would just display the class name - var hashSign = hash < 0 ? "-" : string.Empty; - var absHash = this.hash >= 0 ? this.hash : -this.hash; - return $"addr {this.AbsoluteAddress}{addrRC}, currAddr {this.AbsoluteCurrentAddress}{currAddrRC}{isNotCurr}, hash {hashSign}{absHash}, tag {this.tag}, slot {this.slot}"; - } - } - - public unsafe partial class FasterKV : FasterBase, IFasterKV - { - // Wrappers to call and populate. - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal unsafe bool FindTag(ref HashEntryInfo hei) - { - hei.firstBucket = default; - hei.bucket = default; - hei.slot = default; - hei.entry = default; - return FindTag(hei.hash, hei.tag, ref hei.firstBucket, ref hei.bucket, ref hei.slot, ref hei.entry); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal unsafe void FindOrCreateTag(ref HashEntryInfo hei) - { - hei.firstBucket = default; - hei.bucket = default; - hei.slot = default; - hei.entry = default; - FindOrCreateTag(hei.hash, hei.tag, ref hei.firstBucket, ref hei.bucket, ref hei.slot, ref hei.entry, hlog.BeginAddress); + var result = $"addr {this.AbsoluteAddress}{addrRC}, currAddr {this.AbsoluteCurrentAddress}{currAddrRC}{isNotCurr}, hash {hashStr}, tag {this.tag}, slot {this.slot}, Bkt1 ["; +#if DEBUG + result += $"code {LockCode}, "; +#endif // DEBUG + result += $"{HashBucket.ToString(firstBucket)}]"; + return result; } } } diff --git a/cs/src/core/Index/FASTER/Implementation/Helpers.cs b/cs/src/core/Index/FASTER/Implementation/Helpers.cs index 31e8e4416..e0015a232 100644 --- a/cs/src/core/Index/FASTER/Implementation/Helpers.cs +++ b/cs/src/core/Index/FASTER/Implementation/Helpers.cs @@ -18,11 +18,10 @@ private enum LatchDestination } [MethodImpl(MethodImplOptions.AggressiveInlining)] - static ref RecordInfo WriteTentativeInfo(ref Key key, AllocatorBase log, long newPhysicalAddress, bool inNewVersion, bool tombstone, long previousAddress) + static ref RecordInfo WriteNewRecordInfo(ref Key key, AllocatorBase log, long newPhysicalAddress, bool inNewVersion, bool tombstone, long previousAddress) { ref RecordInfo recordInfo = ref log.GetInfo(newPhysicalAddress); - RecordInfo.WriteInfo(ref recordInfo, inNewVersion, tombstone, previousAddress); - recordInfo.Tentative = true; + recordInfo.WriteInfo(inNewVersion, tombstone, previousAddress); log.Serialize(ref key, newPhysicalAddress); return ref recordInfo; } @@ -44,10 +43,10 @@ internal void MarkPage(long logicalAddress, FasterExecut /// The logical address of the traced record for the key /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool CheckEntryVersionNew(long logicalAddress) + private bool IsRecordVersionNew(long logicalAddress) { HashBucketEntry entry = new() { word = logicalAddress }; - return CheckBucketVersionNew(ref entry); + return IsEntryVersionNew(ref entry); } /// @@ -57,9 +56,9 @@ private bool CheckEntryVersionNew(long logicalAddress) /// the last entry of a bucket /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool CheckBucketVersionNew(ref HashBucketEntry entry) + private bool IsEntryVersionNew(ref HashBucketEntry entry) { - // A version shift can only in an address after the checkpoint starts, as v_new threads RCU entries to the tail. + // A version shift can only happen in an address after the checkpoint starts, as v_new threads RCU entries to the tail. if (entry.Address < _hybridLogCheckpoint.info.startLogicalAddress) return false; @@ -67,12 +66,10 @@ private bool CheckBucketVersionNew(ref HashBucketEntry entry) if (UseReadCache && entry.ReadCache) return false; - // Check if record has the new version bit set - var _addr = hlog.GetPhysicalAddress(entry.Address); - if (entry.Address >= hlog.HeadAddress) - return hlog.GetInfo(_addr).InNewVersion; - else + // If the record is in memory, check if it has the new version bit set + if (entry.Address < hlog.HeadAddress) return false; + return hlog.GetInfo(hlog.GetPhysicalAddress(entry.Address)).IsInNewVersion; } internal enum LatchOperation : byte @@ -82,91 +79,56 @@ internal enum LatchOperation : byte Exclusive } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static bool IsRecordValid(RecordInfo recordInfo, out OperationStatus status) - { - if (recordInfo.Valid) - { - status = OperationStatus.SUCCESS; - return true; - } - - Debug.Assert(!recordInfo.Tentative, "Tentative bit should have been removed when record was invalidated"); - status = OperationStatus.RETRY_LATER; - return false; - } - internal void SetRecordInvalid(long logicalAddress) { - // This is called on exception recovery for a tentative record. + // This is called on exception recovery for a newly-inserted record. var localLog = IsReadCache(logicalAddress) ? readcache : hlog; ref var recordInfo = ref localLog.GetInfo(localLog.GetPhysicalAddress(AbsoluteAddress(logicalAddress))); - Debug.Assert(recordInfo.Tentative, "Expected tentative record in SetRecordInvalid"); recordInfo.SetInvalid(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool CASRecordIntoChain(ref OperationStackContext stackCtx, long newLogicalAddress) + private bool CASRecordIntoChain(ref Key key, ref OperationStackContext stackCtx, long newLogicalAddress, ref RecordInfo newRecordInfo) { + // If Ephemeral locking, we consider this insertion to the mutable portion of the log as a "concurrent" operation, and + // we don't want other threads accessing this record until we complete Post* (which unlock if doing Ephemeral locking). + if (DoEphemeralLocking) + newRecordInfo.InitializeLockExclusive(); + return stackCtx.recSrc.LowestReadCachePhysicalAddress == Constants.kInvalidAddress ? stackCtx.hei.TryCAS(newLogicalAddress) - : SpliceIntoHashChainAtReadCacheBoundary(ref stackCtx.recSrc, newLogicalAddress); + : SpliceIntoHashChainAtReadCacheBoundary(ref key, ref stackCtx, newLogicalAddress); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void PostInsertAtTail(ref Key key, ref OperationStackContext stackCtx, ref RecordInfo srcRecordInfo) + { + if (stackCtx.recSrc.HasReadCacheSrc) + srcRecordInfo.CloseAtomic(); + + // If we are not using the LockTable, then we ElideAndReinsertReadCacheChain ensured no conflict between the readcache + // and the newly-inserted record. Otherwise we spliced it in directly, in which case a competing readcache record may + // have been inserted; if so, invalidate it. + if (UseReadCache && LockTable.IsEnabled) + ReadCacheCheckTailAfterSplice(ref key, ref stackCtx.hei); } // Called after BlockAllocate or anything else that could shift HeadAddress, to adjust addresses or return false for RETRY as needed. // This refreshes the HashEntryInfo, so the caller needs to recheck to confirm the BlockAllocated address is still > hei.Address. [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool VerifyInMemoryAddresses(ref OperationStackContext stackCtx, long skipReadCacheStartAddress = Constants.kInvalidAddress) + private bool VerifyInMemoryAddresses(ref OperationStackContext stackCtx) { - // If we have an in-memory source that is pending eviction, return false and the caller will RETRY. - if (stackCtx.recSrc.InMemorySourceIsBelowHeadAddress()) + // If we have an in-memory source that fell below HeadAddress, return false and the caller will RETRY_LATER. + if (stackCtx.recSrc.HasInMemorySrc && stackCtx.recSrc.LogicalAddress < stackCtx.recSrc.Log.HeadAddress) return false; - // If we're not using readcache or the splice point is still above readcache.HeadAddress, we're good. - if (!UseReadCache || stackCtx.recSrc.LowestReadCacheLogicalAddress >= readcache.HeadAddress) + // If we're not using readcache or we don't have a splice point or it is still above readcache.HeadAddress, we're good. + if (!UseReadCache || stackCtx.recSrc.LowestReadCacheLogicalAddress == Constants.kInvalidAddress || stackCtx.recSrc.LowestReadCacheLogicalAddress >= readcache.HeadAddress) return true; - // Make sure skipReadCacheStartAddress is a readcache address (it likely is not only in the case where there are no readcache records). - // This also ensures the comparison to readcache.HeadAddress below works correctly. - if ((skipReadCacheStartAddress & Constants.kReadCacheBitMask) == 0) - skipReadCacheStartAddress = Constants.kInvalidAddress; - else - skipReadCacheStartAddress &= ~Constants.kReadCacheBitMask; - - // The splice-point readcache record was evicted, so re-get it. - while (true) - { - stackCtx.hei.SetToCurrent(); - stackCtx.recSrc.LatestLogicalAddress = stackCtx.hei.Address; - if (!stackCtx.hei.IsReadCache) - { - stackCtx.recSrc.LowestReadCacheLogicalAddress = Constants.kInvalidAddress; - stackCtx.recSrc.LowestReadCachePhysicalAddress = 0; - Debug.Assert(!stackCtx.recSrc.HasReadCacheSrc, "ReadCacheSrc should not be evicted before SpinWaitUntilAddressIsClosed is called"); - return true; - } - - // Skip from the start address if it's valid, but do not overwrite the Has*Src information in recSrc. - // We stripped the readcache bit from it above, so add it back here (if it's valid). - if (skipReadCacheStartAddress < readcache.HeadAddress) - skipReadCacheStartAddress = Constants.kInvalidAddress; - else - stackCtx.recSrc.LatestLogicalAddress = skipReadCacheStartAddress | Constants.kReadCacheBitMask; - - if (UseReadCache && SkipReadCache(ref stackCtx.recSrc.LatestLogicalAddress, out stackCtx.recSrc.LowestReadCacheLogicalAddress, out stackCtx.recSrc.LowestReadCachePhysicalAddress)) - { - Debug.Assert(stackCtx.hei.IsReadCache || stackCtx.hei.Address == stackCtx.recSrc.LatestLogicalAddress, "For non-readcache chains, recSrc.LatestLogicalAddress should == hei.Address"); - return true; - } - - // A false return from SkipReadCache means we traversed to where recSrc.LatestLogicalAddress is still in - // the readcache but is < readcache.HeadAddress, so wait until it is evicted. - SpinWaitUntilAddressIsClosed(stackCtx.recSrc.LatestLogicalAddress, readcache); - - // If we have an in-memory source that is pending eviction, return false and the caller will RETRY. - if (stackCtx.recSrc.InMemorySourceIsBelowHeadAddress()) - return false; - } + // If the splice point went below readcache.HeadAddress, we would have to wait for the chain to be fixed up by eviction, + // so just return RETRY_LATER and restart the operation. + return false; } } } diff --git a/cs/src/core/Index/FASTER/Implementation/InternalDelete.cs b/cs/src/core/Index/FASTER/Implementation/InternalDelete.cs index 93002b586..fd849c916 100644 --- a/cs/src/core/Index/FASTER/Implementation/InternalDelete.cs +++ b/cs/src/core/Index/FASTER/Implementation/InternalDelete.cs @@ -16,7 +16,6 @@ public unsafe partial class FasterKV : FasterBase, IFasterKVUser context for the operation, in case it goes pending. /// Pending context used internally to store the context of the operation. /// Callback functions. - /// Session context /// Operation serial number /// /// @@ -39,95 +38,76 @@ public unsafe partial class FasterKV : FasterBase, IFasterKV /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal OperationStatus InternalDelete( - ref Key key, - ref Context userContext, - ref PendingContext pendingContext, - FasterSession fasterSession, - FasterExecutionContext sessionCtx, - long lsn) + internal OperationStatus InternalDelete(ref Key key, ref Context userContext, + ref PendingContext pendingContext, FasterSession fasterSession, long lsn) where FasterSession : IFasterSession { - OperationStatus status = default; var latchOperation = LatchOperation.None; var latchDestination = LatchDestination.NormalProcessing; OperationStackContext stackCtx = new(comparer.GetHashCode64(ref key)); - if (sessionCtx.phase != Phase.REST) - HeavyEnter(stackCtx.hei.hash, sessionCtx, fasterSession); - - // A 'ref' variable must be initialized. If we find a record for the key, we reassign the reference. We don't copy from this source, but we do lock it. - RecordInfo dummyRecordInfo = default; - ref RecordInfo srcRecordInfo = ref dummyRecordInfo; + if (fasterSession.Ctx.phase != Phase.REST) + HeavyEnter(stackCtx.hei.hash, fasterSession.Ctx, fasterSession); var tagExists = FindTag(ref stackCtx.hei); if (!tagExists) { - Debug.Assert(!fasterSession.IsManualLocking || LockTable.IsLockedExclusive(ref key, stackCtx.hei.hash), "A Lockable-session Delete() of a non-existent key requires a LockTable lock"); + Debug.Assert(!fasterSession.IsManualLocking || LockTable.IsLockedExclusive(ref key, ref stackCtx.hei), "A Lockable-session Delete() of a non-existent key requires a LockTable lock"); return OperationStatus.NOTFOUND; } stackCtx.SetRecordSourceToHashEntry(hlog); - // We must always scan to HeadAddress; a Lockable*Context could be activated and lock the record in the immutable region while we're scanning. - TryFindRecordInMemory(ref key, ref stackCtx, hlog.HeadAddress); + // Always scan to HeadAddress; this lets us find a tombstoned record in the immutable region, avoiding unnecessarily adding one. + RecordInfo dummyRecordInfo = new() { Valid = true }; + ref RecordInfo srcRecordInfo = ref TryFindRecordInMemory(ref key, ref stackCtx, hlog.HeadAddress) + ? ref stackCtx.recSrc.GetInfo() + : ref dummyRecordInfo; + if (srcRecordInfo.IsClosed) + return OperationStatus.RETRY_LATER; + + // If we already have a deleted record, there's nothing to do. + if (srcRecordInfo.Tombstone) + return OperationStatus.NOTFOUND; DeleteInfo deleteInfo = new() { - SessionType = fasterSession.SessionType, - Version = sessionCtx.version, - SessionID = sessionCtx.sessionID, + Version = fasterSession.Ctx.version, + SessionID = fasterSession.Ctx.sessionID, Address = stackCtx.recSrc.LogicalAddress, KeyHash = stackCtx.hei.hash }; - #region Entry latch operation - if (sessionCtx.phase != Phase.REST) - { - latchDestination = AcquireLatchDelete(sessionCtx, ref stackCtx.hei, ref status, ref latchOperation, stackCtx.recSrc.LogicalAddress); - if (latchDestination == LatchDestination.Retry) - goto LatchRelease; - } - #endregion + if (!TryTransientXLock(fasterSession, ref key, ref stackCtx, out OperationStatus status)) + return status; // We must use try/finally to ensure unlocking even in the presence of exceptions. try { - #region Address and source record checks + #region Address and source record checks if (stackCtx.recSrc.HasReadCacheSrc) { // Use the readcache record as the CopyUpdater source. - goto LockSourceRecord; + goto CreateNewRecord; } - else if (stackCtx.recSrc.LogicalAddress >= hlog.ReadOnlyAddress && latchDestination == LatchDestination.NormalProcessing) + // Check for CPR consistency after checking if source is readcache. + if (fasterSession.Ctx.phase != Phase.REST) { - // Mutable Region: Update the record in-place - srcRecordInfo = ref hlog.GetInfo(stackCtx.recSrc.PhysicalAddress); - if (!TryEphemeralXLock(fasterSession, ref stackCtx.recSrc, ref srcRecordInfo, out status)) + latchDestination = CheckCPRConsistencyDelete(fasterSession.Ctx.phase, ref stackCtx, ref status, ref latchOperation); + if (latchDestination == LatchDestination.Retry) goto LatchRelease; + } - if (srcRecordInfo.Tombstone) - { - EphemeralXUnlockAndAbandonUpdate(fasterSession, ref stackCtx.recSrc, ref srcRecordInfo); - srcRecordInfo = ref dummyRecordInfo; - status = OperationStatus.NOTFOUND; - goto LatchRelease; - } - - if (!srcRecordInfo.IsValidUpdateOrLockSource) - { - EphemeralXUnlockAndAbandonUpdate(fasterSession, ref stackCtx.recSrc, ref srcRecordInfo); - srcRecordInfo = ref dummyRecordInfo; - goto CreateNewRecord; - } - + if (stackCtx.recSrc.LogicalAddress >= hlog.ReadOnlyAddress && latchDestination == LatchDestination.NormalProcessing) + { + // Mutable Region: Update the record in-place deleteInfo.RecordInfo = srcRecordInfo; - ref Value recordValue = ref hlog.GetValue(stackCtx.recSrc.PhysicalAddress); - if (fasterSession.ConcurrentDeleter(ref hlog.GetKey(stackCtx.recSrc.PhysicalAddress), ref recordValue, ref srcRecordInfo, ref deleteInfo)) + ref Value recordValue = ref stackCtx.recSrc.GetValue(); + if (fasterSession.ConcurrentDeleter(ref stackCtx.recSrc.GetKey(), ref recordValue, ref srcRecordInfo, ref deleteInfo, out stackCtx.recSrc.ephemeralLockResult)) { - this.MarkPage(stackCtx.recSrc.LogicalAddress, sessionCtx); + this.MarkPage(stackCtx.recSrc.LogicalAddress, fasterSession.Ctx); if (WriteDefaultOnDelete) recordValue = default; @@ -143,66 +123,39 @@ internal OperationStatus InternalDelete( status = OperationStatusUtils.AdvancedOpCode(OperationStatus.SUCCESS, StatusCode.InPlaceUpdatedRecord); goto LatchRelease; } + if (stackCtx.recSrc.ephemeralLockResult == EphemeralLockResult.Failed) + { + status = OperationStatus.RETRY_LATER; + goto LatchRelease; + } if (deleteInfo.Action == DeleteAction.CancelOperation) { status = OperationStatus.CANCELED; goto LatchRelease; } - stackCtx.recSrc.HasMainLogSrc = true; + // Could not delete in place for some reason - create new record. goto CreateNewRecord; } else if (stackCtx.recSrc.LogicalAddress >= hlog.HeadAddress) { - // Only need to go below ReadOnly for locking and Sealing. - stackCtx.recSrc.HasMainLogSrc = true; - goto LockSourceRecord; + goto CreateNewRecord; } else { - // Either on-disk or no record exists - check for lock before creating new record. First ensure any record lock has transitioned to the LockTable. - SpinWaitUntilRecordIsClosed(ref key, stackCtx.hei.hash, stackCtx.recSrc.LogicalAddress, hlog); - Debug.Assert(!fasterSession.IsManualLocking || LockTable.IsLockedExclusive(ref key, stackCtx.hei.hash), "A Lockable-session Delete() of an on-disk or non-existent key requires a LockTable lock"); - if (LockTable.IsActive && !fasterSession.DisableEphemeralLocking && !LockTable.TryLockEphemeral(ref key, stackCtx.hei.hash, LockType.Exclusive, out stackCtx.recSrc.HasLockTableLock)) - { - status = OperationStatus.RETRY_LATER; - goto LatchRelease; - } + // Either on-disk or no record exists - create new record. + Debug.Assert(!fasterSession.IsManualLocking || LockTable.IsLockedExclusive(ref key, ref stackCtx.hei), "A Lockable-session Delete() of an on-disk or non-existent key requires a LockTable lock"); goto CreateNewRecord; } - #endregion Address and source record checks - #region Lock source record - LockSourceRecord: - // This would be a local function to reduce "goto", but 'ref' variables and parameters aren't supported on local functions. - srcRecordInfo = ref stackCtx.recSrc.GetSrcRecordInfo(); - if (!TryEphemeralXLock(fasterSession, ref stackCtx.recSrc, ref srcRecordInfo, out status)) - goto LatchRelease; - - if (srcRecordInfo.Tombstone) - { - EphemeralXUnlockAndAbandonUpdate(fasterSession, ref stackCtx.recSrc, ref srcRecordInfo); - srcRecordInfo = ref dummyRecordInfo; - status = OperationStatus.NOTFOUND; - goto LatchRelease; - } - - if (!srcRecordInfo.IsValidUpdateOrLockSource) - { - EphemeralXUnlockAndAbandonUpdate(fasterSession, ref stackCtx.recSrc, ref srcRecordInfo); - srcRecordInfo = ref dummyRecordInfo; - } - goto CreateNewRecord; - #endregion Lock source record - #region Create new record in the mutable region CreateNewRecord: { if (latchDestination != LatchDestination.CreatePendingContext) { // Immutable region or new record - status = CreateNewRecordDelete(ref key, ref pendingContext, fasterSession, sessionCtx, ref stackCtx, ref srcRecordInfo); + status = CreateNewRecordDelete(ref key, ref pendingContext, fasterSession, ref stackCtx, ref srcRecordInfo); if (!OperationStatusUtils.IsAppend(status)) { // We should never return "SUCCESS" for a new record operation: it returns NOTFOUND on success. @@ -220,8 +173,8 @@ internal OperationStatus InternalDelete( } finally { - stackCtx.HandleNewRecordOnError(this); - EphemeralXUnlockAfterUpdate(fasterSession, ref key, ref stackCtx, ref srcRecordInfo); + stackCtx.HandleNewRecordOnException(this); + TransientXUnlock(fasterSession, ref key, ref stackCtx); } #region Create pending context @@ -232,7 +185,7 @@ internal OperationStatus InternalDelete( pendingContext.userContext = userContext; pendingContext.entry.word = stackCtx.recSrc.LatestLogicalAddress; pendingContext.logicalAddress = stackCtx.recSrc.LogicalAddress; - pendingContext.version = sessionCtx.version; + pendingContext.version = fasterSession.Ctx.version; pendingContext.serialNum = lsn; } #endregion @@ -257,61 +210,10 @@ internal OperationStatus InternalDelete( return status; } - private LatchDestination AcquireLatchDelete(FasterExecutionContext sessionCtx, ref HashEntryInfo hei, ref OperationStatus status, - ref LatchOperation latchOperation, long logicalAddress) + private LatchDestination CheckCPRConsistencyDelete(Phase phase, ref OperationStackContext stackCtx, ref OperationStatus status, ref LatchOperation latchOperation) { - switch (sessionCtx.phase) - { - case Phase.PREPARE: - { - if (HashBucket.TryAcquireSharedLatch(ref hei)) - { - // Set to release shared latch (default) - latchOperation = LatchOperation.Shared; - if (CheckBucketVersionNew(ref hei.entry)) - { - status = OperationStatus.CPR_SHIFT_DETECTED; - return LatchDestination.Retry; // Pivot Thread, retry - } - break; // Normal Processing - } - else - { - status = OperationStatus.CPR_SHIFT_DETECTED; - return LatchDestination.Retry; // Pivot Thread, retry - } - } - case Phase.IN_PROGRESS: - { - if (!CheckEntryVersionNew(logicalAddress)) - { - if (HashBucket.TryAcquireExclusiveLatch(ref hei)) - { - // Set to release exclusive latch (default) - latchOperation = LatchOperation.Exclusive; - return LatchDestination.CreateNewRecord; // Create a (v+1) record - } - else - { - status = OperationStatus.RETRY_LATER; - return LatchDestination.Retry; // Retry after refresh - } - } - break; // Normal Processing - } - case Phase.WAIT_INDEX_CHECKPOINT: - case Phase.WAIT_FLUSH: - { - if (!CheckEntryVersionNew(logicalAddress)) - { - return LatchDestination.CreateNewRecord; // Create a (v+1) record - } - break; // Normal Processing - } - default: - break; - } - return LatchDestination.NormalProcessing; + // This is the same logic as Upsert; neither goes pending. + return CheckCPRConsistencyUpsert(phase, ref stackCtx, ref status, ref latchOperation); } /// @@ -320,43 +222,27 @@ private LatchDestination AcquireLatchDelete(FasterExecut /// The record Key /// Information about the operation context /// The current session - /// The current session context /// Contains the and structures for this operation, /// and allows passing back the newLogicalAddress for invalidation in the case of exceptions. /// If ., /// this is the for private OperationStatus CreateNewRecordDelete(ref Key key, ref PendingContext pendingContext, - FasterSession fasterSession, FasterExecutionContext sessionCtx, - ref OperationStackContext stackCtx, ref RecordInfo srcRecordInfo) + FasterSession fasterSession, ref OperationStackContext stackCtx, ref RecordInfo srcRecordInfo) where FasterSession : IFasterSession { var value = default(Value); var (_, allocatedSize) = hlog.GetRecordSize(ref key, ref value); - if (!GetAllocationForRetry(ref pendingContext, stackCtx.hei.Address, allocatedSize, out long newLogicalAddress, out long newPhysicalAddress)) - { - // Spin to make sure newLogicalAddress is > recSrc.LatestLogicalAddress (the .PreviousAddress and CAS comparison value). - do - { - if (!BlockAllocate(allocatedSize, out newLogicalAddress, ref pendingContext, out var status)) - return status; - newPhysicalAddress = hlog.GetPhysicalAddress(newLogicalAddress); - if (!VerifyInMemoryAddresses(ref stackCtx)) - { - // Don't save allocation because we did not allocate a full Value. - return OperationStatus.RETRY_LATER; - } - } while (newLogicalAddress < stackCtx.recSrc.LatestLogicalAddress); - } + if (!TryAllocateRecord(ref pendingContext, ref stackCtx, allocatedSize, recycle: false, out long newLogicalAddress, out long newPhysicalAddress, out OperationStatus status)) + return status; - ref RecordInfo newRecordInfo = ref WriteTentativeInfo(ref key, hlog, newPhysicalAddress, inNewVersion: sessionCtx.InNewVersion, tombstone: true, stackCtx.recSrc.LatestLogicalAddress); - stackCtx.newLogicalAddress = newLogicalAddress; + ref RecordInfo newRecordInfo = ref WriteNewRecordInfo(ref key, hlog, newPhysicalAddress, inNewVersion: fasterSession.Ctx.InNewVersion, tombstone: true, stackCtx.recSrc.LatestLogicalAddress); + stackCtx.SetNewRecord(newLogicalAddress); DeleteInfo deleteInfo = new() { - SessionType = fasterSession.SessionType, - Version = sessionCtx.version, - SessionID = sessionCtx.sessionID, + Version = fasterSession.Ctx.version, + SessionID = fasterSession.Ctx.sessionID, Address = newLogicalAddress, KeyHash = stackCtx.hei.hash, RecordInfo = newRecordInfo @@ -372,16 +258,15 @@ private OperationStatus CreateNewRecordDelete(fasterSession, ref key, ref stackCtx, ref srcRecordInfo, ref newRecordInfo, out var lockStatus)) - return lockStatus; + PostInsertAtTail(ref key, ref stackCtx, ref srcRecordInfo); // Note that this is the new logicalAddress; we have not retrieved the old one if it was below HeadAddress, and thus // we do not know whether 'logicalAddress' belongs to 'key' or is a collision. fasterSession.PostSingleDeleter(ref key, ref newRecordInfo, ref deleteInfo); - stackCtx.ClearNewRecordTentativeBitAtomic(ref newRecordInfo); + stackCtx.ClearNewRecord(); pendingContext.recordInfo = newRecordInfo; pendingContext.logicalAddress = newLogicalAddress; return OperationStatusUtils.AdvancedOpCode(OperationStatus.NOTFOUND, StatusCode.CreatedRecord); diff --git a/cs/src/core/Index/FASTER/Implementation/InternalLock.cs b/cs/src/core/Index/FASTER/Implementation/InternalLock.cs index 5b25ddac9..a6dead3bd 100644 --- a/cs/src/core/Index/FASTER/Implementation/InternalLock.cs +++ b/cs/src/core/Index/FASTER/Implementation/InternalLock.cs @@ -13,149 +13,57 @@ public unsafe partial class FasterKV : FasterBase, IFasterKV /// key of the record. /// Lock operation being done. - /// Receives the recordInfo of the record being locked [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal OperationStatus InternalLock(ref Key key, LockOperation lockOp, out RecordInfo lockInfo) + internal OperationStatus InternalLock(ref Key key, LockOperation lockOp) { Debug.Assert(epoch.ThisInstanceProtected(), "InternalLock must have protected epoch"); + Debug.Assert(this.LockTable.IsEnabled, "ManualLockTable must be enabled for InternalLock"); OperationStackContext stackCtx = new(comparer.GetHashCode64(ref key)); FindTag(ref stackCtx.hei); stackCtx.SetRecordSourceToHashEntry(hlog); - // If the record is in memory, then there can't be a LockTable lock. - if (TryFindAndLockRecordInMemory(ref key, lockOp, out lockInfo, ref stackCtx, out OperationStatus lockStatus)) - return lockStatus; - - // Not in memory. First make sure the record has been transferred to the lock table if we did not find it because it was in the eviction region. - var prevLogHA = hlog.HeadAddress; - var prevReadCacheHA = UseReadCache ? readcache.HeadAddress : 0; - Debug.Assert(stackCtx.recSrc.LogicalAddress < stackCtx.recSrc.Log.HeadAddress, "Expected record to be below HeadAddress as we did not find it in-memory"); - SpinWaitUntilRecordIsClosed(ref key, stackCtx.hei.hash, stackCtx.recSrc.LogicalAddress, stackCtx.recSrc.Log); - - // Do LockTable operations - if (lockOp.LockOperationType == LockOperationType.IsLocked) - return (!this.LockTable.IsActive || this.LockTable.TryGet(ref key, stackCtx.hei.hash, out lockInfo)) ? OperationStatus.SUCCESS : OperationStatus.RETRY_LATER; - - if (lockOp.LockOperationType == LockOperationType.Unlock) + switch (lockOp.LockOperationType) { - if (this.LockTable.Unlock(ref key, stackCtx.hei.hash, lockOp.LockType)) - return OperationStatus.SUCCESS; - - // We may need to recheck in-memory, due to a race where, when T1 started this InternalLock call, the key was not in the hash table - // (it was a nonexistent key) but was in the LockTable: - // T1 did TryFindAndUnlockRecordInMemory above, and did not find the key in the hash table - // T2 did an Upsert of the key, which inserted a tentative entry into the log, then transferred the lock from the LockTable to that log record - // Or, T2 completed a pending Read and did CopyToTail or CopyToReadCache - // T1 would fail LockTable.Unlock and leave a locked record in the log - // If the address in the HashEntryInfo has changed, or if hei has a readcache address and either we can't navigate from the lowest readcache - // address (due to it being below HeadAddress) or its previous address does not point to the same address as when we started (which means a - // new log entry was spliced in, then we retry in-memory. - if (stackCtx.hei.IsNotCurrent || - (stackCtx.hei.IsReadCache - && (stackCtx.recSrc.LowestReadCachePhysicalAddress < readcache.HeadAddress - || readcache.GetInfo(stackCtx.recSrc.LowestReadCachePhysicalAddress).PreviousAddress != stackCtx.recSrc.LatestLogicalAddress))) - { - stackCtx.hei.SetToCurrent(); - stackCtx.SetRecordSourceToHashEntry(hlog); - if (TryFindAndLockRecordInMemory(ref key, lockOp, out lockInfo, ref stackCtx, out lockStatus)) - return lockStatus; - - // If the HeadAddresses have changed, then the key may have dropped below it and was/will be evicted back to the LockTable. - if (hlog.HeadAddress != prevLogHA || (UseReadCache && readcache.HeadAddress != prevReadCacheHA)) + case LockOperationType.Lock: + if (!this.LockTable.TryLockManual(ref key, ref stackCtx.hei, lockOp.LockType)) return OperationStatus.RETRY_LATER; - } - - Debug.Fail("Trying to unlock a nonexistent key"); - return OperationStatus.SUCCESS; // SUCCEED so we don't continue the loop; TODO change to OperationStatus.NOTFOUND and return false from Lock API - } - - // Try to lock. One of the following things can happen here: - // - We find a record in the LockTable and: - // - It is tentative; we fail the lock and return RETRY_LATER - // - It is not tentative; we either: - // - Succeed with the lock (probably an additional S lock) and return SUCCESS - // - Fail the lock and return RETRY_LATER - // - The LockTable failed to insert a record - // - We did not find a record so we added one, so proceed with two-phase insert protocol below. - if (!this.LockTable.TryLockManual(ref key, stackCtx.hei.hash, lockOp.LockType, out bool tentativeLock)) - return OperationStatus.RETRY_LATER; - - // We got the lock. If a new record with this key was inserted into the main log or readcache after we started, remove the lock we just added and RETRY. - OperationStackContext stackCtx2 = new(stackCtx.hei.hash); - if (FindTag(ref stackCtx2.hei)) - { - stackCtx2.SetRecordSourceToHashEntry(hlog); - - // First look in the readcache, then in memory. If there's any record there, Tentative or not, we back off this lock and retry. - // The way two-phase insertion to the log (or readcache) works, the inserters will see our LockTable record and wait for it to become - // non-tentative, which means the lock is permanent. If so, we won the race here, and it must be assumed our caller proceeded under - // the assumption they had the lock. (Otherwise, we remove the lock table entry here, and the other thread proceeds). That means we - // can't wait for tentative records here; that would deadlock (we wait for them to become non-tentative and they wait for us to become - // non-tentative). So we must bring the records back here even if they are tentative, then bail on them. - // Note: We don't use TryFindRecordInMemory here because we only want to scan the tail portion of the hash chain; we've already searched - // below that, with the TryFindAndLockRecordInMemory call above. - var found = false; - if (stackCtx2.hei.IsReadCache && (!stackCtx.hei.IsReadCache || stackCtx2.hei.Address > stackCtx.hei.Address)) - { - // stackCtx2 has readcache records. If stackCtx.hei is a readcache record, then we just have to search down to that record; - // otherwise we search the entire readcache. We only need to find the latest logical address if stackCtx.hei is *not* a readcache record. - var untilAddress = stackCtx.hei.IsReadCache ? stackCtx.hei.Address : Constants.kInvalidAddress; - found = FindInReadCache(ref key, ref stackCtx2, untilAddress, alwaysFindLatestLA: !stackCtx.hei.IsReadCache, waitForTentative: false); - } - - if (!found) - { - // Search the main log. Since we did not find the key in the readcache, we have either: - // - stackCtx.hei is not a readcache record: we have the most current LowestReadCache info in stackCtx2 (which may be none, if there are no readcache records) - // - stackCtx.hei is a readcache record: stackCtx2 stopped searching before that, so stackCtx1 has the most recent readcache info - var lowestRcPhysicalAddress = stackCtx.hei.IsReadCache ? stackCtx.recSrc.LowestReadCachePhysicalAddress : stackCtx2.recSrc.LowestReadCachePhysicalAddress; - var latestlogicalAddress = lowestRcPhysicalAddress != 0 ? readcache.GetInfo(lowestRcPhysicalAddress).PreviousAddress : stackCtx2.hei.Address; - if (latestlogicalAddress > stackCtx.recSrc.LatestLogicalAddress) - { - var minAddress = stackCtx.recSrc.LatestLogicalAddress > hlog.HeadAddress ? stackCtx.recSrc.LatestLogicalAddress : hlog.HeadAddress; - found = TraceBackForKeyMatch(ref key, stackCtx2.hei.Address, minAddress + 1, out _, out _, waitForTentative: false); - } - } - - if (found) - { - LockTable.UnlockOrRemoveTentativeEntry(ref key, stackCtx.hei.hash, lockOp.LockType, tentativeLock); - return OperationStatus.RETRY_LATER; - } + return OperationStatus.SUCCESS; + case LockOperationType.Unlock: + this.LockTable.Unlock(ref key, ref stackCtx.hei, lockOp.LockType); + return OperationStatus.SUCCESS; + default: + Debug.Fail($"Unexpected {nameof(LockOperationType)}: {lockOp.LockOperationType}"); + break; } - - // Success - if (tentativeLock && !this.LockTable.ClearTentativeBit(ref key, stackCtx.hei.hash)) - return OperationStatus.RETRY_LATER; // The tentative record was not found, so the lock has not been done; retry return OperationStatus.SUCCESS; } - /// Locks the record if it can find it in memory. - /// True if the key was found in memory, else false. 'lockStatus' returns the lock status, if found, else should be ignored. - private bool TryFindAndLockRecordInMemory(ref Key key, LockOperation lockOp, out RecordInfo lockInfo, ref OperationStackContext stackCtx, out OperationStatus lockStatus) + /// + /// Manual Lock operation for locking . Locks the buckets corresponding to 'keys'. + /// + /// Lock code of the key () to be locked or unlocked. + /// Lock operation being done. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal OperationStatus InternalLock(long keyLockCode, LockOperation lockOp) { - lockInfo = default; - if (TryFindRecordInMemory(ref key, ref stackCtx, minOffset: hlog.HeadAddress)) + Debug.Assert(epoch.ThisInstanceProtected(), "InternalLock must have protected epoch"); + Debug.Assert(this.LockTable.IsEnabled, "ManualLockTable must be enabled for InternalLock"); + + switch (lockOp.LockOperationType) { - ref RecordInfo recordInfo = ref stackCtx.recSrc.GetSrcRecordInfo(); - if (!recordInfo.IsIntermediate(out lockStatus)) - { - if (lockOp.LockOperationType == LockOperationType.IsLocked) - lockStatus = OperationStatus.SUCCESS; - else if (!recordInfo.TryLockOperation(lockOp)) - { - // TODO: Consider eliding the record (as in InternalRMW) from the hash table if we are X-unlocking a Tombstoned record. - lockStatus = OperationStatus.RETRY_LATER; - return true; - } - } - if (lockOp.LockOperationType == LockOperationType.IsLocked) - lockInfo = recordInfo; - return true; + case LockOperationType.Lock: + if (!this.LockTable.TryLockManual(keyLockCode, lockOp.LockType)) + return OperationStatus.RETRY_LATER; + return OperationStatus.SUCCESS; + case LockOperationType.Unlock: + this.LockTable.Unlock(keyLockCode, lockOp.LockType); + return OperationStatus.SUCCESS; + default: + Debug.Fail($"Unexpected {nameof(LockOperationType)}: {lockOp.LockOperationType}"); + break; } - lockStatus = OperationStatus.SUCCESS; - return false; + return OperationStatus.SUCCESS; } } } diff --git a/cs/src/core/Index/FASTER/Implementation/InternalRMW.cs b/cs/src/core/Index/FASTER/Implementation/InternalRMW.cs index 91669b4d8..1c700c1b0 100644 --- a/cs/src/core/Index/FASTER/Implementation/InternalRMW.cs +++ b/cs/src/core/Index/FASTER/Implementation/InternalRMW.cs @@ -3,75 +3,11 @@ using System.Diagnostics; using System.Runtime.CompilerServices; -using System.Threading; namespace FASTER.core { public unsafe partial class FasterKV : FasterBase, IFasterKV { - internal bool ReinitializeExpiredRecord(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo, - long logicalAddress, FasterExecutionContext sessionCtx, FasterSession fasterSession, - bool isIpu, out OperationStatus status) - where FasterSession : IFasterSession - { - // This is called for InPlaceUpdater or CopyUpdater only; CopyUpdater however does not copy an expired record, so we return CreatedRecord. - var advancedStatusCode = isIpu ? StatusCode.InPlaceUpdatedRecord : StatusCode.CreatedRecord; - advancedStatusCode |= StatusCode.Expired; - if (!fasterSession.NeedInitialUpdate(ref key, ref input, ref output, ref rmwInfo)) - { - if (rmwInfo.Action == RMWAction.CancelOperation) - { - status = OperationStatus.CANCELED; - return false; - } - else - { - // Expiration with no insertion. - recordInfo.Tombstone = true; - status = OperationStatusUtils.AdvancedOpCode(OperationStatus.NOTFOUND, advancedStatusCode); - return true; - } - } - - // Try to reinitialize in place - (var currentSize, _) = hlog.GetRecordSize(ref key, ref value); - (var requiredSize, _) = hlog.GetInitialRecordSize(ref key, ref input, fasterSession); - - if (currentSize >= requiredSize) - { - if (fasterSession.InitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, ref rmwInfo)) - { - // If IPU path, we need to complete PostInitialUpdater as well - if (isIpu) - fasterSession.PostInitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, ref rmwInfo); - - status = OperationStatusUtils.AdvancedOpCode(OperationStatus.SUCCESS, advancedStatusCode); - return true; - } - else - { - if (rmwInfo.Action == RMWAction.CancelOperation) - { - status = OperationStatus.CANCELED; - return false; - } - else - { - // Expiration with no insertion. - recordInfo.Tombstone = true; - status = OperationStatusUtils.AdvancedOpCode(OperationStatus.NOTFOUND, advancedStatusCode); - return true; - } - } - } - - // Reinitialization in place was not possible. InternalRMW will do the following based on who called this: - // IPU: move to the NIU->allocate->IU path - // CU: caller invalidates allocation, retries operation as NIU->allocate->IU - status = OperationStatus.SUCCESS; - return false; - } - /// /// Read-Modify-Write Operation. Updates value of 'key' using 'input' and current value. /// Pending operations are processed either using InternalRetryPendingRMW or @@ -83,7 +19,6 @@ internal bool ReinitializeExpiredRecord(r /// user context corresponding to operation used during completion callback. /// pending context created when the operation goes pending. /// Callback functions. - /// Session context /// Operation serial number /// /// @@ -110,103 +45,88 @@ internal bool ReinitializeExpiredRecord(r /// /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal OperationStatus InternalRMW( - ref Key key, ref Input input, ref Output output, - ref Context userContext, - ref PendingContext pendingContext, - FasterSession fasterSession, - FasterExecutionContext sessionCtx, - long lsn) + internal OperationStatus InternalRMW(ref Key key, ref Input input, ref Output output, ref Context userContext, + ref PendingContext pendingContext, FasterSession fasterSession, long lsn) where FasterSession : IFasterSession { - OperationStatus status = default; var latchOperation = LatchOperation.None; var latchDestination = LatchDestination.NormalProcessing; OperationStackContext stackCtx = new(comparer.GetHashCode64(ref key)); - if (sessionCtx.phase != Phase.REST) - HeavyEnter(stackCtx.hei.hash, sessionCtx, fasterSession); - - // A 'ref' variable must be initialized. If we find a record for the key, we reassign the reference. - RecordInfo dummyRecordInfo = default; - ref RecordInfo srcRecordInfo = ref dummyRecordInfo; + if (fasterSession.Ctx.phase != Phase.REST) + HeavyEnter(stackCtx.hei.hash, fasterSession.Ctx, fasterSession); - FindOrCreateTag(ref stackCtx.hei); + FindOrCreateTag(ref stackCtx.hei, hlog.BeginAddress); stackCtx.SetRecordSourceToHashEntry(hlog); - // This tracks the address pointed to by the hash bucket; it may or may not be in the readcache, in-memory, on-disk, or < BeginAddress. - // InternalContinuePendingRMW can stop comparing keys immediately above this address. - long prevHighestKeyHashAddress = stackCtx.hei.Address; + RecordInfo dummyRecordInfo = new() { Valid = true }; + ref RecordInfo srcRecordInfo = ref TryFindRecordInMemory(ref key, ref stackCtx, hlog.HeadAddress) + ? ref stackCtx.recSrc.GetInfo() + : ref dummyRecordInfo; + if (srcRecordInfo.IsClosed) + return OperationStatus.RETRY_LATER; - TryFindRecordInMemory(ref key, ref stackCtx, hlog.HeadAddress); + // These track the latest main-log address in the tag chain; InternalContinuePendingRMW uses them to check for new inserts. + pendingContext.InitialEntryAddress = stackCtx.hei.Address; + pendingContext.InitialLatestLogicalAddress = stackCtx.recSrc.LatestLogicalAddress; RMWInfo rmwInfo = new() { - SessionType = fasterSession.SessionType, - Version = sessionCtx.version, - SessionID = sessionCtx.sessionID, + Version = fasterSession.Ctx.version, + SessionID = fasterSession.Ctx.sessionID, Address = stackCtx.recSrc.LogicalAddress, KeyHash = stackCtx.hei.hash }; - #region Entry latch operation if necessary - if (sessionCtx.phase != Phase.REST) - { - latchDestination = AcquireLatchRMW(pendingContext, sessionCtx, ref stackCtx.hei, ref status, ref latchOperation, stackCtx.recSrc.LogicalAddress); - if (latchDestination == LatchDestination.Retry) - goto LatchRelease; - } - #endregion Entry latch operation if necessary + if (!TryTransientXLock(fasterSession, ref key, ref stackCtx, out OperationStatus status)) + return status; // We must use try/finally to ensure unlocking even in the presence of exceptions. try { - #region Address and source record checks - + #region Address and source record checks + if (stackCtx.recSrc.HasReadCacheSrc) { // Use the readcache record as the CopyUpdater source. - goto LockSourceRecord; + goto CreateNewRecord; } - else if (stackCtx.recSrc.LogicalAddress >= hlog.ReadOnlyAddress && latchDestination == LatchDestination.NormalProcessing) + + // Check for CPR consistency after checking if source is readcache. + if (fasterSession.Ctx.phase != Phase.REST) { - // Mutable Region: Update the record in-place - srcRecordInfo = ref hlog.GetInfo(stackCtx.recSrc.PhysicalAddress); - if (!TryEphemeralXLock(fasterSession, ref stackCtx.recSrc, ref srcRecordInfo, out status)) + latchDestination = CheckCPRConsistencyRMW(fasterSession.Ctx.phase, ref stackCtx, ref status, ref latchOperation); + if (latchDestination == LatchDestination.Retry) goto LatchRelease; + } + if (stackCtx.recSrc.LogicalAddress >= hlog.ReadOnlyAddress && latchDestination == LatchDestination.NormalProcessing) + { + // Mutable Region: Update the record in-place. We perform mutable updates only if we are in normal processing phase of checkpointing if (srcRecordInfo.Tombstone) goto CreateNewRecord; - if (!srcRecordInfo.IsValidUpdateOrLockSource) - { - EphemeralXUnlockAndAbandonUpdate(fasterSession, ref stackCtx.recSrc, ref srcRecordInfo); - srcRecordInfo = ref dummyRecordInfo; - goto CreateNewRecord; - } - rmwInfo.RecordInfo = srcRecordInfo; - if (fasterSession.InPlaceUpdater(ref key, ref input, ref hlog.GetValue(stackCtx.recSrc.PhysicalAddress), ref output, ref srcRecordInfo, ref rmwInfo, out status) + if (fasterSession.InPlaceUpdater(ref key, ref input, ref stackCtx.recSrc.GetValue(), ref output, ref srcRecordInfo, ref rmwInfo, out status, out stackCtx.recSrc.ephemeralLockResult) || (rmwInfo.Action == RMWAction.ExpireAndStop)) { - this.MarkPage(stackCtx.recSrc.LogicalAddress, sessionCtx); + this.MarkPage(stackCtx.recSrc.LogicalAddress, fasterSession.Ctx); // ExpireAndStop means to override default Delete handling (which is to go to InitialUpdater) by leaving the tombstoned record as current. - // Our IFasterSession.InPlaceUpdater implementation has already reinitialized-in-place or set Tombstone as appropriate (inside the ephemeral lock) - // and marked the record. + // Our IFasterSession.InPlaceUpdater implementation has already reinitialized-in-place or set Tombstone as appropriate and marked the record. pendingContext.recordInfo = srcRecordInfo; pendingContext.logicalAddress = stackCtx.recSrc.LogicalAddress; goto LatchRelease; } + // Note: stackCtx.recSrc.ephemeralLockResult == Failed was already handled by 'out status' above if (OperationStatusUtils.BasicOpCode(status) != OperationStatus.SUCCESS) goto LatchRelease; // InPlaceUpdater failed (e.g. insufficient space, another thread set Tombstone, etc). Use this record as the CopyUpdater source. - stackCtx.recSrc.HasMainLogSrc = true; goto CreateNewRecord; } - else if (stackCtx.recSrc.LogicalAddress >= hlog.SafeReadOnlyAddress && !hlog.GetInfo(stackCtx.recSrc.PhysicalAddress).Tombstone && latchDestination == LatchDestination.NormalProcessing) + else if (stackCtx.recSrc.LogicalAddress >= hlog.SafeReadOnlyAddress && !stackCtx.recSrc.GetInfo().Tombstone && latchDestination == LatchDestination.NormalProcessing) { // Fuzzy Region: Must retry after epoch refresh, due to lost-update anomaly status = OperationStatus.RETRY_LATER; @@ -215,8 +135,7 @@ internal OperationStatus InternalRMW( else if (stackCtx.recSrc.LogicalAddress >= hlog.HeadAddress) { // Safe Read-Only Region: CopyUpdate to create a record in the mutable region - stackCtx.recSrc.HasMainLogSrc = true; - goto LockSourceRecord; + goto CreateNewRecord; } else if (stackCtx.recSrc.LogicalAddress >= hlog.BeginAddress) { @@ -227,44 +146,22 @@ internal OperationStatus InternalRMW( } else { - // No record exists - check for lock before creating new record. First ensure any record lock has transitioned to the LockTable. - SpinWaitUntilRecordIsClosed(ref key, stackCtx.hei.hash, stackCtx.recSrc.LogicalAddress, hlog); - Debug.Assert(!fasterSession.IsManualLocking || LockTable.IsLockedExclusive(ref key, stackCtx.hei.hash), "A Lockable-session RMW() of an on-disk or non-existent key requires a LockTable lock"); - if (LockTable.IsActive && !fasterSession.DisableEphemeralLocking - && !LockTable.TryLockEphemeral(ref key, stackCtx.hei.hash, LockType.Exclusive, out stackCtx.recSrc.HasLockTableLock)) - { - status = OperationStatus.RETRY_LATER; - goto LatchRelease; - } + // No record exists - create new record. + Debug.Assert(!fasterSession.IsManualLocking || LockTable.IsLockedExclusive(ref key, ref stackCtx.hei), "A Lockable-session RMW() of an on-disk or non-existent key requires a LockTable lock"); goto CreateNewRecord; } - #endregion Address and source record checks - #region Lock source record - LockSourceRecord: - // This would be a local function to reduce "goto", but 'ref' variables and parameters aren't supported on local functions. - srcRecordInfo = ref stackCtx.recSrc.GetSrcRecordInfo(); - if (!TryEphemeralXLock(fasterSession, ref stackCtx.recSrc, ref srcRecordInfo, out status)) - goto LatchRelease; - if (!srcRecordInfo.IsValidUpdateOrLockSource) - { - EphemeralXUnlockAndAbandonUpdate(fasterSession, ref stackCtx.recSrc, ref srcRecordInfo); - srcRecordInfo = ref dummyRecordInfo; - } - goto CreateNewRecord; - #endregion Lock source record - #region Create new record CreateNewRecord: if (latchDestination != LatchDestination.CreatePendingContext) { Value tempValue = default; - ref var value = ref (stackCtx.recSrc.HasInMemorySrc ? ref stackCtx.recSrc.GetSrcValue() : ref tempValue); + ref var value = ref (stackCtx.recSrc.HasInMemorySrc ? ref stackCtx.recSrc.GetValue() : ref tempValue); // Here, the input* data for 'doingCU' is the same as recSrc. - status = CreateNewRecordRMW(ref key, ref input, ref value, ref output, ref pendingContext, fasterSession, sessionCtx, ref stackCtx, ref srcRecordInfo, - inputSrc: ref stackCtx.recSrc, inputRecordInfo: srcRecordInfo); + status = CreateNewRecordRMW(ref key, ref input, ref value, ref output, ref pendingContext, fasterSession, ref stackCtx, ref srcRecordInfo, + doingCU: stackCtx.recSrc.HasInMemorySrc && !srcRecordInfo.Tombstone); if (!OperationStatusUtils.IsAppend(status)) { // OperationStatus.SUCCESS is OK here; it means NeedCopyUpdate or NeedInitialUpdate returned false @@ -280,8 +177,12 @@ internal OperationStatus InternalRMW( } finally { - stackCtx.HandleNewRecordOnError(this); - EphemeralXUnlockAfterUpdate(fasterSession, ref key, ref stackCtx, ref srcRecordInfo); + // On success, we call UnlockAndSeal. Non-success includes the source address going below HeadAddress, in which case we rely on + // recordInfo.ClearBitsForDiskImages clearing locks and Seal. + if (stackCtx.recSrc.ephemeralLockResult == EphemeralLockResult.HoldForSeal && stackCtx.recSrc.LogicalAddress >= hlog.HeadAddress && srcRecordInfo.IsLocked) + srcRecordInfo.UnlockExclusive(); + stackCtx.HandleNewRecordOnException(this); + TransientXUnlock(fasterSession, ref key, ref stackCtx); } #region Create pending context @@ -299,11 +200,9 @@ internal OperationStatus InternalRMW( heapConvertible.ConvertToHeap(); pendingContext.userContext = userContext; - pendingContext.PrevLatestLogicalAddress = stackCtx.recSrc.LatestLogicalAddress; // InternalContinuePendingRMW compares to this to see if a new record was spliced in pendingContext.logicalAddress = stackCtx.recSrc.LogicalAddress; - pendingContext.version = sessionCtx.version; + pendingContext.version = fasterSession.Ctx.version; pendingContext.serialNum = lsn; - pendingContext.PrevHighestKeyHashAddress = prevHighestKeyHashAddress; } #endregion @@ -328,59 +227,98 @@ internal OperationStatus InternalRMW( return status; } - private LatchDestination AcquireLatchRMW(PendingContext pendingContext, FasterExecutionContext sessionCtx, - ref HashEntryInfo hei, ref OperationStatus status, ref LatchOperation latchOperation, long logicalAddress) + private LatchDestination CheckCPRConsistencyRMW(Phase phase, ref OperationStackContext stackCtx, ref OperationStatus status, ref LatchOperation latchOperation) + { + if (!this.DoTransientLocking) + return AcquireCPRLatchRMW(phase, ref stackCtx, ref status, ref latchOperation); + + // This is AcquireCPRLatchRMW without the bucket latching, since we already have a latch on either the bucket or the recordInfo. + // See additional comments in AcquireCPRLatchRMW. + + switch (phase) + { + case Phase.PREPARE: // Thread is in V + if (!IsEntryVersionNew(ref stackCtx.hei.entry)) + break; // Normal Processing; thread is in V, record is in V + + status = OperationStatus.CPR_SHIFT_DETECTED; + return LatchDestination.Retry; // Pivot Thread for retry (do not operate on v+1 record when thread is in V) + + case Phase.IN_PROGRESS: // Thread is in v+1 + case Phase.WAIT_INDEX_CHECKPOINT: + case Phase.WAIT_FLUSH: + if (IsRecordVersionNew(stackCtx.recSrc.LogicalAddress)) + break; // Normal Processing; V+1 thread encountered a record in V+1 + + if (stackCtx.recSrc.LogicalAddress >= hlog.HeadAddress) + return LatchDestination.CreateNewRecord; // Record is in memory so force creation of a (V+1) record + break; // Normal Processing; the record is below HeadAddress so the operation will go pending + + default: + break; + } + return LatchDestination.NormalProcessing; + } + + private LatchDestination AcquireCPRLatchRMW(Phase phase, ref OperationStackContext stackCtx, ref OperationStatus status, ref LatchOperation latchOperation) { - switch (sessionCtx.phase) + // The idea of CPR is that if a thread in version V tries to perform an operation and notices a record in V+1, it needs to back off and run CPR_SHIFT_DETECTED. + // Similarly, a V+1 thread cannot update a V record; it needs to do a read-copy-update (or upsert at tail) instead of an in-place update. + // 1. V threads take shared lock on bucket + // 2. V+1 threads take exclusive lock on bucket, refreshing until they can + // 3. If V thread cannot take shared lock, that means the system is in V+1 so we can immediately refresh and go to V+1 (do CPR_SHIFT_DETECTED) + // 4. If V thread manages to get shared lock, but encounters a V+1 record, it knows the system is in V+1 so it will do CPR_SHIFT_DETECTED + + switch (phase) { - case Phase.PREPARE: + case Phase.PREPARE: // Thread is in V + if (HashBucket.TryAcquireSharedLatch(ref stackCtx.hei)) { - if (HashBucket.TryAcquireSharedLatch(ref hei)) - { - // Set to release shared latch (default) - latchOperation = LatchOperation.Shared; - if (CheckBucketVersionNew(ref hei.entry)) - { - status = OperationStatus.CPR_SHIFT_DETECTED; - return LatchDestination.Retry; // Pivot Thread for retry - } - break; // Normal Processing - } - else + // Set to release shared latch (default) + latchOperation = LatchOperation.Shared; + + // Here (and in InternalRead, AcquireLatchUpsert, and AcquireLatchDelete) we still check the tail record of the bucket (entry.Address) + // rather than the traced record (logicalAddress), because allowing in-place updates for version V when the bucket has arrived at V+1 may have + // complications we haven't investigated yet. This is safer but potentially unnecessary, and this case is so rare that the potential + // inefficiency is not a concern. + if (IsEntryVersionNew(ref stackCtx.hei.entry)) { status = OperationStatus.CPR_SHIFT_DETECTED; - return LatchDestination.Retry; // Pivot Thread for retry + return LatchDestination.Retry; // Pivot Thread for retry (do not operate on v+1 record when thread is in V) } + break; // Normal Processing; thread is in V, record is in V } - case Phase.IN_PROGRESS: + + // Could not acquire Shared latch; system must be in V+1 (or we have too many shared latches). + status = OperationStatus.CPR_SHIFT_DETECTED; + return LatchDestination.Retry; // Pivot Thread for retry + + case Phase.IN_PROGRESS: // Thread is in v+1 + if (IsRecordVersionNew(stackCtx.recSrc.LogicalAddress)) + break; // Normal Processing; V+1 thread encountered a record in V+1 + + if (HashBucket.TryAcquireExclusiveLatch(ref stackCtx.hei)) { - if (!CheckEntryVersionNew(logicalAddress)) - { - if (HashBucket.TryAcquireExclusiveLatch(ref hei)) - { - // Set to release exclusive latch (default) - latchOperation = LatchOperation.Exclusive; - if (logicalAddress >= hlog.HeadAddress) - return LatchDestination.CreateNewRecord; // Create a (v+1) record - } - else - { - status = OperationStatus.RETRY_LATER; - return LatchDestination.Retry; // Refresh and retry - } - } - break; // Normal Processing + // Set to release exclusive latch (default) + latchOperation = LatchOperation.Exclusive; + if (stackCtx.recSrc.LogicalAddress >= hlog.HeadAddress) + return LatchDestination.CreateNewRecord; // Record is in memory so force creation of a (V+1) record + break; // Normal Processing; the record is below HeadAddress so the operation will go pending } - case Phase.WAIT_INDEX_CHECKPOINT: + + // Could not acquire exclusive latch; likely a conflict on the bucket. + status = OperationStatus.RETRY_LATER; + return LatchDestination.Retry; // Refresh and retry + + case Phase.WAIT_INDEX_CHECKPOINT: // Thread is in V+1 case Phase.WAIT_FLUSH: - { - if (!CheckEntryVersionNew(logicalAddress)) - { - if (logicalAddress >= hlog.HeadAddress) - return LatchDestination.CreateNewRecord; // Create a (v+1) record - } - break; // Normal Processing - } + if (IsRecordVersionNew(stackCtx.recSrc.LogicalAddress)) + break; // Normal Processing; V+1 thread encountered a record in V+1 + + if (stackCtx.recSrc.LogicalAddress >= hlog.HeadAddress) + return LatchDestination.CreateNewRecord; // Record is in memory so force creation of a (V+1) record + break; // Normal Processing; the record is below HeadAddress so the operation will go pending + default: break; } @@ -400,57 +338,35 @@ private LatchDestination AcquireLatchRMW(PendingContext< /// The result of IFunctions.SingleWriter /// Information about the operation context /// The current session - /// The current session context // TODO can this be replaced with fasterSession.clientSession.ctx? /// Contains the and structures for this operation, - /// and allows passing back the newLogicalAddress for invalidation in the case of exceptions. + /// and allows passing back the newLogicalAddress for invalidation in the case of exceptions. If called from pending IO, + /// this is populated from the data read from disk. /// If ., - /// this is the for - /// If , this is populated from the request record; otherwise it is . - /// If , this is the for the request record. - /// Otherwise it is a copy of . - /// Whether we are being called from pending path - /// vs. is a critically important distinction for pending RMW: - /// - /// NonPending: is the usual source for locking and copying; is an alias - /// Pending: is the source for locking *only*; contains the property valus for actual data read from disk. - /// In particular: - /// - /// . always is hlog; . - /// may be the readcache. Cross-log address accesses are a Bad Thing. - /// . reflects the request's logicalAddress rather than the locking record's - /// has no readcache or LockTable information. - /// - /// Therefore, for Pending RMW it is important to use rather than for all operations on input data. - /// - /// + /// this is the for . Otherwise, if called from pending IO, + /// this is the read from disk. If neither of these, it is a default . + /// Whether we are doing a CopyUpdate, either from in-memory or pending IO /// private OperationStatus CreateNewRecordRMW(ref Key key, ref Input input, ref Value value, ref Output output, ref PendingContext pendingContext, FasterSession fasterSession, - FasterExecutionContext sessionCtx, - ref OperationStackContext stackCtx, ref RecordInfo srcRecordInfo, - ref RecordSource inputSrc, RecordInfo inputRecordInfo, bool fromPending = false) + ref OperationStackContext stackCtx, ref RecordInfo srcRecordInfo, bool doingCU) where FasterSession : IFasterSession { bool forExpiration = false; - // Alias this here - var doingCU = inputSrc.HasInMemorySrc && !srcRecordInfo.Tombstone; - RetryNow: RMWInfo rmwInfo = new() { - SessionType = fasterSession.SessionType, - Version = sessionCtx.version, - SessionID = sessionCtx.sessionID, - Address = inputSrc.HasMainLogSrc ? inputSrc.LogicalAddress : Constants.kInvalidAddress, + Version = fasterSession.Ctx.version, + SessionID = fasterSession.Ctx.sessionID, + Address = doingCU && !stackCtx.recSrc.HasReadCacheSrc ? stackCtx.recSrc.LogicalAddress : Constants.kInvalidAddress, KeyHash = stackCtx.hei.hash }; // Perform Need* if (doingCU) { - rmwInfo.RecordInfo = inputRecordInfo; + rmwInfo.RecordInfo = srcRecordInfo; if (!fasterSession.NeedCopyUpdate(ref key, ref input, ref value, ref output, ref rmwInfo)) { if (rmwInfo.Action == RMWAction.CancelOperation) @@ -474,29 +390,14 @@ private OperationStatus CreateNewRecordRMW recSrc.LatestLogicalAddress (the .PreviousAddress and CAS comparison value). - do - { - if (!BlockAllocate(allocatedSize, out newLogicalAddress, ref pendingContext, out status)) - return status; - newPhysicalAddress = hlog.GetPhysicalAddress(newLogicalAddress); - - if (!VerifyInMemoryAddresses(ref stackCtx, stackCtx.recSrc.HasReadCacheSrc ? stackCtx.recSrc.LogicalAddress | Constants.kReadCacheBitMask : Constants.kInvalidAddress)) - { - SaveAllocationForRetry(ref pendingContext, newLogicalAddress, newPhysicalAddress, allocatedSize); - return OperationStatus.RETRY_LATER; - } - } while (newLogicalAddress < stackCtx.recSrc.LatestLogicalAddress); - } + if (!TryAllocateRecord(ref pendingContext, ref stackCtx, allocatedSize, recycle: true, out long newLogicalAddress, out long newPhysicalAddress, out OperationStatus status)) + return status; - ref RecordInfo newRecordInfo = ref WriteTentativeInfo(ref key, hlog, newPhysicalAddress, inNewVersion: sessionCtx.InNewVersion, tombstone: false, stackCtx.recSrc.LatestLogicalAddress); - stackCtx.newLogicalAddress = newLogicalAddress; + ref RecordInfo newRecordInfo = ref WriteNewRecordInfo(ref key, hlog, newPhysicalAddress, inNewVersion: fasterSession.Ctx.InNewVersion, tombstone: false, stackCtx.recSrc.LatestLogicalAddress); + stackCtx.SetNewRecord(newLogicalAddress); rmwInfo.Address = newLogicalAddress; rmwInfo.KeyHash = stackCtx.hei.hash; @@ -543,8 +444,8 @@ private OperationStatus CreateNewRecordRMW(ref key, ref input, ref newRecordValue, ref output, ref newRecordInfo, + ref rmwInfo, newLogicalAddress, fasterSession, isIpu: false, out status)) { // An IPU was not (or could not) be done. Cancel if requested, else invalidate the allocated record and retry. if (status == OperationStatus.CANCELED) @@ -563,11 +464,10 @@ private OperationStatus CreateNewRecordRMW(fasterSession, ref key, ref stackCtx, ref srcRecordInfo, ref newRecordInfo, out var lockStatus)) - return lockStatus; + PostInsertAtTail(ref key, ref stackCtx, ref srcRecordInfo); // If IU, status will be NOTFOUND; return that. if (!doingCU) @@ -580,11 +480,13 @@ private OperationStatus CreateNewRecordRMW(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo, + long logicalAddress, FasterSession fasterSession, bool isIpu, out OperationStatus status) + where FasterSession : IFasterSession + { + // This is called for InPlaceUpdater or CopyUpdater only; CopyUpdater however does not copy an expired record, so we return CreatedRecord. + var advancedStatusCode = isIpu ? StatusCode.InPlaceUpdatedRecord : StatusCode.CreatedRecord; + advancedStatusCode |= StatusCode.Expired; + if (!fasterSession.NeedInitialUpdate(ref key, ref input, ref output, ref rmwInfo)) + { + if (rmwInfo.Action == RMWAction.CancelOperation) + { + status = OperationStatus.CANCELED; + return false; + } + + // Expiration with no insertion. + recordInfo.Tombstone = true; + status = OperationStatusUtils.AdvancedOpCode(OperationStatus.NOTFOUND, advancedStatusCode); + return true; + } + + // Try to reinitialize in place + (var currentSize, _) = hlog.GetRecordSize(ref key, ref value); + (var requiredSize, _) = hlog.GetInitialRecordSize(ref key, ref input, fasterSession); + + if (currentSize >= requiredSize) + { + if (fasterSession.InitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, ref rmwInfo)) + { + // If IPU path, we need to complete PostInitialUpdater as well + if (isIpu) + fasterSession.PostInitialUpdater(ref key, ref input, ref value, ref output, ref recordInfo, ref rmwInfo); + + status = OperationStatusUtils.AdvancedOpCode(OperationStatus.SUCCESS, advancedStatusCode); + return true; + } + else + { + if (rmwInfo.Action == RMWAction.CancelOperation) + { + status = OperationStatus.CANCELED; + return false; + } + else + { + // Expiration with no insertion. + recordInfo.Tombstone = true; + status = OperationStatusUtils.AdvancedOpCode(OperationStatus.NOTFOUND, advancedStatusCode); + return true; + } + } + } + + // Reinitialization in place was not possible. InternalRMW will do the following based on who called this: + // IPU: move to the NIU->allocate->IU path + // CU: caller invalidates allocation, retries operation as NIU->allocate->IU + status = OperationStatus.SUCCESS; + return false; + } } } diff --git a/cs/src/core/Index/FASTER/Implementation/InternalRead.cs b/cs/src/core/Index/FASTER/Implementation/InternalRead.cs index 3ebaf3b00..52f92da07 100644 --- a/cs/src/core/Index/FASTER/Implementation/InternalRead.cs +++ b/cs/src/core/Index/FASTER/Implementation/InternalRead.cs @@ -20,7 +20,6 @@ public unsafe partial class FasterKV : FasterBase, IFasterKVUser context for the operation, in case it goes pending. /// Pending context used internally to store the context of the operation. /// Callback functions. - /// Session context /// Operation serial number /// /// @@ -43,35 +42,23 @@ public unsafe partial class FasterKV : FasterBase, IFasterKV /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal OperationStatus InternalRead( - ref Key key, - ref Input input, - ref Output output, - long startAddress, - ref Context userContext, - ref PendingContext pendingContext, - FasterSession fasterSession, - FasterExecutionContext sessionCtx, - long lsn) + internal OperationStatus InternalRead(ref Key key, ref Input input, ref Output output, + long startAddress, ref Context userContext, ref PendingContext pendingContext, + FasterSession fasterSession, long lsn) where FasterSession : IFasterSession { OperationStackContext stackCtx = new(comparer.GetHashCode64(ref key)); - if (sessionCtx.phase != Phase.REST) - HeavyEnter(stackCtx.hei.hash, sessionCtx, fasterSession); + if (fasterSession.Ctx.phase != Phase.REST) + HeavyEnter(stackCtx.hei.hash, fasterSession.Ctx, fasterSession); #region Trace back for record in readcache and in-memory HybridLog - // This tracks the address pointed to by the hash bucket; it may or may not be in the readcache, in-memory, on-disk, or < BeginAddress (in which - // case we return NOTFOUND and this value is not used). InternalContinuePendingRead can stop comparing keys immediately above this address. - long prevHighestKeyHashAddress = Constants.kInvalidAddress; - var useStartAddress = startAddress != Constants.kInvalidAddress && !pendingContext.HasMinAddress; if (!useStartAddress) { if (!FindTag(ref stackCtx.hei) || (!stackCtx.hei.IsReadCache && stackCtx.hei.Address < pendingContext.minAddress)) return OperationStatus.NOTFOUND; - prevHighestKeyHashAddress = stackCtx.hei.Address; } else { @@ -87,14 +74,13 @@ internal OperationStatus InternalRead( OperationStatus status; if (UseReadCache) { - // TODO doc: DisableReadCacheReads is used by readAtAddress, e.g. to backtrack to previous versions. - // Verify this can be done outside the locking scheme (maybe skip ephemeral locking entirely for readAtAddress) + // DisableReadCacheReads is used by readAtAddress, e.g. to backtrack to previous versions. if (pendingContext.DisableReadCacheReads || pendingContext.NoKey) { - SkipReadCache(ref stackCtx.hei, ref stackCtx.recSrc.LogicalAddress); + SkipReadCache(ref stackCtx, out _); // This may refresh, but we haven't examined HeadAddress yet stackCtx.SetRecordSourceToHashEntry(hlog); } - else if (ReadFromCache(ref key, ref input, ref output, ref stackCtx, ref pendingContext, fasterSession, sessionCtx, out status) + else if (ReadFromCache(ref key, ref input, ref output, ref stackCtx, ref pendingContext, fasterSession, out status) || status != OperationStatus.SUCCESS) { return status; @@ -104,73 +90,57 @@ internal OperationStatus InternalRead( // Traceback for key match if (stackCtx.recSrc.LogicalAddress >= hlog.HeadAddress) { - stackCtx.recSrc.PhysicalAddress = hlog.GetPhysicalAddress(stackCtx.recSrc.LogicalAddress); + stackCtx.recSrc.SetPhysicalAddress(); if (!pendingContext.NoKey) { var minAddress = pendingContext.minAddress > hlog.HeadAddress ? pendingContext.minAddress : hlog.HeadAddress; TraceBackForKeyMatch(ref key, ref stackCtx.recSrc, minAddress); } else - key = ref hlog.GetKey(stackCtx.recSrc.PhysicalAddress); // We do not have the key in the call and must use the key from the record. + key = ref stackCtx.recSrc.GetKey(); // We do not have the key in the call and must use the key from the record. } #endregion - if (sessionCtx.phase == Phase.PREPARE && CheckBucketVersionNew(ref stackCtx.hei.entry)) - { + // These track the latest main-log address in the tag chain; InternalContinuePendingRead uses them to check for new inserts. + pendingContext.InitialEntryAddress = stackCtx.hei.Address; + pendingContext.InitialLatestLogicalAddress = stackCtx.recSrc.LatestLogicalAddress; + + // V threads cannot access V+1 records. Use the latest logical address rather than the traced address (logicalAddress) per comments in AcquireCPRLatchRMW. + if (fasterSession.Ctx.phase == Phase.PREPARE && IsEntryVersionNew(ref stackCtx.hei.entry)) return OperationStatus.CPR_SHIFT_DETECTED; // Pivot thread; retry - } #region Normal processing - // Mutable region (even fuzzy region is included here) if (stackCtx.recSrc.LogicalAddress >= hlog.SafeReadOnlyAddress) { - return ReadFromMutableRegion(ref key, ref input, ref output, ref stackCtx, ref pendingContext, fasterSession, sessionCtx); + // Mutable region (even fuzzy region is included here) + return ReadFromMutableRegion(ref key, ref input, ref output, useStartAddress, ref stackCtx, ref pendingContext, fasterSession); } - - // Immutable region else if (stackCtx.recSrc.LogicalAddress >= hlog.HeadAddress) { - status = ReadFromImmutableRegion(ref key, ref input, ref output, useStartAddress, ref stackCtx, ref pendingContext, fasterSession, sessionCtx); + // Immutable region + status = ReadFromImmutableRegion(ref key, ref input, ref output, useStartAddress, ref stackCtx, ref pendingContext, fasterSession); if (status == OperationStatus.ALLOCATE_FAILED && pendingContext.IsAsync) // May happen due to CopyToTailFromReadOnly goto CreatePendingContext; return status; } - - // On-Disk Region - else + else if (stackCtx.recSrc.LogicalAddress >= hlog.BeginAddress) { - SpinWaitUntilAddressIsClosed(stackCtx.recSrc.LogicalAddress, hlog); - - if (stackCtx.recSrc.LogicalAddress >= hlog.BeginAddress) - { - Debug.Assert(!fasterSession.IsManualLocking || LockTable.IsLocked(ref key, stackCtx.hei.hash), "A Lockable-session Read() of an on-disk key requires a LockTable lock"); + // On-Disk Region + Debug.Assert(!fasterSession.IsManualLocking || LockTable.IsLocked(ref key, ref stackCtx.hei), "A Lockable-session Read() of an on-disk key requires a LockTable lock"); - // Note: we do not lock here; we wait until reading from disk, then lock in the InternalContinuePendingRead chain. - if (hlog.IsNullDevice) - return OperationStatus.NOTFOUND; - - status = OperationStatus.RECORD_ON_DISK; - if (sessionCtx.phase == Phase.PREPARE) - { - if (!useStartAddress) - { - // Failure to latch indicates CPR_SHIFT, but don't hold on to shared latch during IO - if (HashBucket.TryAcquireSharedLatch(ref stackCtx.hei)) - HashBucket.ReleaseSharedLatch(ref stackCtx.hei); - else - return OperationStatus.CPR_SHIFT_DETECTED; - } - } - - goto CreatePendingContext; - } - else - { - // No record found - Debug.Assert(!fasterSession.IsManualLocking || LockTable.IsLocked(ref key, stackCtx.hei.hash), "A Lockable-session Read() of a non-existent key requires a LockTable lock"); + // Note: we do not lock here; we wait until reading from disk, then lock in the InternalContinuePendingRead chain. + if (hlog.IsNullDevice) return OperationStatus.NOTFOUND; - } + + status = OperationStatus.RECORD_ON_DISK; + goto CreatePendingContext; + } + else + { + // No record found + Debug.Assert(!fasterSession.IsManualLocking || LockTable.IsLocked(ref key, ref stackCtx.hei), "A Lockable-session Read() of a non-existent key requires a LockTable lock"); + return OperationStatus.NOTFOUND; } #endregion @@ -189,148 +159,135 @@ internal OperationStatus InternalRead( heapConvertible.ConvertToHeap(); pendingContext.userContext = userContext; - pendingContext.PrevLatestLogicalAddress = stackCtx.recSrc.LatestLogicalAddress; pendingContext.logicalAddress = stackCtx.recSrc.LogicalAddress; - pendingContext.version = sessionCtx.version; + pendingContext.version = fasterSession.Ctx.version; pendingContext.serialNum = lsn; - pendingContext.PrevHighestKeyHashAddress = prevHighestKeyHashAddress; } #endregion return status; } - private bool ReadFromCache(ref Key key, ref Input input, ref Output output, - ref OperationStackContext stackCtx, - ref PendingContext pendingContext, FasterSession fasterSession, - FasterExecutionContext sessionCtx, out OperationStatus status) + private bool ReadFromCache(ref Key key, ref Input input, ref Output output, ref OperationStackContext stackCtx, + ref PendingContext pendingContext, FasterSession fasterSession, out OperationStatus status) where FasterSession : IFasterSession { status = OperationStatus.SUCCESS; - if (FindInReadCache(ref key, ref stackCtx, untilAddress: Constants.kInvalidAddress, alwaysFindLatestLA: false)) + if (FindInReadCache(ref key, ref stackCtx, minAddress: Constants.kInvalidAddress, alwaysFindLatestLA: false)) { // Note: When session is in PREPARE phase, a read-cache record cannot be new-version. This is because a new-version record // insertion would have invalidated the read-cache entry, and before the new-version record can go to disk become eligible // to enter the read-cache, the PREPARE phase for that session will be over due to an epoch refresh. // This is not called when looking up by address, so we can set pendingContext.recordInfo. - ref RecordInfo srcRecordInfo = ref stackCtx.recSrc.GetSrcRecordInfo(); + ref RecordInfo srcRecordInfo = ref stackCtx.recSrc.GetInfo(); pendingContext.recordInfo = srcRecordInfo; ReadInfo readInfo = new() { - SessionType = fasterSession.SessionType, - Version = sessionCtx.version, + Version = fasterSession.Ctx.version, Address = Constants.kInvalidAddress, // ReadCache addresses are not valid for indexing etc. so pass kInvalidAddress. RecordInfo = srcRecordInfo }; - if (!TryEphemeralSLock(fasterSession, ref stackCtx.recSrc, ref srcRecordInfo, out status)) + if (!TryTransientSLock(fasterSession, ref key, ref stackCtx, out status)) return false; try { - if (fasterSession.SingleReader(ref key, ref input, ref stackCtx.recSrc.GetSrcValue(), ref output, ref srcRecordInfo, ref readInfo)) + if (fasterSession.SingleReader(ref key, ref input, ref stackCtx.recSrc.GetValue(), ref output, ref srcRecordInfo, ref readInfo)) return true; status = readInfo.Action == ReadAction.CancelOperation ? OperationStatus.CANCELED : OperationStatus.NOTFOUND; return false; } finally { - EphemeralSUnlock(fasterSession, sessionCtx, ref pendingContext, ref key, ref stackCtx, ref srcRecordInfo); + TransientSUnlock(fasterSession, ref key, ref stackCtx, ref srcRecordInfo); } } return false; } private OperationStatus ReadFromMutableRegion(ref Key key, ref Input input, ref Output output, - ref OperationStackContext stackCtx, - ref PendingContext pendingContext, FasterSession fasterSession, - FasterExecutionContext sessionCtx) + bool useStartAddress, ref OperationStackContext stackCtx, + ref PendingContext pendingContext, FasterSession fasterSession) where FasterSession : IFasterSession { // We don't copy from this source, but we do lock it. - ref var srcRecordInfo = ref stackCtx.recSrc.GetSrcRecordInfo(); + ref var srcRecordInfo = ref stackCtx.recSrc.GetInfo(); pendingContext.recordInfo = srcRecordInfo; pendingContext.logicalAddress = stackCtx.recSrc.LogicalAddress; ReadInfo readInfo = new() { - SessionType = fasterSession.SessionType, - Version = sessionCtx.version, + Version = fasterSession.Ctx.version, Address = stackCtx.recSrc.LogicalAddress, RecordInfo = srcRecordInfo }; - if (!TryEphemeralSLock(fasterSession, ref stackCtx.recSrc, ref srcRecordInfo, out var status)) + // If we are starting from a specified address in the immutable region, we may have a Sealed record from a previous RCW. + // For this case, do not try to lock, TransientSUnlock will see that we do not have a lock so will not try to update it. + OperationStatus status = OperationStatus.SUCCESS; + if (!useStartAddress && !TryTransientSLock(fasterSession, ref key, ref stackCtx, out status)) return status; try { - if (pendingContext.ResetModifiedBit && !srcRecordInfo.TryResetModifiedAtomic()) - return OperationStatus.RETRY_LATER; if (srcRecordInfo.Tombstone) return OperationStatus.NOTFOUND; - if (fasterSession.ConcurrentReader(ref key, ref input, ref hlog.GetValue(stackCtx.recSrc.PhysicalAddress), ref output, ref srcRecordInfo, ref readInfo)) + if (fasterSession.ConcurrentReader(ref key, ref input, ref stackCtx.recSrc.GetValue(), ref output, ref srcRecordInfo, ref readInfo, out stackCtx.recSrc.ephemeralLockResult)) return OperationStatus.SUCCESS; + if (stackCtx.recSrc.ephemeralLockResult == EphemeralLockResult.Failed) + return OperationStatus.RETRY_LATER; if (readInfo.Action == ReadAction.CancelOperation) return OperationStatus.CANCELED; if (readInfo.Action == ReadAction.Expire) - { - // Our IFasterSession.ConcurrentReader implementation has already set Tombstone if appropriate. - this.MarkPage(stackCtx.recSrc.LogicalAddress, sessionCtx); - return OperationStatusUtils.AdvancedOpCode(OperationStatus.NOTFOUND, StatusCode.InPlaceUpdatedRecord | StatusCode.Expired); - } + return OperationStatusUtils.AdvancedOpCode(OperationStatus.NOTFOUND, StatusCode.Expired); return OperationStatus.NOTFOUND; } finally { - EphemeralSUnlock(fasterSession, sessionCtx, ref pendingContext, ref key, ref stackCtx, ref srcRecordInfo); + TransientSUnlock(fasterSession, ref key, ref stackCtx, ref srcRecordInfo); } } private OperationStatus ReadFromImmutableRegion(ref Key key, ref Input input, ref Output output, bool useStartAddress, ref OperationStackContext stackCtx, - ref PendingContext pendingContext, FasterSession fasterSession, - FasterExecutionContext sessionCtx) + ref PendingContext pendingContext, FasterSession fasterSession) where FasterSession : IFasterSession { // We don't copy from this source, but we do lock it. - ref var srcRecordInfo = ref stackCtx.recSrc.GetSrcRecordInfo(); + ref var srcRecordInfo = ref stackCtx.recSrc.GetInfo(); pendingContext.recordInfo = srcRecordInfo; pendingContext.logicalAddress = stackCtx.recSrc.LogicalAddress; ReadInfo readInfo = new() { - SessionType = fasterSession.SessionType, - Version = sessionCtx.version, + Version = fasterSession.Ctx.version, Address = stackCtx.recSrc.LogicalAddress, RecordInfo = srcRecordInfo }; // If we are starting from a specified address in the immutable region, we may have a Sealed record from a previous RCW. - // For this case, do not try to lock, EphemeralSUnlock will see that we do not have a lock so will not try to update it. + // For this case, do not try to lock, TransientSUnlock will see that we do not have a lock so will not try to update it. OperationStatus status = OperationStatus.SUCCESS; - if (!useStartAddress && !TryEphemeralSLock(fasterSession, ref stackCtx.recSrc, ref srcRecordInfo, out status)) + if (!useStartAddress && !TryTransientSLock(fasterSession, ref key, ref stackCtx, out status)) return status; try { - if (pendingContext.ResetModifiedBit && !srcRecordInfo.TryResetModifiedAtomic()) - return OperationStatus.RETRY_LATER; if (srcRecordInfo.Tombstone) return OperationStatus.NOTFOUND; - ref Value recordValue = ref stackCtx.recSrc.GetSrcValue(); + ref Value recordValue = ref stackCtx.recSrc.GetValue(); - if (fasterSession.SingleReader(ref key, ref input, ref recordValue, ref output, ref srcRecordInfo, ref readInfo) - || readInfo.Action == ReadAction.Expire) + if (fasterSession.SingleReader(ref key, ref input, ref recordValue, ref output, ref srcRecordInfo, ref readInfo)) { - if (pendingContext.CopyReadsToTailFromReadOnly || readInfo.Action == ReadAction.Expire) // Expire adds a tombstoned record to tail + if (pendingContext.CopyReadsToTailFromReadOnly) { - status = InternalTryCopyToTail(sessionCtx, ref pendingContext, ref key, ref input, ref recordValue, ref output, ref stackCtx, + status = InternalTryCopyToTail(ref pendingContext, ref key, ref input, ref recordValue, ref output, ref stackCtx, ref srcRecordInfo, untilLogicalAddress: stackCtx.recSrc.LatestLogicalAddress, fasterSession, - reason: WriteReason.CopyToTail, expired: readInfo.Action == ReadAction.Expire); + reason: WriteReason.CopyToTail); // status != SUCCESS means no copy to tail was done if (status == OperationStatus.NOTFOUND || status == OperationStatus.RECORD_ON_DISK) return readInfo.Action == ReadAction.Expire @@ -340,14 +297,20 @@ private OperationStatus ReadFromImmutableRegion(fasterSession, ref key, ref stackCtx, ref srcRecordInfo); } } + } } diff --git a/cs/src/core/Index/FASTER/Implementation/InternalUpsert.cs b/cs/src/core/Index/FASTER/Implementation/InternalUpsert.cs index e3101025d..92aaeb1af 100644 --- a/cs/src/core/Index/FASTER/Implementation/InternalUpsert.cs +++ b/cs/src/core/Index/FASTER/Implementation/InternalUpsert.cs @@ -19,7 +19,6 @@ public unsafe partial class FasterKV : FasterBase, IFasterKVUser context for the operation, in case it goes pending. /// Pending context used internally to store the context of the operation. /// Callback functions. - /// Session context /// Operation serial number /// /// @@ -42,90 +41,82 @@ public unsafe partial class FasterKV : FasterBase, IFasterKV /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal OperationStatus InternalUpsert( - ref Key key, ref Input input, ref Value value, ref Output output, - ref Context userContext, - ref PendingContext pendingContext, - FasterSession fasterSession, - FasterExecutionContext sessionCtx, - long lsn) + internal OperationStatus InternalUpsert(ref Key key, ref Input input, ref Value value, ref Output output, + ref Context userContext, ref PendingContext pendingContext, FasterSession fasterSession, long lsn) where FasterSession : IFasterSession { - OperationStatus status = default; var latchOperation = LatchOperation.None; var latchDestination = LatchDestination.NormalProcessing; OperationStackContext stackCtx = new(comparer.GetHashCode64(ref key)); - if (sessionCtx.phase != Phase.REST) - HeavyEnter(stackCtx.hei.hash, sessionCtx, fasterSession); + if (fasterSession.Ctx.phase != Phase.REST) + HeavyEnter(stackCtx.hei.hash, fasterSession.Ctx, fasterSession); - // A 'ref' variable must be initialized. If we find a record for the key, we reassign the reference. We don't copy from this source, but we do lock it. - RecordInfo dummyRecordInfo = default; - ref RecordInfo srcRecordInfo = ref dummyRecordInfo; - - FindOrCreateTag(ref stackCtx.hei); + FindOrCreateTag(ref stackCtx.hei, hlog.BeginAddress); stackCtx.SetRecordSourceToHashEntry(hlog); - // We must always scan to HeadAddress; a Lockable*Context could be activated and lock the record in the immutable region while we're scanning. - TryFindRecordInMemory(ref key, ref stackCtx, hlog.HeadAddress); + // We blindly insert if we don't find the record in the mutable region. + RecordInfo dummyRecordInfo = new() { Valid = true }; + ref RecordInfo srcRecordInfo = ref TryFindRecordInMemory(ref key, ref stackCtx, hlog.ReadOnlyAddress) + ? ref stackCtx.recSrc.GetInfo() + : ref dummyRecordInfo; + if (srcRecordInfo.IsClosed) + return OperationStatus.RETRY_LATER; + + // Note: we do not track pendingContext.Initial*Address because we don't have an InternalContinuePendingUpsert UpsertInfo upsertInfo = new() { - SessionType = fasterSession.SessionType, - Version = sessionCtx.version, - SessionID = sessionCtx.sessionID, + Version = fasterSession.Ctx.version, + SessionID = fasterSession.Ctx.sessionID, Address = stackCtx.recSrc.LogicalAddress, KeyHash = stackCtx.hei.hash }; - #region Entry latch operation - if (sessionCtx.phase != Phase.REST) - { - latchDestination = AcquireLatchUpsert(sessionCtx, ref stackCtx.hei, ref status, ref latchOperation, stackCtx.recSrc.LogicalAddress); - if (latchDestination == LatchDestination.Retry) - goto LatchRelease; - } - #endregion + if (!TryTransientXLock(fasterSession, ref key, ref stackCtx, out OperationStatus status)) + return status; // We must use try/finally to ensure unlocking even in the presence of exceptions. try { - #region Address and source record checks + #region Address and source record checks if (stackCtx.recSrc.HasReadCacheSrc) { // Use the readcache record as the CopyUpdater source. - goto LockSourceRecord; + goto CreateNewRecord; } - else if (stackCtx.recSrc.LogicalAddress >= hlog.ReadOnlyAddress && latchDestination == LatchDestination.NormalProcessing) + + // Check for CPR consistency after checking if source is readcache. + if (fasterSession.Ctx.phase != Phase.REST) { - // Mutable Region: Update the record in-place - // We perform mutable updates only if we are in normal processing phase of checkpointing - srcRecordInfo = ref hlog.GetInfo(stackCtx.recSrc.PhysicalAddress); - if (!TryEphemeralXLock(fasterSession, ref stackCtx.recSrc, ref srcRecordInfo, out status)) + latchDestination = CheckCPRConsistencyUpsert(fasterSession.Ctx.phase, ref stackCtx, ref status, ref latchOperation); + if (latchDestination == LatchDestination.Retry) goto LatchRelease; + } + if (stackCtx.recSrc.LogicalAddress >= hlog.ReadOnlyAddress && latchDestination == LatchDestination.NormalProcessing) + { + // Mutable Region: Update the record in-place. We perform mutable updates only if we are in normal processing phase of checkpointing if (srcRecordInfo.Tombstone) goto CreateNewRecord; - if (!srcRecordInfo.IsValidUpdateOrLockSource) - { - EphemeralXUnlockAndAbandonUpdate(fasterSession, ref stackCtx.recSrc, ref srcRecordInfo); - srcRecordInfo = ref dummyRecordInfo; - goto CreateNewRecord; - } - upsertInfo.RecordInfo = srcRecordInfo; - ref Value recordValue = ref hlog.GetValue(stackCtx.recSrc.PhysicalAddress); - if (fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref recordValue, ref output, ref srcRecordInfo, ref upsertInfo)) + ref Value recordValue = ref stackCtx.recSrc.GetValue(); + if (fasterSession.ConcurrentWriter(ref key, ref input, ref value, ref recordValue, ref output, ref srcRecordInfo, ref upsertInfo, out stackCtx.recSrc.ephemeralLockResult)) { - this.MarkPage(stackCtx.recSrc.LogicalAddress, sessionCtx); + this.MarkPage(stackCtx.recSrc.LogicalAddress, fasterSession.Ctx); pendingContext.recordInfo = srcRecordInfo; pendingContext.logicalAddress = stackCtx.recSrc.LogicalAddress; status = OperationStatusUtils.AdvancedOpCode(OperationStatus.SUCCESS, StatusCode.InPlaceUpdatedRecord); goto LatchRelease; } + if (stackCtx.recSrc.ephemeralLockResult == EphemeralLockResult.Failed) + { + status = OperationStatus.RETRY_LATER; + goto LatchRelease; + } if (upsertInfo.Action == UpsertAction.CancelOperation) { status = OperationStatus.CANCELED; @@ -133,51 +124,26 @@ internal OperationStatus InternalUpsert( } // ConcurrentWriter failed (e.g. insufficient space, another thread set Tombstone, etc). Write a new record, but track that we have to seal and unlock this one. - stackCtx.recSrc.HasMainLogSrc = true; goto CreateNewRecord; } else if (stackCtx.recSrc.LogicalAddress >= hlog.HeadAddress) { - // Only need to go below ReadOnly for locking and Sealing. - stackCtx.recSrc.HasMainLogSrc = true; - goto LockSourceRecord; + goto CreateNewRecord; } else { - // Either on-disk or no record exists - check for lock before creating new record. First ensure any record lock has transitioned to the LockTable. - SpinWaitUntilRecordIsClosed(ref key, stackCtx.hei.hash, stackCtx.recSrc.LogicalAddress, hlog); - Debug.Assert(!fasterSession.IsManualLocking || LockTable.IsLockedExclusive(ref key, stackCtx.hei.hash), "A Lockable-session Upsert() of an on-disk or non-existent key requires a LockTable lock"); - if (LockTable.IsActive && !fasterSession.DisableEphemeralLocking - && !LockTable.TryLockEphemeral(ref key, stackCtx.hei.hash, LockType.Exclusive, out stackCtx.recSrc.HasLockTableLock)) - { - status = OperationStatus.RETRY_LATER; - goto LatchRelease; - } + // Either on-disk or no record exists - create new record. + Debug.Assert(!fasterSession.IsManualLocking || LockTable.IsLockedExclusive(ref key, ref stackCtx.hei), "A Lockable-session Upsert() of an on-disk or non-existent key requires a LockTable lock"); goto CreateNewRecord; } #endregion Address and source record checks - #region Lock source record - LockSourceRecord: - // This would be a local function to reduce "goto", but 'ref' variables and parameters aren't supported on local functions. - srcRecordInfo = ref stackCtx.recSrc.GetSrcRecordInfo(); - if (!TryEphemeralXLock(fasterSession, ref stackCtx.recSrc, ref srcRecordInfo, out status)) - goto LatchRelease; - if (!srcRecordInfo.IsValidUpdateOrLockSource) - { - EphemeralXUnlockAndAbandonUpdate(fasterSession, ref stackCtx.recSrc, ref srcRecordInfo); - srcRecordInfo = ref dummyRecordInfo; - } - goto CreateNewRecord; - #endregion Lock source record - #region Create new record in the mutable region CreateNewRecord: if (latchDestination != LatchDestination.CreatePendingContext) { // Immutable region or new record - status = CreateNewRecordUpsert(ref key, ref input, ref value, ref output, ref pendingContext, fasterSession, - sessionCtx, ref stackCtx, ref srcRecordInfo); + status = CreateNewRecordUpsert(ref key, ref input, ref value, ref output, ref pendingContext, fasterSession, ref stackCtx, ref srcRecordInfo); if (!OperationStatusUtils.IsAppend(status)) { // We should never return "SUCCESS" for a new record operation: it returns NOTFOUND on success. @@ -194,8 +160,12 @@ internal OperationStatus InternalUpsert( } finally { - stackCtx.HandleNewRecordOnError(this); - EphemeralXUnlockAfterUpdate(fasterSession, ref key, ref stackCtx, ref srcRecordInfo); + // On success, we call UnlockAndSeal. Non-success includes the source address going below HeadAddress, in which case we rely on + // recordInfo.ClearBitsForDiskImages clearing locks and Seal. + if (stackCtx.recSrc.ephemeralLockResult == EphemeralLockResult.HoldForSeal && stackCtx.recSrc.LogicalAddress >= hlog.HeadAddress && srcRecordInfo.IsLocked) + srcRecordInfo.UnlockExclusive(); + stackCtx.HandleNewRecordOnException(this); + TransientXUnlock(fasterSession, ref key, ref stackCtx); } #region Create pending context @@ -212,9 +182,8 @@ internal OperationStatus InternalUpsert( heapConvertible.ConvertToHeap(); pendingContext.userContext = userContext; - pendingContext.PrevLatestLogicalAddress = stackCtx.recSrc.LatestLogicalAddress; pendingContext.logicalAddress = stackCtx.recSrc.LogicalAddress; - pendingContext.version = sessionCtx.version; + pendingContext.version = fasterSession.Ctx.version; pendingContext.serialNum = lsn; } #endregion @@ -242,61 +211,80 @@ internal OperationStatus InternalUpsert( return status; } - private LatchDestination AcquireLatchUpsert(FasterExecutionContext sessionCtx, ref HashEntryInfo hei, ref OperationStatus status, - ref LatchOperation latchOperation, long logicalAddress) + private LatchDestination CheckCPRConsistencyUpsert(Phase phase, ref OperationStackContext stackCtx, ref OperationStatus status, ref LatchOperation latchOperation) { - switch (sessionCtx.phase) + if (!this.DoTransientLocking) + return AcquireCPRLatchUpsert(phase, ref stackCtx, ref status, ref latchOperation); + + // This is AcquireCPRLatchUpsert without the bucket latching, since we already have a latch on either the bucket or the recordInfo. + // See additional comments in AcquireCPRLatchRMW. + + switch (phase) { - case Phase.PREPARE: + case Phase.PREPARE: // Thread is in V + if (!IsEntryVersionNew(ref stackCtx.hei.entry)) + break; // Normal Processing; thread is in V, record is in V + + status = OperationStatus.CPR_SHIFT_DETECTED; + return LatchDestination.Retry; // Pivot Thread for retry (do not operate on V+1 record when thread is in V) + + case Phase.IN_PROGRESS: // Thread is in V+1 + case Phase.WAIT_INDEX_CHECKPOINT: + case Phase.WAIT_FLUSH: + if (IsRecordVersionNew(stackCtx.recSrc.LogicalAddress)) + break; // Normal Processing; V+1 thread encountered a record in V+1 + return LatchDestination.CreateNewRecord; // Upsert never goes pending; always force creation of a (V+1) record + + default: + break; + } + return LatchDestination.NormalProcessing; + } + + private LatchDestination AcquireCPRLatchUpsert(Phase phase, ref OperationStackContext stackCtx, ref OperationStatus status, ref LatchOperation latchOperation) + { + // See additional comments in AcquireCPRLatchRMW. + + switch (phase) + { + case Phase.PREPARE: // Thread is in V + if (HashBucket.TryAcquireSharedLatch(ref stackCtx.hei)) { - if (HashBucket.TryAcquireSharedLatch(ref hei)) - { - // Set to release shared latch (default) - latchOperation = LatchOperation.Shared; - // Here (and in InternalRead, AcquireLatchRMW, and AcquireLatchDelete) we still check the tail record of the bucket (entry.Address) - // rather than the traced record (logicalAddress), because I'm worried that the implementation - // may not allow in-place updates for version v when the bucket arrives v+1. - // This is safer but potentially unnecessary. - if (CheckBucketVersionNew(ref hei.entry)) - { - status = OperationStatus.CPR_SHIFT_DETECTED; - return LatchDestination.Retry; // Pivot Thread on retry - } - break; // Normal Processing - } - else + // Set to release shared latch (default) + latchOperation = LatchOperation.Shared; + if (IsEntryVersionNew(ref stackCtx.hei.entry)) { status = OperationStatus.CPR_SHIFT_DETECTED; - return LatchDestination.Retry; // Pivot Thread on retry + return LatchDestination.Retry; // Pivot Thread for retry (do not operate on V+1 record when thread is in V) } + break; // Normal Processing; thread is in V, record is in V } - case Phase.IN_PROGRESS: + + // Could not acquire Shared latch; system must be in V+1 (or we have too many shared latches). + status = OperationStatus.CPR_SHIFT_DETECTED; + return LatchDestination.Retry; // Pivot Thread for retry + + case Phase.IN_PROGRESS: // Thread is in V+1 + if (IsRecordVersionNew(stackCtx.recSrc.LogicalAddress)) + break; // Normal Processing; V+1 thread encountered a record in V+1 + + if (HashBucket.TryAcquireExclusiveLatch(ref stackCtx.hei)) { - if (!CheckEntryVersionNew(logicalAddress)) - { - if (HashBucket.TryAcquireExclusiveLatch(ref hei)) - { - // Set to release exclusive latch (default) - latchOperation = LatchOperation.Exclusive; - return LatchDestination.CreateNewRecord; // Create a (v+1) record - } - else - { - status = OperationStatus.RETRY_LATER; - return LatchDestination.Retry; // Refresh and retry operation - } - } - break; // Normal Processing + // Set to release exclusive latch (default) + latchOperation = LatchOperation.Exclusive; + return LatchDestination.CreateNewRecord; // Upsert never goes pending; always force creation of a (v+1) record } - case Phase.WAIT_INDEX_CHECKPOINT: + + // Could not acquire exclusive latch; likely a conflict on the bucket. + status = OperationStatus.RETRY_LATER; + return LatchDestination.Retry; // Retry after refresh + + case Phase.WAIT_INDEX_CHECKPOINT: // Thread is in v+1 case Phase.WAIT_FLUSH: - { - if (!CheckEntryVersionNew(logicalAddress)) - { - return LatchDestination.CreateNewRecord; // Create a (v+1) record - } - break; // Normal Processing - } + if (IsRecordVersionNew(stackCtx.recSrc.LogicalAddress)) + break; // Normal Processing; V+1 thread encountered a record in V+1 + return LatchDestination.CreateNewRecord; // Upsert never goes pending; always force creation of a (V+1) record + default: break; } @@ -312,43 +300,27 @@ private LatchDestination AcquireLatchUpsert(FasterExecut /// The result of IFunctions.SingleWriter /// Information about the operation context /// The current session - /// The current session context /// Contains the and structures for this operation, /// and allows passing back the newLogicalAddress for invalidation in the case of exceptions. /// If ., /// this is the for private OperationStatus CreateNewRecordUpsert(ref Key key, ref Input input, ref Value value, ref Output output, ref PendingContext pendingContext, FasterSession fasterSession, - FasterExecutionContext sessionCtx, ref OperationStackContext stackCtx, ref RecordInfo srcRecordInfo) where FasterSession : IFasterSession { var (actualSize, allocatedSize) = hlog.GetRecordSize(ref key, ref value); - if (!GetAllocationForRetry(ref pendingContext, stackCtx.hei.Address, allocatedSize, out long newLogicalAddress, out long newPhysicalAddress)) - { - // Spin to make sure newLogicalAddress is > recSrc.LatestLogicalAddress (the .PreviousAddress and CAS comparison value). - do - { - if (!BlockAllocate(allocatedSize, out newLogicalAddress, ref pendingContext, out OperationStatus status)) - return status; - newPhysicalAddress = hlog.GetPhysicalAddress(newLogicalAddress); - if (!VerifyInMemoryAddresses(ref stackCtx)) - { - SaveAllocationForRetry(ref pendingContext, newLogicalAddress, newPhysicalAddress, allocatedSize); - return OperationStatus.RETRY_LATER; - } - } while (newLogicalAddress < stackCtx.recSrc.LatestLogicalAddress); - } + if (!TryAllocateRecord(ref pendingContext, ref stackCtx, allocatedSize, recycle: true, out long newLogicalAddress, out long newPhysicalAddress, out OperationStatus status)) + return status; - ref RecordInfo newRecordInfo = ref WriteTentativeInfo(ref key, hlog, newPhysicalAddress, inNewVersion: sessionCtx.InNewVersion, tombstone: false, stackCtx.recSrc.LatestLogicalAddress); - stackCtx.newLogicalAddress = newLogicalAddress; + ref RecordInfo newRecordInfo = ref WriteNewRecordInfo(ref key, hlog, newPhysicalAddress, inNewVersion: fasterSession.Ctx.InNewVersion, tombstone: false, stackCtx.recSrc.LatestLogicalAddress); + stackCtx.SetNewRecord(newLogicalAddress); UpsertInfo upsertInfo = new() { - SessionType = fasterSession.SessionType, - Version = sessionCtx.version, - SessionID = sessionCtx.sessionID, + Version = fasterSession.Ctx.version, + SessionID = fasterSession.Ctx.sessionID, Address = newLogicalAddress, KeyHash = stackCtx.hei.hash, RecordInfo = newRecordInfo @@ -365,14 +337,15 @@ private OperationStatus CreateNewRecordUpsert(fasterSession, ref key, ref stackCtx, ref srcRecordInfo, ref newRecordInfo, out var lockStatus)) - return lockStatus; + PostInsertAtTail(ref key, ref stackCtx, ref srcRecordInfo); fasterSession.PostSingleWriter(ref key, ref input, ref value, ref newValue, ref output, ref newRecordInfo, ref upsertInfo, WriteReason.Upsert); - stackCtx.ClearNewRecordTentativeBitAtomic(ref newRecordInfo); + if (stackCtx.recSrc.ephemeralLockResult == EphemeralLockResult.HoldForSeal) + srcRecordInfo.UnlockExclusiveAndSeal(); + stackCtx.ClearNewRecord(); pendingContext.recordInfo = newRecordInfo; pendingContext.logicalAddress = newLogicalAddress; return OperationStatusUtils.AdvancedOpCode(OperationStatus.NOTFOUND, StatusCode.CreatedRecord); diff --git a/cs/src/core/Index/FASTER/Implementation/Locking/ILockTable.cs b/cs/src/core/Index/FASTER/Implementation/Locking/ILockTable.cs new file mode 100644 index 000000000..b042f4134 --- /dev/null +++ b/cs/src/core/Index/FASTER/Implementation/Locking/ILockTable.cs @@ -0,0 +1,93 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using System; + +namespace FASTER.core +{ + /// + /// Manual-enabled (both manual and transient) LockTable interface definition + /// + /// + internal interface ILockTable : IDisposable + { + /// + /// Try to acquire a manual lock for the key. + /// + /// The key to lock + /// The hash table entry info of the key to lock + /// The lock type to acquire + /// True if the lock was acquired; false if lock acquisition failed + /// There are no variations of this call specific to Shared vs. Exclusive, because this is + /// called only from InternalLock, which takes the argument. + public bool TryLockManual(ref TKey key, ref HashEntryInfo hei, LockType lockType); + + /// + /// Try to acquire a transient lock for the key. + /// + /// The key to lock + /// The hash table entry info of the key to lock + /// The lock type to acquire--shared or exclusive + public bool TryLockTransient(ref TKey key, ref HashEntryInfo hei, LockType lockType); + + /// + /// Try to acquire a shared transient lock for the key. + /// + /// The key to lock + /// The hash table entry info of the key to lock + public bool TryLockTransientShared(ref TKey key, ref HashEntryInfo hei); + + /// + /// Try to acquire an exclusive transient lock for the key. + /// + /// The key to lock + /// The hash table entry info of the key to lock + public bool TryLockTransientExclusive(ref TKey key, ref HashEntryInfo hei); + + /// + /// Release the lock on the key. + /// + /// The key to unlock + /// The hash table entry info of the key to lock + /// The lock type to release--shared or exclusive + public void Unlock(ref TKey key, ref HashEntryInfo hei, LockType lockType); + + /// + /// Release a shared lock on the key. + /// + /// The key to unlock + /// The hash table entry info of the key to lock + public void UnlockShared(ref TKey key, ref HashEntryInfo hei); + + /// + /// Release an exclusive lock on the key. + /// + /// The key to unlock + /// The hash table entry info of the key to lock + public void UnlockExclusive(ref TKey key, ref HashEntryInfo hei); + + /// + /// Return whether the key is S locked + /// + public bool IsLockedShared(ref TKey key, ref HashEntryInfo hei); + + /// + /// Return whether the keyrecord is X locked + /// + public bool IsLockedExclusive(ref TKey key, ref HashEntryInfo hei); + + /// + /// Return whether an the key is S or X locked + /// + public bool IsLocked(ref TKey key, ref HashEntryInfo hei); + + /// + /// Return the Lock state of the key. + /// + public LockState GetLockState(ref TKey key, ref HashEntryInfo hei); + + public bool NeedKeyLockCode { get; } + + public long GetLockCode(ref TKey key, long keyHash); + } +} diff --git a/cs/src/core/Index/FASTER/Implementation/Locking/OverflowBucketLockTable.cs b/cs/src/core/Index/FASTER/Implementation/Locking/OverflowBucketLockTable.cs new file mode 100644 index 000000000..4a075da88 --- /dev/null +++ b/cs/src/core/Index/FASTER/Implementation/Locking/OverflowBucketLockTable.cs @@ -0,0 +1,224 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Runtime.CompilerServices; + +namespace FASTER.core +{ + internal struct OverflowBucketLockTable : ILockTable + { + FasterKV fht; + + internal long NumBuckets => IsEnabled ? fht.state[fht.resizeInfo.version].size_mask + 1 : 0; + + internal bool IsEnabled => this.fht is not null; + + internal OverflowBucketLockTable(FasterKV f) => this.fht = f; + + [Conditional("DEBUG")] + void AssertLockAllowed() => Debug.Assert(IsEnabled, $"Attempt to do Manual-locking lock when locking mode is not {LockingMode.Standard}"); + + [Conditional("DEBUG")] + void AssertUnlockAllowed() => Debug.Assert(IsEnabled, $"Attempt to do Manual-locking unlock when locking mode is not {LockingMode.Standard}"); + + [Conditional("DEBUG")] + void AssertQueryAllowed() => Debug.Assert(IsEnabled, $"Attempt to do Manual-locking query when locking mode is not {LockingMode.Standard}"); + + internal long GetSize() => fht.state[fht.resizeInfo.version].size_mask; + + public bool NeedKeyLockCode => IsEnabled; + + static OverflowBucketLockTable() + { + Debug.Assert(LockType.Exclusive < LockType.Shared, "LockType.Exclusive must be < LockType.Shared, or LockCodeComparer must be changed accordingly"); + } + + /// + public long GetLockCode(ref TKey key, long hash) => IsEnabled ? hash : 0; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static long GetBucketIndex(long keyCode, long size_mask) + => keyCode & size_mask; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal long GetBucketIndex(long keyCode) + => GetBucketIndex(keyCode, fht.state[fht.resizeInfo.version].size_mask); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal unsafe HashBucket* GetBucket(long keyCode) + => fht.state[fht.resizeInfo.version].tableAligned + GetBucketIndex(keyCode); + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe bool TryLockManual(ref TKey key, ref HashEntryInfo hei, LockType lockType) + => TryLockManual(hei.firstBucket, lockType); + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe bool TryLockManual(long keyCode, LockType lockType) + => TryLockManual(GetBucket(keyCode), lockType); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private unsafe bool TryLockManual(HashBucket* bucket, LockType lockType) + { + AssertLockAllowed(); + return lockType switch + { + LockType.Shared => HashBucket.TryAcquireSharedLatch(bucket), + LockType.Exclusive => HashBucket.TryAcquireExclusiveLatch(bucket), + _ => throw new FasterException("Attempt to lock with unknown LockType") + }; + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe bool TryLockTransient(ref TKey key, ref HashEntryInfo hei, LockType lockType) + => lockType == LockType.Shared ? TryLockTransientShared(ref key, ref hei) : TryLockTransientExclusive(ref key, ref hei); + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe bool TryLockTransientShared(ref TKey key, ref HashEntryInfo hei) + { + AssertLockAllowed(); + return HashBucket.TryAcquireSharedLatch(hei.firstBucket); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe bool TryLockTransientExclusive(ref TKey key, ref HashEntryInfo hei) + { + AssertLockAllowed(); + return HashBucket.TryAcquireExclusiveLatch(hei.firstBucket); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe void Unlock(ref TKey key, ref HashEntryInfo hei, LockType lockType) + { + AssertUnlockAllowed(); + if (lockType == LockType.Shared) + UnlockShared(ref key, ref hei); + else + { + Debug.Assert(lockType == LockType.Exclusive, "Attempt to unlock with unknown LockType"); + UnlockExclusive(ref key, ref hei); + } + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe void Unlock(long keyCode, LockType lockType) + { + AssertUnlockAllowed(); + HashBucket* bucket = GetBucket(keyCode); + if (lockType == LockType.Shared) + HashBucket.ReleaseSharedLatch(bucket); + else + { + Debug.Assert(lockType == LockType.Exclusive, "Attempt to unlock with unknown LockType"); + HashBucket.ReleaseExclusiveLatch(bucket); + } + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe void UnlockShared(ref TKey key, ref HashEntryInfo hei) + { + AssertUnlockAllowed(); + HashBucket.ReleaseSharedLatch(ref hei); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe void UnlockExclusive(ref TKey key, ref HashEntryInfo hei) + { + AssertUnlockAllowed(); + HashBucket.ReleaseExclusiveLatch(ref hei); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe bool IsLockedShared(ref TKey key, ref HashEntryInfo hei) + { + AssertQueryAllowed(); + return HashBucket.NumLatchedShared(hei.firstBucket) > 0; + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe bool IsLockedExclusive(ref TKey key, ref HashEntryInfo hei) + { + AssertQueryAllowed(); + return HashBucket.IsLatchedExclusive(hei.firstBucket); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe bool IsLocked(ref TKey key, ref HashEntryInfo hei) + { + AssertQueryAllowed(); + return HashBucket.IsLatched(hei.firstBucket); + } + + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe LockState GetLockState(ref TKey key, ref HashEntryInfo hei) + { + AssertQueryAllowed(); + return new() + { + IsFound = true, // Always true for OverflowBucketLockTable + NumLockedShared = HashBucket.NumLatchedShared(hei.firstBucket), + IsLockedExclusive = HashBucket.IsLatchedExclusive(hei.firstBucket) + }; + } + + private static int LockCodeComparer(TLockableKey key1, TLockableKey key2, long size_mask) + where TLockableKey : ILockableKey + { + var idx1 = GetBucketIndex(key1.LockCode, size_mask); + var idx2 = GetBucketIndex(key2.LockCode, size_mask); + return (idx1 != idx2) ? idx1.CompareTo(idx2) : key1.LockType.CompareTo(key2.LockType); + } + + /// + internal int CompareLockCodes(TLockableKey key1, TLockableKey key2) + where TLockableKey : ILockableKey + => LockCodeComparer(key1, key2, fht.state[fht.resizeInfo.version].size_mask); + + /// + internal int CompareLockCodes(ref TLockableKey key1, ref TLockableKey key2) + where TLockableKey : ILockableKey + => LockCodeComparer(key1, key2, fht.state[fht.resizeInfo.version].size_mask); + + /// + internal void SortLockCodes(TLockableKey[] keys) + where TLockableKey : ILockableKey + => Array.Sort(keys, new KeyComparer(fht.state[fht.resizeInfo.version].size_mask)); + + /// + internal void SortLockCodes(TLockableKey[] keys, int start, int count) + where TLockableKey : ILockableKey + => Array.Sort(keys, start, count, new KeyComparer(fht.state[fht.resizeInfo.version].size_mask)); + + /// + /// Need this struct because the Comparison{T} form of Array.Sort is not available with start and length arguments. + /// + struct KeyComparer : IComparer + where TLockableKey : ILockableKey + { + readonly long size_mask; + + internal KeyComparer(long s) => size_mask = s; + + public int Compare(TLockableKey key1, TLockableKey key2) => LockCodeComparer(key1, key2, size_mask); + } + + /// + public void Dispose() { } + } +} + diff --git a/cs/src/core/Index/FASTER/Implementation/Locking/TransientLocking.cs b/cs/src/core/Index/FASTER/Implementation/Locking/TransientLocking.cs new file mode 100644 index 000000000..aca63200d --- /dev/null +++ b/cs/src/core/Index/FASTER/Implementation/Locking/TransientLocking.cs @@ -0,0 +1,51 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using System.Runtime.CompilerServices; + +namespace FASTER.core +{ + public unsafe partial class FasterKV : FasterBase, IFasterKV + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool TryTransientXLock(FasterSession fasterSession, ref Key key, ref OperationStackContext stackCtx, + out OperationStatus status) + where FasterSession : IFasterSession + { + status = OperationStatus.SUCCESS; + if (!this.LockTable.IsEnabled || fasterSession.TryLockTransientExclusive(ref key, ref stackCtx)) + return true; + status = OperationStatus.RETRY_LATER; + return false; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool TryTransientSLock(FasterSession fasterSession, ref Key key, ref OperationStackContext stackCtx, + out OperationStatus status) + where FasterSession : IFasterSession + { + status = OperationStatus.SUCCESS; + if (!this.LockTable.IsEnabled || fasterSession.TryLockTransientShared(ref key, ref stackCtx)) + return true; + status = OperationStatus.RETRY_LATER; + return false; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + void TransientSUnlock(FasterSession fasterSession, ref Key key, + ref OperationStackContext stackCtx, ref RecordInfo srcRecordInfo) + where FasterSession : IFasterSession + { + if (stackCtx.recSrc.HasTransientLock) + fasterSession.UnlockTransientShared(ref key, ref stackCtx); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void TransientXUnlock(FasterSession fasterSession, ref Key key, ref OperationStackContext stackCtx) + where FasterSession : IFasterSession + { + if (stackCtx.recSrc.HasTransientLock) + fasterSession.UnlockTransientExclusive(ref key, ref stackCtx); + } + } +} diff --git a/cs/src/core/Index/FASTER/Implementation/ModifiedBitOperation.cs b/cs/src/core/Index/FASTER/Implementation/ModifiedBitOperation.cs index b4e8edfd1..e0e548717 100644 --- a/cs/src/core/Index/FASTER/Implementation/ModifiedBitOperation.cs +++ b/cs/src/core/Index/FASTER/Implementation/ModifiedBitOperation.cs @@ -42,16 +42,14 @@ internal OperationStatus InternalModifiedBitOperation(ref Key key, out RecordInf if (logicalAddress >= hlog.HeadAddress) { ref RecordInfo recordInfo = ref hlog.GetInfo(physicalAddress); - if (!recordInfo.IsIntermediate(out OperationStatus status)) + if (reset) { - if (!reset) - status = OperationStatus.SUCCESS; - else if (!recordInfo.TryResetModifiedAtomic()) + if (!recordInfo.TryResetModifiedAtomic()) return OperationStatus.RETRY_LATER; } - if (!reset && !recordInfo.Tombstone) + else if (!recordInfo.Tombstone) modifiedInfo = recordInfo; - return status; + return OperationStatus.SUCCESS; } // If the record does not exist we return unmodified; if it is on the disk we return modified diff --git a/cs/src/core/Index/FASTER/Implementation/OperationStackContext.cs b/cs/src/core/Index/FASTER/Implementation/OperationStackContext.cs index 0c4280627..8437f6da0 100644 --- a/cs/src/core/Index/FASTER/Implementation/OperationStackContext.cs +++ b/cs/src/core/Index/FASTER/Implementation/OperationStackContext.cs @@ -1,7 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. -using System; using System.Runtime.CompilerServices; namespace FASTER.core @@ -23,38 +22,34 @@ internal struct OperationStackContext /// /// Sets to the current ., which is the current address - /// in the hash table. This is the same effect as calling . + /// in the hash table. This is the same effect as calling . /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void UpdateRecordSourceToCurrentHashEntry() + internal void UpdateRecordSourceToCurrentHashEntry(AllocatorBase hlog) { this.hei.SetToCurrent(); - this.SetRecordSourceToHashEntry(this.recSrc.Log); + this.SetRecordSourceToHashEntry(hlog); } /// /// If this is not , it is the logical Address allocated by CreateNewRecord*; if an exception /// occurs, this needs to be set invalid and non-tentative by the caller's 'finally' (to avoid another try/finally overhead). /// - internal long newLogicalAddress; + private long newLogicalAddress; /// - /// Called during normal operations when a record insertion fails, to set the new record invalid and non-tentative. + /// Sets the new record to be handled on error recovery. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void SetNewRecordInvalid(ref RecordInfo newRecordInfo) - { - newRecordInfo.SetInvalid(); - this.newLogicalAddress = Constants.kInvalidAddress; - } + internal void SetNewRecord(long newRecordLogicalAddress) => this.newLogicalAddress = newRecordLogicalAddress; /// /// Called during normal operations when a record insertion fails, to set the new record invalid and non-tentative. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void SetNewRecordInvalidAtomic(ref RecordInfo newRecordInfo) + internal void SetNewRecordInvalid(ref RecordInfo newRecordInfo) { - newRecordInfo.SetInvalidAtomic(); + newRecordInfo.SetInvalid(); this.newLogicalAddress = Constants.kInvalidAddress; } @@ -62,17 +57,13 @@ internal void SetNewRecordInvalidAtomic(ref RecordInfo newRecordInfo) /// Called during normal operations when a record insertion succeeds, to set the new record non-tentative (permanent). /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void ClearNewRecordTentativeBitAtomic(ref RecordInfo newRecordInfo) - { - newRecordInfo.ClearTentativeBitAtomic(); - this.newLogicalAddress = Constants.kInvalidAddress; - } + internal void ClearNewRecord() => this.newLogicalAddress = Constants.kInvalidAddress; /// - /// Called during InternalXxx 'finally' handler, to set the new record invalid and non-tentative if an exception or other error occurred. + /// Called during InternalXxx 'finally' handler, to set the new record invalid if an exception or other error occurred. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void HandleNewRecordOnError(FasterKV fkv) + internal void HandleNewRecordOnException(FasterKV fkv) { if (this.newLogicalAddress != Constants.kInvalidAddress) { @@ -80,5 +71,12 @@ internal void HandleNewRecordOnError(FasterKV fkv) this.newLogicalAddress = Constants.kInvalidAddress; } } + + /// + public override string ToString() + { + var delimiter = " "; // Environment.NewLine doesn't display in VS + return $"hei: {hei}{delimiter}newLA {newLogicalAddress}{delimiter}recSrc: {recSrc}"; + } } } diff --git a/cs/src/core/Index/FASTER/Implementation/ReadCache.cs b/cs/src/core/Index/FASTER/Implementation/ReadCache.cs index 31ede8af8..46008b7a1 100644 --- a/cs/src/core/Index/FASTER/Implementation/ReadCache.cs +++ b/cs/src/core/Index/FASTER/Implementation/ReadCache.cs @@ -3,7 +3,7 @@ using System.Diagnostics; using System.Runtime.CompilerServices; -using static FASTER.core.LockUtility; +using System.Threading; using static FASTER.core.Utility; namespace FASTER.core @@ -12,177 +12,196 @@ namespace FASTER.core public unsafe partial class FasterKV : FasterBase, IFasterKV { [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool FindInReadCache(ref Key key, ref OperationStackContext stackCtx, long untilAddress, bool alwaysFindLatestLA = true, bool waitForTentative = true) + internal bool FindInReadCache(ref Key key, ref OperationStackContext stackCtx, long minAddress = Constants.kInvalidAddress, bool alwaysFindLatestLA = true) { Debug.Assert(UseReadCache, "Should not call FindInReadCache if !UseReadCache"); + + // minAddress, if present, comes from the pre-pendingIO entry.Address; there may have been no readcache entries then. + minAddress = IsReadCache(minAddress) ? AbsoluteAddress(minAddress) : readcache.HeadAddress; + RestartChain: + // 'recSrc' has already been initialized to the address in 'hei'. if (!stackCtx.hei.IsReadCache) return false; + // This is also part of the initialization process for stackCtx.recSrc for each API/InternalXxx call. stackCtx.recSrc.LogicalAddress = Constants.kInvalidAddress; stackCtx.recSrc.PhysicalAddress = 0; + // LatestLogicalAddress is the "leading" pointer and will end up as the highest logical address in the main log for this tag chain. stackCtx.recSrc.LatestLogicalAddress &= ~Constants.kReadCacheBitMask; - if (stackCtx.recSrc.LatestLogicalAddress < readcache.HeadAddress) - { - // The first entry in the hash chain is a readcache entry that is targeted for eviction. - SpinWaitUntilAddressIsClosed(stackCtx.recSrc.LatestLogicalAddress, readcache); - stackCtx.UpdateRecordSourceToCurrentHashEntry(); - goto RestartChain; - } - - stackCtx.recSrc.LowestReadCacheLogicalAddress = stackCtx.recSrc.LatestLogicalAddress; - stackCtx.recSrc.LowestReadCachePhysicalAddress = readcache.GetPhysicalAddress(stackCtx.recSrc.LowestReadCacheLogicalAddress); - - // untilAddress, if present, comes from the pre-pendingIO entry.Address; there may have been no readcache entries then. - Debug.Assert((untilAddress & Constants.kReadCacheBitMask) != 0 || untilAddress == Constants.kInvalidAddress, "untilAddress must be readcache or kInvalidAddress"); - untilAddress &= ~Constants.kReadCacheBitMask; while (true) { - // Use a non-ref local, because we update it below to remove the readcache bit. + if (ReadCacheNeedToWaitForEviction(ref stackCtx)) + goto RestartChain; + + // Increment the trailing "lowest read cache" address (for the splice point). We'll look ahead from this to examine the next record. + stackCtx.recSrc.LowestReadCacheLogicalAddress = stackCtx.recSrc.LatestLogicalAddress; + stackCtx.recSrc.LowestReadCachePhysicalAddress = readcache.GetPhysicalAddress(stackCtx.recSrc.LowestReadCacheLogicalAddress); + + // Use a non-ref local, because we don't need to update. RecordInfo recordInfo = readcache.GetInfo(stackCtx.recSrc.LowestReadCachePhysicalAddress); - // Return true if we find a read cache entry matching the key. Skip Invalid but *not* Intermediate; that's tested as part of lock acquisition. - if (!recordInfo.Invalid && stackCtx.recSrc.LatestLogicalAddress > untilAddress && !stackCtx.recSrc.HasReadCacheSrc + // When traversing the readcache, we skip Invalid (Closed) records. We don't have Sealed records in the readcache because they cause + // the operation to be retried, so we'd never get past them. Return true if we find a Valid read cache entry matching the key. + if (!recordInfo.Invalid && stackCtx.recSrc.LatestLogicalAddress >= minAddress && !stackCtx.recSrc.HasReadCacheSrc && comparer.Equals(ref key, ref readcache.GetKey(stackCtx.recSrc.LowestReadCachePhysicalAddress))) { - // When traversing the readcache, we skip Invalid records. The semantics of Seal are that the operation is retried, so if we leave - // Sealed records in the readcache, we'll never get past them. Therefore, we go from Tentative to Invalid if the Tentative record - // has to be invalidated. There is only one scenario where we go Tentative -> Invalid in the readcache: when an updated record was - // added to the main log. This record is *after* the Invalidated one, so it is safe to proceed. We don't go Tentative -> Invalid for - // Read/CopyToReadCache; InternalContinuePendingRead makes sure there is not already a record in the readcache for a record just read - // from disk, and the usual CAS-into-hashbucket operation to add a new readcache record will catch the case a subsequent one was added. - bool valid = true; - if (recordInfo.Tentative && waitForTentative) - { - // This is not a ref, so we have to re-get it. - ref var ri = ref readcache.GetInfo(stackCtx.recSrc.LowestReadCachePhysicalAddress); - valid = SpinWaitWhileTentativeAndReturnValidity(ref ri); - recordInfo = ri; - } - - if (valid) - { - // Keep these at the current readcache location; they'll be the caller's source record. - stackCtx.recSrc.LogicalAddress = stackCtx.recSrc.LowestReadCacheLogicalAddress; - stackCtx.recSrc.PhysicalAddress = stackCtx.recSrc.LowestReadCachePhysicalAddress; - stackCtx.recSrc.HasReadCacheSrc = true; - stackCtx.recSrc.Log = readcache; - - // Read() does not need to continue past the found record; updaters need to continue to find latestLogicalAddress and lowestReadCache*Address. - if (!alwaysFindLatestLA) - return true; - } + // Keep these at the current readcache location; they'll be the caller's source record. + stackCtx.recSrc.LogicalAddress = stackCtx.recSrc.LowestReadCacheLogicalAddress; + stackCtx.recSrc.PhysicalAddress = stackCtx.recSrc.LowestReadCachePhysicalAddress; + stackCtx.recSrc.HasReadCacheSrc = true; + stackCtx.recSrc.Log = readcache; + + // Read() does not need to continue past the found record; updaters need to continue to find latestLogicalAddress and lowestReadCache*Address. + if (!alwaysFindLatestLA) + return true; } - // Is the previous record a main log record? If so, break out. + // Update the leading LatestLogicalAddress to recordInfo.PreviousAddress, and if that is a main log record, break out. + stackCtx.recSrc.LatestLogicalAddress = recordInfo.PreviousAddress & ~Constants.kReadCacheBitMask; if (!recordInfo.PreviousAddressIsReadCache) - { - Debug.Assert(recordInfo.PreviousAddress >= hlog.BeginAddress, "Read cache chain should always end with a main-log entry"); - stackCtx.recSrc.LatestLogicalAddress = recordInfo.PreviousAddress; goto InMainLog; - } - - recordInfo.PreviousAddress &= ~Constants.kReadCacheBitMask; - if (recordInfo.PreviousAddress < readcache.HeadAddress) - { - // We must wait until possible locks are transferred to the lock table by ReadCacheEvict. Due to address ordering, waiting for - // latestLogicalAddress also waits for any lower-address readcache records. This wait also ensures that ReadCacheEvict is complete - // for this chain, so the returned "lowest readcache addresses" are correct. - var prevHeadAddress = readcache.HeadAddress; - SpinWaitUntilAddressIsClosed(recordInfo.PreviousAddress, readcache); - if (readcache.HeadAddress == prevHeadAddress) - { - // HeadAddress is the same, so ReadCacheEvict should have updated the lowest readcache entry's .PreviousAddress to point to the main log. - recordInfo = readcache.GetInfo(stackCtx.recSrc.LowestReadCachePhysicalAddress); // refresh the local copy - Debug.Assert(!recordInfo.PreviousAddressIsReadCache, "SpinWaitUntilRecordIsClosed should set recordInfo.PreviousAddress to a main-log entry if Headaddress is unchanged"); - stackCtx.recSrc.LatestLogicalAddress = recordInfo.PreviousAddress; - goto InMainLog; - } - - // SpinWaitUntilRecordIsClosed updated readcache.HeadAddress, so we must restart the chain. - stackCtx.UpdateRecordSourceToCurrentHashEntry(); - goto RestartChain; - } - - stackCtx.recSrc.LatestLogicalAddress = recordInfo.PreviousAddress; - stackCtx.recSrc.LowestReadCacheLogicalAddress = stackCtx.recSrc.LatestLogicalAddress; - stackCtx.recSrc.LowestReadCachePhysicalAddress = readcache.GetPhysicalAddress(stackCtx.recSrc.LowestReadCacheLogicalAddress); } InMainLog: if (stackCtx.recSrc.HasReadCacheSrc) + { + Debug.Assert(object.ReferenceEquals(stackCtx.recSrc.Log, readcache), "Expected Log == readcache"); return true; + } // We did not find the record in the readcache, so set these to the start of the main log entries, and the caller will call TracebackForKeyMatch + Debug.Assert(object.ReferenceEquals(stackCtx.recSrc.Log, hlog), "Expected Log == hlog"); + Debug.Assert(stackCtx.recSrc.LatestLogicalAddress > Constants.kTempInvalidAddress, "Must have a main-log address after readcache"); stackCtx.recSrc.LogicalAddress = stackCtx.recSrc.LatestLogicalAddress; stackCtx.recSrc.PhysicalAddress = 0; // do *not* call hlog.GetPhysicalAddress(); LogicalAddress may be below hlog.HeadAddress. Let the caller decide when to do this. return false; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool SpliceIntoHashChainAtReadCacheBoundary(ref RecordSource recSrc, long newLogicalAddress) + bool ReadCacheNeedToWaitForEviction(ref OperationStackContext stackCtx) { - // Splice into the gap of the last readcache/first main log entries. - Debug.Assert(recSrc.LowestReadCachePhysicalAddress >= readcache.HeadAddress, "LowestReadCachePhysicalAddress must be >= readcache.HeadAddress; caller should have called VerifyReadCacheSplicePoint"); - ref RecordInfo rcri = ref readcache.GetInfo(recSrc.LowestReadCachePhysicalAddress); - return rcri.TryUpdateAddress(recSrc.LatestLogicalAddress, newLogicalAddress); + if (stackCtx.recSrc.LatestLogicalAddress < readcache.HeadAddress) + { + SpinWaitUntilRecordIsClosed(stackCtx.recSrc.LatestLogicalAddress, readcache); + + // Restore to hlog; we may have set readcache into Log and continued the loop, had to restart, and the matching readcache record was evicted. + stackCtx.UpdateRecordSourceToCurrentHashEntry(hlog); + return true; + } + return false; } - // Skip over all readcache records in this key's chain (advancing logicalAddress to the first non-readcache record we encounter). [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void SkipReadCache(ref HashEntryInfo hei, ref long logicalAddress) + private bool SpliceIntoHashChainAtReadCacheBoundary(ref Key key, ref OperationStackContext stackCtx, long newLogicalAddress) { - while (!SkipReadCache(ref logicalAddress, out _, out _)) + // Splice into the gap of the last readcache/first main log entries. + Debug.Assert(stackCtx.recSrc.LowestReadCacheLogicalAddress >= readcache.ClosedUntilAddress, + $"{nameof(VerifyInMemoryAddresses)} should have ensured LowestReadCacheLogicalAddress ({stackCtx.recSrc.LowestReadCacheLogicalAddress}) >= readcache.ClosedUntilAddress ({readcache.ClosedUntilAddress})"); + + // If the LockTable is enabled, then we either have an exclusive lock and thus cannot have a competing insert to the readcache, or we are doing a + // Read() so we allow a momentary overlap of records because they're the same value (no update is being done). Otherwise, we must do a more expensive + // detach-and-restore operation. + if (LockTable.IsEnabled) { - hei.SetToCurrent(); - logicalAddress = hei.Address; + ref RecordInfo rcri = ref readcache.GetInfo(stackCtx.recSrc.LowestReadCachePhysicalAddress); + return rcri.TryUpdateAddress(stackCtx.recSrc.LatestLogicalAddress, newLogicalAddress); } + return DetachAndReattachReadCacheChain(ref key, ref stackCtx, newLogicalAddress); } - // Skip over all readcache records in this key's chain (advancing logicalAddress to the first non-readcache record we encounter - // and returning the lowest readcache logical and phyical addresses). If we go below readcache.HeadAddress we can't find the - // 'lowestReadCache*' output params, so return false and let the caller issue a retry. Otherwise, we reached the end of the - // readcache chain (if any), so return true. - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool SkipReadCache(ref long logicalAddress, out long lowestReadCacheLogicalAddress, out long lowestReadCachePhysicalAddress) + private bool DetachAndReattachReadCacheChain(ref Key key, ref OperationStackContext stackCtx, long newLogicalAddress) { - Debug.Assert(UseReadCache, "Should not call SkipReadCache if !UseReadCache"); - var entry = new HashBucketEntry() { word = logicalAddress }; - logicalAddress = entry.AbsoluteAddress; - if (!entry.ReadCache || logicalAddress < readcache.HeadAddress) + // We are not doing LockTable-based locking, so the only place non-ReadCacheEvict codes updates the chain membership is at the HashBucketEntry; + // no thread will try to splice at the readcache/log boundary. hei.Address is the highest readcache address. + HashBucketEntry entry = new() { word = stackCtx.hei.Address }; + long highestRcAddress = entry.Address, lowestRcAddress = highestRcAddress; + + // First detach the chain by CAS'ing in the new log record (whose .PreviousAddress = recSrc.LatestLogicalAddress). + if (!stackCtx.hei.TryCAS(newLogicalAddress)) + return false; + if (entry.AbsoluteAddress < readcache.HeadAddress) + goto Success; + + // Traverse from the old address at hash entry to the lowestReadCacheLogicalAddress, invalidating any record matching the key. + for (bool found = false; entry.ReadCache && entry.AbsoluteAddress >= readcache.HeadAddress; /* incremented in loop */) { - lowestReadCacheLogicalAddress = Constants.kInvalidAddress; - lowestReadCachePhysicalAddress = 0; - return !entry.ReadCache; + lowestRcAddress = entry.Address; + var physicalAddress = readcache.GetPhysicalAddress(entry.AbsoluteAddress); + ref RecordInfo recordInfo = ref readcache.GetInfo(physicalAddress); + if (!found && !recordInfo.Invalid && comparer.Equals(ref key, ref readcache.GetKey(physicalAddress))) + { + found = true; + recordInfo.SetInvalidAtomic(); // Atomic needed due to other threads (e.g. ReadCacheEvict) possibly being in this chain before we detached it. + } + entry.word = recordInfo.PreviousAddress; } - var physicalAddress = readcache.GetPhysicalAddress(logicalAddress); + if (AbsoluteAddress(highestRcAddress) >= readcache.HeadAddress) + { + // Splice the new recordInfo into the local chain. Used atomic due to other threads (e.g. ReadCacheEvict) possibly being in this + // before we detached it, and setting the record to Invalid (no other thread will be updating anything else in the chain, though). + ref RecordInfo rcri = ref readcache.GetInfo(readcache.GetPhysicalAddress(AbsoluteAddress(lowestRcAddress))); + while (!rcri.TryUpdateAddress(rcri.PreviousAddress, newLogicalAddress)) + Thread.Yield(); + + // Now try to CAS the chain into the HashBucketEntry. If it fails, give up; we lose those readcache records. + // Trying to handle conflicts would require evaluating whether other threads had inserted keys in our chain, and it's too rare to worry about. + if (stackCtx.hei.TryCAS(highestRcAddress)) + { + // If we go below readcache.HeadAddress ReadCacheEvict may race past us, so make sure the lowest address is still in range. + while (lowestRcAddress < readcache.HeadAddress) + lowestRcAddress = ReadCacheEvictChain(readcache.HeadAddress, ref stackCtx.hei); + } + } + + Success: + stackCtx.UpdateRecordSourceToCurrentHashEntry(hlog); + return true; + } + + // Skip over all readcache records in this key's chain, advancing stackCtx.recSrc to the first non-readcache record we encounter. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void SkipReadCache(ref OperationStackContext stackCtx, out bool didRefresh) + { + Debug.Assert(UseReadCache, "Should not call SkipReadCache if !UseReadCache"); + didRefresh = false; + + RestartChain: + // 'recSrc' has already been initialized to the address in 'hei'. + if (!stackCtx.hei.IsReadCache) + return; + + // This is FindInReadCache without the key comparison or untilAddress. + stackCtx.recSrc.LogicalAddress = Constants.kInvalidAddress; + stackCtx.recSrc.PhysicalAddress = 0; + + stackCtx.recSrc.LatestLogicalAddress = AbsoluteAddress(stackCtx.recSrc.LatestLogicalAddress); while (true) { - lowestReadCacheLogicalAddress = logicalAddress; - lowestReadCachePhysicalAddress = physicalAddress; - - var recordInfo = readcache.GetInfo(physicalAddress); - if (recordInfo.Tentative) + if (ReadCacheNeedToWaitForEviction(ref stackCtx)) { - // This is not a ref, so we have to re-get it. - ref var ri = ref readcache.GetInfo(physicalAddress); - SpinWaitWhileTentativeAndReturnValidity(ref ri); - recordInfo = ri; + didRefresh = true; + goto RestartChain; } - // Look ahead to see if we're at the end of the readcache chain. - entry.word = recordInfo.PreviousAddress; - logicalAddress = entry.AbsoluteAddress; - - if (!entry.ReadCache || logicalAddress < readcache.HeadAddress) - return !entry.ReadCache; + // Increment the trailing "lowest read cache" address (for the splice point). We'll look ahead from this to examine the next record. + stackCtx.recSrc.LowestReadCacheLogicalAddress = stackCtx.recSrc.LatestLogicalAddress; + stackCtx.recSrc.LowestReadCachePhysicalAddress = readcache.GetPhysicalAddress(stackCtx.recSrc.LowestReadCacheLogicalAddress); - physicalAddress = readcache.GetPhysicalAddress(logicalAddress); + RecordInfo recordInfo = readcache.GetInfo(stackCtx.recSrc.LowestReadCachePhysicalAddress); + if (!recordInfo.PreviousAddressIsReadCache) + { + stackCtx.recSrc.LatestLogicalAddress = recordInfo.PreviousAddress; + stackCtx.recSrc.LogicalAddress = stackCtx.recSrc.LatestLogicalAddress; + stackCtx.recSrc.PhysicalAddress = 0; + return; + } + stackCtx.recSrc.LatestLogicalAddress = AbsoluteAddress(recordInfo.PreviousAddress); } } @@ -213,7 +232,7 @@ private void SkipReadCacheBucket(HashBucket* bucket) // Called after a readcache insert, to make sure there was no race with another session that added a main-log record at the same time. [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool EnsureNoMainLogRecordWasAddedDuringReadCacheInsert(ref Key key, RecordSource recSrc, long untilLogicalAddress, ref OperationStatus failStatus) + private bool EnsureNoNewMainLogRecordWasSpliced(ref Key key, RecordSource recSrc, long untilLogicalAddress, ref OperationStatus failStatus) { bool success = true; ref RecordInfo lowest_rcri = ref readcache.GetInfo(recSrc.LowestReadCachePhysicalAddress); @@ -221,9 +240,8 @@ private bool EnsureNoMainLogRecordWasAddedDuringReadCacheInsert(ref Key key, Rec if (lowest_rcri.PreviousAddress > untilLogicalAddress) { // Someone added a new record in the splice region. It won't be readcache; that would've been added at tail. See if it's our key. - // We want this whether it's Tentative or not, so don't wait for Tentative. var minAddress = untilLogicalAddress > hlog.HeadAddress ? untilLogicalAddress : hlog.HeadAddress; - if (TraceBackForKeyMatch(ref key, lowest_rcri.PreviousAddress, minAddress + 1, out long prevAddress, out _, waitForTentative: false)) + if (TraceBackForKeyMatch(ref key, lowest_rcri.PreviousAddress, minAddress + 1, out long prevAddress, out _)) success = false; else if (prevAddress > untilLogicalAddress && prevAddress < hlog.HeadAddress) { @@ -232,26 +250,32 @@ private bool EnsureNoMainLogRecordWasAddedDuringReadCacheInsert(ref Key key, Rec // hlog.HeadAddress -> [prevAddress is somewhere in here] -> untilLogicalAddress // (If prevAddress is == untilLogicalAddress, we know there is nothing more recent, so the new readcache record should stay.) // recSrc.HasLockTableLock may or may not be true. The new readcache record must be invalidated; then we return ON_DISK; - // this abandons the attempt to CopyToTail, and the caller proceeds with the possibly-stale value that was read (and any - // LockTable lock is released, with the LockTable entry remaining). + // this abandons the attempt to CopyToTail, and the caller proceeds with the possibly-stale value that was read. success = false; failStatus = OperationStatus.RECORD_ON_DISK; } } return success; } - - // Called to check if another session added a readcache entry from a pending read while we were inserting an updated record. If so, then if - // it is not locked, it is obsolete and can be Invalidated, and the update continues. Otherwise, the inserted record is either obsolete or - // its update is disallowed because a read lock or better exists on that key, and so the inserted record must be invalidated. + + // Called to check if another session added a readcache entry from a pending read while we were inserting a record at the tail of the log. + // If so, then it must be invalidated, and its *read* locks must be transferred to the new record. Why not X locks? + // - There can be only one X lock so we optimize its handling in CompleteUpdate, rather than transfer them like S locks (because there + // can be multiple S locks). + // - The thread calling this has "won the CAS" if it has gotten this far; that is, it has CAS'd in a new record at the tail of the log + // (or spliced it at the end of the readcache prefix chain). + // - It is still holding its "tentative" X lock on the newly-inserted log-tail record while calling this. + // - If there is another thread holding an X lock on this readcache record, it will fail its CAS, give up its X lock, and RETRY_LATER. // Note: The caller will do no epoch-refreshing operations after re-verifying the readcache chain following record allocation, so it is not - // possible for the chain to be disrupted and the new insertion lost, or for hei.Address to be below readcache.HeadAddress. + // possible for the chain to be disrupted and the new insertion lost, even if readcache.HeadAddress is raised above hei.Address. [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool ReadCacheCompleteTwoPhaseUpdate(ref Key key, ref HashEntryInfo hei) + private void ReadCacheCheckTailAfterSplice(ref Key key, ref HashEntryInfo hei) { - Debug.Assert(UseReadCache, "Should not call ReadCacheCompleteTwoPhaseUpdate if !UseReadCache"); - HashBucketEntry entry = new() { word = hei.CurrentAddress }; - HashBucketEntry untilEntry = new() { word = hei.Address }; + Debug.Assert(UseReadCache, "Should not call ReadCacheCheckTailAfterSplice if !UseReadCache"); + + // We already searched from hei.Address down; so now we search from hei.CurrentAddress down to just above hei.Address. + HashBucketEntry entry = new() { word = hei.CurrentAddress | (hei.IsCurrentReadCache ? Constants.kReadCacheBitMask : 0)}; + HashBucketEntry untilEntry = new() { word = hei.Address | (hei.IsReadCache ? Constants.kReadCacheBitMask : 0) }; // Traverse for the key above untilAddress (which may not be in the readcache if there were no readcache records when it was retrieved). while (entry.ReadCache && (entry.Address > untilEntry.Address || !untilEntry.ReadCache)) @@ -260,40 +284,23 @@ private bool ReadCacheCompleteTwoPhaseUpdate(ref Key key, ref HashEntryInfo hei) ref RecordInfo recordInfo = ref readcache.GetInfo(physicalAddress); if (!recordInfo.Invalid && comparer.Equals(ref key, ref readcache.GetKey(physicalAddress))) { - if (SpinWaitWhileTentativeAndReturnValidity(ref recordInfo)) - return recordInfo.SetInvalidAtomicIfNoLocks(); + recordInfo.SetInvalidAtomic(); + return; } entry.word = recordInfo.PreviousAddress; } - // If we're here, no record for 'key' was found. - return true; + // If we're here, no (valid) record for 'key' was found. + return; } - // Called to check if another session added a readcache entry from a pending read while we were doing CopyToTail of a pending read. - // If so and it is unlocked or has only Read locks, we can transfer any locks to the new record. If it is XLocked, we fail. [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool ReadCacheCompleteTwoPhaseCopyToTail(ref Key key, ref HashEntryInfo hei, ref RecordInfo newRecordInfo, bool allowXLock, bool removeEphemeralLock) + void ReadCacheAbandonRecord(long physicalAddress) { - Debug.Assert(UseReadCache, "Should not call ReadCacheCompleteTwoPhaseCopyToTail if !UseReadCache"); - HashBucketEntry entry = new() { word = hei.CurrentAddress }; - HashBucketEntry untilEntry = new() { word = hei.Address }; - - // Traverse for the key above untilAddress (which may not be in the readcache if there were no readcache records when it was retrieved). - while (entry.ReadCache && (entry.Address > untilEntry.Address || !untilEntry.ReadCache)) - { - var physicalAddress = readcache.GetPhysicalAddress(entry.AbsoluteAddress); - ref RecordInfo recordInfo = ref readcache.GetInfo(physicalAddress); - if (!recordInfo.Invalid && comparer.Equals(ref key, ref readcache.GetKey(physicalAddress))) - { - if (SpinWaitWhileTentativeAndReturnValidity(ref recordInfo)) - return newRecordInfo.CopyReadLocksFromAndMarkSourceAtomic(ref recordInfo, allowXLock, seal: false, removeEphemeralLock); - } - entry.word = recordInfo.PreviousAddress; - } - - // If we're here, no (valid, non-Tentative) record for 'key' was found. - return true; + // TODO: We currently don't save readcache allocations for retry, but we could + ref var ri = ref readcache.GetInfo(physicalAddress); + ri.SetInvalid(); + ri.PreviousAddress = Constants.kTempInvalidAddress; // Necessary for ReadCacheEvict, but cannot be kInvalidAddress or we have recordInfo.IsNull } internal void ReadCacheEvict(long rcLogicalAddress, long rcToLogicalAddress) @@ -328,64 +335,68 @@ internal void ReadCacheEvict(long rcLogicalAddress, long rcToLogicalAddress) if (!FindTag(ref hei)) goto NextRecord; - // Traverse the chain of readcache entries for this key, looking "ahead" to .PreviousAddress to see if it is less than readcache.HeadAddress. - // nextPhysicalAddress remains Constants.kInvalidAddress if hei.Address is < HeadAddress; othrwise, it is the lowest-address readcache record - // remaining following this eviction, and its .PreviousAddress is updated to each lower record in turn until we hit a non-readcache record. - long nextPhysicalAddress = Constants.kInvalidAddress; - HashBucketEntry entry = new() { word = hei.entry.word }; - while (entry.ReadCache) + ReadCacheEvictChain(rcToLogicalAddress, ref hei); + + NextRecord: + if ((rcLogicalAddress & readcache.PageSizeMask) + rcAllocatedSize > readcache.PageSize) { - var la = entry.AbsoluteAddress; - var pa = readcache.GetPhysicalAddress(la); - ref RecordInfo ri = ref readcache.GetInfo(pa); + rcLogicalAddress = (1 + (rcLogicalAddress >> readcache.LogPageSizeBits)) << readcache.LogPageSizeBits; + continue; + } + rcLogicalAddress += rcAllocatedSize; + } + } + + private long ReadCacheEvictChain(long rcToLogicalAddress, ref HashEntryInfo hei) + { + // Traverse the chain of readcache entries for this key, looking "ahead" to .PreviousAddress to see if it is less than readcache.HeadAddress. + // nextPhysicalAddress remains Constants.kInvalidAddress if hei.Address is < HeadAddress; othrwise, it is the lowest-address readcache record + // remaining following this eviction, and its .PreviousAddress is updated to each lower record in turn until we hit a non-readcache record. + long nextPhysicalAddress = Constants.kInvalidAddress; + HashBucketEntry entry = new() { word = hei.entry.word }; + long lowestAddress = entry.Address; + while (entry.ReadCache) + { + var la = entry.AbsoluteAddress; + var pa = readcache.GetPhysicalAddress(la); + ref RecordInfo ri = ref readcache.GetInfo(pa); #if DEBUG - // Due to collisions, we can compare the hash code *mask* (i.e. the hash bucket index), not the key - var mask = state[resizeInfo.version].size_mask; - var rc_mask = hei.hash & mask; - var pa_mask = comparer.GetHashCode64(ref readcache.GetKey(pa)) & mask; - Debug.Assert(rc_mask == pa_mask, "The keyHash mask of the hash-chain ReadCache entry does not match the one obtained from the initial readcache address"); + // Due to collisions, we can compare the hash code *mask* (i.e. the hash bucket index), not the key + var mask = state[resizeInfo.version].size_mask; + var rc_mask = hei.hash & mask; + var pa_mask = comparer.GetHashCode64(ref readcache.GetKey(pa)) & mask; + Debug.Assert(rc_mask == pa_mask, "The keyHash mask of the hash-chain ReadCache entry does not match the one obtained from the initial readcache address"); #endif - // If the record's address is above the eviction range, leave it there and track nextPhysicalAddress. - if (la >= rcToLogicalAddress) - { - nextPhysicalAddress = pa; - entry.word = ri.PreviousAddress; - continue; - } - - // The record is being evicted. First transfer any locks. Other threads do not conflict; traversal, lock, and unlock operations all - // check for readcache addresses below readcache.HeadAddress and call SpinWaitUntilRecordIsClosed() as needed. - if (!ri.Invalid && ri.IsLocked) - this.LockTable.TransferFromLogRecord(ref readcache.GetKey(pa), ri); - - // If we have a higher readcache record that is not being evicted, unlink 'la' by setting (nextPhysicalAddress).PreviousAddress to (la).PreviousAddress. - if (nextPhysicalAddress != Constants.kInvalidAddress) - { - ref RecordInfo nextri = ref readcache.GetInfo(nextPhysicalAddress); - if (nextri.TryUpdateAddress(entry.Address, ri.PreviousAddress)) - ri.PreviousAddress = Constants.kTempInvalidAddress; // The record is no longer in the chain - entry.word = nextri.PreviousAddress; - continue; - } - - // We are evicting the record whose address is in the hash bucket; unlink 'la' by setting the hash bucket to point to (la).PreviousAddress. - if (hei.TryCAS(ri.PreviousAddress)) - ri.PreviousAddress = Constants.kTempInvalidAddress; // The record is no longer in the chain - else - hei.SetToCurrent(); - entry.word = hei.entry.word; + // If the record's address is above the eviction range, leave it there and track nextPhysicalAddress. + if (la >= rcToLogicalAddress) + { + nextPhysicalAddress = pa; + entry.word = ri.PreviousAddress; + continue; } - NextRecord: - if ((rcLogicalAddress & readcache.PageSizeMask) + rcAllocatedSize > readcache.PageSize) + // The record is being evicted. If we have a higher readcache record that is not being evicted, unlink 'la' by setting + // (nextPhysicalAddress).PreviousAddress to (la).PreviousAddress. + if (nextPhysicalAddress != Constants.kInvalidAddress) { - rcLogicalAddress = (1 + (rcLogicalAddress >> readcache.LogPageSizeBits)) << readcache.LogPageSizeBits; + ref RecordInfo nextri = ref readcache.GetInfo(nextPhysicalAddress); + if (nextri.TryUpdateAddress(entry.Address, ri.PreviousAddress)) + ri.PreviousAddress = Constants.kTempInvalidAddress; // The record is no longer in the chain + entry.word = nextri.PreviousAddress; continue; } - rcLogicalAddress += rcAllocatedSize; + + // We are evicting the record whose address is in the hash bucket; unlink 'la' by setting the hash bucket to point to (la).PreviousAddress. + if (hei.TryCAS(ri.PreviousAddress)) + ri.PreviousAddress = Constants.kTempInvalidAddress; // The record is no longer in the chain + else + hei.SetToCurrent(); + lowestAddress = entry.Address; + entry.word = hei.entry.word; } + return lowestAddress; } } } \ No newline at end of file diff --git a/cs/src/core/Index/FASTER/Implementation/RecordSource.cs b/cs/src/core/Index/FASTER/Implementation/RecordSource.cs index a662251fc..354620db5 100644 --- a/cs/src/core/Index/FASTER/Implementation/RecordSource.cs +++ b/cs/src/core/Index/FASTER/Implementation/RecordSource.cs @@ -1,7 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. -using System.Diagnostics; using System.Runtime.CompilerServices; using static FASTER.core.Utility; @@ -61,35 +60,24 @@ internal struct RecordSource internal bool HasReadCacheSrc; /// - /// Set by caller to indicate whether it has an ephemeral lock on the InMemorySrc record's (this may be mainlog or readcache). + /// Set by caller to indicate whether it has an transient lock in the LockTable for the operation Key. /// - internal bool HasInMemoryLock; + internal bool HasTransientLock; /// - /// Set by caller to indicate whether it has an ephemeral lock in the LockTable for the operation Key. + /// Status of ephemeral locking, if applicable. /// - internal bool HasLockTableLock; + internal EphemeralLockResult ephemeralLockResult; [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void ClearSrc() - { - this.LogicalAddress = Constants.kInvalidAddress; - this.PhysicalAddress = 0; - this.Log = default; - this.HasMainLogSrc = false; - this.HasReadCacheSrc = false; - this.HasInMemoryLock = false; - this.HasLockTableLock = false; - } - + internal ref RecordInfo GetInfo() => ref Log.GetInfo(PhysicalAddress); + internal ref Key GetKey() => ref Log.GetKey(PhysicalAddress); [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal ref RecordInfo GetSrcRecordInfo() => ref Log.GetInfo(PhysicalAddress); + internal ref Value GetValue() => ref Log.GetValue(PhysicalAddress); [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal ref Value GetSrcValue() => ref Log.GetValue(PhysicalAddress); + internal long SetPhysicalAddress() => this.PhysicalAddress = Log.GetPhysicalAddress(LogicalAddress); - internal bool HasSrc => HasInMemorySrc || HasLockTableLock; internal bool HasInMemorySrc => HasMainLogSrc || HasReadCacheSrc; - internal bool HasLock => HasInMemoryLock || HasLockTableLock; /// /// Initialize to the latest logical address from the caller. @@ -102,82 +90,28 @@ internal void Set(long latestLogicalAddress, AllocatorBase srcLog) LowestReadCachePhysicalAddress = default; HasMainLogSrc = false; HasReadCacheSrc = default; - this.HasInMemoryLock = false; - HasLockTableLock = false; + + // HasTransientLock = ...; Do not clear this; it is in the LockTable and must be preserved until unlocked this.LatestLogicalAddress = this.LogicalAddress = AbsoluteAddress(latestLogicalAddress); this.Log = srcLog; } - /// - /// After a successful CopyUpdate or other replacement of a source record, this marks the source record as Sealed or Invalid. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void MarkSourceRecordAfterSuccessfulCopyUpdate(FasterSession fasterSession, ref RecordInfo srcRecordInfo) - where FasterSession : IFasterSession - { - if (this.HasInMemorySrc) - { - this.AssertInMemorySourceWasNotEvicted(); - if (this.HasReadCacheSrc) - { - // If the record was evicted, it won't be accessed, so we do not need to worry about setting it invalid. - // Even though we should be called with an XLock (unless ephemeral locking is disabled) we need to do this atomically; - // otherwise this could race with ReadCacheEvict unlinking records. - srcRecordInfo.SetInvalidAtomic(); - } - else - { - // Another thread may come along to do this update in-place, or use this record as a copy source, once we've released our lock; - // Seal it to prevent that. This will cause the other thread to RETRY_NOW (unlike Invalid which ignores the record). - // If the record was evicted, then we cannot Seal it, but we have already inserted the later record, so any attempt to retrieve - // the old record from disk will find the newer version instead due to normal verification when completing pending I/O. - // Because we have an XLock (unless ephemeral locking is disabled), we don't need an atomic operation here. - srcRecordInfo.Seal(); - } - - // if fasterSession.DisableEphemeralLocking, the "finally" handler won't unlock it, so we do that here. - // For ephemeral locks, we don't clear the locks here (defer that to the "finally"). - if (fasterSession.DisableEphemeralLocking) - srcRecordInfo.ClearLocks(); - } - - // We successfully transferred the source recordInfo, and will unlock it in UnlockAfterUpdate, but if there was a LockTable entry, - // it was transferred and doesn't exist in the LockTable anymore. A new LockTable entry may be written if the source record fell - // below HeadAddress due to BlockAllocate, but we handle that case in UnlockAfter*() below (by spinwaiting until it's in the LockTable). - this.HasLockTableLock = false; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal bool InMemorySourceIsBelowHeadAddress() => this.HasInMemorySrc && this.LogicalAddress < this.Log.HeadAddress; - - [Conditional("DEBUG")] - internal void AssertInMemorySourceWasNotEvicted() - { - if (this.HasInMemorySrc) - { - // We should have called VerifyInMemoryAddresses when starting this operation to verify we were above HeadAddress. - // After that, HeadAddress may be increased by another session, but we should always hold the epoch here and thus - // OnPagesClosed (which does the actual eviction) cannot be called. - - // This should not be called on failure/retry, or it will fire spuriously. For example: - // - Lock a record that is on the next page to be evicted - // - Call BlockAllocate, which evicts that page - // - This will then fail the subsequent VerifyInMemoryAddresses call, because the record is now below HeadAddress - // In this case, the record has been legitimately evicted. - Debug.Assert(this.LogicalAddress >= this.Log.ClosedUntilAddress, "Record should always be in memory at this point, regardless of HeadAddress"); - } - } - public override string ToString() { var isRC = "(rc)"; var llaRC = IsReadCache(LatestLogicalAddress) ? isRC : string.Empty; var laRC = IsReadCache(LogicalAddress) ? isRC : string.Empty; static string bstr(bool value) => value ? "T" : "F"; - return $"lla {AbsoluteAddress(LatestLogicalAddress)}{llaRC}, la {AbsoluteAddress(LogicalAddress)}{laRC}, pa {PhysicalAddress:x}" - + $" lrcla {AbsoluteAddress(LowestReadCacheLogicalAddress)}, lrcpa {LowestReadCachePhysicalAddress:x}" - + $" hasMLsrc {bstr(HasMainLogSrc)}, hasRCsrc {bstr(HasReadCacheSrc)}, hasIMlock {bstr(HasInMemoryLock)}, hasLTlock {bstr(HasLockTableLock)}"; + string ephLockResult = this.ephemeralLockResult switch + { + EphemeralLockResult.Success => "S", + EphemeralLockResult.Failed => "F", + EphemeralLockResult.HoldForSeal => "H", + _ => "unknown" + }; + return $"lla {AbsoluteAddress(LatestLogicalAddress)}{llaRC}, la {AbsoluteAddress(LogicalAddress)}{laRC}, lrcla {AbsoluteAddress(LowestReadCacheLogicalAddress)}," + + $" logSrc {bstr(HasMainLogSrc)}, rcSrc {bstr(HasReadCacheSrc)}, tLock {bstr(HasTransientLock)}, eLock {ephLockResult}"; } } } diff --git a/cs/src/core/Index/FASTER/LogAccessor.cs b/cs/src/core/Index/FASTER/LogAccessor.cs index 044df2764..8875847bd 100644 --- a/cs/src/core/Index/FASTER/LogAccessor.cs +++ b/cs/src/core/Index/FASTER/LogAccessor.cs @@ -97,11 +97,9 @@ public void SetEmptyPageCount(int pageCount, bool wait = false) public long MemorySizeBytes => ((long)(allocator.AllocatedPageCount + allocator.OverflowPageCount)) << allocator.LogPageSizeBits; /// - /// Whether we have allocated exactly the requested number of pages on the log (based on BufferSize and EmptyPageCount). - /// This can take some time between increasing EmptyPageCount and the actual page eviction (decrementing AllocatedPageCount), - /// or between decreasing EmptyPageCount and the time the page is allocated (increasing AllocatedPageCount). + /// Number of pages allocated /// - public bool PageAllocationStabilized() => allocator.AllocatedPageCount == allocator.BufferSize - allocator.EmptyPageCount + 1; + public int AllocatedPageCount => allocator.AllocatedPageCount; /// /// Shift begin address to the provided untilAddress. Make sure address corresponds to record boundary if snapToPageStart is set to diff --git a/cs/src/core/Index/FASTER/ReadFlags.cs b/cs/src/core/Index/FASTER/ReadFlags.cs index 57f0e9312..dd666a9e9 100644 --- a/cs/src/core/Index/FASTER/ReadFlags.cs +++ b/cs/src/core/Index/FASTER/ReadFlags.cs @@ -53,10 +53,5 @@ public enum ReadFlags /// Copy reads from only (do not copy from read-only region), to read cache or tail. /// CopyFromDeviceOnly = 0x10, - - /// - /// Reset the modified bit on Read() (even if it does CopyToTail). - /// - ResetModifiedBit = 0x20, } } \ No newline at end of file diff --git a/cs/src/core/Index/Interfaces/FunctionsBase.cs b/cs/src/core/Index/Interfaces/FunctionsBase.cs index 81b45a86b..2e9c6b769 100644 --- a/cs/src/core/Index/Interfaces/FunctionsBase.cs +++ b/cs/src/core/Index/Interfaces/FunctionsBase.cs @@ -94,6 +94,22 @@ public override bool SingleReader(ref Key key, ref Value input, ref Value value, return true; } + public override bool SingleWriter(ref Key key, ref Value input, ref Value src, ref Value dst, ref Value output, ref UpsertInfo upsertInfo, WriteReason reason) + { + var result = base.SingleWriter(ref key, ref input, ref src, ref dst, ref output, ref upsertInfo, reason); + if (result) + output = dst; + return result; + } + + public override bool ConcurrentWriter(ref Key key, ref Value input, ref Value src, ref Value dst, ref Value output, ref UpsertInfo upsertInfo) + { + var result = base.ConcurrentWriter(ref key, ref input, ref src, ref dst, ref output, ref upsertInfo); + if (result) + output = dst; + return result; + } + /// public override bool InitialUpdater(ref Key key, ref Value input, ref Value value, ref Value output, ref RMWInfo rmwInfo) { value = output = input; return true; } /// diff --git a/cs/src/core/Index/Interfaces/IFasterSession.cs b/cs/src/core/Index/Interfaces/IFasterSession.cs index c220e894f..758d4432c 100644 --- a/cs/src/core/Index/Interfaces/IFasterSession.cs +++ b/cs/src/core/Index/Interfaces/IFasterSession.cs @@ -6,7 +6,8 @@ namespace FASTER.core /// /// Provides thread management and callback to checkpoint completion (called state machine). /// - // This is split to two interfaces just to limit infection of type parameters + /// This is broken out into a non-generic base interfaces to allow the use of + /// in . internal interface IFasterSession { void UnsafeResumeThread(); @@ -25,24 +26,18 @@ internal interface IFasterSession /// internal interface IFasterSession : IFasterSession, IVariableLengthStruct { - #region Optional features supported by this implementation - bool DisableEphemeralLocking { get; } - bool IsManualLocking { get; } - SessionType SessionType { get; } - #endregion Optional features supported by this implementation - #region Reads bool SingleReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, ref ReadInfo readInfo); - bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, ref ReadInfo readInfo); + bool ConcurrentReader(ref Key key, ref Input input, ref Value value, ref Output dst, ref RecordInfo recordInfo, ref ReadInfo readInfo, out EphemeralLockResult lockResult); void ReadCompletionCallback(ref Key key, ref Input input, ref Output output, Context ctx, Status status, RecordMetadata recordMetadata); #endregion reads #region Upserts bool SingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, ref UpsertInfo upsertInfo, WriteReason reason); void PostSingleWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, ref UpsertInfo upsertInfo, WriteReason reason); - bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, ref UpsertInfo upsertInfo); + bool ConcurrentWriter(ref Key key, ref Input input, ref Value src, ref Value dst, ref Output output, ref RecordInfo recordInfo, ref UpsertInfo upsertInfo, out EphemeralLockResult lockResult); #endregion Upserts #region RMWs @@ -59,7 +54,7 @@ internal interface IFasterSession : IFasterS #endregion CopyUpdater #region InPlaceUpdater - bool InPlaceUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo, out OperationStatus status); + bool InPlaceUpdater(ref Key key, ref Input input, ref Value value, ref Output output, ref RecordInfo recordInfo, ref RMWInfo rmwInfo, out OperationStatus status, out EphemeralLockResult lockResult); #endregion InPlaceUpdater void RMWCompletionCallback(ref Key key, ref Input input, ref Output output, Context ctx, Status status, RecordMetadata recordMetadata); @@ -68,7 +63,7 @@ internal interface IFasterSession : IFasterS #region Deletes bool SingleDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo); void PostSingleDeleter(ref Key key, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo); - bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo); + bool ConcurrentDeleter(ref Key key, ref Value value, ref RecordInfo recordInfo, ref DeleteInfo deleteInfo, out EphemeralLockResult lockResult); #endregion Deletes #region Disposal @@ -79,15 +74,17 @@ internal interface IFasterSession : IFasterS void DisposeDeserializedFromDisk(ref Key key, ref Value value, ref RecordInfo recordInfo); #endregion Disposal - #region Ephemeral locking - bool TryLockEphemeralExclusive(ref RecordInfo recordInfo); - bool TryLockEphemeralShared(ref RecordInfo recordInfo); - void UnlockEphemeralExclusive(ref RecordInfo recordInfo); - bool TryUnlockEphemeralShared(ref RecordInfo recordInfo); + #region Transient locking + bool TryLockTransientExclusive(ref Key key, ref OperationStackContext stackCtx); + bool TryLockTransientShared(ref Key key, ref OperationStackContext stackCtx); + void UnlockTransientExclusive(ref Key key, ref OperationStackContext stackCtx); + void UnlockTransientShared(ref Key key, ref OperationStackContext stackCtx); #endregion bool CompletePendingWithOutputs(out CompletedOutputIterator completedOutputs, bool wait = false, bool spinWaitForCommit = false); + public FasterKV.FasterExecutionContext Ctx { get; } + IHeapContainer GetHeapContainer(ref Input input); } } \ No newline at end of file diff --git a/cs/src/core/Index/Interfaces/UpdateInfo.cs b/cs/src/core/Index/Interfaces/UpdateInfo.cs index d51a4f167..2863946ee 100644 --- a/cs/src/core/Index/Interfaces/UpdateInfo.cs +++ b/cs/src/core/Index/Interfaces/UpdateInfo.cs @@ -3,34 +3,6 @@ namespace FASTER.core { - /// - /// The type of session being used for this operation - /// - public enum SessionType : byte - { - /// - /// Direct calls through to the standard client session, which does ephemeral locking and epoch protection on a per-operation basis. - /// - BasicContext, - - /// - /// An unsafe context which does ephemeral locking but allows the user to do coarse-grained epoch protection, - /// which can improve speed. - /// - UnsafeContext, - - /// - /// An unsafe context that does no ephemeral locking; the application must lock and unlock records manually and - /// make its own epoch protection calls. - /// - LockableUnsafeContext, - - /// - /// A context that does no ephemeral locking; the application must lock and unlock records manually. - /// - LockableContext - } - /// /// What actions to take following the RMW IFunctions method call, such as cancellation or record expiration. /// @@ -41,11 +13,6 @@ public enum UpsertAction /// Default, - /// - /// The operation cannot not be completed in-place because it would exceed available space. - /// - NeedMoreSpace, - /// /// Stop the operation immediately and return. /// @@ -57,11 +24,6 @@ public enum UpsertAction /// public struct UpsertInfo { - /// - /// The type of session context executing the operation - /// - public SessionType SessionType { get; internal set; } - /// /// The FASTER execution context version of the operation /// @@ -97,7 +59,6 @@ public struct UpsertInfo /// public UpsertInfo(ref RMWInfo rmwInfo) { - this.SessionType = rmwInfo.SessionType; this.Version = rmwInfo.Version; this.SessionID = rmwInfo.SessionID; this.Address = rmwInfo.Address; @@ -117,11 +78,6 @@ public enum RMWAction /// Default, - /// - /// The operation cannot not be completed in-place because it would exceed available space. - /// - NeedMoreSpace, - /// /// Expire the record, including continuing actions to reinsert a new record with initial state. /// @@ -143,11 +99,6 @@ public enum RMWAction /// public struct RMWInfo { - /// - /// The type of session context executing the operation - /// - public SessionType SessionType { get; internal set; } - /// /// The FASTER execution context version of the operation /// @@ -200,11 +151,6 @@ public enum DeleteAction /// public struct DeleteInfo { - /// - /// The type of session context executing the operation - /// - public SessionType SessionType { get; internal set; } - /// /// The FASTER execution context version of the operation /// @@ -262,11 +208,6 @@ public enum ReadAction /// public struct ReadInfo { - /// - /// The type of session context executing the operation - /// - public SessionType SessionType { get; internal set; } - /// /// The FASTER execution context version of the operation /// diff --git a/cs/src/core/Index/Recovery/Recovery.cs b/cs/src/core/Index/Recovery/Recovery.cs index b74803a80..6db8d6aa2 100644 --- a/cs/src/core/Index/Recovery/Recovery.cs +++ b/cs/src/core/Index/Recovery/Recovery.cs @@ -856,8 +856,7 @@ private unsafe void ClearLocksOnPage(long page, RecoveryOptions options) { long recordStart = physicalAddress + pointer; ref RecordInfo info = ref hlog.GetInfo(recordStart); - info.ClearLocks(); - info.Unseal(); + info.ClearBitsForDiskImages(); if (info.IsNull()) pointer += RecordInfo.GetLength(); @@ -882,14 +881,8 @@ private unsafe bool RecoverFromPage(long startRecoveryAddress, { bool touched = false; - var hash = default(long); - var tag = default(ushort); var pointer = default(long); var recordStart = default(long); - var firstBucket = default(HashBucket*); - var bucket = default(HashBucket*); - var entry = default(HashBucketEntry); - var slot = default(int); pointer = fromLogicalAddressInPage; while (pointer < untilLogicalAddressInPage) @@ -905,22 +898,19 @@ private unsafe bool RecoverFromPage(long startRecoveryAddress, if (!info.Invalid) { - hash = comparer.GetHashCode64(ref hlog.GetKey(recordStart)); - tag = (ushort)((ulong)hash >> Constants.kHashTagShift); + HashEntryInfo hei = new(comparer.GetHashCode64(ref hlog.GetKey(recordStart))); + FindOrCreateTag(ref hei, hlog.BeginAddress); - entry = default; - FindOrCreateTag(hash, tag, ref firstBucket, ref bucket, ref slot, ref entry, hlog.BeginAddress); - - bool ignoreRecord = ((pageLogicalAddress + pointer) >= options.fuzzyRegionStartAddress) && info.InNewVersion; + bool ignoreRecord = ((pageLogicalAddress + pointer) >= options.fuzzyRegionStartAddress) && info.IsInNewVersion; if (!options.undoNextVersion) ignoreRecord = false; if (!ignoreRecord) { - entry.Address = pageLogicalAddress + pointer; - entry.Tag = tag; - entry.Pending = false; - entry.Tentative = false; - bucket->bucket_entries[slot] = entry.word; + hei.entry.Address = pageLogicalAddress + pointer; + hei.entry.Tag = hei.tag; + hei.entry.Pending = false; + hei.entry.Tentative = false; + hei.bucket->bucket_entries[hei.slot] = hei.entry.word; } else { @@ -928,11 +918,11 @@ private unsafe bool RecoverFromPage(long startRecoveryAddress, info.SetInvalid(); if (info.PreviousAddress < startRecoveryAddress) { - entry.Address = info.PreviousAddress; - entry.Tag = tag; - entry.Pending = false; - entry.Tentative = false; - bucket->bucket_entries[slot] = entry.word; + hei.entry.Address = info.PreviousAddress; + hei.entry.Tag = hei.tag; + hei.entry.Pending = false; + hei.entry.Tentative = false; + hei.bucket->bucket_entries[hei.slot] = hei.entry.word; } } } diff --git a/cs/src/core/Index/Synchronization/FasterStateMachine.cs b/cs/src/core/Index/Synchronization/FasterStateMachine.cs index 0f876d60a..52ab7e821 100644 --- a/cs/src/core/Index/Synchronization/FasterStateMachine.cs +++ b/cs/src/core/Index/Synchronization/FasterStateMachine.cs @@ -199,11 +199,11 @@ private void ThreadStateMachineStep( } #endregion - var currentState = ctx == null ? targetState : SystemState.Make(ctx.phase, ctx.version); + var currentState = ctx is null ? targetState : SystemState.Make(ctx.phase, ctx.version); var targetStartState = StartOfCurrentCycle(targetState); #region Get returning thread to start of current cycle, issuing completion callbacks if needed - if (ctx != null) + if (ctx is not null) { if (ctx.version < targetStartState.Version) { @@ -226,7 +226,7 @@ private void ThreadStateMachineStep( fasterSession?.CheckpointCompletionCallback(ctx.sessionID, ctx.sessionName, commitPoint); } } - if ((ctx.version == targetStartState.Version) && (ctx.phase < Phase.REST) && !(ctx.threadStateMachine is IndexSnapshotStateMachine)) + if ((ctx.version == targetStartState.Version) && (ctx.phase < Phase.REST) && ctx.threadStateMachine is not IndexSnapshotStateMachine) { IssueCompletionCallback(ctx, fasterSession); } @@ -237,7 +237,7 @@ private void ThreadStateMachineStep( // we can directly fast forward session to target state if (currentTask == null || targetState.Phase == Phase.REST) { - if (ctx != null) + if (ctx is not null) { ctx.phase = targetState.Phase; ctx.version = targetState.Version; @@ -252,7 +252,7 @@ private void ThreadStateMachineStep( // a checkpoint to complete on a client app thread), we start at current system state var threadState = targetState; - if (ctx != null) + if (ctx is not null) { if (ctx.threadStateMachine == currentTask) { @@ -276,7 +276,7 @@ private void ThreadStateMachineStep( currentTask.OnThreadEnteringState(threadState, previousState, this, ctx, fasterSession, valueTasks, token); - if (ctx != null) + if (ctx is not null) { ctx.phase = threadState.Phase; ctx.version = threadState.Version; diff --git a/cs/src/core/Index/Synchronization/HybridLogCheckpointTask.cs b/cs/src/core/Index/Synchronization/HybridLogCheckpointTask.cs index 484e5b362..980ccf6a4 100644 --- a/cs/src/core/Index/Synchronization/HybridLogCheckpointTask.cs +++ b/cs/src/core/Index/Synchronization/HybridLogCheckpointTask.cs @@ -113,7 +113,7 @@ public virtual void OnThreadState( switch (current.Phase) { case Phase.PREPARE: - if (ctx != null) - { - if (!ctx.markers[EpochPhaseIdx.Prepare]) - { - ctx.markers[EpochPhaseIdx.Prepare] = true; - } - } + if (ctx is not null) + ctx.markers[EpochPhaseIdx.Prepare] = true; faster.epoch.Mark(EpochPhaseIdx.Prepare, current.Version); if (faster.epoch.CheckIsComplete(EpochPhaseIdx.Prepare, current.Version) && faster.hlog.NumActiveLockingSessions == 0) diff --git a/cs/src/core/Utilities/FasterException.cs b/cs/src/core/Utilities/FasterException.cs index 75f9dbe14..3369fbbc2 100644 --- a/cs/src/core/Utilities/FasterException.cs +++ b/cs/src/core/Utilities/FasterException.cs @@ -44,4 +44,19 @@ public FasterException(SerializationInfo info, StreamingContext context) : base( { } } + + /// + /// FASTER IO exception type + /// + public class FasterIOException : FasterException + { + /// + /// Throw FASTER exception + /// + /// + /// + public FasterIOException(string message, Exception innerException) : base(message, innerException) + { + } + } } \ No newline at end of file diff --git a/cs/src/core/Utilities/InMemKV.cs b/cs/src/core/Utilities/InMemKV.cs deleted file mode 100644 index 6205f4a8b..000000000 --- a/cs/src/core/Utilities/InMemKV.cs +++ /dev/null @@ -1,813 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT license. - -using System; -using System.Collections.Concurrent; -using System.Diagnostics; -using System.Runtime.CompilerServices; -using System.Threading; - -namespace FASTER.core.Utilities -{ - /// - /// A static class to provide constants witout having to use the full type specifications. - /// - internal static class InMemKV - { - internal const int kChunkSize = 16; - - // Parameterless struct ctors are silently ignored (in C# 10) for array allocation, so use a "default". - // If this changes we can use "invalid" and set it to kChunkSize + 1. - internal const int kDefaultOverflowIndex = 0; - } - - /// - /// User functions that replace to allow comparison of TKey and THeapKey, as well as other methods. - /// - internal interface IInMemKVUserFunctions - { - void Dispose(ref THeapKey key, ref TValue value); - - THeapKey CreateHeapKey(ref TKey key); - - ref TKey GetHeapKeyRef(THeapKey heapKey); - - bool IsActive(ref TValue value); - - bool Equals(ref TKey key, THeapKey heapKey); - - long GetHashCode64(ref TKey key); - } - - /// - /// A single key's entry in the store. - /// - internal struct InMemKVEntry - where TUserFunctions : IInMemKVUserFunctions - { - // This is a transient structure that contains information about the location of a key entry. - internal struct Location - { - internal InMemKVChunk chunk; // 'default' if the target is the initial entry - internal int chunkEntryIndex; - internal InMemKV kv; - int bucketIndex; - - internal Location(InMemKV kv, int bucketIndex) - { - this.kv = kv; - this.bucketIndex = bucketIndex; - this.chunk = default; - this.chunkEntryIndex = InMemKV.kDefaultOverflowIndex; - } - - internal void SetToInitialEntry() - { - this.chunk = default; - this.chunkEntryIndex = InMemKV.kDefaultOverflowIndex; - } - - internal void Set(InMemKVChunk chunk, int entryIndex) - { - this.chunk = chunk; - this.chunkEntryIndex = entryIndex; - } - - internal bool IsInOverflow => this.chunk is not null; - - internal ref InMemKVEntry EntryRef - => ref (IsInOverflow ? ref this.chunk[this.chunkEntryIndex] : ref kv.buckets[bucketIndex].InitialEntry); - - public override string ToString() => $"entry {{{EntryRef}}}; IsInOverflow {IsInOverflow}; entryIndex {chunkEntryIndex}}}"; - } - - internal THeapKey heapKey; - internal TValue value; - - internal void Initialize(THeapKey key) - { - this.heapKey = key; - this.value = default; - } - - internal void Initialize() => Initialize(default); - - internal void Set(ref InMemKVEntry other, IInMemKVUserFunctions userFunctions) - { - userFunctions.Dispose(ref this.heapKey, ref this.value); - this.heapKey = other.heapKey; - this.value = other.value; - } - - internal void Dispose(TUserFunctions userFunctions) - { - userFunctions.Dispose(ref this.heapKey, ref this.value); - Initialize(); - } - - internal bool IsActive(TUserFunctions userFunctions) => userFunctions.IsActive(ref this.value); - - internal bool IsDefault => this.heapKey is null; - - public override string ToString() => $"key {{{heapKey}}}; value {{{value}}}; isDef {IsDefault}"; - } - - /// - /// A single chunk (C# array) of structs. - /// - internal class InMemKVChunk - where TUserFunctions : IInMemKVUserFunctions - { - internal InMemKVChunk prev; - internal InMemKVChunk next; - - private readonly InMemKVEntry[] entries = new InMemKVEntry[InMemKV.kChunkSize]; - - internal ref InMemKVEntry this[int index] => ref this.entries[index]; - - internal void LinkAfter(InMemKVChunk prior) - { - Debug.Assert(prior.next is null, "We should only append to the end of the linked list"); - this.prev = prior; - prior.next = this; - } - - internal InMemKVChunk UnlinkAfter() - { - Debug.Assert(this.next is null, "We should only remove from the end of the linked list"); - var prev = this.prev; - if (this.prev is not null) - { - this.prev.next = null; - this.prev = null; - } - return prev; - } - } - - /// - /// A vertebra of the hash table spine, containing the single initial , - /// the final if any are allocated, and methods for shared and exclusive - /// locking of the bucket. - /// - internal struct InMemKVBucket - where TUserFunctions : IInMemKVUserFunctions - { - #region Bucket Constants - // We use our own locking etc. here as we do not use Tentative in the same way, we have lock promotion, and we have an ExclusiveGeneration value. - // We reuse the part of the PreviousAddress space as LastChunkEntryIndex, but only sizeof(int) * 8 bits, because we are limiting it to the offset - // within a single chunk. So the layout of our bucket word is: - // [GGGGGGGG] [GGGGGGGG] [GGGGGGGG][G][X][SSSSSS] [AAAAAAAA] [AAAAAAAA] [AAAAAAAA] [AAAAAAAA] - // where X = exclusive lock, S = shared lock, R = readcache, A = address, G = ExclusiveGeneration - - const int kLastActiveChunkEntryIndexBits = 32; - const long kLastActiveChunkEntryIndexBitMask = (1L << kLastActiveChunkEntryIndexBits) - 1; - - // Shift position of lock in word - const int kSharedLockBitOffset = kLastActiveChunkEntryIndexBits; - - // Use the same lock bit count as RecordInfo--7 lock bits: 6 shared lock bits + 1 exclusive lock bit - const int kSharedLockBits = RecordInfo.kSharedLockBits; - const int kExclusiveLockBits = RecordInfo.kExclusiveLockBits; - - // Shared lock constants - const long kSharedLockBitMask = ((1L << kSharedLockBits) - 1) << kSharedLockBitOffset; - const long kSharedLockIncrement = 1L << kSharedLockBitOffset; - - // Exclusive lock constants - const int kExclusiveLockBitOffset = kSharedLockBitOffset + kSharedLockBits; - const long kExclusiveLockBitMask = 1L << kExclusiveLockBitOffset; - - // ExclusiveGeneration constants - const int kExclusiveGenerationBitOffset = kExclusiveLockBitOffset + kExclusiveLockBits; - const int kExclusiveGenerationBits = 25; - const long kExclusiveGenerationBitMask = ((1L << kExclusiveGenerationBits) - 1) << kExclusiveGenerationBitOffset; - const long kExclusiveGenerationIncrement = 1L << kExclusiveGenerationBitOffset; - #endregion Bucket Constants - - #region Bucket Data - // The single word that implements locking etc. using the above constants. - private long word; - - // The initial entry. Often we will have only one, so no allocation is needed. - internal InMemKVEntry InitialEntry; - - // The last overflow chunk, if any are allocated. We store the last one instead of the first to facilitate compaction, - // which accesses the end of the chunk list. - internal InMemKVChunk LastOverflowChunk; - #endregion Bucket Data - - internal bool HasEntries => !this.InitialEntry.IsDefault; - internal bool HasOverflow => this.LastOverflowChunk is not null; - - internal bool IsXLocked => (word & kExclusiveLockBitMask) != 0; - internal long NumSLocks => (word & kSharedLockBitMask) >> kSharedLockBitOffset; - - // The index of the last active entry in the overflow chunks - internal int LastActiveChunkEntryIndex - { - get => (int)(word & kLastActiveChunkEntryIndexBitMask); - set => word = (word & ~kLastActiveChunkEntryIndexBitMask) | (value & kLastActiveChunkEntryIndexBitMask); - } - - // The ExclusiveGeneration value enables optimization of Compact(); it wraps around on overflow (currently at 33m). - internal int ExclusiveGeneration => (int)((word & kExclusiveGenerationBitMask) >> kExclusiveGenerationBitOffset); - internal void IncrementExclusiveGeneration() => word = (word & ~kExclusiveGenerationBitMask) | IncrementExclusiveGeneration(word & kExclusiveGenerationBitMask); - internal static long IncrementExclusiveGeneration(long value) => value == kExclusiveGenerationBitMask ? 0 : value + kExclusiveGenerationIncrement; - - public InMemKVBucket() - { - this.word = default; - this.InitialEntry = default; - this.LastOverflowChunk = default; - - // Parameterless struct ctors are silently ignored (in C# 10) for array allocation, so use a "default". - // If this changes we can use "invalid" set to kChunkSize + 1 here (and when removing the last overflow chunk) and Assert() a stronger consistency check. - this.LastActiveChunkEntryIndex = InMemKV.kDefaultOverflowIndex; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void XLock() - { - // Acquire exclusive lock (readers may still be present; we'll drain them below) - for (; ; Thread.Yield()) - { - long expected_word = word; - if ((expected_word & kExclusiveLockBitMask) == 0 && Interlocked.CompareExchange(ref word, expected_word | kExclusiveLockBitMask, expected_word) == expected_word) - break; - } - - // Wait for readers to drain - while ((word & kSharedLockBitMask) != 0) - Thread.Yield(); - this.IncrementExclusiveGeneration(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void XUnlock() - { - Debug.Assert(IsXLocked, "Trying to X unlock an unlocked bucket"); - word &= ~kExclusiveLockBitMask; // Safe because there should be no other threads (e.g., readers) updating the word at this point - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void SLock() - { - for (; ; Thread.Yield()) - { - long expected_word = word; - if ((expected_word & kExclusiveLockBitMask) == 0 // not exclusively locked - && (expected_word & kSharedLockBitMask) != kSharedLockBitMask // shared lock is not full - && Interlocked.CompareExchange(ref word, expected_word + kSharedLockIncrement, expected_word) == expected_word) - break; - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void SUnlock() - { - Debug.Assert(NumSLocks > 0, "Trying to S unlock an unlocked bucket"); - Interlocked.Add(ref word, -kSharedLockIncrement); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal bool SUnlockAndTryXLock(out bool wasImmediate) - { - Debug.Assert(NumSLocks > 0, $"Bucket SLock count should not be 0 when promoting"); - var prevGen = this.word & kExclusiveGenerationBitMask; - - // Acquire exclusive lock in place of this thread's shared lock (other readers may still be present; we'll drain them below). - // We must SUnlock first to avoid deadlock in the "draining other readers" phase if are multiple threads in here. - wasImmediate = true; - SUnlock(); - - int maxxRetries = 10; - - while (true) - { - for (; ; Thread.Yield()) - { - long expected_word = word; - bool acquired = (expected_word & kExclusiveLockBitMask) == 0 // not exclusively locked - && Interlocked.CompareExchange(ref word, expected_word | kExclusiveLockBitMask, expected_word) == expected_word; - if ((this.word & kExclusiveGenerationBitMask) != prevGen) - wasImmediate = false; - if (acquired) - break; - } - - // Wait for readers to drain. In the LockTable scenario, we have no key locks on this thread, but a second thread may have a - // key lock and a third thread may have a read lock trying to acquire that key lock; let go of our bucket XLock to let the second - // thread acquire the read lock to unlock it, and retry the lock acquisition. - for (var ii = 0; ii < Constants.kMaxReaderLockDrainSpins / 2; ++ii) // Make this shorter than RecordInfo's XLock drain - { - if ((word & kSharedLockBitMask) == 0) - break; - Thread.Yield(); - } - - if ((word & kSharedLockBitMask) == 0) - break; - - // Release the exclusive bit and return false so an Unlock has a chance to get into the bucket. - // To reset this bit while spinning to drain readers, we must use CAS to avoid losing a reader unlock. - for (; ; Thread.Yield()) - { - long expected_word = word; - if (Interlocked.CompareExchange(ref word, expected_word & ~kExclusiveLockBitMask, expected_word) == expected_word) - break; - } - - if (--maxxRetries <= 0) - return false; - Thread.Yield(); - } - - this.IncrementExclusiveGeneration(); - return true; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void SUnlockAndTryXLockAndCompaction(TUserFunctions userFunctions, ref InMemKVEntry.Location entryLocation) - { - // We don't need try{} because we don't allocate. - if (this.SUnlockAndTryXLock(out bool wasImmediate)) - { - DoCompaction(userFunctions, ref entryLocation, wasImmediate); - this.XUnlock(); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void DoCompaction(TUserFunctions userFunctions, ref InMemKVEntry.Location entryLocation, bool wasImmediateXlock) - { - // Some other thread may've removed all entries before we got the lock. - if (!this.HasEntries) - return; - - if (this.LastOverflowChunk is null) - { - // There are no overflow chunks; see if initialEntry is active. (Note that we may have gotten here even though entryLocation - // may be in the overflow chunks, if other threads trimmed away trailing InActive entries). - if (!this.InitialEntry.IsActive(userFunctions)) - { - this.InitialEntry.Dispose(userFunctions); - entryLocation.kv.DecrementActiveBuckets(); - } - return; - } - - if (wasImmediateXlock) - { - // Need to recheck IsActive because even if wasImmediateXLock, another thread may have already had an SLock on the bucket, - // or gotten it, and added a lock between the time we previously checked IsActive and the time we got the XLock. - ref var targetEntry = ref entryLocation.EntryRef; - if (!targetEntry.IsDefault && !targetEntry.IsActive(userFunctions)) - this.CompactInto(userFunctions, ref entryLocation); - } - else - { - // We did not acquire the Xlock immediately (before another thread got it), so make sure the target entry is still valid: - // The chunk must be in our assigned list, and the entry at the index must be inactive (but not default). Some notes about this: - // 1. We only insert new entries at the end of the list, never in the middle. This means our target entry will never be at a - // position "later" than it was when we made this call. - // 2. Every CompactInto() removes InActive entries at the end of the list (an InActive entry will never be swapped into a - // target entry). Since another thread got the XLock before us, our target entry was removed if it was at the end of the list. - for (var chunk = this.LastOverflowChunk; chunk is not null; chunk = chunk.prev) - { - if (object.ReferenceEquals(chunk, entryLocation.chunk)) - { - ref var targetEntry = ref entryLocation.EntryRef; - if (!targetEntry.IsDefault && !targetEntry.IsActive(userFunctions)) - this.CompactInto(userFunctions, ref entryLocation); - break; - } - } - } - - if (!this.HasEntries) - entryLocation.kv.DecrementActiveBuckets(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void CompactInto(TUserFunctions userFunctions, ref InMemKVEntry.Location entryLocation) - { -#if DEBUG - Debug.Assert(this.HasOverflow, "Should only Compact when there are overflow allocations"); - ref var targetEntry = ref entryLocation.EntryRef; - Debug.Assert(!targetEntry.IsActive(userFunctions), "Should only CompactInto an InActive entry"); - Debug.Assert(!targetEntry.IsDefault, "Should only CompactInto a non-IsDefault) entry"); -#endif - - // Other threads may have final-unlocked one or more entries at the end of the active set; if so, back - // up over them, removing them as we go. Note that this may back up over entryLocation; we don't stop if so. - var removedTargetChunk = false; - ref var lastEntry = ref this.LastOverflowChunk[this.LastActiveChunkEntryIndex]; - while (!lastEntry.IsDefault && !lastEntry.IsActive(userFunctions)) - { - // We must always clear lastEntry, even if freeing the chunk. - lastEntry.Dispose(userFunctions); - removedTargetChunk = RemoveLastActiveChunkEntry(userFunctions, ref entryLocation, ref lastEntry); - - if (this.LastOverflowChunk is null) - { - // We've freed all chunks. If InitialEntry is InActive, Clear() it, then return. - if (!this.InitialEntry.IsActive(userFunctions)) - this.InitialEntry.Dispose(userFunctions); - return; - } - lastEntry = ref this.LastOverflowChunk[this.LastActiveChunkEntryIndex]; - } - - if (!removedTargetChunk) - { - // We did not back up over toEntry, so transfer the entry data, then remove the last entry (that we just transferred from). - if (entryLocation.IsInOverflow) - { - ref var toEntry = ref entryLocation.EntryRef; - if (toEntry.IsDefault) - return; - toEntry.Set(ref lastEntry, userFunctions); - } - else - { - // Note that we can never back up over InitialEntry (removedChunkTarget will always be false because we never free a 'null' chunk). - this.InitialEntry.Set(ref lastEntry, userFunctions); - } - - // Because we copied lastEntry's key and value to a new location, we can't Dispose() them - lastEntry.Initialize(); - RemoveLastActiveChunkEntry(userFunctions, ref entryLocation, ref lastEntry); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool RemoveLastActiveChunkEntry(TUserFunctions userFunctions, ref InMemKVEntry.Location entryLocation, - ref InMemKVEntry lastEntry) - { - if (this.LastActiveChunkEntryIndex == 0) - { - // Back up to the previous chunk's last entry--or perhaps to kInvalidOverflowIndex, which means the previous "chunk" is InitialEntry. - var result = this.RemoveLastChunk(entryLocation); - this.LastActiveChunkEntryIndex = this.LastOverflowChunk is null ? InMemKV.kDefaultOverflowIndex : InMemKV.kChunkSize - 1; - return result; - } - - // Not the first entry on the chunk so just decrement - --this.LastActiveChunkEntryIndex; - return false; - } - - private bool RemoveLastChunk(InMemKVEntry.Location entryLocation) - { - var freed = this.LastOverflowChunk; - this.LastOverflowChunk = this.LastOverflowChunk.UnlinkAfter(); - entryLocation.kv.OnChunkFreed(freed); - return object.ReferenceEquals(freed, entryLocation.chunk); - } - - internal ref InMemKVEntry GetNextFreeChunkEntry() - { - Debug.Assert(this.IsXLocked, "Must have bucket XLock before getting next free chunk entry"); - Debug.Assert(!this.InitialEntry.IsDefault, "Should not be getting a free chunk entry when InitialEntry.IsDefault"); - Debug.Assert(this.HasOverflow || this.LastActiveChunkEntryIndex == InMemKV.kDefaultOverflowIndex, "If there are no overflow chunks, LastActiveChunkEntryIndex should be kDefaultOverflowIndex"); - if (this.HasOverflow && this.LastActiveChunkEntryIndex < InMemKV.kChunkSize - 1) - return ref this.LastOverflowChunk[++LastActiveChunkEntryIndex]; - return ref AddChunk(); - } - - private ref InMemKVEntry AddChunk() - { - var chunk = new InMemKVChunk(); - if (this.LastOverflowChunk is not null) - chunk.LinkAfter(this.LastOverflowChunk); - this.LastOverflowChunk = chunk; - this.LastActiveChunkEntryIndex = 0; - return ref chunk[this.LastActiveChunkEntryIndex]; - } - - internal void Dispose(TUserFunctions userFunctions) - { - // Back up through the overflow chunks. - var lastActiveEntryIndexOnChunk = this.LastActiveChunkEntryIndex; - while (this.HasOverflow) - { - for (var iEntry = 0; iEntry < lastActiveEntryIndexOnChunk; ++iEntry) - { - ref var entry = ref this.LastOverflowChunk[iEntry]; - Debug.Assert(!entry.IsDefault, "Entry should not be default"); - entry.Dispose(userFunctions); - } - lastActiveEntryIndexOnChunk = InMemKV.kChunkSize - 1; - this.LastOverflowChunk = this.LastOverflowChunk.prev; - } - if (!this.InitialEntry.IsDefault) - InitialEntry.Dispose(userFunctions); - } - - public override string ToString() - { - var locks = $"{(this.IsXLocked ? "x" : string.Empty)}{this.NumSLocks}"; - return $"initEntry {{{InitialEntry}}}, locks {locks}, LastACEI {LastActiveChunkEntryIndex}, XGen {ExclusiveGeneration}"; - } - } - - /// - /// The high-level In-Memory Key/Value store. Contains the list of buckets, the freelist, and user functions. - /// - internal class InMemKV : IDisposable - where TUserFunctions : IInMemKVUserFunctions - { - internal readonly InMemKVBucket[] buckets; - - private readonly TUserFunctions userFunctions; - - private readonly ConcurrentStack> freeList = new(); - internal int FreeListCount => freeList.Count; - private readonly int maxFreeChunks; - - internal InMemKV(int numBuckets, int maxFreeChunks, TUserFunctions userFunctions) - { - this.buckets = new InMemKVBucket[numBuckets]; - this.maxFreeChunks = maxFreeChunks; - this.userFunctions = userFunctions; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal ref InMemKVBucket GetBucket(ref TKey key, out int index) => ref GetBucket(this.userFunctions.GetHashCode64(ref key), out index); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal ref InMemKVBucket GetBucket(long hash, out int index) - { - index = (int)(hash >= 0 ? hash % buckets.Length : -hash % buckets.Length); - return ref buckets[index]; - } - - internal bool HasEntries(ref TKey key) => GetBucket(ref key, out _).HasEntries; - internal bool HasEntries(long hash) => GetBucket(hash, out _).HasEntries; - - long NumActiveBuckets = 0; - - void IncrementActiveBuckets() => Interlocked.Increment(ref NumActiveBuckets); - internal void DecrementActiveBuckets() => Interlocked.Decrement(ref NumActiveBuckets); - - public bool IsActive => this.NumActiveBuckets > 0; - -#region Individual operation callbacks - internal interface IFindEntryFunctions - { - void NotFound(ref TKey key); - void FoundEntry(ref TKey key, ref TValue value); - } - - internal interface IAddEntryFunctions - { - void AddedEntry(ref TKey key, ref TValue value); - } - - internal interface IFindOrAddEntryFunctions : IAddEntryFunctions - { - void FoundEntry(ref TKey key, ref TValue value); - } -#endregion - - // This function is here instead of on the bucket due to: CS8170 Struct members cannot return 'this' or other instance members by reference - private ref InMemKVEntry TryToFindEntry(ref InMemKVBucket bucket, ref TKey key, - ref InMemKVEntry.Location entryLocation, out bool found) - { - if (!bucket.InitialEntry.IsDefault && this.userFunctions.Equals(ref key, bucket.InitialEntry.heapKey)) - { - found = true; - entryLocation.SetToInitialEntry(); - return ref bucket.InitialEntry; - } - - // Back up through the overflow chunks. - var lastActiveEntryIndexOnChunk = bucket.LastActiveChunkEntryIndex; - for (var chunk = bucket.LastOverflowChunk; chunk is not null; chunk = chunk.prev) - { - for (var iEntry = 0; iEntry <= lastActiveEntryIndexOnChunk; ++iEntry) - { - ref var entry = ref chunk[iEntry]; - Debug.Assert(!entry.IsDefault, "Entry should not be default"); - if (this.userFunctions.Equals(ref key, entry.heapKey)) - { - found = true; - entryLocation.Set(chunk, iEntry); - return ref entry; - } - } - lastActiveEntryIndexOnChunk = InMemKV.kChunkSize - 1; - } - - // If we're here, it was not found. - found = false; - return ref bucket.InitialEntry; // We have to return a reference to something - } - - /// - /// Try to find the entry, and call the user functions - /// - /// The key to lock - /// The hash code of the key, to avoid recalculating - /// The caller's implementation of the appropriate functions (see the type constraint) - /// true if the key was found (even if it was rendered InActive and removed), else false - /// InMemKV does not have a Remove method, because its compaction is based on the InActive property; - /// use a implementation that renderes the entry InActive in FoundEntry. - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool FindEntry(ref TKey key, long hash, ref TFuncs funcs) - where TFuncs : IFindEntryFunctions - { - ref var bucket = ref GetBucket(hash, out var bucketIndex); - bucket.SLock(); - - if (!bucket.InitialEntry.IsDefault) - { - InMemKVEntry.Location entryLocation = new(this, bucketIndex); - ref var entry = ref TryToFindEntry(ref bucket, ref key, ref entryLocation, out bool found); - if (found) - { - funcs.FoundEntry(ref key, ref entry.value); - if (!entry.IsActive(userFunctions)) - bucket.SUnlockAndTryXLockAndCompaction(userFunctions, ref entryLocation); - else - bucket.SUnlock(); - return true; - } - } - funcs.NotFound(ref key); - bucket.SUnlock(); - return false; - } - - /// - /// Try to find the entry, and call the user functions - /// - /// The key to lock - /// The hash code of the key, to avoid recalculating - /// The caller's implementation of the appropriate functions (see the type constraint) - /// true if the key was found (even if it was rendered InActive and removed), else false - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool FindOrAddEntry(ref TKey key, long hash, ref TFuncs funcs) - where TFuncs : IFindOrAddEntryFunctions - { - ref var bucket = ref GetBucket(hash, out var bucketIndex); - bucket.SLock(); - - InMemKVEntry.Location entryLocation = new(this, bucketIndex); - ref var entry = ref TryToFindEntry(ref bucket, ref key, ref entryLocation, out bool found); - if (found) - { - funcs.FoundEntry(ref key, ref entry.value); - if (!entry.IsActive(userFunctions)) - bucket.SUnlockAndTryXLockAndCompaction(userFunctions, ref entryLocation); - else - bucket.SUnlock(); - return true; - } - - // This has try/catch so can't be inlined; make it a separate function. - return SUnlockAndTryXLockAndFindOrAddEntry(ref key, ref bucket, bucketIndex, ref funcs); - } - - private bool SUnlockAndTryXLockAndFindOrAddEntry(ref TKey key, ref InMemKVBucket bucket, int bucketIndex, ref TFuncs funcs) - where TFuncs : IFindOrAddEntryFunctions - { - // Add a new entry. We XLock whenever we change the element membership. Use try/finally due to allocation - if (!bucket.SUnlockAndTryXLock(out bool wasImmediateXlock)) - return false; - - bool wasActive = bucket.HasEntries; - try - { - // If someone else acquired the xlock before we did, we must recheck once we acquire it to make sure the key wasn't added by another thread. - if (!wasImmediateXlock) - { - InMemKVEntry.Location entryLocation = new(this, bucketIndex); - ref var entry = ref TryToFindEntry(ref bucket, ref key, ref entryLocation, out bool found); - if (found) - { - funcs.FoundEntry(ref key, ref entry.value); - if (!entry.IsActive(userFunctions)) - { - // Here the wasImmediateXLock is true because we held the XLock when performing the user operation. - bucket.DoCompaction(userFunctions, ref entryLocation, wasImmediateXlock: true); - } - return true; - } - } - - AddEntry(ref key, ref funcs, ref bucket, wasActive); - return true; - } - catch (Exception) - { - // Allocation threw, so remove the pre-increment if we added it. - if (!wasActive) - DecrementActiveBuckets(); - throw; - } - finally - { - bucket.XUnlock(); - } - } - - /// - /// Add the entry, and call the user functions - /// - /// The key to lock - /// The hash code of the key, to avoid recalculating - /// The caller's implementation of the appropriate functions (see the type constraint) - public void AddEntry(ref TKey key, long hash, ref TFuncs funcs) - where TFuncs : IAddEntryFunctions - { - ref var bucket = ref GetBucket(hash, out var bucketIndex); - - // This is like FindOrAddEntry, except we know the key is not in the table, so we XLock directly. - bucket.XLock(); - -#if DEBUG - InMemKVEntry.Location entryLocation = new(this, bucketIndex); - _ = ref TryToFindEntry(ref bucket, ref key, ref entryLocation, out bool found); - Debug.Assert(!found, "Should not find key in AddEntry()"); -#endif - - bool wasActive = bucket.HasEntries; - try - { - AddEntry(ref key, ref funcs, ref bucket, wasActive); - } - catch (Exception) - { - // Allocation threw, so remove the pre-increment if we added it. - if (!wasActive) - DecrementActiveBuckets(); - throw; - } - finally - { - bucket.XUnlock(); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void AddEntry(ref TKey key, ref TFuncs funcs, ref InMemKVBucket bucket, bool wasActive) - where TFuncs : IAddEntryFunctions - { - // At this point we know we will add an entry, so pre-increment the active-bucket count to avoid a race where we add the entry but - // another thread does not see it immediately because this will become the first active bucket. The XLock on the bucket during membership - // changes means this will only be done by the correct thread. - if (!wasActive) - IncrementActiveBuckets(); - - // Find the next free entry - ref var entry = ref bucket.InitialEntry.IsDefault ? ref bucket.InitialEntry : ref bucket.GetNextFreeChunkEntry(); - entry.Initialize(userFunctions.CreateHeapKey(ref key)); - funcs.AddedEntry(ref key, ref entry.value); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal bool TryGet(ref TKey key, out TValue value) => TryGet(ref key, this.userFunctions.GetHashCode64(ref key), out value); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal bool TryGet(ref TKey key, long hash, out TValue value) - { - ref var bucket = ref GetBucket(hash, out var bucketIndex); - bucket.SLock(); - InMemKVEntry.Location entryLocation = new(this, bucketIndex); - ref var entry = ref TryToFindEntry(ref bucket, ref key, ref entryLocation, out bool found); - value = found ? entry.value : default; - bucket.SUnlock(); - return found; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal void OnChunkFreed(InMemKVChunk chunk) - { - if (this.freeList.Count < this.maxFreeChunks) - this.freeList.Push(chunk); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ContainsKey(ref TKey key, long hash) - { - ref var bucket = ref GetBucket(hash, out var bucketIndex); - bucket.SLock(); - InMemKVEntry.Location entryLocation = new(this, bucketIndex); - _ = ref TryToFindEntry(ref bucket, ref key, ref entryLocation, out bool found); - bucket.SUnlock(); - return found; - } - - public void Dispose() - { - if (this.buckets != null) - { - foreach (var bucket in this.buckets) - bucket.Dispose(this.userFunctions); - } - this.freeList.Clear(); - } - } -} diff --git a/cs/src/core/Utilities/LockTable.cs b/cs/src/core/Utilities/LockTable.cs deleted file mode 100644 index 3a5f31c38..000000000 --- a/cs/src/core/Utilities/LockTable.cs +++ /dev/null @@ -1,426 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT license. - -using FASTER.core.Utilities; -using System; -using System.Diagnostics; -using System.Runtime.CompilerServices; -using System.Threading; - -namespace FASTER.core -{ - internal class LockTable : IDisposable - { - #region IInMemKVUserFunctions implementation - internal class LockTableFunctions : IInMemKVUserFunctions, RecordInfo>, IDisposable - { - private readonly IFasterEqualityComparer keyComparer; - private readonly IVariableLengthStruct keyLen; - private readonly SectorAlignedBufferPool bufferPool; - - internal LockTableFunctions(IFasterEqualityComparer keyComparer, IVariableLengthStruct keyLen) - { - this.keyComparer = keyComparer; - this.keyLen = keyLen; - if (keyLen is not null) - this.bufferPool = new SectorAlignedBufferPool(1, 1); - } - - public IHeapContainer CreateHeapKey(ref TKey key) - => bufferPool is null ? new StandardHeapContainer(ref key) : new VarLenHeapContainer(ref key, keyLen, bufferPool); - - public ref TKey GetHeapKeyRef(IHeapContainer heapKey) => ref heapKey.Get(); - - public bool Equals(ref TKey key, IHeapContainer heapKey) => keyComparer.Equals(ref key, ref heapKey.Get()); - - public long GetHashCode64(ref TKey key) => keyComparer.GetHashCode64(ref key); - - public bool IsActive(ref RecordInfo recordInfo) => recordInfo.IsLocked; - - public void Dispose(ref IHeapContainer key, ref RecordInfo recordInfo) - { - key?.Dispose(); - key = default; - recordInfo = default; - } - - public void Dispose() - { - this.bufferPool?.Free(); - } - } - #endregion IInMemKVUserFunctions implementation - - internal readonly InMemKV, RecordInfo, LockTableFunctions> kv; - internal readonly LockTableFunctions functions; - - internal LockTable(int numBuckets, IFasterEqualityComparer keyComparer, IVariableLengthStruct keyLen) - { - this.functions = new(keyComparer, keyLen); - this.kv = new InMemKV, RecordInfo, LockTableFunctions>(numBuckets, numBuckets >> 4, this.functions); - } - - public bool IsActive => kv.IsActive; - - /// - /// Try to lock the key for an ephemeral operation; if there is no lock, return without locking and let 2p insert handle it. - /// - /// The key to lock - /// The hash code of the key to lock, to avoid recalculating - /// The lock type to acquire, if the key is found - /// Returns true if we got the requested lock; the caller must unlock - /// True if either lock was acquired or the entry was not found; false if lock acquisition failed - /// Ephemeral locks only lock if an entry exists; two-phase insertion/locking will ensure we don't have a conflict - /// if there is not already an entry there. - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool TryLockEphemeral(ref TKey key, long hash, LockType lockType, out bool gotLock) - { - var funcs = new FindEntryFunctions_EphemeralLock(lockType); - kv.FindEntry(ref key, hash, ref funcs); - gotLock = funcs.gotLock; - return funcs.success; - } - - internal struct FindEntryFunctions_EphemeralLock : InMemKV, RecordInfo, LockTableFunctions>.IFindEntryFunctions - { - readonly LockType lockType; - internal bool success, gotLock; - - internal FindEntryFunctions_EphemeralLock(LockType lockType) - { - this.lockType = lockType; - this.success = this.gotLock = false; - } - - public void NotFound(ref TKey key) => success = true; - - public void FoundEntry(ref TKey key, ref RecordInfo recordInfo) => success = gotLock = recordInfo.TryLock(lockType); - } - - /// - /// Try to acquire a manual lock, either by locking existing LockTable record or adding a new record. - /// - /// The key to lock - /// The hash code of the key to lock, to avoid recalculating - /// The lock type to acquire, if the key is found - /// If true, the lock should be acquired tentatively, as part of two-phase insertion/locking - /// True if the lock was acquired; false if lock acquisition failed - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool TryLockManual(ref TKey key, long hash, LockType lockType, out bool isTentative) - { - var funcs = new FindOrAddEntryFunctions_ManualLock(lockType, isTentative: true); - kv.FindOrAddEntry(ref key, hash, ref funcs); - isTentative = funcs.isTentative; // funcs.isTentative is in/out - return funcs.success; - } - - internal struct FindOrAddEntryFunctions_ManualLock : InMemKV, RecordInfo, LockTableFunctions>.IFindOrAddEntryFunctions - { - readonly LockType lockType; - internal bool success, isTentative; // isTentative is in/out - - internal FindOrAddEntryFunctions_ManualLock(LockType lockType, bool isTentative) - { - this.lockType = lockType; - this.isTentative = isTentative; - this.success = false; - } - - public void FoundEntry(ref TKey key, ref RecordInfo recordInfo) - { - // If the entry is already there, we lock it, tentatively if specified - success = (lockType == LockType.Shared) - ? recordInfo.TryLockShared(isTentative) // This will only be tentative if there are no other locks at the time we finally acquire the lock - : recordInfo.TryLockExclusive(isTentative); - isTentative = recordInfo.Tentative; - } - - public void AddedEntry(ref TKey key, ref RecordInfo recordInfo) - { - recordInfo.InitializeLock(lockType, isTentative); - isTentative = recordInfo.Tentative; - recordInfo.Valid = true; - success = true; - } - } - - /// - /// Unlock the key with the specified lock type - /// - /// The key to unlock - /// The hash code of the key to lock, to avoid recalculating - /// The lock type--shared or exclusive - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool Unlock(ref TKey key, long hash, LockType lockType) - { - var funcs = new FindEntryFunctions_Unlock(lockType); - kv.FindEntry(ref key, hash, ref funcs); - - // success is false if the key was not found, or if the record was marked invalid. - return funcs.success; - } - - internal struct FindEntryFunctions_Unlock : InMemKV, RecordInfo, LockTableFunctions>.IFindEntryFunctions - { - readonly LockType lockType; - readonly bool wasTentative; - internal bool success; - - internal FindEntryFunctions_Unlock(LockType lockType, bool wasTentative = false) - { - this.lockType = lockType; - this.wasTentative = wasTentative; - success = false; - } - - public void NotFound(ref TKey key) => success = false; - - public void FoundEntry(ref TKey key, ref RecordInfo recordInfo) - { - // If the record is xlocked and tentative, it means RecordInfo.TryLockExclusive is spinning in its "drain the readers" loop. -#if DEBUG - var ri = recordInfo; // Need a local for atomic comparisons; "ref recordInfo" state can change between the "&&" - Debug.Assert(wasTentative == ri.Tentative || lockType == LockType.Shared && ri.Tentative && ri.IsLockedExclusive, "Entry.recordInfo.Tentative was not as expected"); -#endif - success = recordInfo.TryUnlock(lockType); - } - } - - /// - /// Remove the key from the lockTable, if it exists. Called after a record is transferred. - /// - /// The key to unlock - /// The hash code of the key to lock, to avoid recalculating - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool Remove(ref TKey key, long hash) - { - var funcs = new FindEntryFunctions_Remove(); - kv.FindEntry(ref key, hash, ref funcs); - return funcs.wasFound; - } - - internal struct FindEntryFunctions_Remove : InMemKV, RecordInfo, LockTableFunctions>.IFindEntryFunctions - { - internal bool wasFound; - - public void NotFound(ref TKey key) => wasFound = false; - - public void FoundEntry(ref TKey key, ref RecordInfo recordInfo) - { - recordInfo.ClearLocks(); - wasFound = true; - } - } - - /// - /// Transfer locks from the Log record into the lock table. Called on eviction from main log or readcache. - /// - /// The key to unlock - /// The log record to copy from - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void TransferFromLogRecord(ref TKey key, RecordInfo logRecordInfo) - { - Debug.Assert(!logRecordInfo.IsIntermediate, "Should not have a transfer from an intermediate log record"); - - // This is called from record eviction, which doesn't have a hashcode available, so we have to calculate it here. - long hash = functions.GetHashCode64(ref key); - var funcs = new AddEntryFunctions_TransferFromLogRecord(logRecordInfo); - kv.AddEntry(ref key, hash, ref funcs); - } - - internal struct AddEntryFunctions_TransferFromLogRecord : InMemKV, RecordInfo, LockTableFunctions>.IAddEntryFunctions - { - internal RecordInfo fromRecordInfo; - - internal AddEntryFunctions_TransferFromLogRecord(RecordInfo fromRI) => fromRecordInfo = fromRI; - - public void AddedEntry(ref TKey key, ref RecordInfo recordInfo) - { - recordInfo.TransferLocksFrom(ref fromRecordInfo); - recordInfo.Valid = true; - } - } - - /// - /// Transfer locks from the lock table into the Log record. - /// - /// The key to unlock - /// The hash code of the key to lock, to avoid recalculating - /// The log record to copy from - /// Returns whether the entry was found - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool TransferToLogRecord(ref TKey key, long hash, ref RecordInfo logRecordInfo) - { - Debug.Assert(logRecordInfo.Tentative, "Caller must retain tentative flag in log record until locks are transferred"); - var funcs = new FindEntryFunctions_TransferToLogRecord(logRecordInfo); - kv.FindEntry(ref key, hash, ref funcs); - logRecordInfo = funcs.toRecordInfo; - return funcs.wasFound; - } - - internal struct FindEntryFunctions_TransferToLogRecord : InMemKV, RecordInfo, LockTableFunctions>.IFindEntryFunctions - { - internal RecordInfo toRecordInfo; - internal bool wasFound; - - internal FindEntryFunctions_TransferToLogRecord(RecordInfo toRI) - { - this.toRecordInfo = toRI; - wasFound = false; - } - - public void NotFound(ref TKey key) { } - - public void FoundEntry(ref TKey key, ref RecordInfo recordInfo) - { - Debug.Assert(!recordInfo.Tentative, "Should not transfer from a tentative LT record"); - toRecordInfo.TransferLocksFrom(ref recordInfo); - wasFound = true; - } - } - - /// - /// Clear the Tentative bit from the key's lock--make it "real" - /// - /// The key to unlock - /// The hash code of the key to lock, to avoid recalculating - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ClearTentativeBit(ref TKey key, long hash) - { - var funcs = new FindEntryFunctions_ClearTentative(); - kv.FindEntry(ref key, hash, ref funcs); - Debug.Assert(funcs.success, "ClearTentativeBit should always find the entry"); - return funcs.success; - } - - internal struct FindEntryFunctions_ClearTentative : InMemKV, RecordInfo, LockTableFunctions>.IFindEntryFunctions - { - internal bool success; - - public void NotFound(ref TKey key) => success = false; - - public void FoundEntry(ref TKey key, ref RecordInfo recordInfo) - { - Debug.Assert(recordInfo.Tentative, "ClearTentative should only be called for a tentative record"); - recordInfo.ClearTentativeBitAtomic(); - success = true; - } - } - - /// - /// Unlock the key, or remove a tentative entry that was added. This is called when the caller is abandoning the current attempt and will retry. - /// - /// The key to unlock - /// The hash code of the key to lock, to avoid recalculating - /// The lock type--shared or exclusive - /// Whether or not we set a Tentative lock for it - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void UnlockOrRemoveTentativeEntry(ref TKey key, long hash, LockType lockType, bool wasTentative) - { - var funcs = new FindEntryFunctions_Unlock(lockType, wasTentative); - kv.FindEntry(ref key, hash, ref funcs); - Debug.Assert(funcs.success, "UnlockOrRemoveTentative should always find the entry"); - } - - /// - /// Returns whether the two-phase Update protocol completes successfully: wait for any tentative lock on the key (it will either be cleared by - /// the owner or the record will be removed), and return false if a non-tentative lock was found. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool CompleteTwoPhaseUpdate(ref TKey key, long hash) - { - var funcs = new FindEntryFunctions_CompleteTwoPhaseUpdate(); - while (true) - { - funcs.tentativeFound = false; - kv.FindEntry(ref key, hash, ref funcs); - if (funcs.notFound) - return true; - if (!funcs.tentativeFound) - break; - Thread.Yield(); - } - - // A non-tentative lock was found. - return false; - } - - internal struct FindEntryFunctions_CompleteTwoPhaseUpdate : InMemKV, RecordInfo, LockTableFunctions>.IFindEntryFunctions - { - internal bool notFound, tentativeFound; - - public void NotFound(ref TKey key) => notFound = true; - - public void FoundEntry(ref TKey key, ref RecordInfo recordInfo) => tentativeFound = recordInfo.Tentative; - } - - /// - /// Returns whether the two-phase CopyToTail protocol completes successfully: wait for any tentative lock on the key (it will either be cleared by - /// the owner or the record will be removed), return false if an exclusive lock is found, or transfer any read locks. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool CompleteTwoPhaseCopyToTail(ref TKey key, long hash, ref RecordInfo logRecordInfo, bool allowXLock, bool removeEphemeralLock) - { - Debug.Assert(logRecordInfo.Tentative, "Must retain tentative flag until locks are transferred"); - var funcs = new FindEntryFunctions_CompleteTwoPhaseCopyToTail(logRecordInfo, allowXLock, removeEphemeralLock); - kv.FindEntry(ref key, hash, ref funcs); - logRecordInfo = funcs.toRecordInfo; - return funcs.success; - } - - internal struct FindEntryFunctions_CompleteTwoPhaseCopyToTail : InMemKV, RecordInfo, LockTableFunctions>.IFindEntryFunctions - { - internal RecordInfo toRecordInfo; - private readonly bool allowXLock, removeEphemeralLock; - internal bool success; - - internal FindEntryFunctions_CompleteTwoPhaseCopyToTail(RecordInfo toRecordInfo, bool allowXLock, bool removeEphemeralLock) - { - this.toRecordInfo = toRecordInfo; - this.allowXLock = allowXLock; - this.removeEphemeralLock = removeEphemeralLock; - success = false; - } - - public void NotFound(ref TKey key) { /* If there is no entry, somone else moved it */ } - - public void FoundEntry(ref TKey key, ref RecordInfo recordInfo) - => success = toRecordInfo.CopyReadLocksFromAndMarkSourceAtomic(ref recordInfo, allowXLock, seal: false, this.removeEphemeralLock); - } - - /// - /// Test whether a key is present in the Lock Table. In production code, this is used to implement - /// . - /// - /// The key to unlock - /// The hash code of the key to lock, to avoid recalculating - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public bool ContainsKey(ref TKey key, long hash) => kv.ContainsKey(ref key, hash); - - public void Dispose() - { - kv.Dispose(); - functions.Dispose(); - } - - #region Internal methods for Test - internal bool HasEntries(ref TKey key) => kv.HasEntries(ref key); - internal bool HasEntries(long hash) => kv.HasEntries(hash); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal bool TryGet(ref TKey key, out RecordInfo recordInfo) => TryGet(ref key, this.functions.GetHashCode64(ref key), out recordInfo); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal bool TryGet(ref TKey key, long hash, out RecordInfo recordInfo) => kv.TryGet(ref key, hash, out recordInfo); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal bool IsLocked(ref TKey key, long hash) => TryGet(ref key, hash, out var recordInfo) && recordInfo.IsLocked; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal bool IsLockedShared(ref TKey key, long hash) => TryGet(ref key, hash, out var recordInfo) && recordInfo.IsLockedShared; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal bool IsLockedExclusive(ref TKey key, long hash) => TryGet(ref key, hash, out var recordInfo) && recordInfo.IsLockedExclusive; - #endregion Internal methods for Test - } -} diff --git a/cs/src/core/Utilities/LockType.cs b/cs/src/core/Utilities/LockType.cs index 0b273ca68..89ff7e4ee 100644 --- a/cs/src/core/Utilities/LockType.cs +++ b/cs/src/core/Utilities/LockType.cs @@ -13,23 +13,158 @@ public enum LockType : byte /// None, + /// + /// Exclusive lock, taken on Upsert, RMW, or Delete + /// + Exclusive, + /// /// Shared lock, taken on Read /// - Shared, + Shared + } + /// + /// How FASTER should do record locking + /// + public enum LockingMode : byte + { /// - /// Exclusive lock, taken on Upsert, RMW, or Delete + /// Keys are locked based upon the session type, using hash buckets, and support manual locking. + /// + Standard, + + /// + /// Keys are locked only for the duration of a concurrent IFunctions call (one that operates on data in the mutable region of the log). + /// + Ephemeral, + + /// + /// Locking is not done in FASTER. + /// + None + } + + internal enum EphemeralLockResult + { + Success, // Lock succeeded + Failed, // Lock failed due to timeout; must do RETRY_LATER + HoldForSeal // Lock succeeded, but was not unlocked because the user's IFunctions method requires a read-copy-update + } + + /// + /// Interface that must be implemented to participate in keycode-based locking. + /// + public interface ILockableKey + { + /// + /// The lock code for a specific key, obtained from + /// + public long LockCode { get; } + + /// + /// The lock type for a specific key /// - Exclusive + public LockType LockType { get; } + } + + /// + /// A utility class to carry a fixed-length key (blittable or object type) and its assciated info for Locking + /// + /// + public struct FixedLengthLockableKeyStruct : ILockableKey + { + /// + /// The key that is acquiring or releasing a lock + /// + public TKey Key; + + /// + /// The hash code of the key that is acquiring or releasing a lock + /// + public long KeyHash; + + #region ILockableKey + /// + public long LockCode { get; set; } + + /// + public LockType LockType { get; set; } + #endregion ILockableKey + + /// + /// Constructor + /// + public FixedLengthLockableKeyStruct(TKey key, LockType lockType, ILockableContext context) : this(ref key, lockType, context) { } + + /// + /// Constructor + /// + public FixedLengthLockableKeyStruct(ref TKey key, LockType lockType, ILockableContext context) + { + Key = key; + LockType = lockType; + LockCode = context.GetLockCode(ref key, out KeyHash); + } + /// + /// Constructor + /// + public FixedLengthLockableKeyStruct(TKey key, long keyHash, LockType lockType, ILockableContext context) : this(ref key, keyHash, lockType, context) { } + + /// + /// Constructor + /// + public FixedLengthLockableKeyStruct(ref TKey key, long keyHash, LockType lockType, ILockableContext context) + { + Key = key; + KeyHash = keyHash; + LockType = lockType; + LockCode = context.GetLockCode(ref key, keyHash); + } + + /// + /// Sort the passed key array for use in + /// and + /// + /// + /// + public static void Sort(FixedLengthLockableKeyStruct[] keys, ILockableContext context) => context.SortLockCodes(keys); + + /// + public override string ToString() + { + // The debugger often can't call the Globalization NegativeSign property so ToString() would just display the class name + var hashSign = KeyHash < 0 ? "-" : string.Empty; + var absHash = this.KeyHash >= 0 ? this.KeyHash : -this.KeyHash; + return $"key {Key}, hash {hashSign}{absHash}, lockCode {LockCode}, {LockType}"; + } + + } + + /// + /// Lock state of a record + /// + internal struct LockState + { + internal bool IsLockedExclusive; + internal bool IsFound; + internal ushort NumLockedShared; + internal bool IsLockedShared => NumLockedShared > 0; + + internal bool IsLocked => IsLockedExclusive || NumLockedShared > 0; + + public override string ToString() + { + var locks = $"{(this.IsLockedExclusive ? "x" : string.Empty)}{this.NumLockedShared}"; + return $"found {IsFound}, locks {locks}"; + } } internal enum LockOperationType : byte { None, Lock, - Unlock, - IsLocked + Unlock } internal struct LockOperation diff --git a/cs/src/core/Utilities/LockUtility.cs b/cs/src/core/Utilities/LockUtility.cs deleted file mode 100644 index b190fb8aa..000000000 --- a/cs/src/core/Utilities/LockUtility.cs +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT license. - -using System.Diagnostics; -using System.Runtime.CompilerServices; -using System.Threading; - -namespace FASTER.core -{ - internal static class LockUtility - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool IsIntermediate(this ref RecordInfo recordInfo, out OperationStatus internalStatus, bool isReadingAtAddress = false) - { - // First a fast check so there is only one "if" - internalStatus = OperationStatus.SUCCESS; - if (!recordInfo.IsIntermediate) - return false; - - // Separate routine to reduce impact on inlining decision. - return HandleIntermediate(ref recordInfo, out internalStatus, isReadingAtAddress); - } - - internal static bool HandleIntermediate(this ref RecordInfo recordInfo, out OperationStatus internalStatus, bool isReadingAtAddress = false) - { - SpinWaitWhileTentativeAndReturnValidity(ref recordInfo); - - // We don't want to jump out on Sealed/Invalid and restart if we are traversing the "read by address" chain - if ((recordInfo.Sealed || recordInfo.Invalid) && !isReadingAtAddress) - { - Thread.Yield(); - - // A record is only Sealed or Invalidated in the hash chain after the new record has been successfully inserted. - internalStatus = OperationStatus.RETRY_LATER; - return true; - } - internalStatus = OperationStatus.SUCCESS; - return false; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool SpinWaitWhileTentativeAndReturnValidity(ref RecordInfo recordInfo) - { - // This is called for Tentative records encountered in the hash chain, and no epoch-changing allocations should be done after they have been - // added to the hash chain. Therefore, it is safe to spin. This routine centralizes this, in the event it needs to change, e.g. by limiting spin - // count and returning bool. - while (recordInfo.Tentative) - Thread.Yield(); - return recordInfo.Valid; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool TryLockOperation(this ref RecordInfo recordInfo, LockOperation lockOp) - { - if (lockOp.LockOperationType == LockOperationType.Lock) - return recordInfo.TryLock(lockOp.LockType); - - if (lockOp.LockOperationType == LockOperationType.Unlock) - recordInfo.TryUnlock(lockOp.LockType); - else - Debug.Fail($"Unexpected LockOperation {lockOp.LockOperationType}"); - return true; - } - } -} diff --git a/cs/src/core/Utilities/Native32.cs b/cs/src/core/Utilities/Native32.cs index 6c192d633..66c4d11be 100644 --- a/cs/src/core/Utilities/Native32.cs +++ b/cs/src/core/Utilities/Native32.cs @@ -87,6 +87,8 @@ internal struct FILE_STORAGE_INFO #region io constants and flags internal const int ERROR_IO_PENDING = 997; + internal const int ERROR_PATH_NOT_FOUND = 3; + internal const int WIN32_MAX_PATH = 260; internal const uint GENERIC_READ = 0x80000000; internal const uint GENERIC_WRITE = 0x40000000; internal const uint FILE_FLAG_DELETE_ON_CLOSE = 0x04000000; diff --git a/cs/stress/IValueTester.cs b/cs/stress/IValueTester.cs index bd851c582..861592904 100644 --- a/cs/stress/IValueTester.cs +++ b/cs/stress/IValueTester.cs @@ -22,7 +22,9 @@ internal interface IValueTester : IValueTester, IDisposable void AddRecord(int keyOrdinal, ref TKey key); - void TestRecord(int keyOrdinal, int keyCount, TKey[] keys); - Task TestRecordAsync(int keyOrdinal, int keyCount, TKey[] keys); + ILockableContext LockableContext { get; } + + void TestRecord(int keyOrdinal, int keyCount, FixedLengthLockableKeyStruct[] keys); + Task TestRecordAsync(int keyOrdinal, int keyCount, FixedLengthLockableKeyStruct[] keys); } } diff --git a/cs/stress/LongKeyTester.cs b/cs/stress/LongKeyTester.cs index 0614f1177..94c3f53fb 100644 --- a/cs/stress/LongKeyTester.cs +++ b/cs/stress/LongKeyTester.cs @@ -12,7 +12,7 @@ internal class LongKeyTester : IKeyTester readonly IValueTester valueTester; readonly Random rng; readonly int[] lockOrdinals; - readonly long[] lockKeys; + readonly FixedLengthLockableKeyStruct[] lockKeys; internal LongKeyTester(int tid, TestLoader testLoader) { @@ -27,7 +27,7 @@ internal LongKeyTester(int tid, TestLoader testLoader) }; rng = new Random(tid * testLoader.Options.RandomSeed); lockOrdinals = new int[testLoader.LockKeyArraySize]; - lockKeys = new long[testLoader.LockKeyArraySize]; + lockKeys = new FixedLengthLockableKeyStruct[testLoader.LockKeyArraySize]; } public long GetAverageRecordSize() => sizeof(long) + valueTester.GetAverageSize(); @@ -65,15 +65,9 @@ public void Populate(int hashTableCacheLines, LogSettings logSettings, Checkpoin } } - class SpanByteSortComparer : IComparer - { - public int Compare(long x, long y) => x.CompareTo(y); - } - readonly SpanByteSortComparer sortComparer = new(); - - public void Test() => testLoader.Test(tid, rng, lockOrdinals, lockKeys, ordinal => ordinal, sortComparer, valueTester); + public void Test() => testLoader.Test(tid, rng, lockOrdinals, lockKeys, ordinal => ordinal, valueTester); - public Task TestAsync() => testLoader.TestAsync(tid, rng, lockOrdinals, lockKeys, ordinal => ordinal, sortComparer, valueTester); + public Task TestAsync() => testLoader.TestAsync(tid, rng, lockOrdinals, lockKeys, ordinal => ordinal, valueTester); public void Dispose() { } } diff --git a/cs/stress/LongValueTester.cs b/cs/stress/LongValueTester.cs index d4d092090..c39fc2dc6 100644 --- a/cs/stress/LongValueTester.cs +++ b/cs/stress/LongValueTester.cs @@ -26,6 +26,8 @@ internal LongValueTester(int tid, TestLoader testLoader, TFunctions functions) this.session = new(testLoader, output => (int)(output % testLoader.ValueIncrement), rng); } + public ILockableContext LockableContext => this.session.LockableContext; + public long GetAverageSize() => sizeof(long); public void Create(int hashTableCacheLines, LogSettings logSettings, CheckpointSettings checkpointSettings, IFasterEqualityComparer comparer) @@ -63,7 +65,7 @@ public bool CompactStore() return true; } - public void TestRecord(int keyOrdinal, int keyCount, TKey[] keys) + public void TestRecord(int keyOrdinal, int keyCount, FixedLengthLockableKeyStruct[] lockKeys) { var opType = testLoader.GetOperationType(rng); @@ -74,21 +76,21 @@ public void TestRecord(int keyOrdinal, int keyCount, TKey[] keys) switch (opType) { case OperationType.READ: - this.session.Read(keyOrdinal, keyCount, keys); + this.session.Read(keyOrdinal, keyCount, lockKeys); return; case OperationType.DELETE: - session.Delete(keys); + session.Delete(lockKeys); return; case OperationType.RMW: - session.RMW(keyOrdinal, keyCount, keys, value); + session.RMW(keyOrdinal, keyCount, lockKeys, value); return; default: - session.Upsert(keys, value); + session.Upsert(lockKeys, value); return; } } - public Task TestRecordAsync(int keyOrdinal, int keyCount, TKey[] keys) + public Task TestRecordAsync(int keyOrdinal, int keyCount, FixedLengthLockableKeyStruct[] lockKeys) { var opType = testLoader.GetOperationType(rng); @@ -98,10 +100,10 @@ public Task TestRecordAsync(int keyOrdinal, int keyCount, TKey[] keys) // Read and Delete do not take a value return opType switch { - OperationType.READ => session.ReadAsync(keyOrdinal, keyCount, keys), - OperationType.DELETE => session.DeleteAsync(keys), - OperationType.RMW => session.RMWAsync(keyOrdinal, keyCount, keys, value), - _ => session.UpsertAsync(keys, value) + OperationType.READ => session.ReadAsync(keyOrdinal, keyCount, lockKeys), + OperationType.DELETE => session.DeleteAsync(lockKeys), + OperationType.RMW => session.RMWAsync(keyOrdinal, keyCount, lockKeys, value), + _ => session.UpsertAsync(lockKeys, value) }; } diff --git a/cs/stress/SessionWrapper.cs b/cs/stress/SessionWrapper.cs index eed169c21..bc239adee 100644 --- a/cs/stress/SessionWrapper.cs +++ b/cs/stress/SessionWrapper.cs @@ -32,22 +32,24 @@ internal void PrepareTest(ClientSession LockableContext => luContext; + internal ClientSession> FkvSession => this.session; internal bool IsLUC => !this.luContext.IsNull; #region Read - internal void Read(int keyOrdinal, int keyCount, TKey[] keys) + internal void Read(int keyOrdinal, int keyCount, FixedLengthLockableKeyStruct[] lockKeys) { if (this.IsLUC) - ReadLUC(keyOrdinal, keyCount, keys); + ReadLUC(keyOrdinal, keyCount, lockKeys); else - this.Read(keyOrdinal, keys[0]); + this.Read(keyOrdinal, lockKeys[0].Key); } - internal Task ReadAsync(int keyOrdinal, int keyCount, TKey[] keys) - => this.IsLUC ? this.ReadLUCAsync(keyOrdinal, keyCount, keys) : this.ReadAsync(keyOrdinal, keys[0]); + internal Task ReadAsync(int keyOrdinal, int keyCount, FixedLengthLockableKeyStruct[] keys) + => this.IsLUC ? this.ReadLUCAsync(keyOrdinal, keyCount, keys) : this.ReadAsync(keyOrdinal, keys[0].Key); private void Read(int keyOrdinal, TKey key) { @@ -77,14 +79,14 @@ private async Task ReadAsync(int keyOrdinal, TKey key) disposer(output); } - private void ReadLUC(int keyOrdinal, int keyCount, TKey[] keys) + private void ReadLUC(int keyOrdinal, int keyCount, FixedLengthLockableKeyStruct[] lockKeys) { try { luContext.BeginUnsafe(); // Retain epoch control through lock, the operation, and unlock - testLoader.MaybeLock(luContext, keyCount, keys, isRmw: false, isAsyncTest: false); + testLoader.MaybeLock(luContext, keyCount, lockKeys, isRmw: false, isAsyncTest: false); TOutput output = default; - var status = luContext.Read(ref keys[0], ref output); + var status = luContext.Read(ref lockKeys[0].Key, ref output); if (status.IsPending) { luContext.CompletePendingWithOutputs(out var completedOutputs, wait: true); @@ -100,19 +102,19 @@ private void ReadLUC(int keyOrdinal, int keyCount, TKey[] keys) } finally { - testLoader.MaybeUnlock(luContext, keyCount, keys, isRmw: false, isAsyncTest: false); + testLoader.MaybeUnlock(luContext, keyCount, lockKeys, isRmw: false, isAsyncTest: false); luContext.EndUnsafe(); } } - private async Task ReadLUCAsync(int keyOrdinal, int keyCount, TKey[] keys) + private async Task ReadLUCAsync(int keyOrdinal, int keyCount, FixedLengthLockableKeyStruct[] lockKeys) { try { - testLoader.MaybeLock(luContext, keyCount, keys, isRmw: false, isAsyncTest: true); + testLoader.MaybeLock(luContext, keyCount, lockKeys, isRmw: false, isAsyncTest: true); // Do not resume epoch for Async operations - var (status, output) = (await luContext.ReadAsync(ref keys[0])).Complete(); + var (status, output) = (await luContext.ReadAsync(ref lockKeys[0].Key)).Complete(); if (status.Found) Assert.AreEqual(keyOrdinal, GetResultKeyOrdinal(output)); else @@ -121,22 +123,22 @@ private async Task ReadLUCAsync(int keyOrdinal, int keyCount, TKey[] keys) } finally { - testLoader.MaybeUnlock(luContext, keyCount, keys, isRmw: false, isAsyncTest: true); + testLoader.MaybeUnlock(luContext, keyCount, lockKeys, isRmw: false, isAsyncTest: true); } } #endregion Read #region RMW - internal void RMW(int keyOrdinal, int keyCount, TKey[] keys, TInput input) + internal void RMW(int keyOrdinal, int keyCount, FixedLengthLockableKeyStruct[] lockKeys, TInput input) { if (this.IsLUC) - this.RMWLUC(keyOrdinal, keyCount, keys, input); + this.RMWLUC(keyOrdinal, keyCount, lockKeys, input); else - this.RMW(keyOrdinal, keys[0], input); + this.RMW(keyOrdinal, lockKeys[0].Key, input); } - internal Task RMWAsync(int keyOrdinal, int keyCount, TKey[] keys, TInput input) - => this.IsLUC ? this.RMWLUCAsync(keyOrdinal, keyCount, keys, input) : this.RMWAsync(keyOrdinal, keys[0], input); + internal Task RMWAsync(int keyOrdinal, int keyCount, FixedLengthLockableKeyStruct[] keys, TInput input) + => this.IsLUC ? this.RMWLUCAsync(keyOrdinal, keyCount, keys, input) : this.RMWAsync(keyOrdinal, keys[0].Key, input); private void RMW(int keyOrdinal, TKey key, TInput input) { @@ -145,7 +147,7 @@ private void RMW(int keyOrdinal, TKey key, TInput input) if (status.IsPending) { session.CompletePendingWithOutputs(out var completedOutputs, wait: true); - (status, output) = TestLoader.GetSinglePendingResult(completedOutputs, out var recordMetadata); + (status, output) = TestLoader.GetSinglePendingResult(completedOutputs); Assert.AreEqual(status.Found, status.Record.CopyUpdated | status.Record.InPlaceUpdated, $"keyOrdinal {keyOrdinal}: {status}"); } if (status.Found) @@ -165,14 +167,14 @@ private async Task RMWAsync(int keyOrdinal, TKey key, TInput input) disposer(output); } - private void RMWLUC(int keyOrdinal, int keyCount, TKey[] keys, TInput input) + private void RMWLUC(int keyOrdinal, int keyCount, FixedLengthLockableKeyStruct[] lockKeys, TInput input) { try { luContext.BeginUnsafe(); // Retain epoch control through lock, the operation, and unlock - testLoader.MaybeLock(luContext, keyCount, keys, isRmw: true, isAsyncTest: false); + testLoader.MaybeLock(luContext, keyCount, lockKeys, isRmw: true, isAsyncTest: false); TOutput output = default; - var status = luContext.RMW(ref keys[0], ref input, ref output); + var status = luContext.RMW(ref lockKeys[0].Key, ref input, ref output); if (status.IsPending) { luContext.CompletePendingWithOutputs(out var completedOutputs, wait: true); @@ -187,19 +189,19 @@ private void RMWLUC(int keyOrdinal, int keyCount, TKey[] keys, TInput input) } finally { - testLoader.MaybeUnlock(luContext, keyCount, keys, isRmw: true, isAsyncTest: false); + testLoader.MaybeUnlock(luContext, keyCount, lockKeys, isRmw: true, isAsyncTest: false); luContext.EndUnsafe(); } } - private async Task RMWLUCAsync(int keyOrdinal, int keyCount, TKey[] keys, TInput input) + private async Task RMWLUCAsync(int keyOrdinal, int keyCount, FixedLengthLockableKeyStruct[] lockKeys, TInput input) { try { - testLoader.MaybeLock(luContext, keyCount, keys, isRmw: true, isAsyncTest: true); + testLoader.MaybeLock(luContext, keyCount, lockKeys, isRmw: true, isAsyncTest: true); // Do not resume epoch for Async operations - var (status, output) = (await luContext.RMWAsync(ref keys[0], ref input)).Complete(); + var (status, output) = (await luContext.RMWAsync(ref lockKeys[0].Key, ref input)).Complete(); if (status.Found) Assert.AreEqual(keyOrdinal, GetResultKeyOrdinal(output)); else @@ -208,21 +210,21 @@ private async Task RMWLUCAsync(int keyOrdinal, int keyCount, TKey[] keys, TInput } finally { - testLoader.MaybeUnlock(luContext, keyCount, keys, isRmw: true, isAsyncTest: true); + testLoader.MaybeUnlock(luContext, keyCount, lockKeys, isRmw: true, isAsyncTest: true); } } #endregion RMW #region Upsert - internal void Upsert(TKey[] keys, TValue value) + internal void Upsert(FixedLengthLockableKeyStruct[] lockKeys, TValue value) { if (this.IsLUC) - this.UpsertLUC(ref keys[0], ref value); + this.UpsertLUC(ref lockKeys[0].Key, ref value); else - this.Upsert(ref keys[0], ref value); + this.Upsert(ref lockKeys[0].Key, ref value); } - internal Task UpsertAsync(TKey[] keys, TValue value) => this.IsLUC ? this.UpsertLUCAsync(keys[0], value) : this.UpsertAsync(keys[0], value); + internal Task UpsertAsync(FixedLengthLockableKeyStruct[] lockKeys, TValue value) => this.IsLUC ? this.UpsertLUCAsync(lockKeys[0].Key, value) : this.UpsertAsync(lockKeys[0].Key, value); internal void Upsert(ref TKey key, ref TValue value) { @@ -251,15 +253,15 @@ private void UpsertLUC(ref TKey key, ref TValue value) #endregion Upsert #region Delete - internal void Delete(TKey[] keys) + internal void Delete(FixedLengthLockableKeyStruct[] lockKeys) { if (this.IsLUC) - this.DeleteLUC(keys[0]); + this.DeleteLUC(lockKeys[0].Key); else - this.Delete(keys[0]); + this.Delete(lockKeys[0].Key); } - internal Task DeleteAsync(TKey[] keys) => this.IsLUC ? this.DeleteLUCAsync(keys[0]) : this.DeleteAsync(keys[0]); + internal Task DeleteAsync(FixedLengthLockableKeyStruct[] keys) => this.IsLUC ? this.DeleteLUCAsync(keys[0].Key) : this.DeleteAsync(keys[0].Key); private void Delete(TKey key) { diff --git a/cs/stress/SpanByteKeyTester.cs b/cs/stress/SpanByteKeyTester.cs index d8c83828a..540f7578d 100644 --- a/cs/stress/SpanByteKeyTester.cs +++ b/cs/stress/SpanByteKeyTester.cs @@ -39,7 +39,7 @@ internal class SpanByteKeyTester : IKeyTester readonly IValueTester valueTester; readonly Random rng; readonly int[] lockOrdinals; - readonly SpanByte[] lockKeys; + readonly FixedLengthLockableKeyStruct[] lockKeys; static PinnedByteArray[] keys; @@ -56,7 +56,7 @@ internal SpanByteKeyTester(int tid, TestLoader testLoader) }; rng = new Random(tid * testLoader.Options.RandomSeed); lockOrdinals = new int[testLoader.LockKeyArraySize]; - lockKeys = new SpanByte[testLoader.LockKeyArraySize]; + lockKeys = new FixedLengthLockableKeyStruct[testLoader.LockKeyArraySize]; keys = new PinnedByteArray[testLoader.Options.KeyCount]; } @@ -93,15 +93,9 @@ public void Populate(int hashTableCacheLines, LogSettings logSettings, Checkpoin } } - class SpanByteSortComparer : IComparer - { - public int Compare(SpanByte x, SpanByte y) => x.AsReadOnlySpan().SequenceCompareTo(y.AsReadOnlySpan()); - } - readonly SpanByteSortComparer sortComparer = new(); - - public void Test() => testLoader.Test(tid, rng, lockOrdinals, lockKeys, ordinal => keys[ordinal].GetSpanByte(), sortComparer, valueTester); + public void Test() => testLoader.Test(tid, rng, lockOrdinals, lockKeys, ordinal => keys[ordinal].GetSpanByte(), valueTester); - public Task TestAsync() => testLoader.TestAsync(tid, rng, lockOrdinals, lockKeys, ordinal => keys[ordinal].GetSpanByte(), sortComparer, valueTester); + public Task TestAsync() => testLoader.TestAsync(tid, rng, lockOrdinals, lockKeys, ordinal => keys[ordinal].GetSpanByte(), valueTester); public void Dispose() { diff --git a/cs/stress/SpanByteValueTester.cs b/cs/stress/SpanByteValueTester.cs index f20c997b9..3af9445f5 100644 --- a/cs/stress/SpanByteValueTester.cs +++ b/cs/stress/SpanByteValueTester.cs @@ -28,6 +28,8 @@ internal SpanByteValueTester(int tid, TestLoader testLoader, TFunctions function this.session = new(testLoader, output => BitConverter.ToInt32(output.Memory.Memory.Span) % testLoader.ValueIncrement, rng, o => o.Memory?.Dispose()); } + public ILockableContext LockableContext => this.session.LockableContext; + public long GetAverageSize() => sizeof(int) + (testLoader.Options.ValueLength / (testLoader.UseRandom ? 2 : 1)); public void Create(int hashTableCacheLines, LogSettings logSettings, CheckpointSettings checkpointSettings, IFasterEqualityComparer comparer) @@ -67,7 +69,7 @@ public bool CompactStore() return true; } - public void TestRecord(int keyOrdinal, int keyCount, TKey[] keys) + public void TestRecord(int keyOrdinal, int keyCount, FixedLengthLockableKeyStruct[] lockKeys) { var opType = testLoader.GetOperationType(rng); @@ -77,21 +79,21 @@ public void TestRecord(int keyOrdinal, int keyCount, TKey[] keys) switch (opType) { case OperationType.READ: - this.session.Read(keyOrdinal, keyCount, keys); + this.session.Read(keyOrdinal, keyCount, lockKeys); return; case OperationType.DELETE: - session.Delete(keys); + session.Delete(lockKeys); return; case OperationType.RMW: - session.RMW(keyOrdinal, keyCount, keys, values[keyOrdinal].GetSpanByte()); + session.RMW(keyOrdinal, keyCount, lockKeys, values[keyOrdinal].GetSpanByte()); return; default: - session.Upsert(keys, values[keyOrdinal].GetSpanByte()); + session.Upsert(lockKeys, values[keyOrdinal].GetSpanByte()); return; } } - public Task TestRecordAsync(int keyOrdinal, int keyCount, TKey[] keys) + public Task TestRecordAsync(int keyOrdinal, int keyCount, FixedLengthLockableKeyStruct[] lockKeys) { var opType = testLoader.GetOperationType(rng); @@ -100,10 +102,10 @@ public Task TestRecordAsync(int keyOrdinal, int keyCount, TKey[] keys) // Read and Delete do not take a value return opType switch { - OperationType.READ => session.ReadAsync(keyOrdinal, keyCount, keys), - OperationType.DELETE => session.DeleteAsync(keys), - OperationType.RMW => session.RMWAsync(keyOrdinal, keyCount, keys, values[keyOrdinal].GetSpanByte()), - _ => session.UpsertAsync(keys, values[keyOrdinal].GetSpanByte()) + OperationType.READ => session.ReadAsync(keyOrdinal, keyCount, lockKeys), + OperationType.DELETE => session.DeleteAsync(lockKeys), + OperationType.RMW => session.RMWAsync(keyOrdinal, keyCount, lockKeys, values[keyOrdinal].GetSpanByte()), + _ => session.UpsertAsync(lockKeys, values[keyOrdinal].GetSpanByte()) }; } diff --git a/cs/stress/StringKeyTester.cs b/cs/stress/StringKeyTester.cs index fb9d49acf..17a3ff5d1 100644 --- a/cs/stress/StringKeyTester.cs +++ b/cs/stress/StringKeyTester.cs @@ -12,7 +12,7 @@ internal class StringKeyTester : IKeyTester readonly IValueTester valueTester; readonly Random rng; readonly int[] lockOrdinals; - readonly string[] lockKeys; + readonly FixedLengthLockableKeyStruct[] lockKeys; static string[] keys; // Keep these so we don't invoke the GC @@ -29,7 +29,7 @@ internal StringKeyTester(int tid, TestLoader testLoader) }; rng = new Random(tid * testLoader.Options.RandomSeed); lockOrdinals = new int[testLoader.LockKeyArraySize]; - lockKeys = new string[testLoader.LockKeyArraySize]; + lockKeys = new FixedLengthLockableKeyStruct[testLoader.LockKeyArraySize]; keys = new string[testLoader.Options.KeyCount]; } @@ -68,15 +68,9 @@ public void Populate(int hashTableCacheLines, LogSettings logSettings, Checkpoin } } - class StringSortComparer : IComparer - { - public int Compare(string x, string y) => String.CompareOrdinal(x, y); - } - readonly StringSortComparer sortComparer = new(); - - public void Test() => testLoader.Test(tid, rng, lockOrdinals, lockKeys, ordinal => keys[ordinal], sortComparer, valueTester); + public void Test() => testLoader.Test(tid, rng, lockOrdinals, lockKeys, ordinal => keys[ordinal], valueTester); - public Task TestAsync() => testLoader.TestAsync(tid, rng, lockOrdinals, lockKeys, ordinal => keys[ordinal], sortComparer, valueTester); + public Task TestAsync() => testLoader.TestAsync(tid, rng, lockOrdinals, lockKeys, ordinal => keys[ordinal], valueTester); public void Dispose() { } } diff --git a/cs/stress/StringValueTester.cs b/cs/stress/StringValueTester.cs index 77aa37ecc..6b792f958 100644 --- a/cs/stress/StringValueTester.cs +++ b/cs/stress/StringValueTester.cs @@ -28,6 +28,8 @@ internal StringValueTester(int tid, TestLoader testLoader, TFunctions functions) values = new string[testLoader.Options.KeyCount]; } + public ILockableContext LockableContext => this.session.LockableContext; + public long GetAverageSize() => testLoader.AverageStringLength; public void Create(int hashTableCacheLines, LogSettings logSettings, CheckpointSettings checkpointSettings, IFasterEqualityComparer comparer) @@ -65,7 +67,7 @@ public bool CompactStore() return true; } - public void TestRecord(int keyOrdinal, int keyCount, TKey[] keys) + public void TestRecord(int keyOrdinal, int keyCount, FixedLengthLockableKeyStruct[] lockKeys) { var opType = testLoader.GetOperationType(rng); @@ -75,21 +77,21 @@ public void TestRecord(int keyOrdinal, int keyCount, TKey[] keys) switch (opType) { case OperationType.READ: - this.session.Read(keyOrdinal, keyCount, keys); + this.session.Read(keyOrdinal, keyCount, lockKeys); return; case OperationType.DELETE: - session.Delete(keys); + session.Delete(lockKeys); return; case OperationType.RMW: - session.RMW(keyOrdinal, keyCount, keys, values[keyOrdinal]); + session.RMW(keyOrdinal, keyCount, lockKeys, values[keyOrdinal]); return; default: - session.Upsert(keys, values[keyOrdinal]); + session.Upsert(lockKeys, values[keyOrdinal]); return; } } - public Task TestRecordAsync(int keyOrdinal, int keyCount, TKey[] keys) + public Task TestRecordAsync(int keyOrdinal, int keyCount, FixedLengthLockableKeyStruct[] lockKeys) { var opType = testLoader.GetOperationType(rng); @@ -98,10 +100,10 @@ public Task TestRecordAsync(int keyOrdinal, int keyCount, TKey[] keys) // Read and Delete do not take a value return opType switch { - OperationType.READ => session.ReadAsync(keyOrdinal, keyCount, keys), - OperationType.DELETE => session.DeleteAsync(keys), - OperationType.RMW => session.RMWAsync(keyOrdinal, keyCount, keys, values[keyOrdinal]), - _ => session.UpsertAsync(keys, values[keyOrdinal]) + OperationType.READ => session.ReadAsync(keyOrdinal, keyCount, lockKeys), + OperationType.DELETE => session.DeleteAsync(lockKeys), + OperationType.RMW => session.RMWAsync(keyOrdinal, keyCount, lockKeys, values[keyOrdinal]), + _ => session.UpsertAsync(lockKeys, values[keyOrdinal]) }; } diff --git a/cs/stress/TestLoader.cs b/cs/stress/TestLoader.cs index be04f0baa..8bf99d54a 100644 --- a/cs/stress/TestLoader.cs +++ b/cs/stress/TestLoader.cs @@ -219,7 +219,7 @@ internal void Status(Verbose level, string message) Console.WriteLine(message); } - internal void MaybeLock(ILockableContext luContext, int keyCount, TKey[] keys, bool isRmw, bool isAsyncTest) + internal void MaybeLock(ILockableContext luContext, int keyCount, FixedLengthLockableKeyStruct[] keys, bool isRmw, bool isAsyncTest) { if (!UseLocks) return; @@ -229,22 +229,22 @@ internal void MaybeLock(ILockableContext luContext, int keyCount, TK uContext.BeginUnsafe(); try { - for (var ii = 0; ii < keyCount; ++ii) - { - if (isRmw && ii == 0) - luContext.Lock(ref keys[ii], LockType.Exclusive); - else - luContext.Lock(ref keys[ii], LockType.Shared); - } + luContext.BeginLockable(); + + // For RMW, simulate "putting the result" into keys[0] + if (isRmw) + keys[0].LockType = LockType.Exclusive; + luContext.Lock(keys); } finally { + luContext.EndLockable(); if (isAsyncTest) uContext.EndUnsafe(); } } - internal void MaybeUnlock(ILockableContext luContext, int keyCount, TKey[] keys, bool isRmw, bool isAsyncTest) + internal void MaybeUnlock(ILockableContext luContext, int keyCount, FixedLengthLockableKeyStruct[] lockKeys, bool isRmw, bool isAsyncTest) { if (!UseLocks) return; @@ -254,16 +254,14 @@ internal void MaybeUnlock(ILockableContext luContext, int keyCount, uContext.BeginUnsafe(); try { - for (var ii = 0; ii < keyCount; ++ii) - { - if (isRmw && ii == 0) - luContext.Unlock(ref keys[ii], LockType.Exclusive); - else - luContext.Unlock(ref keys[ii], LockType.Shared); - } + luContext.BeginLockable(); + luContext.Unlock(lockKeys); } finally { + // Undo the setting from RMW + lockKeys[0].LockType = LockType.Shared; + luContext.EndLockable(); if (isAsyncTest) uContext.EndUnsafe(); } @@ -282,7 +280,7 @@ internal static (Status status, TOutput output) GetSinglePendingResult(int tid, Random rng, int[] lockOrdinals, TKey[] lockKeys, Func getOrdinalKey, IComparer sortComparer, IValueTester valueTester) + internal void Test(int tid, Random rng, int[] lockOrdinals, FixedLengthLockableKeyStruct[] lockKeys, Func getOrdinalKey, IValueTester valueTester) { this.Status(Verbose.Low, $"Thread {tid}/{Environment.CurrentManagedThreadId} starting Sync Test"); for (var iter = 0; iter < this.Options.IterationCount; ++iter) @@ -291,8 +289,8 @@ internal void Test(int tid, Random rng, int[] lockOrdinals, TKey[] lockKey { var lockKeyCount = this.GetKeysToLock(rng, ii, lockOrdinals); for (var jj = 0; jj < lockOrdinals.Length; ++jj) - lockKeys[jj] = getOrdinalKey(lockOrdinals[jj]); - Array.Sort(lockKeys, sortComparer); // Sort to avoid deadlocks + lockKeys[jj] = new(getOrdinalKey(lockOrdinals[jj]), LockType.Shared, valueTester.LockableContext); + valueTester.LockableContext.SortLockCodes(lockKeys); // Sort to avoid deadlocks valueTester.TestRecord(lockOrdinals[0], lockKeyCount, lockKeys); } this.Status(iter > 0 && iter % 100 == 0 ? Verbose.Low : Verbose.High, $"Thread {tid}/{Environment.CurrentManagedThreadId} completed Sync iteration {iter}"); @@ -300,7 +298,7 @@ internal void Test(int tid, Random rng, int[] lockOrdinals, TKey[] lockKey this.Status(Verbose.Low, $"Thread {tid}/{Environment.CurrentManagedThreadId} completed Sync Test"); } - internal async Task TestAsync(int tid, Random rng, int[] lockOrdinals, TKey[] lockKeys, Func getOrdinalKey, IComparer sortComparer, IValueTester valueTester) + internal async Task TestAsync(int tid, Random rng, int[] lockOrdinals, FixedLengthLockableKeyStruct[] lockKeys, Func getOrdinalKey, IValueTester valueTester) { this.Status(Verbose.Low, $"Thread {tid}/{Environment.CurrentManagedThreadId} starting Async Test"); await Task.Delay(50); // Make sure the test doesn't start by executing synchronously for a while @@ -310,8 +308,8 @@ internal async Task TestAsync(int tid, Random rng, int[] lockOrdinals, TKe { var lockKeyCount = this.GetKeysToLock(rng, ii, lockOrdinals); for (var jj = 0; jj < lockOrdinals.Length; ++jj) - lockKeys[jj] = getOrdinalKey(lockOrdinals[jj]); - Array.Sort(lockKeys, sortComparer); // Sort to avoid deadlocks + lockKeys[jj] = new(getOrdinalKey(lockOrdinals[jj]), LockType.Shared, valueTester.LockableContext); + valueTester.LockableContext.SortLockCodes(lockKeys); // Sort to avoid deadlocks await valueTester.TestRecordAsync(lockOrdinals[0], lockKeyCount, lockKeys); } this.Status(iter > 0 && iter % 100 == 0 ? Verbose.Low : Verbose.High, $"Thread {tid}/{Environment.CurrentManagedThreadId} completed Async iteration {iter}"); diff --git a/cs/test/AdvancedLockTests.cs b/cs/test/AdvancedLockTests.cs index 7a8256c0d..a2a61c882 100644 --- a/cs/test/AdvancedLockTests.cs +++ b/cs/test/AdvancedLockTests.cs @@ -5,9 +5,12 @@ using NUnit.Framework; using System; using System.Threading; -using FASTER.test.ReadCacheTests; +using FASTER.test.LockTable; using static FASTER.test.TestUtils; using System.Threading.Tasks; +using System.Diagnostics; + +#pragma warning disable IDE0060 // Remove unused parameter: used by Setup namespace FASTER.test.LockTests { @@ -77,6 +80,17 @@ public override bool ConcurrentReader(ref int key, ref Input input, ref int valu } } + internal class ChainComparer : IFasterEqualityComparer + { + readonly int mod; + + internal ChainComparer(int mod) => this.mod = mod; + + public bool Equals(ref int k1, ref int k2) => k1 == k2; + + public long GetHashCode64(ref int k) => k % mod; + } + private FasterKV fkv; private ClientSession session; private IDevice log; @@ -87,8 +101,19 @@ public void Setup() DeleteDirectory(MethodTestDir, wait: true); log = Devices.CreateLogDevice(MethodTestDir + "/GenericStringTests.log", deleteOnClose: true); var readCacheSettings = new ReadCacheSettings { MemorySizeBits = 15, PageSizeBits = 9 }; + + var lockingMode = LockingMode.None; + foreach (var arg in TestContext.CurrentContext.Test.Arguments) + { + if (arg is LockingMode lm) + { + lockingMode = lm; + continue; + } + } + fkv = new FasterKV(1L << 20, new LogSettings { LogDevice = log, ObjectLogDevice = null, ReadCacheSettings = readCacheSettings }, - comparer: new ChainTests.ChainComparer(mod), disableEphemeralLocking: false); + comparer: new ChainComparer(mod), lockingMode: lockingMode); session = fkv.For(new Functions()).NewSession(); } @@ -120,8 +145,10 @@ void Populate(bool evict = false) [Category(FasterKVTestCategory)] [Category(LockTestCategory)] //[Repeat(100)] - public async ValueTask SameKeyInsertAndCTTTest() + public async ValueTask SameKeyInsertAndCTTTest([Values(LockingMode.None, LockingMode.Ephemeral /* Standard will hang */)] LockingMode lockingMode) { + if (TestContext.CurrentContext.CurrentRepeatCount > 0) + Debug.WriteLine($"*** Current test iteration: {TestContext.CurrentContext.CurrentRepeatCount + 1} ***"); Populate(evict: true); Functions functions = new(); using var readSession = fkv.NewSession(functions); @@ -174,120 +201,47 @@ await DoTwoThreadRandomKeyTest(numKeys, ); } - [TestFixture] - class LockRecoveryTests - { - const int numKeys = 5000; - - string checkpointDir; - - private FasterKV fht1; - private FasterKV fht2; - private IDevice log; - - - [SetUp] - public void Setup() - { - DeleteDirectory(MethodTestDir, wait: true); - checkpointDir = MethodTestDir + $"/checkpoints"; - log = Devices.CreateLogDevice(MethodTestDir + "/test.log", deleteOnClose: true); - - fht1 = new FasterKV(128, - logSettings: new LogSettings { LogDevice = log, MutableFraction = 0.1, MemorySizeBits = 29 }, - checkpointSettings: new CheckpointSettings { CheckpointDir = checkpointDir } - ); - - fht2 = new FasterKV(128, - logSettings: new LogSettings { LogDevice = log, MutableFraction = 0.1, MemorySizeBits = 29 }, - checkpointSettings: new CheckpointSettings { CheckpointDir = checkpointDir } - ); - } - - [TearDown] - public void TearDown() + [TestFixture] + class LockRecoveryTests { - fht1?.Dispose(); - fht1 = null; - fht2?.Dispose(); - fht2 = null; - log?.Dispose(); - log = null; - - DeleteDirectory(MethodTestDir); - } - - [Test] - [Category(FasterKVTestCategory), Category(CheckpointRestoreCategory), Category(LockTestCategory)] - [Ignore("Should not hold LUC while calling sync checkpoint")] - public async ValueTask NoLocksAfterRestoreTest([Values] CheckpointType checkpointType, [Values] SyncMode syncMode, [Values] bool incremental) - { - if (incremental && checkpointType != CheckpointType.Snapshot) - Assert.Ignore(); - const int lockKeyInterval = 10; - - static LockType getLockType(int key) => ((key / lockKeyInterval) & 0x1) == 0 ? LockType.Shared : LockType.Exclusive; - static int getValue(int key) => key + numKeys * 10; - Guid token; - - { // Populate and Lock - using var session = fht1.NewSession(new SimpleFunctions()); - var luContext = session.LockableUnsafeContext; - var firstKeyEnd = incremental ? numKeys / 2 : numKeys; - - luContext.BeginUnsafe(); - for (int key = 0; key < firstKeyEnd; key++) - { - luContext.Upsert(key, getValue(key)); - if ((key % lockKeyInterval) == 0) - luContext.Lock(key, getLockType(key)); - } - luContext.EndUnsafe(); + const int numKeys = 5000; - fht1.TryInitiateFullCheckpoint(out token, checkpointType); - await fht1.CompleteCheckpointAsync(); + string checkpointDir; - if (incremental) - { - luContext.BeginUnsafe(); - for (int key = firstKeyEnd; key < numKeys; key++) - { - luContext.Upsert(key, getValue(key)); - if ((key % lockKeyInterval) == 0) - luContext.Lock(key, getLockType(key)); - } - luContext.EndUnsafe(); - - var _result1 = fht1.TryInitiateHybridLogCheckpoint(out var _token1, checkpointType, tryIncremental: true); - await fht1.CompleteCheckpointAsync(); - } + private FasterKV fht1; + private FasterKV fht2; + private IDevice log; - luContext.BeginUnsafe(); - for (int key = 0; key < numKeys; key += lockKeyInterval) - { - // This also verifies the locks are there--otherwise (in Debug) we'll AssertFail trying to unlock an unlocked record - luContext.Unlock(key, getLockType(key)); - } - luContext.EndUnsafe(); - } - if (syncMode == SyncMode.Async) - await fht2.RecoverAsync(token); - else - fht2.Recover(token); + [SetUp] + public void Setup() + { + DeleteDirectory(MethodTestDir, wait: true); + checkpointDir = MethodTestDir + $"/checkpoints"; + log = Devices.CreateLogDevice(MethodTestDir + "/test.log", deleteOnClose: true); + + fht1 = new FasterKV(128, + logSettings: new LogSettings { LogDevice = log, MutableFraction = 0.1, MemorySizeBits = 29 }, + checkpointSettings: new CheckpointSettings { CheckpointDir = checkpointDir } + ); + + fht2 = new FasterKV(128, + logSettings: new LogSettings { LogDevice = log, MutableFraction = 0.1, MemorySizeBits = 29 }, + checkpointSettings: new CheckpointSettings { CheckpointDir = checkpointDir } + ); + } - { // Ensure there are no locks - using var session = fht2.NewSession(new SimpleFunctions()); - var luContext = session.LockableUnsafeContext; - luContext.BeginUnsafe(); - for (int key = 0; key < numKeys; key++) - { - (bool isExclusive, byte isShared) = luContext.IsLocked(key); - Assert.IsFalse(isExclusive); - Assert.AreEqual(0, isShared); - } - luContext.EndUnsafe(); - } + [TearDown] + public void TearDown() + { + fht1?.Dispose(); + fht1 = null; + fht2?.Dispose(); + fht2 = null; + log?.Dispose(); + log = null; + + DeleteDirectory(MethodTestDir); } } } diff --git a/cs/test/BasicFASTERTests.cs b/cs/test/BasicFASTERTests.cs index efabe4e94..331fde5cb 100644 --- a/cs/test/BasicFASTERTests.cs +++ b/cs/test/BasicFASTERTests.cs @@ -60,19 +60,10 @@ private void AssertCompleted(Status expected, Status actual) private (Status status, OutputStruct output) CompletePendingResult() { - session.CompletePendingWithOutputs(out var completedOutputs); + session.CompletePendingWithOutputs(out var completedOutputs, wait: true); return TestUtils.GetSinglePendingResult(completedOutputs); } - private static (Status status, OutputStruct output) CompletePendingResult(CompletedOutputIterator completedOutputs) - { - Assert.IsTrue(completedOutputs.Next()); - var result = (completedOutputs.Current.Status, completedOutputs.Current.Output); - Assert.IsFalse(completedOutputs.Next()); - completedOutputs.Dispose(); - return result; - } - [Test] [Category("FasterKV")] [Category("Smoke")] @@ -809,6 +800,15 @@ public static void KVBasicsSampleEndToEndInDocs() Assert.AreEqual(10, output); } + [Test] + [Category("FasterKV")] + public static void LogPathtooLong() + { + string testDir = new string('x', Native32.WIN32_MAX_PATH - 11); // As in LSD, -11 for "." + using var log = Devices.CreateLogDevice($"{testDir}", deleteOnClose: true); // Should succeed + Assert.Throws(typeof(FasterException), () => Devices.CreateLogDevice($"{testDir}y", deleteOnClose: true)); + } + [Test] [Category("FasterKV")] public static void UshortKeyByteValueTest() diff --git a/cs/test/BasicLockTests.cs b/cs/test/BasicLockTests.cs index b037912f6..308c5c689 100644 --- a/cs/test/BasicLockTests.cs +++ b/cs/test/BasicLockTests.cs @@ -82,7 +82,7 @@ public void Setup() { TestUtils.DeleteDirectory(TestUtils.MethodTestDir, wait: true); log = Devices.CreateLogDevice(TestUtils.MethodTestDir + "/GenericStringTests.log", deleteOnClose: true); - fkv = new FasterKV(1L << 20, new LogSettings { LogDevice = log, ObjectLogDevice = null }, comparer: new LocalComparer(), disableEphemeralLocking: false ); + fkv = new FasterKV(1L << 20, new LogSettings { LogDevice = log, ObjectLogDevice = null }, comparer: new LocalComparer(), lockingMode: LockingMode.Ephemeral); session = fkv.For(new Functions()).NewSession(); } @@ -109,14 +109,14 @@ public unsafe void RecordInfoLockTest([Values(1, 50)] int numThreads) RecordInfo* ri = &recordInfo; #pragma warning disable IDE0200 // The lambdas cannot be simplified as it causes struct temporaries - XLockTest(numThreads, () => ri->TryLockExclusive(), () => { ri->UnlockExclusive(); return true; }); - SLockTest(numThreads, () => ri->TryLockShared(), () => ri->TryUnlockShared()); - XSLockTest(numThreads, () => ri->TryLockExclusive(), () => { ri->UnlockExclusive(); return true; }, () => ri->TryLockShared(), () => ri->TryUnlockShared()); + XLockTest(numThreads, () => ri->TryLockExclusive(), () => ri->UnlockExclusive()); + SLockTest(numThreads, () => ri->TryLockShared(), () => ri->UnlockShared()); + XSLockTest(numThreads, () => ri->TryLockExclusive(), () => ri->UnlockExclusive(), () => ri->TryLockShared(), () => ri->UnlockShared()); #pragma warning restore IDE0200 } } - private void XLockTest(int numThreads, Func locker, Func unlocker) + private void XLockTest(int numThreads, Func locker, Action unlocker) { long lockTestValue = 0; const int numIters = 1000; @@ -136,12 +136,12 @@ void XLockTestFunc() var temp = lockTestValue; Thread.Yield(); lockTestValue = temp + 1; - Assert.IsTrue(unlocker()); + unlocker(); } } } - private void SLockTest(int numThreads, Func locker, Func unlocker) + private void SLockTest(int numThreads, Func locker, Action unlocker) { long lockTestValue = 1; long lockTestValueResult = 0; @@ -162,12 +162,12 @@ void SLockTestFunc() sw.SpinOnce(-1); Interlocked.Add(ref lockTestValueResult, Interlocked.Read(ref lockTestValue)); Thread.Yield(); - Assert.IsTrue(unlocker()); + unlocker(); } } } - private void XSLockTest(int numThreads, Func xlocker, Func xunlocker, Func slocker, Func sunlocker) + private void XSLockTest(int numThreads, Func xlocker, Action xunlocker, Func slocker, Action sunlocker) { long lockTestValue = 0; long lockTestValueResult = 0; @@ -191,7 +191,7 @@ void XLockTestFunc() var temp = lockTestValue; Thread.Yield(); lockTestValue = temp + 1; - Assert.IsTrue(xunlocker()); + xunlocker(); } } @@ -203,7 +203,7 @@ void SLockTestFunc() sw.SpinOnce(-1); Interlocked.Add(ref lockTestValueResult, 1); Thread.Yield(); - Assert.IsTrue(sunlocker()); + sunlocker(); } } } @@ -253,7 +253,7 @@ void UpdateFunc(bool useRMW, int numRecords, int numIters) [Test] [Category("FasterKV")] - public unsafe void SealDeletedRecordTest([Values(UpdateOp.RMW, UpdateOp.Upsert)] UpdateOp updateOp, [Values(FlushMode.NoFlush, FlushMode.OnDisk)] FlushMode flushMode) + public unsafe void CollidingDeletedRecordTest([Values(UpdateOp.RMW, UpdateOp.Upsert)] UpdateOp updateOp, [Values(FlushMode.NoFlush, FlushMode.OnDisk)] FlushMode flushMode) { // Populate for (int key = 0; key < numRecords; key++) @@ -300,7 +300,6 @@ public unsafe void SealDeletedRecordTest([Values(UpdateOp.RMW, UpdateOp.Upsert)] Assert.IsFalse(status.IsPending); Assert.IsTrue(recordInfo.Tombstone, "Tombstone should be true after Update"); - Assert.IsTrue(recordInfo.Sealed, "Sealed should be true after Update"); } [Test] @@ -351,7 +350,6 @@ public unsafe void SetInvalidOnException([Values] UpdateOp updateOp) long physicalAddress = this.fkv.hlog.GetPhysicalAddress(expectedThrowAddress); ref var recordInfo = ref this.fkv.hlog.GetInfo(physicalAddress); Assert.IsTrue(recordInfo.Invalid, "Expected Invalid record"); - Assert.IsFalse(recordInfo.Tentative, "Expected non-Tentative record"); } } } \ No newline at end of file diff --git a/cs/test/BlittableLogCompactionTests.cs b/cs/test/BlittableLogCompactionTests.cs index 9e936acfa..ebf2c5fdd 100644 --- a/cs/test/BlittableLogCompactionTests.cs +++ b/cs/test/BlittableLogCompactionTests.cs @@ -3,6 +3,9 @@ using FASTER.core; using NUnit.Framework; +using System.Diagnostics; + +#pragma warning disable IDE0060 // Remove unused parameter == Some parameters are just to let [Setup] know what to do namespace FASTER.test { @@ -17,8 +20,19 @@ public void Setup() { TestUtils.DeleteDirectory(TestUtils.MethodTestDir, wait:true); log = Devices.CreateLogDevice(TestUtils.MethodTestDir + "/BlittableLogCompactionTests.log", deleteOnClose: true); + + var lockingMode = LockingMode.Standard; + foreach (var arg in TestContext.CurrentContext.Test.Arguments) + { + if (arg is LockingMode locking_mode) + { + lockingMode = locking_mode; + break; + } + } + fht = new FasterKV - (1L << 20, new LogSettings { LogDevice = log, MemorySizeBits = 15, PageSizeBits = 9 }); + (1L << 20, new LogSettings { LogDevice = log, MemorySizeBits = 15, PageSizeBits = 9 }, lockingMode: lockingMode); } [TearDown] @@ -36,7 +50,8 @@ public void TearDown() [Category("Compaction")] [Category("Smoke")] - public void BlittableLogCompactionTest1([Values] CompactionType compactionType) + public void BlittableLogCompactionTest1([Values] CompactionType compactionType, + [Values(LockingMode.Standard)] LockingMode lockingMode) { using var session = fht.For(new FunctionsCompaction()).NewSession(); @@ -85,7 +100,8 @@ public void BlittableLogCompactionTest1([Values] CompactionType compactionType) [Test] [Category("FasterKV")] [Category("Compaction")] - public void BlittableLogCompactionTest2([Values] CompactionType compactionType) + public void BlittableLogCompactionTest2([Values] CompactionType compactionType, + [Values(LockingMode.Standard)] LockingMode lockingMode) { using var session = fht.For(new FunctionsCompaction()).NewSession(); @@ -145,7 +161,8 @@ public void BlittableLogCompactionTest2([Values] CompactionType compactionType) [Test] [Category("FasterKV")] [Category("Compaction")] - public void BlittableLogCompactionTest3([Values] CompactionType compactionType) + public void BlittableLogCompactionTest3([Values] CompactionType compactionType, + [Values(LockingMode.Standard)] LockingMode lockingMode) { using var session = fht.For(new FunctionsCompaction()).NewSession(); @@ -211,7 +228,9 @@ public void BlittableLogCompactionTest3([Values] CompactionType compactionType) [Category("Compaction")] [Category("Smoke")] - public void BlittableLogCompactionCustomFunctionsTest1([Values] CompactionType compactionType) + public void BlittableLogCompactionCustomFunctionsTest1([Values] CompactionType compactionType, + [Values(LockingMode.Standard)] + LockingMode lockingMode) { using var session = fht.For(new FunctionsCompaction()).NewSession(); @@ -270,8 +289,9 @@ public void BlittableLogCompactionCustomFunctionsTest1([Values] CompactionType c [Test] [Category("FasterKV")] [Category("Compaction")] - - public void BlittableLogCompactionCustomFunctionsTest2([Values] CompactionType compactionType, [Values]bool flushAndEvict) + [System.Diagnostics.CodeAnalysis.SuppressMessage("Style", "IDE0060:Remove unused parameter", Justification = "lockingMode is used by Setup")] + public void BlittableLogCompactionCustomFunctionsTest2([Values] CompactionType compactionType, [Values]bool flushAndEvict, + [Values(LockingMode.Standard)] LockingMode lockingMode) { // Update: irrelevant as session compaction no longer uses Copy/CopyInPlace // This test checks if CopyInPlace returning false triggers call to Copy @@ -280,13 +300,19 @@ public void BlittableLogCompactionCustomFunctionsTest2([Values] CompactionType c var key = new KeyStruct { kfield1 = 100, kfield2 = 101 }; var value = new ValueStruct { vfield1 = 10, vfield2 = 20 }; + var input = default(InputStruct); + var output = default(OutputStruct); session.Upsert(ref key, ref value, 0, 0); + var status = session.Read(ref key, ref input, ref output, 0, 0); + Debug.Assert(status.Found); fht.Log.Flush(true); value = new ValueStruct { vfield1 = 11, vfield2 = 21 }; session.Upsert(ref key, ref value, 0, 0); + status = session.Read(ref key, ref input, ref output, 0, 0); + Debug.Assert(status.Found); if (flushAndEvict) fht.Log.FlushAndEvict(true); @@ -296,9 +322,7 @@ public void BlittableLogCompactionCustomFunctionsTest2([Values] CompactionType c var compactUntil = session.Compact(fht.Log.TailAddress, compactionType); fht.Log.Truncate(); - var input = default(InputStruct); - var output = default(OutputStruct); - var status = session.Read(ref key, ref input, ref output, 0, 0); + status = session.Read(ref key, ref input, ref output, 0, 0); if (status.IsPending) { Assert.IsTrue(session.CompletePendingWithOutputs(out var outputs, wait: true)); diff --git a/cs/test/ComponentRecoveryTests.cs b/cs/test/ComponentRecoveryTests.cs index c4f00731a..dc58fa635 100644 --- a/cs/test/ComponentRecoveryTests.cs +++ b/cs/test/ComponentRecoveryTests.cs @@ -104,22 +104,15 @@ private static unsafe void Setup_FuzzyIndexRecoveryTest(out int seed, out int si hash_table1.Initialize(size, 512); //do something - var firstBucket = default(HashBucket*); - var bucket = default(HashBucket*); - var slot = default(int); - var keyGenerator1 = new Random(seed); var valueGenerator = new Random(seed + 1); for (int i = 0; i < numAdds; i++) { long key = keyGenerator1.Next(); - var hash = Utility.GetHashCode(key); - var tag = (ushort)((ulong)hash >> Constants.kHashTagShift); - - var entry = default(HashBucketEntry); - hash_table1.FindOrCreateTag(hash, tag, ref firstBucket, ref bucket, ref slot, ref entry, 0); + HashEntryInfo hei = new(Utility.GetHashCode(key)); + hash_table1.FindOrCreateTag(ref hei, 0); - hash_table1.UpdateSlot(bucket, slot, entry.word, valueGenerator.Next(), out long found_word); + hash_table1.UpdateSlot(hei.bucket, hei.slot, hei.entry.word, valueGenerator.Next(), out long found_word); } //issue checkpoint call @@ -134,29 +127,20 @@ private static unsafe void Finish_FuzzyIndexRecoveryTest(int seed, long numAdds, { var keyGenerator2 = new Random(seed); - var bucket1 = default(HashBucket*); - var firstBucket1 = default(HashBucket*); - var bucket2 = default(HashBucket*); - var firstBucket2 = default(HashBucket*); - var slot1 = default(int); - var slot2 = default(int); - - var entry1 = default(HashBucketEntry); - var entry2 = default(HashBucketEntry); for (int i = 0; i < 2 * numAdds; i++) { long key = keyGenerator2.Next(); - var hash = Utility.GetHashCode(key); - var tag = (ushort)((ulong)hash >> Constants.kHashTagShift); + HashEntryInfo hei1 = new(Utility.GetHashCode(key)); + HashEntryInfo hei2 = new(hei1.hash); - var exists1 = hash_table1.FindTag(hash, tag, ref firstBucket1, ref bucket1, ref slot1, ref entry1); - var exists2 = hash_table2.FindTag(hash, tag, ref firstBucket2, ref bucket2, ref slot2, ref entry2); + var exists1 = hash_table1.FindTag(ref hei1); + var exists2 = hash_table2.FindTag(ref hei2); Assert.AreEqual(exists2, exists1); if (exists1) { - Assert.AreEqual(entry2.word, entry1.word); + Assert.AreEqual(hei2.entry.word, hei1.entry.word); } } diff --git a/cs/test/DisposeTests.cs b/cs/test/DisposeTests.cs index bea0fd7d4..8abc6d8e6 100644 --- a/cs/test/DisposeTests.cs +++ b/cs/test/DisposeTests.cs @@ -9,6 +9,9 @@ using FASTER.core; using NUnit.Framework; using static FASTER.test.TestUtils; +using static FASTER.core.Utility; + +#pragma warning disable IDE0060 // Remove unused parameter; used for Setup only namespace FASTER.test.Dispose { @@ -35,18 +38,25 @@ public void Setup() objlog = Devices.CreateLogDevice(MethodTestDir + "/ObjectFASTERTests.obj.log", deleteOnClose: true); LogSettings logSettings = new () { LogDevice = log, ObjectLogDevice = objlog, MutableFraction = 0.1, MemorySizeBits = 15, PageSizeBits = 10 }; + var lockingMode = LockingMode.None; foreach (var arg in TestContext.CurrentContext.Test.Arguments) { if (arg is ReadCopyDestination dest) { if (dest == ReadCopyDestination.ReadCache) logSettings.ReadCacheSettings = new() { PageSizeBits = 12, MemorySizeBits = 22 }; - break; + continue; + } + if (arg is LockingMode lm) + { + lockingMode = lm; + continue; } } fht = new FasterKV(128, logSettings: logSettings, comparer: new MyKeyComparer(), - serializerSettings: new SerializerSettings { keySerializer = () => new MyKeySerializer(), valueSerializer = () => new MyValueSerializer() } + serializerSettings: new SerializerSettings { keySerializer = () => new MyKeySerializer(), valueSerializer = () => new MyValueSerializer() }, + lockingMode: lockingMode // Warning: LockingMode.Standard will deadlock with X locks as both keys map to the same LockCode ); } @@ -107,40 +117,50 @@ internal DisposeFunctions(DisposeTests tester, bool sut, bool splice = false) void WaitForEvent() { + Assert.IsTrue(tester.fht.epoch.ThisInstanceProtected(), "This should only be called from IFunctions methods, which are under epoch protection"); if (isSUT) { MyKey key = new() { key = TestKey }; - tester.fht.FindKey(ref key, out var entry); + tester.fht.FindHashBucketEntryForKey(ref key, out var entry); var address = entry.Address; if (isSplice) { - // There should be one readcache entry for this test. - Assert.IsTrue(new HashBucketEntry() { word = entry.Address }.ReadCache); - Assert.GreaterOrEqual(address, tester.fht.ReadCache.HeadAddress); + // Get the tail entry for this key's hash chain; there should be exactly one readcache entry for this test. + Assert.IsTrue(entry.ReadCache, "Expected readcache entry in WaitForEvent pt 1"); + Assert.GreaterOrEqual(entry.AbsoluteAddress, tester.fht.ReadCache.HeadAddress); var physicalAddress = tester.fht.readcache.GetPhysicalAddress(entry.AbsoluteAddress); ref RecordInfo recordInfo = ref tester.fht.readcache.GetInfo(physicalAddress); address = recordInfo.PreviousAddress; - // There should be only one readcache entry for this test. - Assert.IsFalse(new HashBucketEntry() { word = address }.ReadCache); + // There should be only one readcache entry for this test. The address we just got may have been kTempInvalidAddress, + // and if not then it should have been a pre-FlushAndEvict()ed record. + Assert.IsFalse(IsReadCache(address)); + + // Retry will have already inserted something post-FlushAndEvict. + Assert.IsTrue(isRetry || address < tester.fht.hlog.HeadAddress); } tester.otherGate.Release(); tester.sutGate.Wait(); + tester.fht.FindHashBucketEntryForKey(ref key, out entry); + // There's a little race where the SUT session could still beat the other session to the CAS if (!isRetry) { - if (!isSplice) + if (isSplice) { - while (entry.Address == address) + // If this is not Standard locking, then we use detach-and-reattach logic on the hash chain. That happens after SingleWriter, + // so 'other' thread may still be in progress . Wait for it. + while (!entry.ReadCache) { + Assert.IsFalse(tester.fht.LockTable.IsEnabled, "Standard locking should have spliced directly"); Thread.Yield(); - tester.fht.FindKey(ref key, out entry); + tester.fht.FindHashBucketEntryForKey(ref key, out entry); } - } - else - { - Assert.GreaterOrEqual(address, tester.fht.ReadCache.HeadAddress); + + // We're the thread awaiting the splice, so wait until the address in the last readcache record changes. + Assert.IsTrue(entry.ReadCache, "Expected readcache entry in WaitForEvent pt 2"); + Assert.GreaterOrEqual(entry.AbsoluteAddress, tester.fht.ReadCache.HeadAddress); var physicalAddress = tester.fht.readcache.GetPhysicalAddress(entry.AbsoluteAddress); ref RecordInfo recordInfo = ref tester.fht.readcache.GetInfo(physicalAddress); while (recordInfo.PreviousAddress == address) @@ -148,6 +168,17 @@ void WaitForEvent() // Wait for the splice to happen Thread.Yield(); } + Assert.IsFalse(IsReadCache(recordInfo.PreviousAddress)); + Assert.IsTrue(recordInfo.PreviousAddress >= tester.fht.hlog.HeadAddress); + } + else + { + // We're not the splice thread, so wait until the address in the hash entry changes. + while (entry.Address == address) + { + Thread.Yield(); + tester.fht.FindHashBucketEntryForKey(ref key, out entry); + } } } isRetry = true; // the next call will be from RETRY_NOW @@ -332,7 +363,7 @@ void DoFlush(FlushMode flushMode) [Test] [Category("FasterKV")] [Category("Smoke")] - public void DisposeSingleWriter2Threads() + public void DisposeSingleWriter2Threads([Values(LockingMode.Ephemeral, LockingMode.None)] LockingMode lockingMode) { var functions1 = new DisposeFunctions(this, sut: true); var functions2 = new DisposeFunctions(this, sut: false); @@ -369,7 +400,8 @@ void DoUpsert(DisposeFunctions functions) [Test] [Category("FasterKV")] [Category("Smoke")] - public void DisposeInitialUpdater2Threads([Values(FlushMode.NoFlush, FlushMode.OnDisk)] FlushMode flushMode) + public void DisposeInitialUpdater2Threads([Values(FlushMode.NoFlush, FlushMode.OnDisk)] FlushMode flushMode, + [Values(LockingMode.Ephemeral, LockingMode.None)] LockingMode lockingMode) { var functions1 = new DisposeFunctions(this, sut: true); var functions2 = new DisposeFunctions(this, sut: false); @@ -407,7 +439,8 @@ void DoInsert(DisposeFunctions functions) [Test] [Category("FasterKV")] [Category("Smoke")] - public void DisposeCopyUpdater2Threads([Values(FlushMode.ReadOnly, FlushMode.OnDisk)] FlushMode flushMode) + public void DisposeCopyUpdater2Threads([Values(FlushMode.ReadOnly, FlushMode.OnDisk)] FlushMode flushMode, + [Values(LockingMode.Ephemeral, LockingMode.None)] LockingMode lockingMode) { var functions1 = new DisposeFunctions(this, sut: true); var functions2 = new DisposeFunctions(this, sut: false); @@ -446,24 +479,21 @@ void DoUpdate(DisposeFunctions functions) // The way this works for OnDisk is: // SUT sees that the address in the hash entry is below HeadAddress (because everything has been flushed to disk) + // SUT records InitialEntryAddress with the original hash entry address // SUT goes pending, gets to InternalContinuePendingRMW, calls CreateNewRecordRMW, which calls CopyUpdater // SUT (in CopyUpdater) signals Other, then blocks - // SUT has recorded prevHighestKeyHashAddress with the original hash entry address // Other calls InternalRMW and also sees that the address in the hash entry is below HeadAddress, so it goes pending // Other gets to InternalContinuePendingRMW, sees its key does not exist, and calls InitialUpdater, which signals SUT // Other returns from InternalContinuePendingRMW, which enqueues DeserializedFromDisk into functions2.handlerQueue // SUT is now unblocked and returns from CopyUpdater. CAS fails due to Other's insertion // SUT does the RETRY loop in InternalContinuePendingRMW - // This second loop iteration sees that prevHighestKeyHashAddress is less than the current hash table entry, so drops down to do InternalRMW. - // InternalRMW does TracebackForKeyMatch, which passes Other's inserted collision and goes below HeadAddress - // InternalRMW thus enqueues another pending IO - // InternalContinuePendingRMW returns, which enqueues DeserializedFromDisk into functions1.handlerQueue - // The final pending IO calls InternalContinuePendingRMW, which operates normally now as there is no conflict. - // InternalContinuePendingRMW returns, which enqueues another DeserializedFromDisk into functions1.handlerQueue + // This second loop iteration searches for the record in-memory down to InitialEntryAddress and does not find it. + // It verifies that the lower bound of the search guarantees we searched all in-memory records. + // Therefore SUT calls CreateNewRecordRMW again, which succeeds. + // SUT returns from InternalContinuePendingRMW, which enqueues DeserializedFromDisk into functions1.handlerQueue Assert.AreEqual(DisposeHandler.CopyUpdater, functions1.handlerQueue.Dequeue()); if (flushMode == FlushMode.OnDisk) { - Assert.AreEqual(DisposeHandler.DeserializedFromDisk, functions1.handlerQueue.Dequeue()); Assert.AreEqual(DisposeHandler.DeserializedFromDisk, functions1.handlerQueue.Dequeue()); Assert.AreEqual(DisposeHandler.DeserializedFromDisk, functions2.handlerQueue.Dequeue()); } @@ -473,7 +503,8 @@ void DoUpdate(DisposeFunctions functions) [Test] [Category("FasterKV")] [Category("Smoke")] - public void DisposeSingleDeleter2Threads([Values(FlushMode.ReadOnly, FlushMode.OnDisk)] FlushMode flushMode) + public void DisposeSingleDeleter2Threads([Values(FlushMode.ReadOnly, FlushMode.OnDisk)] FlushMode flushMode, + [Values(LockingMode.Ephemeral, LockingMode.None)] LockingMode lockingMode) { var functions1 = new DisposeFunctions(this, sut: true); var functions2 = new DisposeFunctions(this, sut: false); @@ -492,9 +523,6 @@ public void DisposeSingleDeleter2Threads([Values(FlushMode.ReadOnly, FlushMode.O // Make it immutable so we don't simply set Tombstone. DoFlush(flushMode); - // This is necessary for FlushMode.ReadOnly to test the readonly range in Delete() (otherwise we can't test SingleDeleter there) - var luc = fht.NewSession(new DisposeFunctionsNoSync()).LockableUnsafeContext; - void DoDelete(DisposeFunctions functions) { using var innerSession = fht.NewSession(functions); @@ -522,7 +550,8 @@ void DoDelete(DisposeFunctions functions) [Test] [Category("FasterKV")] [Category("Smoke")] - public void PendingRead([Values] ReadCopyDestination copyDest) + public void PendingRead([Values] ReadCopyDestination copyDest, [Values(LockingMode.Ephemeral, LockingMode.None)] LockingMode lockingMode) + { DoPendingReadInsertTest(copyDest, initialReadCacheInsert: false); } @@ -530,7 +559,8 @@ public void PendingRead([Values] ReadCopyDestination copyDest) [Test] [Category("FasterKV")] [Category("Smoke")] - public void CopyToTailWithInitialReadCache([Values(ReadCopyDestination.ReadCache)] ReadCopyDestination copyDest) + public void CopyToTailWithInitialReadCache([Values(ReadCopyDestination.ReadCache)] ReadCopyDestination copyDest, + [Values(LockingMode.Ephemeral, LockingMode.None)] LockingMode lockingMode) { // We use the ReadCopyDestination.ReadCache parameter so Setup() knows to set up the readcache, but // for the actual test it is used only for setup; we execute CopyToTail. @@ -578,7 +608,7 @@ void DoPendingReadInsertTest(ReadCopyDestination copyDest, bool initialReadCache [Test] [Category("FasterKV")] [Category("Smoke")] - public void DisposePendingRead2Threads([Values] ReadCopyDestination copyDest) + public void DisposePendingRead2Threads([Values] ReadCopyDestination copyDest, [Values] LockingMode lockingMode) { DoDisposePendingReadInsertTest2Threads(copyDest, initialReadCacheInsert: false); } @@ -586,7 +616,7 @@ public void DisposePendingRead2Threads([Values] ReadCopyDestination copyDest) [Test] [Category("FasterKV")] [Category("Smoke")] - public void DisposeCopyToTailWithInitialReadCache2Threads([Values(ReadCopyDestination.ReadCache)] ReadCopyDestination copyDest) + public void DisposeCopyToTailWithInitialReadCache2Threads([Values(ReadCopyDestination.ReadCache)] ReadCopyDestination copyDest, [Values] LockingMode lockingMode) { // We use the ReadCopyDestination.ReadCache parameter so Setup() knows to set up the readcache, but // for the actual test it is used only for setup; we execute CopyToTail. @@ -599,6 +629,8 @@ void DoDisposePendingReadInsertTest2Threads(ReadCopyDestination copyDest, bool i var functions2 = new DisposeFunctions(this, sut: false); MyKey key = new() { key = TestKey }; + MyKey collidingKey = new() { key = TestCollidingKey }; + MyValue collidingValue = new() { value = TestCollidingValue }; MyKey collidingKey2 = new() { key = TestCollidingKey2 }; MyValue collidingValue2 = new() { value = TestCollidingValue2 }; @@ -607,6 +639,7 @@ void DoDisposePendingReadInsertTest2Threads(ReadCopyDestination copyDest, bool i using var session = fht.NewSession(new DisposeFunctionsNoSync()); MyValue value = new() { value = TestInitialValue }; session.Upsert(ref key, ref value); + session.Upsert(ref collidingKey, ref collidingValue); if (initialReadCacheInsert) session.Upsert(ref collidingKey2, ref collidingValue2); } @@ -619,32 +652,38 @@ void DoDisposePendingReadInsertTest2Threads(ReadCopyDestination copyDest, bool i using var session = fht.NewSession(new DisposeFunctionsNoSync()); MyOutput output = new(); var status = session.Read(ref collidingKey2, ref output); + Assert.IsTrue(status.IsPending, status.ToString()); session.CompletePending(wait: true); } + // We use Read() only here (not Upsert()), so we have only read locks and thus do not self-deadlock with an XLock on the colliding bucket. void DoRead(DisposeFunctions functions) { + MyOutput output = new(); + MyInput input = new(); + ReadOptions readOptions = default; + if (copyDest == ReadCopyDestination.Tail) + readOptions.ReadFlags = ReadFlags.CopyReadsToTail; + using var session = fht.NewSession(functions); if (functions.isSUT) { - MyOutput output = new(); - MyInput input = new(); - ReadOptions readOptions = default; - if (copyDest == ReadCopyDestination.Tail) - readOptions.ReadFlags = ReadFlags.CopyReadsToTail; var status = session.Read(ref key, ref input, ref output, ref readOptions, out _); Assert.IsTrue(status.IsPending, status.ToString()); session.CompletePendingWithOutputs(out var completedOutputs, wait: true); (status, output) = GetSinglePendingResult(completedOutputs); + Assert.IsTrue(status.Found, status.ToString()); Assert.AreEqual(TestInitialValue, output.value.value); } else { - // Do an upsert here to cause the collision (it will blindly insert) otherGate.Wait(); - MyKey collidingKey = new() { key = TestCollidingKey }; - MyValue collidingValue = new() { value = TestCollidingValue }; - session.Upsert(ref collidingKey, ref collidingValue); + var status = session.Read(ref collidingKey, ref input, ref output, ref readOptions, out _); + Assert.IsTrue(status.IsPending, status.ToString()); + session.CompletePendingWithOutputs(out var completedOutputs, wait: true); + (status, output) = GetSinglePendingResult(completedOutputs); + Assert.IsTrue(status.Found, status.ToString()); + Assert.AreEqual(TestCollidingValue, output.value.value); } } @@ -655,7 +694,8 @@ void DoRead(DisposeFunctions functions) }; Task.WaitAll(tasks); - Assert.AreEqual(DisposeHandler.SingleWriter, functions1.handlerQueue.Dequeue()); + if (fht.LockTable.IsEnabled || !initialReadCacheInsert) // This allows true splice, so we generated a conflict. + Assert.AreEqual(DisposeHandler.SingleWriter, functions1.handlerQueue.Dequeue()); Assert.AreEqual(DisposeHandler.DeserializedFromDisk, functions1.handlerQueue.Dequeue()); Assert.IsEmpty(functions1.handlerQueue); } @@ -663,7 +703,7 @@ void DoRead(DisposeFunctions functions) [Test] [Category("FasterKV")] [Category("Smoke")] - public void DisposePendingReadWithNoInsertTest() + public void DisposePendingReadWithNoInsertTest([Values(LockingMode.Ephemeral, LockingMode.None)] LockingMode lockingMode) { var functions = new DisposeFunctionsNoSync(); @@ -690,7 +730,7 @@ public void DisposePendingReadWithNoInsertTest() [Test] [Category("FasterKV")] [Category("Smoke")] - public void DisposePendingRmwWithNoConflictTest() + public void DisposePendingRmwWithNoConflictTest([Values(LockingMode.Ephemeral, LockingMode.None)] LockingMode lockingMode) { var functions = new DisposeFunctionsNoSync(); @@ -716,4 +756,4 @@ public void DisposePendingRmwWithNoConflictTest() } } } -#endif \ No newline at end of file +#endif diff --git a/cs/test/EphemeralLockingTests.cs b/cs/test/EphemeralLockingTests.cs new file mode 100644 index 000000000..fa4f36625 --- /dev/null +++ b/cs/test/EphemeralLockingTests.cs @@ -0,0 +1,516 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. +using System; +using System.IO; +using FASTER.core; +using NUnit.Framework; +using FASTER.test.ReadCacheTests; +using System.Threading.Tasks; +using static FASTER.test.TestUtils; + +namespace FASTER.test.EphemeralLocking +{ + // Functions for ephemeral locking--locking only for the duration of a concurrent IFunctions call. + internal class EphemeralLockingTestFunctions : SimpleFunctions + { + internal bool failInPlace; + + public override bool ConcurrentWriter(ref long key, ref long input, ref long src, ref long dst, ref long output, ref UpsertInfo upsertInfo) + => !failInPlace && base.ConcurrentWriter(ref key, ref input, ref src, ref dst, ref output, ref upsertInfo); + + public override bool InPlaceUpdater(ref long key, ref long input, ref long value, ref long output, ref RMWInfo rmwInfo) + => !failInPlace && base.InPlaceUpdater(ref key, ref input, ref value, ref output, ref rmwInfo); + } + + [TestFixture] + class EphemeralLockingTests + { + const int numRecords = 1000; + const int useNewKey = 1010; + const int useExistingKey = 200; + + const int valueMult = 1_000_000; + + EphemeralLockingTestFunctions functions; + LongFasterEqualityComparer comparer; + + private FasterKV fht; + private ClientSession session; + private IDevice log; + + [SetUp] + public void Setup() => Setup(forRecovery: false); + + public void Setup(bool forRecovery) + { + if (!forRecovery) + DeleteDirectory(MethodTestDir, wait: true); + log = Devices.CreateLogDevice(Path.Combine(MethodTestDir, "test.log"), deleteOnClose: false, recoverDevice: forRecovery); + + ReadCacheSettings readCacheSettings = default; + CheckpointSettings checkpointSettings = default; + foreach (var arg in TestContext.CurrentContext.Test.Arguments) + { + if (arg is ReadCopyDestination dest) + { + if (dest == ReadCopyDestination.ReadCache) + readCacheSettings = new() { PageSizeBits = 12, MemorySizeBits = 22 }; + continue; + } + if (arg is CheckpointType chktType) + { + checkpointSettings = new CheckpointSettings { CheckpointDir = MethodTestDir }; + continue; + } + } + + comparer = new LongFasterEqualityComparer(); + functions = new EphemeralLockingTestFunctions(); + + fht = new FasterKV(1L << 20, new LogSettings { LogDevice = log, ObjectLogDevice = null, PageSizeBits = 12, MemorySizeBits = 22, ReadCacheSettings = readCacheSettings }, + checkpointSettings: checkpointSettings, comparer: comparer, lockingMode: LockingMode.Ephemeral); + session = fht.For(functions).NewSession(); + } + + [TearDown] + public void TearDown() => TearDown(forRecovery: false); + + public void TearDown(bool forRecovery) + { + session?.Dispose(); + session = null; + fht?.Dispose(); + fht = null; + log?.Dispose(); + log = null; + + if (!forRecovery) + DeleteDirectory(MethodTestDir); + } + + void Populate() + { + for (int key = 0; key < numRecords; key++) + Assert.IsFalse(session.Upsert(key, key * valueMult).IsPending); + } + + void AssertIsNotLocked(long key) + { + // Check *both* hlog and readcache + OperationStackContext stackCtx = new(comparer.GetHashCode64(ref key)); + fht.FindTag(ref stackCtx.hei); + stackCtx.SetRecordSourceToHashEntry(fht.hlog); + + HashEntryInfo hei = new(fht.comparer.GetHashCode64(ref key)); + + if (fht.UseReadCache && fht.FindInReadCache(ref key, ref stackCtx, minAddress: Constants.kInvalidAddress)) + { + var recordInfo = fht.hlog.GetInfo(fht.hlog.GetPhysicalAddress(stackCtx.hei.AbsoluteAddress)); + Assert.IsFalse(recordInfo.IsLocked); + fht.SkipReadCache(ref stackCtx, out _); // Ignore refresh + } + if (fht.TryFindRecordInMainLog(ref key, ref stackCtx, fht.hlog.BeginAddress)) + { + var recordInfo = fht.hlog.GetInfo(fht.hlog.GetPhysicalAddress(hei.AbsoluteAddress)); + Assert.IsFalse(recordInfo.IsLocked); + } + } + + void PrepareRecordLocation(FlushMode recordLocation) + { + if (recordLocation == FlushMode.ReadOnly) + this.fht.Log.ShiftReadOnlyAddress(this.fht.Log.TailAddress, wait: true); + else if (recordLocation == FlushMode.OnDisk) + this.fht.Log.FlushAndEvict(wait: true); + } + + void AssertNoLocks() + { + long count = 0; + using (var iter = this.fht.Log.Scan(this.fht.Log.BeginAddress, this.fht.Log.TailAddress)) + { + while (iter.GetNext(out var recordInfo, out var key, out var value)) + { + ++count; + Assert.False(recordInfo.IsLocked, $"Unexpected Locked record for key {key}: {(recordInfo.IsLockedShared ? "S" : "")} {(recordInfo.IsLockedExclusive ? "X" : "")}"); + } + // We delete some records so just make sure the test executed. + Assert.Greater(count, 0); + } + + if (this.fht.UseReadCache) + { + using var iter = this.fht.ReadCache.Scan(this.fht.readcache.BeginAddress, this.fht.readcache.GetTailAddress()); + while (iter.GetNext(out var recordInfo, out var key, out var value)) + { + Assert.False(recordInfo.IsLocked, $"Unexpected Locked record for key {key}: {(recordInfo.IsLockedShared ? "S" : "")} {(recordInfo.IsLockedExclusive ? "X" : "")}"); + } + } + } + + [Test] + [Category(LockableUnsafeContextTestCategory)] + [Category(SmokeTestCategory)] + public void InMemorySimpleLockTest([Values] FlushMode flushMode, [Values(Phase.REST, Phase.INTERMEDIATE)] Phase phase, + [Values(UpdateOp.Upsert, UpdateOp.RMW)] UpdateOp updateOp) + { + Populate(); + PrepareRecordLocation(flushMode); + + // SetUp also reads this to determine whether to supply ReadCacheSettings. If ReadCache is specified it wins over CopyToTail. + var useRMW = updateOp == UpdateOp.RMW; + const int readKey24 = 24, readKey51 = 51; + long resultKey = readKey24 + readKey51; + long resultValue = -1; + long expectedResult = (readKey24 + readKey51) * valueMult; + Status status; + + AssertNoLocks(); + + // Re-get source values, to verify (e.g. they may be in readcache now). + // We just locked this above, but for FlushMode.OnDisk it will be in the LockTable and will still be PENDING. + status = session.Read(readKey24, out var readValue24); + if (flushMode == FlushMode.OnDisk) + { + if (status.IsPending) + { + session.CompletePendingWithOutputs(out var completedOutputs, wait: true); + Assert.True(completedOutputs.Next()); + readValue24 = completedOutputs.Current.Output; + Assert.False(completedOutputs.Next()); + completedOutputs.Dispose(); + } + } + else + { + Assert.IsFalse(status.IsPending, status.ToString()); + } + AssertIsNotLocked(readKey24); + Assert.AreEqual(24 * valueMult, readValue24); + + status = session.Read(readKey51, out var readValue51); + if (flushMode == FlushMode.OnDisk) + { + if (status.IsPending) + { + session.CompletePendingWithOutputs(out var completedOutputs, wait: true); + Assert.True(completedOutputs.Next()); + readValue51 = completedOutputs.Current.Output; + Assert.False(completedOutputs.Next()); + completedOutputs.Dispose(); + } + } + else + { + Assert.IsFalse(status.IsPending, status.ToString()); + } + AssertIsNotLocked(readKey51); + Assert.AreEqual(51 * valueMult, readValue51); + + // Set the phase to Phase.INTERMEDIATE to test the non-Phase.REST blocks + session.ctx.phase = phase; + long dummyInOut = 0; + status = useRMW + ? session.RMW(ref resultKey, ref expectedResult, ref resultValue, out RecordMetadata recordMetadata) + : session.Upsert(ref resultKey, ref dummyInOut, ref expectedResult, ref resultValue, out recordMetadata); + if (flushMode == FlushMode.OnDisk) + { + if (status.IsPending) + { + session.CompletePendingWithOutputs(out var completedOutputs, wait: true); + Assert.True(completedOutputs.Next()); + resultValue = completedOutputs.Current.Output; + Assert.AreEqual(expectedResult, resultValue); + Assert.False(completedOutputs.Next()); + completedOutputs.Dispose(); + } + } + else + { + Assert.IsFalse(status.IsPending, status.ToString()); + Assert.AreEqual(expectedResult, resultValue); + } + AssertIsNotLocked(resultKey); + + // Reread the destination to verify + status = session.Read(resultKey, out resultValue); + Assert.IsFalse(status.IsPending, status.ToString()); + Assert.AreEqual(expectedResult, resultValue); + + // Verify reading the destination from the full session. + status = session.Read(resultKey, out resultValue); + Assert.IsFalse(status.IsPending, status.ToString()); + Assert.AreEqual(expectedResult, resultValue); + AssertNoLocks(); + } + + [Test] + [Category(LockableUnsafeContextTestCategory)] + [Category(SmokeTestCategory)] + public void InMemoryDeleteTest([Values] ReadCopyDestination readCopyDestination, + [Values(FlushMode.NoFlush, FlushMode.ReadOnly)] FlushMode flushMode, [Values(Phase.REST, Phase.INTERMEDIATE)] Phase phase) + { + // Phase.INTERMEDIATE is to test the non-Phase.REST blocks + Populate(); + PrepareRecordLocation(flushMode); + + // SetUp also reads this to determine whether to supply ReadCacheSettings. If ReadCache is specified it wins over CopyToTail. + long resultKey = 75; + Status status; + + AssertNoLocks(); + + // Set the phase to Phase.INTERMEDIATE to test the non-Phase.REST blocks + session.ctx.phase = phase; + status = session.Delete(ref resultKey); + Assert.IsFalse(status.IsPending, status.ToString()); + AssertIsNotLocked(resultKey); + + // Reread the destination to verify + status = session.Read(resultKey, out var _); + Assert.IsFalse(status.Found, status.ToString()); + + AssertNoLocks(); + + // Verify reading the destination from the full session. + status = session.Read(resultKey, out var _); + Assert.IsFalse(status.Found, status.ToString()); + AssertNoLocks(); + } + + [Test] + [Category(LockableUnsafeContextTestCategory)] + [Category(SmokeTestCategory)] + public void StressEphemeralLocks([Values(2, 8)] int numThreads) + { + Populate(); + + // Lock in ordered sequence (avoiding deadlocks) + const int baseKey = 42; + const int numKeys = 20; + const int numIncrement = 5; + const int numIterations = 1000; + + void runLEphemeralLockOpThread(int tid) + { + Random rng = new(tid + 101); + + using var localSession = fht.For(new EphemeralLockingTestFunctions()).NewSession(); + var basicContext = localSession.BasicContext; + + for (var iteration = 0; iteration < numIterations; ++iteration) + { + for (var key = baseKey + rng.Next(numIncrement); key < baseKey + numKeys; key += rng.Next(1, numIncrement)) + { + var rand = rng.Next(100); + if (rand < 33) + basicContext.Read(key); + else if (rand < 66) + basicContext.Upsert(key, key * valueMult); + else + basicContext.RMW(key, key * valueMult); + } + } + } + + // Run a mix of luContext and normal ClientSession operations + Task[] tasks = new Task[numThreads]; // Task rather than Thread for propagation of exceptions. + for (int t = 0; t < numThreads; t++) + { + var tid = t; + tasks[t] = Task.Factory.StartNew(() => runLEphemeralLockOpThread(tid)); + } + Task.WaitAll(tasks); + + AssertNoLocks(); + } + + void VerifyKeyIsSplicedInAndHasNoLocks(long expectedKey) + { + // Scan to the end of the readcache chain and verify we inserted the value. + var (_, pa) = ChainTests.SkipReadCacheChain(fht, expectedKey); + var storedKey = fht.hlog.GetKey(pa); + Assert.AreEqual(expectedKey, storedKey); + + // Verify we've no orphaned ephemeral lock. + ref RecordInfo recordInfo = ref fht.hlog.GetInfo(pa); + Assert.IsFalse(recordInfo.IsLocked); + } + + [Test] + [Category(LockableUnsafeContextTestCategory)] + [Category(SmokeTestCategory)] + public void CopyToCTTTest() + { + Populate(); + fht.Log.FlushAndEvict(wait: true); + + using var session = fht.NewSession(new SimpleFunctions()); + long input = 0, output = 0, key = useExistingKey; + ReadOptions readOptions = new() { ReadFlags = ReadFlags.CopyReadsToTail }; + + var status = session.Read(ref key, ref input, ref output, ref readOptions, out _); + Assert.IsTrue(status.IsPending, status.ToString()); + session.CompletePending(wait: true); + + VerifyKeyIsSplicedInAndHasNoLocks(key); + } + + [Test] + [Category(LockableUnsafeContextTestCategory)] + [Category(SmokeTestCategory)] + public void VerifyCountsAfterFlushAndEvict() + { + PopulateAndEvict(immutable: true); + AssertNoLocks(); + fht.Log.FlushAndEvict(true); + AssertNoLocks(); + } + + void PopulateAndEvict(bool immutable = false) + { + Populate(); + + if (immutable) + fht.Log.ShiftReadOnlyAddress(fht.Log.TailAddress, wait: true); + else + fht.Log.FlushAndEvict(true); + } + + [Test] + [Category(LockableUnsafeContextTestCategory)] + [Category(SmokeTestCategory)] + public void VerifyNoLocksAfterToUpsertToTailTest([Values] ChainTests.RecordRegion recordRegion) + { + PopulateAndEvict(recordRegion == ChainTests.RecordRegion.Immutable); + + using var session = fht.NewSession(new SimpleFunctions()); + + int key = recordRegion == ChainTests.RecordRegion.Immutable || recordRegion == ChainTests.RecordRegion.OnDisk + ? useExistingKey : useNewKey; + var status = session.Upsert(key, key * valueMult); + Assert.IsTrue(status.Record.Created, status.ToString()); + + VerifyKeyIsSplicedInAndHasNoLocks(key); + } + + [Test] + [Category(LockableUnsafeContextTestCategory)] + [Category(SmokeTestCategory)] + public void VerifyNoLocksAfterRMWToTailTest([Values] ChainTests.RecordRegion recordRegion) + { + PopulateAndEvict(recordRegion == ChainTests.RecordRegion.Immutable); + PopulateAndEvict(recordRegion == ChainTests.RecordRegion.Immutable); + + using var session = fht.NewSession(new SimpleFunctions()); + + int key = recordRegion == ChainTests.RecordRegion.Immutable || recordRegion == ChainTests.RecordRegion.OnDisk + ? useExistingKey : useNewKey; + var status = session.RMW(key, key * valueMult); + if (recordRegion == ChainTests.RecordRegion.OnDisk) + { + Assert.IsTrue(status.IsPending, status.ToString()); + session.CompletePendingWithOutputs(out var completedOutputs, wait: true); + (status, _) = GetSinglePendingResult(completedOutputs); + Assert.IsTrue(status.Record.CopyUpdated, status.ToString()); + } + else if (recordRegion == ChainTests.RecordRegion.Immutable) + Assert.IsTrue(status.Record.CopyUpdated, status.ToString()); + else + Assert.IsTrue(status.Record.Created, status.ToString()); + + VerifyKeyIsSplicedInAndHasNoLocks(key); + } + + [Test] + [Category(LockableUnsafeContextTestCategory)] + [Category(SmokeTestCategory)] + public void VerifyNoLocksAfterDeleteToTailTest([Values] ChainTests.RecordRegion recordRegion) + { + PopulateAndEvict(recordRegion == ChainTests.RecordRegion.Immutable); + + using var session = fht.NewSession(new SimpleFunctions()); + + long key = -1; + + if (recordRegion == ChainTests.RecordRegion.Immutable || recordRegion == ChainTests.RecordRegion.OnDisk) + { + key = useExistingKey; + var status = session.Delete(key); + + // Delete does not search outside mutable region so the key will not be found + Assert.IsTrue(!status.Found && status.Record.Created, status.ToString()); + + VerifyKeyIsSplicedInAndHasNoLocks(key); + } + else + { + key = useNewKey; + var status = session.Delete(key); + Assert.IsFalse(status.Found, status.ToString()); + + // This key was *not* inserted; Delete sees it does not exist so jumps out immediately. + Assert.IsFalse(fht.FindHashBucketEntryForKey(ref key, out _)); + } + } + + [Test] + [Category(LockableUnsafeContextTestCategory)] + [Category(SmokeTestCategory)] + public void VerifNoLocksAfterReadOnlyToUpdateRecordTest([Values] UpdateOp updateOp) + { + Populate(); + this.fht.Log.ShiftReadOnlyAddress(this.fht.Log.TailAddress, wait: true); + + const int key = 42; + static int getValue(int key) => key + valueMult; + + var status = updateOp switch + { + UpdateOp.Upsert => session.Upsert(key, getValue(key)), + UpdateOp.RMW => session.RMW(key, getValue(key)), + UpdateOp.Delete => session.Delete(key), + _ => new(StatusCode.Error) + }; + Assert.IsFalse(status.IsFaulted, $"Unexpected UpdateOp {updateOp}, status {status}"); + if (updateOp == UpdateOp.RMW) + Assert.IsTrue(status.Record.CopyUpdated, status.ToString()); + else + Assert.IsTrue(status.Record.Created, status.ToString()); + + AssertNoLocks(); + } + + [Test] + [Category(LockableUnsafeContextTestCategory)] + [Category(SmokeTestCategory)] + public void FailInPlaceAndSealTest([Values(UpdateOp.Upsert, UpdateOp.RMW)] UpdateOp updateOp) + { + Populate(); + + functions.failInPlace = true; + + const int key = 42; + static int getValue(int key) => key + valueMult; + + var status = updateOp switch + { + UpdateOp.Upsert => session.Upsert(key, getValue(key)), + UpdateOp.RMW => session.RMW(key, getValue(key)), + _ => new(StatusCode.Error) + }; + Assert.IsFalse(status.IsFaulted, $"Unexpected UpdateOp {updateOp}, status {status}"); + if (updateOp == UpdateOp.RMW) + Assert.IsTrue(status.Record.CopyUpdated, status.ToString()); + else + Assert.IsTrue(status.Record.Created, status.ToString()); + + long output; + (status, output) = session.Read(key); + Assert.IsTrue(status.Found, status.ToString()); + Assert.AreEqual(getValue(key), output); + + AssertNoLocks(); + } + } +} diff --git a/cs/test/ExpirationTests.cs b/cs/test/ExpirationTests.cs index 137e4e3fc..519faee46 100644 --- a/cs/test/ExpirationTests.cs +++ b/cs/test/ExpirationTests.cs @@ -455,7 +455,10 @@ public override bool SingleReader(ref int key, ref ExpirationInput input, ref VL { output.AddFunc(Funcs.SingleReader); if (IsExpired(key, value.field1)) + { + readInfo.Action = ReadAction.Expire; return false; + } output.retrievedValue = value.field1; return true; } @@ -464,7 +467,10 @@ public override bool ConcurrentReader(ref int key, ref ExpirationInput input, re { output.AddFunc(Funcs.ConcurrentReader); if (IsExpired(key, value.field1)) + { + readInfo.Action = ReadAction.Expire; return false; + } output.retrievedValue = value.field1; return true; } @@ -662,7 +668,7 @@ public void PassiveExpireTest([Values] FlushMode flushMode, [Values(Phase.REST, MaybeEvict(flushMode); IncrementValue(TestOp.PassiveExpire, flushMode); session.ctx.phase = phase; - GetRecord(ModifyKey, new(StatusCode.NotFound), flushMode); + GetRecord(ModifyKey, new(StatusCode.NotFound | StatusCode.Expired), flushMode); } [Test] @@ -689,7 +695,10 @@ public void ExpireDeleteTest([Values] FlushMode flushMode, [Values(Phase.REST, P Assert.AreEqual(ExpirationResult.ExpireDelete, output.result); // Verify it's not there - GetRecord(key, new(StatusCode.NotFound), flushMode); + if (flushMode == FlushMode.NoFlush) + GetRecord(key, new(StatusCode.NotFound), flushMode); // Expiration was IPU-deletion + else + GetRecord(key, new(StatusCode.NotFound | StatusCode.Expired), flushMode); } [Test] diff --git a/cs/test/LockTableTests.cs b/cs/test/LockTableTests.cs deleted file mode 100644 index e953e3e8c..000000000 --- a/cs/test/LockTableTests.cs +++ /dev/null @@ -1,491 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT license. - -using FASTER.core; -using FASTER.core.Utilities; -using NUnit.Framework; -using System; -using System.Collections.Generic; -using System.Threading; -using System.Threading.Tasks; -using static FASTER.test.TestUtils; - -namespace FASTER.test.LockTable -{ - internal class SingleBucketComparer : IFasterEqualityComparer - { - public bool Equals(ref long k1, ref long k2) => k1 == k2; - - public long GetHashCode64(ref long k) => 42L; - } - - [TestFixture] - internal class LockTableTests - { - LockTable lockTable; - long SingleBucketKey = 1; // We use a single bucket here for most tests so this lets us use 'ref' easily - - [SetUp] - public void Setup() - { - DeleteDirectory(MethodTestDir); - lockTable = new(Constants.kDefaultLockTableSize, new SingleBucketComparer(), keyLen: null); - } - - [TearDown] - public void TearDown() - { - lockTable.Dispose(); - lockTable = null; - } - - void TryLock(long key, LockType lockType, bool ephemeral, int expectedCurrentReadLocks, bool expectedLockResult, bool expectedGotLock) - { - var hash = lockTable.functions.GetHashCode64(ref key); - - // Check for existing lock - var found = lockTable.TryGet(ref key, out var existingRecordInfo); - Assert.AreEqual(expectedCurrentReadLocks > 0, found); - if (found) - Assert.AreEqual(expectedCurrentReadLocks, existingRecordInfo.NumLockedShared); - - bool gotLock; - if (ephemeral) - Assert.AreEqual(expectedLockResult, lockTable.TryLockEphemeral(ref key, hash, lockType, out gotLock)); - else - { - // All manual locks start out tentative; if there is already a lock there, they increment it non-tentatively - Assert.AreEqual(expectedLockResult, gotLock = lockTable.TryLockManual(ref key, hash, lockType, out bool isTentative)); - Assert.AreEqual(expectedCurrentReadLocks == 0, isTentative); - if (isTentative) - lockTable.ClearTentativeBit(ref key, hash); - } - - Assert.AreEqual(expectedGotLock, gotLock); - if (expectedGotLock) - Assert.IsTrue(lockTable.HasEntries(hash)); - } - - void Unlock(long key, LockType lockType) => lockTable.Unlock(ref key, lockTable.functions.GetHashCode64(ref key), lockType); - - ref InMemKVBucket, RecordInfo, LockTable.LockTableFunctions> GetBucket(ref long key) - { - _ = lockTable.kv.GetBucket(ref key, out var bucketIndex); // Compiler won't allow a direct 'ref' return for some reason - return ref lockTable.kv.buckets[bucketIndex]; - } - - static InMemKVChunk, RecordInfo, LockTable.LockTableFunctions> GetFirstChunk( - ref InMemKVBucket, RecordInfo, LockTable.LockTableFunctions> bucket) - { - var chunk = bucket.LastOverflowChunk; - while (chunk is not null && chunk.prev is not null) - chunk = chunk.prev; - return chunk; - } - - public enum RemovalType { Unlock, Transfer }; - - internal static bool LockTableHasEntries(LockTable lockTable) - { - foreach (var bucket in lockTable.kv.buckets) - { - if (bucket.HasEntries) - return true; - } - return false; - } - - internal static int LockTableEntryCount(LockTable lockTable) - { - int count = 0; - foreach (var bucket in lockTable.kv.buckets) - { - var localBucket = bucket; // can't ref iteration variable - count += LockTableBucketCount(ref localBucket); - } - return count; - } - - internal static int LockTableBucketCount(ref InMemKVBucket, RecordInfo, LockTable.LockTableFunctions> bucket) - { - if (bucket.InitialEntry.IsDefault) - return 0; - if (!bucket.HasOverflow) - return 1; - - int count = bucket.LastActiveChunkEntryIndex + 1 /* 0-based */ + 1 /* initialEntry */; - for (var chunk = bucket.LastOverflowChunk.prev; chunk is not null; chunk = chunk.prev) - { - for (var iEntry = 0; iEntry < InMemKV.kChunkSize; ++iEntry) - { - Assert.IsFalse(chunk[iEntry].IsDefault); - ++count; - } - } - return count; - } - - [Test] - [Category(LockTestCategory), Category(LockTableTestCategory), Category(SmokeTestCategory)] - public void EphemeralLockTest() - { - // No entries - ref var initialEntry = ref GetBucket(ref SingleBucketKey).InitialEntry; - Assert.IsTrue(initialEntry.IsDefault); - long key = 1; - TryLock(key, LockType.Shared, ephemeral: true, expectedCurrentReadLocks: 0, expectedLockResult: true, expectedGotLock:false); - Assert.IsTrue(initialEntry.IsDefault); - Assert.IsFalse(lockTable.IsActive); - Assert.IsFalse(lockTable.TryGet(ref key, out _)); - - // Add a non-ephemeral lock - TryLock(key, LockType.Shared, ephemeral: false, expectedCurrentReadLocks: 0, expectedLockResult: true, expectedGotLock: true); - Assert.IsFalse(initialEntry.IsDefault); - Assert.IsTrue(lockTable.IsActive); - Assert.AreEqual(1, initialEntry.value.NumLockedShared); - Assert.IsTrue(lockTable.TryGet(ref key, out _)); - - // Now the ephemeral lock with the same key should lock it - TryLock(key, LockType.Shared, ephemeral: true, expectedCurrentReadLocks: 1, expectedLockResult: true, expectedGotLock: true); - Assert.IsFalse(initialEntry.IsDefault); - Assert.IsTrue(lockTable.IsActive); - Assert.AreEqual(2, initialEntry.value.NumLockedShared); - - // An ephemeral lock with a different key should not add a lock - key = 2; - TryLock(key, LockType.Shared, ephemeral: true, expectedCurrentReadLocks: 0, expectedLockResult: true, expectedGotLock: false); - Assert.IsFalse(lockTable.TryGet(ref key, out _)); - } - - [Test] - [Category(LockTestCategory), Category(LockTableTestCategory), Category(SmokeTestCategory)] - public void SingleEntryTest() - { - TryLock(1, LockType.Shared, ephemeral: false, expectedCurrentReadLocks: 0, expectedLockResult: true, expectedGotLock: true); - ref var bucket = ref GetBucket(ref SingleBucketKey); - ref var initialEntry = ref bucket.InitialEntry; - Assert.IsFalse(initialEntry.IsDefault); - Assert.IsFalse(bucket.HasOverflow); - Assert.IsTrue(lockTable.IsActive); - Unlock(1, LockType.Shared); - Assert.IsTrue(initialEntry.IsDefault); - Assert.IsFalse(bucket.HasOverflow); - Assert.IsFalse(lockTable.IsActive); - } - - [Test] - [Category(LockTestCategory), Category(LockTableTestCategory), Category(SmokeTestCategory)] - public void ThreeEntryTest() - { - Assert.IsFalse(lockTable.IsActive); - - TryLock(1, LockType.Shared, ephemeral: false, expectedCurrentReadLocks: 0, expectedLockResult: true, expectedGotLock: true); - Assert.IsTrue(lockTable.HasEntries(ref SingleBucketKey)); - ref var bucket = ref GetBucket(ref SingleBucketKey); - ref var initialEntry = ref bucket.InitialEntry; - Assert.IsFalse(initialEntry.IsDefault); - Assert.IsFalse(bucket.HasOverflow); - Assert.IsTrue(lockTable.IsActive); - - // Verify the same key is locked. - TryLock(1, LockType.Shared, ephemeral: false, expectedCurrentReadLocks: 1, expectedLockResult: true, expectedGotLock: true); - Assert.IsFalse(bucket.HasOverflow); - Assert.AreEqual(2, initialEntry.value.NumLockedShared); - - Unlock(1, LockType.Shared); - Assert.IsFalse(bucket.HasOverflow); - Assert.AreEqual(1, initialEntry.value.NumLockedShared); - Assert.AreEqual(0, lockTable.kv.FreeListCount); - - TryLock(2, LockType.Shared, ephemeral: false, expectedCurrentReadLocks: 0, expectedLockResult: true, expectedGotLock: true); - Assert.IsTrue(bucket.HasOverflow); - var chunk1 = GetFirstChunk(ref bucket); - ref var chunkEntry1 = ref chunk1[0]; - ref var chunkEntry2 = ref chunk1[1]; - Assert.IsFalse(chunkEntry1.IsDefault); - Assert.AreEqual(2, chunkEntry1.heapKey.Get()); - Assert.IsTrue(chunkEntry2.IsDefault); - - // The last entry on the chunk should be default. - ref var lastEntryOnChunk = ref chunk1[InMemKV.kChunkSize - 1]; - Assert.IsTrue(lastEntryOnChunk.IsDefault); - - TryLock(3, LockType.Shared, ephemeral: false, expectedCurrentReadLocks: 0, expectedLockResult: true, expectedGotLock: true); - Assert.IsTrue(bucket.HasOverflow); - Assert.IsFalse(chunkEntry2.IsDefault); - Assert.AreEqual(3, chunkEntry2.heapKey.Get()); - - // Unlock chunkEntry2; there is no record after it, so it should become empty, but chunkEntry1 is still there. - Unlock(3, LockType.Shared); - Assert.IsTrue(bucket.HasOverflow); - Assert.AreEqual(0, lockTable.kv.FreeListCount); - Assert.IsFalse(chunkEntry1.IsDefault); - Assert.IsTrue(chunkEntry2.IsDefault); - - // Unlock chunkEntry1; there is no record after it, so the page should be freed. - Unlock(2, LockType.Shared); - Assert.IsFalse(initialEntry.IsDefault); - Assert.IsFalse(bucket.HasOverflow); - Assert.AreEqual(1, lockTable.kv.FreeListCount); - - Unlock(1, LockType.Shared); - Assert.IsTrue(initialEntry.IsDefault); - Assert.IsFalse(bucket.HasOverflow); - Assert.AreEqual(1, lockTable.kv.FreeListCount); - Assert.IsFalse(lockTable.IsActive); - } - - [Test] - [Category(LockTestCategory), Category(LockTableTestCategory), Category(SmokeTestCategory)] - public void UnlockMidChunkEntryTest() - { - for (long key = 1; key <= 5; ++key) - TryLock(key, LockType.Shared, ephemeral: false, expectedCurrentReadLocks: 0, expectedLockResult: true, expectedGotLock: true); - ref var bucket = ref GetBucket(ref SingleBucketKey); - ref var initialEntry = ref bucket.InitialEntry; - Assert.AreEqual(1, initialEntry.heapKey.Get()); - Assert.IsTrue(bucket.HasOverflow); - var chunk1 = GetFirstChunk(ref bucket); - ref var chunkEntry1 = ref chunk1[0]; - Assert.AreEqual(2, chunkEntry1.heapKey.Get()); - ref var chunkEntry2 = ref chunk1[1]; - Assert.AreEqual(3, chunkEntry2.heapKey.Get()); - ref var chunkEntry3 = ref chunk1[2]; - Assert.AreEqual(4, chunkEntry3.heapKey.Get()); - ref var chunkEntry4 = ref chunk1[3]; - Assert.AreEqual(5, chunkEntry4.heapKey.Get()); - - // Unlock chunkEntry3, which will Compact chunkEntry4 into it. - Unlock(4, LockType.Shared); - Assert.AreEqual(5, chunkEntry3.heapKey.Get()); - Assert.IsTrue(chunkEntry4.IsDefault); - - // Unlock chunkEntry1, which will Compact chunkEntry3 into it. - Unlock(2, LockType.Shared); - Assert.AreEqual(5, chunkEntry1.heapKey.Get()); - Assert.IsTrue(chunkEntry3.IsDefault); - - // Unlock chunkEntry2, which will not Compact because there is nothing after it. - Unlock(3, LockType.Shared); - Assert.IsTrue(chunkEntry2.IsDefault); - Assert.IsTrue(chunkEntry3.IsDefault); - Assert.IsTrue(chunkEntry4.IsDefault); - - // Unlock the initial entry, which will Compact chunkEntry1 into it and free the chunk. - Unlock(1, LockType.Shared); - Assert.AreEqual(5, initialEntry.heapKey.Get()); - Assert.AreEqual(1, lockTable.kv.FreeListCount); - Assert.IsTrue(lockTable.IsActive); - - // Remove the final entry. - Unlock(5, LockType.Shared); - Assert.IsTrue(initialEntry.IsDefault); - Assert.IsFalse(bucket.HasOverflow); - Assert.AreEqual(1, lockTable.kv.FreeListCount); - Assert.IsFalse(lockTable.IsActive); - } - - [Test] - [Category(LockTestCategory), Category(LockTableTestCategory), Category(SmokeTestCategory)] - public void UnlockLastChunkEntryTest() - { - // Fill the initial entry and the chunk. - for (long key = 1; key <= InMemKV.kChunkSize + 1; ++key) - TryLock(key, LockType.Shared, ephemeral: false, expectedCurrentReadLocks: 0, expectedLockResult: true, expectedGotLock: true); - - ref var bucket = ref GetBucket(ref SingleBucketKey); - ref var initialEntry = ref bucket.InitialEntry; - var chunk1 = GetFirstChunk(ref bucket); - var lastKey = InMemKV.kChunkSize + 1; - ref var lastEntry = ref chunk1[InMemKV.kChunkSize - 1]; - Assert.AreEqual(lastKey, lastEntry.heapKey.Get()); - - Unlock(lastKey, LockType.Shared); - Assert.IsTrue(lastEntry.IsDefault); - Assert.AreEqual(0, lockTable.kv.FreeListCount); - Assert.IsTrue(lockTable.IsActive); - } - - [Test] - [Category(LockTestCategory), Category(LockTableTestCategory), Category(SmokeTestCategory)] - public void CompactFromSecondChunkTest() - { - // Fill the initial entry and the chunk, and the first entry of the second chunk. - for (long key = 1; key <= InMemKV.kChunkSize + 2; ++key) - TryLock(key, LockType.Shared, ephemeral: false, expectedCurrentReadLocks: 0, expectedLockResult: true, expectedGotLock: true); - - ref var bucket = ref GetBucket(ref SingleBucketKey); - ref var initialEntry = ref bucket.InitialEntry; - var chunk1 = GetFirstChunk(ref bucket); - Assert.AreSame(chunk1.next, bucket.LastOverflowChunk); - var lastKey = InMemKV.kChunkSize + 1; - ref var lastEntry = ref chunk1[InMemKV.kChunkSize - 1]; - Assert.AreEqual(lastKey, lastEntry.heapKey.Get()); - - // Unlock the last entry on the first chunk; there should be only one entry in the second - // chunk, so this will cause it to be Compacted into the first chunk and the second chunk freed. - Unlock(lastKey, LockType.Shared); - Assert.IsFalse(lastEntry.IsDefault); - Assert.AreEqual(1, lockTable.kv.FreeListCount); - Assert.IsTrue(lockTable.IsActive); - Assert.AreSame(chunk1, bucket.LastOverflowChunk); - } - - [Test] - [Category(LockTestCategory), Category(LockTableTestCategory), Category(SmokeTestCategory)] - public void UnlockOnlyEntryOnSecondChunkTest() - { - // Fill the initial entry and the chunk, and the first entry of the second chunk. - long lastKey = InMemKV.kChunkSize + 2; - for (long key = 1; key <= lastKey; ++key) - TryLock(key, LockType.Shared, ephemeral: false, expectedCurrentReadLocks: 0, expectedLockResult: true, expectedGotLock: true); - - ref var bucket = ref GetBucket(ref SingleBucketKey); - ref var initialEntry = ref bucket.InitialEntry; - var chunk1 = GetFirstChunk(ref bucket); - var chunk2 = bucket.LastOverflowChunk; - Assert.AreNotSame(chunk1, chunk2); - Assert.AreSame(chunk1.next, chunk2); - Assert.IsNull(chunk1.prev); - Assert.AreSame(chunk2.prev, chunk1); - Assert.IsNull(chunk2.next); - ref var lastEntryOnFirstChunk = ref chunk1[InMemKV.kChunkSize - 1]; - - ref var firstEntryOnSecondChunk = ref chunk2[0]; - Assert.AreEqual(lastKey, firstEntryOnSecondChunk.heapKey.Get()); - Assert.IsTrue(chunk2[1].IsDefault); // Second entry on second chunk should be empty - - // Unlock the first entry on the second chunk; this should free the second chunk. - Unlock(lastKey, LockType.Shared); - Assert.AreEqual(1, lockTable.kv.FreeListCount); - Assert.IsTrue(lockTable.IsActive); - Assert.AreSame(chunk1, bucket.LastOverflowChunk); - } - - [Test] - [Category(LockTestCategory), Category(LockTableTestCategory), Category(SmokeTestCategory)] - public void ThreadedLockStressTest1Thread() - { - List tasks = new(); - var lastTid = 0; - AddThreads(tasks, ref lastTid, numThreads: 1, maxNumKeys: 5, lowKey: 1, highKey: 5, LockType.Exclusive); - Task.WaitAll(tasks.ToArray()); - } - - [Test] - [Category(LockTestCategory), Category(LockTableTestCategory), Category(SmokeTestCategory)] - public void ThreadedLockStressTestMultiThreadsNoContention([Values(3, 8)] int numThreads) - { - List tasks = new(); - var lastTid = 0; - for (var ii = 0; ii < numThreads; ++ii) - AddThreads(tasks, ref lastTid, numThreads: 1, maxNumKeys: 5, lowKey: 1 + 10 * ii, highKey: 5 + 10 * ii, LockType.Exclusive); - Task.WaitAll(tasks.ToArray()); - Assert.IsTrue(!lockTable.IsActive, "Expected lockTable to be inactive"); - Assert.AreEqual(0, LockTableEntryCount(lockTable), "Expected LockTableEntryCount to be 0"); - Assert.IsFalse(LockTableHasEntries(lockTable), "Expected LockTableHasEntries to be false"); - } - - [Test] - [Category(LockTestCategory), Category(LockTableTestCategory), Category(SmokeTestCategory)] - public void ThreadedLockStressTestMultiThreadsFullContention([Values(3, 8)] int numThreads, [Values] LockType lockType) - { - List tasks = new(); - var lastTid = 0; - AddThreads(tasks, ref lastTid, numThreads: numThreads, maxNumKeys: 5, lowKey: 1, highKey: 5, lockType); - Task.WaitAll(tasks.ToArray()); - Assert.IsTrue(!lockTable.IsActive, "Expected lockTable to be inactive"); - Assert.AreEqual(0, LockTableEntryCount(lockTable), "Expected LockTableEntryCount to be 0"); - Assert.IsFalse(LockTableHasEntries(lockTable), "Expected LockTableHasEntries to be false"); - } - - [Test] - [Category(LockTestCategory), Category(LockTableTestCategory), Category(SmokeTestCategory)] - public void ThreadedLockStressTestMultiThreadsRandomContention([Values(3, 8)] int numThreads, [Values] LockType lockType) - { - List tasks = new(); - var lastTid = 0; - AddThreads(tasks, ref lastTid, numThreads: numThreads, maxNumKeys: 5, lowKey: 1, highKey: 10 * (numThreads / 2), lockType); - Task.WaitAll(tasks.ToArray()); - Assert.IsTrue(!lockTable.IsActive, "Expected lockTable to be inactive"); - Assert.AreEqual(0, LockTableEntryCount(lockTable), "Expected LockTableEntryCount to be 0"); - Assert.IsFalse(LockTableHasEntries(lockTable), "Expected LockTableHasEntries to be false"); - } - - const int NumTestIterations = 15; - const int maxSleepMs = 5; - - internal struct ThreadStruct - { - internal long key; - internal long hash; - internal LockType lockType; - - public override string ToString() => $"key {key}, hash {hash}, {lockType}"; - } - - private void AddThreads(List tasks, ref int lastTid, int numThreads, int maxNumKeys, int lowKey, int highKey, LockType lockType) - { - void runThread(int tid) - { - Random rng = new(101 * tid); - - // maxNumKeys < 0 means use random number of keys - int numKeys = maxNumKeys < 0 ? rng.Next(1, -maxNumKeys) : maxNumKeys; - ThreadStruct[] threadStructs = new ThreadStruct[numKeys]; - - long getNextKey() - { - while (true) - { - var key = rng.Next(lowKey, highKey + 1); // +1 because the end # is not included - if (!Array.Exists(threadStructs, it => it.key == key )) - return key; - } - } - - for (var iteration = 0; iteration < NumTestIterations; ++iteration) - { - // Create key structs - for (var ii = 0; ii < numKeys; ++ii) - { - var key = getNextKey(); - threadStructs[ii] = new() // local var for debugging - { - key = key, - // LockType.None means split randomly between Shared and Exclusive - lockType = lockType == LockType.None ? (rng.Next(0, 100) > 50 ? LockType.Shared : LockType.Exclusive) : lockType, - hash = lockTable.functions.GetHashCode64(ref key) - }; - } - - // Sort and lock - Array.Sort(threadStructs, (x, y) => x.key.CompareTo(y.key)); - for (var ii = 0; ii < numKeys; ++ii) - { - bool isTentative; - while (!lockTable.TryLockManual(ref threadStructs[ii].key, threadStructs[ii].hash, threadStructs[ii].lockType, out isTentative)) - ; - if (isTentative) - Assert.IsTrue(lockTable.ClearTentativeBit(ref threadStructs[ii].key, threadStructs[ii].hash)); - } - - // Pretend to do work - Thread.Sleep(rng.Next(maxSleepMs)); - - // Unlock - for (var ii = 0; ii < numKeys; ++ii) - Assert.IsTrue(lockTable.Unlock(ref threadStructs[ii].key, threadStructs[ii].hash, threadStructs[ii].lockType)); - Array.Clear(threadStructs); - } - - } - - for (int t = 1; t <= numThreads; t++) - { - var tid = ++lastTid; - tasks.Add(Task.Factory.StartNew(() => runThread(tid))); - } - } - } -} diff --git a/cs/test/LockableUnsafeContextTests.cs b/cs/test/LockableUnsafeContextTests.cs index eb465cd12..7a470a329 100644 --- a/cs/test/LockableUnsafeContextTests.cs +++ b/cs/test/LockableUnsafeContextTests.cs @@ -7,7 +7,6 @@ using System.Linq; using System.Threading; using FASTER.core; -using FASTER.core.Utilities; using NUnit.Framework; using FASTER.test.ReadCacheTests; using System.Threading.Tasks; @@ -20,30 +19,30 @@ namespace FASTER.test.LockableUnsafeContext // Functions for the "Simple lock transaction" case, e.g.: // - Lock key1, key2, key3, keyResult // - Do some operation on value1, value2, value3 and write the result to valueResult - internal class LockableUnsafeFunctions : SimpleFunctions + internal class LockableUnsafeFunctions : SimpleFunctions { - internal long deletedRecordAddress; + internal long recordAddress; - public override void PostSingleDeleter(ref int key, ref DeleteInfo deleteInfo) + public override void PostSingleDeleter(ref long key, ref DeleteInfo deleteInfo) { - deletedRecordAddress = deleteInfo.Address; + recordAddress = deleteInfo.Address; } - public override bool ConcurrentDeleter(ref int key, ref int value, ref DeleteInfo deleteInfo) + public override bool ConcurrentDeleter(ref long key, ref long value, ref DeleteInfo deleteInfo) { - deletedRecordAddress = deleteInfo.Address; + recordAddress = deleteInfo.Address; return true; } } - internal class LockableUnsafeComparer : IFasterEqualityComparer + internal class LockableUnsafeComparer : IFasterEqualityComparer { internal int maxSleepMs; readonly Random rng = new(101); - public bool Equals(ref int k1, ref int k2) => k1 == k2; + public bool Equals(ref long k1, ref long k2) => k1 == k2; - public long GetHashCode64(ref int k) + public long GetHashCode64(ref long k) { if (maxSleepMs > 0) Thread.Sleep(rng.Next(maxSleepMs)); @@ -53,20 +52,101 @@ public long GetHashCode64(ref int k) public enum ResultLockTarget { MutableLock, LockTable } + internal struct BucketLockTracker + { + internal readonly Dictionary buckets; + + public BucketLockTracker() + { + buckets = new(); + } + + internal void Increment(FixedLengthLockableKeyStruct key) => Increment(ref key); // easier with 'foreach' because iteration vars can't be passed by 'ref' + internal void Increment(ref FixedLengthLockableKeyStruct key) + { + if (key.LockType == LockType.Exclusive) + IncrementX(ref key); + else + IncrementS(ref key); + } + internal void Decrement(FixedLengthLockableKeyStruct key) => Decrement(ref key); + internal void Decrement(ref FixedLengthLockableKeyStruct key) + { + if (key.LockType == LockType.Exclusive) + DecrementX(ref key); + else + DecrementS(ref key); + } + + internal void IncrementX(ref FixedLengthLockableKeyStruct key) => AddX(ref key, 1); + internal void DecrementX(ref FixedLengthLockableKeyStruct key) => AddX(ref key, -1); + internal void IncrementS(ref FixedLengthLockableKeyStruct key) => AddS(ref key, 1); + internal void DecrementS(ref FixedLengthLockableKeyStruct key) => AddS(ref key, -1); + + private void AddX(ref FixedLengthLockableKeyStruct key, int addend) + { + if (!buckets.TryGetValue(key.LockCode, out var counts)) + counts = default; + counts.x += addend; + Assert.GreaterOrEqual(counts.x, 0); + buckets[key.LockCode] = counts; + } + + private void AddS(ref FixedLengthLockableKeyStruct key, int addend) + { + if (!buckets.TryGetValue(key.LockCode, out var counts)) + counts = default; + counts.s += addend; + Assert.GreaterOrEqual(counts.s, 0); + buckets[key.LockCode] = counts; + } + + internal bool GetLockCounts(ref FixedLengthLockableKeyStruct key, out (int x, int s) counts) + { + if (!buckets.TryGetValue(key.LockCode, out counts)) + { + counts = default; + return false; + } + return true; + } + + internal (int x, int s) GetLockCounts() + { + var xx = 0; + var ss = 0; + foreach (var kvp in buckets) + { + xx += kvp.Value.x; + ss += kvp.Value.s; + } + return (xx, ss); + } + + internal void AssertNoLocks() + { + foreach (var kvp in buckets) + { + Assert.AreEqual(0, kvp.Value.x); + Assert.AreEqual(0, kvp.Value.s); + } + } + } + [TestFixture] class LockableUnsafeContextTests { const int numRecords = 1000; - const int transferToNewKey = 1010; - const int transferToExistingKey = 200; + const int useNewKey = 1010; + const int useExistingKey = 200; const int valueMult = 1_000_000; LockableUnsafeFunctions functions; LockableUnsafeComparer comparer; - private FasterKV fht; - private ClientSession session; + private FasterKV fht; + private ClientSession session; private IDevice log; [SetUp] @@ -100,9 +180,9 @@ public void Setup(bool forRecovery) comparer = new LockableUnsafeComparer(); functions = new LockableUnsafeFunctions(); - fht = new FasterKV(1L << 20, new LogSettings { LogDevice = log, ObjectLogDevice = null, PageSizeBits = 12, MemorySizeBits = 22, ReadCacheSettings = readCacheSettings }, + fht = new FasterKV(1L << 20, new LogSettings { LogDevice = log, ObjectLogDevice = null, PageSizeBits = 12, MemorySizeBits = 22, ReadCacheSettings = readCacheSettings }, checkpointSettings: checkpointSettings, comparer: comparer, - disableEphemeralLocking: false); + lockingMode: LockingMode.Standard); session = fht.For(functions).NewSession(); } @@ -130,65 +210,116 @@ void Populate() Assert.IsFalse(session.Upsert(key, key * valueMult).IsPending); } - static void AssertIsLocked(LockableUnsafeContext luContext, int key, bool xlock, bool slock) - { - var (isX, isS) = luContext.IsLocked(key); - Assert.AreEqual(xlock, isX, "xlock mismatch"); - Assert.AreEqual(slock, isS > 0, "slock mismatch"); - } + void AssertIsLocked(FixedLengthLockableKeyStruct key, bool xlock, bool slock) + => OverflowBucketLockTableTests.AssertLockCounts(fht, ref key, xlock, slock); + void AssertIsLocked(ref FixedLengthLockableKeyStruct key, bool xlock, bool slock) + => OverflowBucketLockTableTests.AssertLockCounts(fht, ref key, xlock, slock); + + void PrepareRecordLocation(FlushMode recordLocation) => PrepareRecordLocation(this.fht, recordLocation); - void PrepareRecordLocation(FlushMode recordLocation) + static void PrepareRecordLocation(FasterKV fht, FlushMode recordLocation) { if (recordLocation == FlushMode.ReadOnly) - this.fht.Log.ShiftReadOnlyAddress(this.fht.Log.TailAddress, wait: true); + fht.Log.ShiftReadOnlyAddress(fht.Log.TailAddress, wait: true); else if (recordLocation == FlushMode.OnDisk) - this.fht.Log.FlushAndEvict(wait: true); + fht.Log.FlushAndEvict(wait: true); } - static void ClearCountsOnError(ClientSession luContext) + static void ClearCountsOnError(ClientSession luContext) { // If we already have an exception, clear these counts so "Run" will not report them spuriously. luContext.sharedLockCount = 0; luContext.exclusiveLockCount = 0; } - static void ClearCountsOnError(ClientSession> luContext) + static void ClearCountsOnError(ClientSession> luContext) { // If we already have an exception, clear these counts so "Run" will not report them spuriously. luContext.sharedLockCount = 0; luContext.exclusiveLockCount = 0; } - static void ClearCountsOnError(ClientSession> luContext) + void PopulateHei(ref HashEntryInfo hei) => OverflowBucketLockTableTests.PopulateHei(fht, ref hei); + + void AssertTotalLockCounts(long expectedX, long expectedS) => OverflowBucketLockTableTests.AssertTotalLockCounts(fht, expectedX, expectedS); + + unsafe void AssertTotalLockCounts(ref BucketLockTracker blt) { - // If we already have an exception, clear these counts so "Run" will not report them spuriously. - luContext.sharedLockCount = 0; - luContext.exclusiveLockCount = 0; + var(expectedX, expectedS) = blt.GetLockCounts(); + AssertTotalLockCounts(expectedX, expectedS); + + foreach (var kvp in blt.buckets) + { + var hashBucket = fht.LockTable.GetBucket(kvp.Key); + Assert.AreEqual(kvp.Value.s, HashBucket.NumLatchedShared(hashBucket)); + Assert.AreEqual(kvp.Value.x == 1, HashBucket.IsLatchedExclusive(hashBucket)); + } + } + + void AssertNoLocks(ref BucketLockTracker blt) + { + blt.AssertNoLocks(); + AssertTotalLockCounts(0, 0); } - void EnsureNoLocks() + internal void AssertBucketLockCount(ref FixedLengthLockableKeyStruct key, long expectedX, long expectedS) => OverflowBucketLockTableTests.AssertBucketLockCount(fht, ref key, expectedX, expectedS); + + internal static IEnumerable EnumActionKeyIndices(FixedLengthLockableKeyStruct[] keys, LockOperationType lockOpType) { - using var iter = this.fht.Log.Scan(this.fht.Log.BeginAddress, this.fht.Log.TailAddress); - long count = 0; - while (iter.GetNext(out var recordInfo, out var key, out var value)) + // "Action" means the keys that will actually be locked or unlocked. + // See comments in LockableContext.DoInternalLockOp. Apps shouldn't need to do this; key sorting and enumeration + // should be a black-box to them, so this code is just for test. + if (lockOpType == LockOperationType.Lock) { - ++count; - Assert.False(recordInfo.IsLocked, $"Unexpected Locked record for key {key}: {(recordInfo.IsLockedShared ? "S" : "")} {(recordInfo.IsLockedExclusive ? "X" : "")}"); + for (int ii = 0; ii < keys.Length; ++ii) + { + if (ii == 0 || keys[ii].LockCode != keys[ii - 1].LockCode) + yield return ii; + } + yield break; } - // We delete some records so just make sure the test worked. - Assert.Greater(count, numRecords - 10); + // LockOperationType.Unlock + for (int ii = keys.Length - 1; ii >= 0; --ii) + { + if (ii == 0 || keys[ii].LockCode != keys[ii - 1].LockCode) + yield return ii; + } } - bool LockTableHasEntries() => LockTableTests.LockTableHasEntries(fht.LockTable); - int LockTableEntryCount() => LockTableTests.LockTableEntryCount(fht.LockTable); + [Test] + [Category(LockTestCategory), Category(LockTableTestCategory), Category(SmokeTestCategory)] + public void ManualLockCollidingHashCodes([Values] UseSingleBucketComparer /* justToSignalSetup */ _) + { + // GetBucketIndex does a mask of lower bits. + uint bucketIndex = 42; + long genHashCode(uint uniquifier) => ((long)uniquifier << 30) | bucketIndex; + + var lContext = session.LockableContext; + lContext.BeginLockable(); + + var keys = new[] + { + new FixedLengthLockableKeyStruct(101L, genHashCode(1), LockType.Exclusive, lContext), + new FixedLengthLockableKeyStruct(102L, genHashCode(2), LockType.Exclusive, lContext), + new FixedLengthLockableKeyStruct(103L, genHashCode(3), LockType.Exclusive, lContext), + }; + + for (var ii = 0; ii < keys.Length; ++ii) + Assert.AreEqual(bucketIndex, fht.LockTable.GetBucketIndex(keys[ii].LockCode), $"BucketIndex mismatch on key {ii}"); + + lContext.Lock(keys); + lContext.Unlock(keys); + + lContext.EndLockable(); + } [Test] [Category("FasterKV")] [Category("Smoke")] public async Task TestShiftHeadAddressLUC([Values] SyncMode syncMode) { - int input = default; + long input = default; const int RandSeed = 10; const int RandRange = numRecords; const int NumRecs = 200; @@ -201,54 +332,63 @@ public async Task TestShiftHeadAddressLUC([Values] SyncMode syncMode) luContext.BeginUnsafe(); luContext.BeginLockable(); + var keyVec = new FixedLengthLockableKeyStruct[1]; + try { for (int c = 0; c < NumRecs; c++) { - var key1 = r.Next(RandRange); - luContext.Lock(key1, LockType.Exclusive); - var value = key1 + numRecords; + keyVec[0] = new(r.Next(RandRange), LockType.Exclusive, luContext); + luContext.Lock(keyVec); + AssertBucketLockCount(ref keyVec[0], 1, 0); + + var value = keyVec[0].Key + numRecords; if (syncMode == SyncMode.Sync) { - luContext.Upsert(ref key1, ref value, Empty.Default, 0); + luContext.Upsert(ref keyVec[0].Key, ref value, Empty.Default, 0); } else { luContext.EndUnsafe(); - var status = (await luContext.UpsertAsync(ref key1, ref value)).Complete(); + var status = (await luContext.UpsertAsync(ref keyVec[0].Key, ref value)).Complete(); luContext.BeginUnsafe(); Assert.IsFalse(status.IsPending); } - luContext.Unlock(key1, LockType.Exclusive); + luContext.Unlock(keyVec); + AssertBucketLockCount(ref keyVec[0], 0, 0); } + AssertTotalLockCounts(0, 0); + r = new Random(RandSeed); sw.Restart(); for (int c = 0; c < NumRecs; c++) { - var key1 = r.Next(RandRange); - var value = key1 + numRecords; - int output = 0; + keyVec[0] = new(r.Next(RandRange), LockType.Shared, luContext); + var value = keyVec[0].Key + numRecords; + long output = 0; - luContext.Lock(key1, LockType.Shared); + luContext.Lock(keyVec); + AssertBucketLockCount(ref keyVec[0], 0, 1); Status status; if (syncMode == SyncMode.Sync || (c % 1 == 0)) // in .Async mode, half the ops should be sync to test CompletePendingAsync { - status = luContext.Read(ref key1, ref input, ref output, Empty.Default, 0); + status = luContext.Read(ref keyVec[0].Key, ref input, ref output, Empty.Default, 0); } else { luContext.EndUnsafe(); - (status, output) = (await luContext.ReadAsync(ref key1, ref input)).Complete(); + (status, output) = (await luContext.ReadAsync(ref keyVec[0].Key, ref input)).Complete(); luContext.BeginUnsafe(); } - luContext.Unlock(key1, LockType.Shared); - if (!status.IsPending) - { - Assert.AreEqual(value, output); - } + luContext.Unlock(keyVec); + AssertBucketLockCount(ref keyVec[0], 0, 0); + Assert.IsFalse(status.IsPending); } + + AssertTotalLockCounts(0, 0); + if (syncMode == SyncMode.Sync) { luContext.CompletePending(true); @@ -267,19 +407,26 @@ public async Task TestShiftHeadAddressLUC([Values] SyncMode syncMode) sw.Restart(); // Since we do random selection with replacement, we may not lock all keys--so need to track which we do - List lockKeys = new(); - - for (int c = 0; c < NumRecs; c++) + // Similarly, we need to track bucket counts. + BucketLockTracker blt = new(); + var lockKeys = Enumerable.Range(0, NumRecs).Select(ii => new FixedLengthLockableKeyStruct(r.Next(RandRange), LockType.Shared, luContext)).ToArray(); + luContext.SortLockCodes(lockKeys); + luContext.Lock(lockKeys); + + var expectedS = 0; + foreach (var idx in EnumActionKeyIndices(lockKeys, LockOperationType.Lock)) { - var key1 = r.Next(RandRange); - int output = 0; - luContext.Lock(key1, LockType.Shared); - lockKeys.Add(key1); - Status foundStatus = luContext.Read(ref key1, ref input, ref output, Empty.Default, 0); + ++expectedS; + long output = 0; + blt.IncrementS(ref lockKeys[idx]); + Status foundStatus = luContext.Read(ref lockKeys[idx].Key, ref input, ref output, Empty.Default, 0); Assert.IsTrue(foundStatus.IsPending); } - CompletedOutputIterator outputs; + // We did not lock all keys, only the "Action" ones - one lock per bucket, all shared in this test + AssertTotalLockCounts(0, expectedS); + + CompletedOutputIterator outputs; if (syncMode == SyncMode.Sync) { luContext.CompletePendingWithOutputs(out outputs, wait: true); @@ -291,8 +438,14 @@ public async Task TestShiftHeadAddressLUC([Values] SyncMode syncMode) luContext.BeginUnsafe(); } - foreach (var key in lockKeys) - luContext.Unlock(key, LockType.Shared); + foreach (var idx in EnumActionKeyIndices(lockKeys, LockOperationType.Unlock)) + { + luContext.Unlock(lockKeys, idx, 1); + blt.DecrementS(ref lockKeys[idx]); + } + + blt.AssertNoLocks(); + AssertTotalLockCounts(0, 0); int count = 0; while (outputs.Next()) @@ -301,7 +454,7 @@ public async Task TestShiftHeadAddressLUC([Values] SyncMode syncMode) Assert.AreEqual(outputs.Current.Key + numRecords, outputs.Current.Output); } outputs.Dispose(); - Assert.AreEqual(NumRecs, count); + Assert.AreEqual(expectedS, count); } finally { @@ -313,7 +466,7 @@ public async Task TestShiftHeadAddressLUC([Values] SyncMode syncMode) [Test] [Category(LockableUnsafeContextTestCategory)] [Category(SmokeTestCategory)] - public void InMemorySimpleLockTxnTest([Values] ResultLockTarget resultLockTarget, [Values] ReadCopyDestination readCopyDestination, + public void InMemorySimpleLockTxnTest([Values] ResultLockTarget resultLockTarget, [Values] FlushMode flushMode, [Values(Phase.REST, Phase.INTERMEDIATE)] Phase phase, [Values(UpdateOp.Upsert, UpdateOp.RMW)] UpdateOp updateOp) { @@ -321,102 +474,116 @@ public void InMemorySimpleLockTxnTest([Values] ResultLockTarget resultLockTarget PrepareRecordLocation(flushMode); // SetUp also reads this to determine whether to supply ReadCacheSettings. If ReadCache is specified it wins over CopyToTail. - bool useReadCache = readCopyDestination == ReadCopyDestination.ReadCache && flushMode == FlushMode.OnDisk; var useRMW = updateOp == UpdateOp.RMW; const int readKey24 = 24, readKey51 = 51; - int resultKey = resultLockTarget == ResultLockTarget.LockTable ? numRecords + 1 : readKey24 + readKey51; - int resultValue = -1; - int expectedResult = (readKey24 + readKey51) * valueMult; + long resultKey = resultLockTarget == ResultLockTarget.LockTable ? numRecords + 1 : readKey24 + readKey51; + long resultValue; + long expectedResult = (readKey24 + readKey51) * valueMult; Status status; - Dictionary locks = new(); + BucketLockTracker blt = new(); var luContext = session.LockableUnsafeContext; luContext.BeginUnsafe(); luContext.BeginLockable(); + + var keys = new[] + { + new FixedLengthLockableKeyStruct(readKey24, LockType.Shared, luContext), // Source, shared + new FixedLengthLockableKeyStruct(readKey51, LockType.Shared, luContext), // Source, shared + new FixedLengthLockableKeyStruct(resultKey, LockType.Exclusive, luContext), // Destination, exclusive + }; + luContext.SortLockCodes(keys); + try { - { // key scope - // Get initial source values - int key = readKey24; - luContext.Lock(key, LockType.Shared); - AssertIsLocked(luContext, key, xlock: false, slock: true); - locks[key] = LockType.Shared; - - key = readKey51; - luContext.Lock(key, LockType.Shared); - locks[key] = LockType.Shared; - AssertIsLocked(luContext, key, xlock: false, slock: true); - - // Lock destination value. - luContext.Lock(resultKey, LockType.Exclusive); - locks[resultKey] = LockType.Exclusive; - AssertIsLocked(luContext, resultKey, xlock: true, slock: false); - - // Re-get source values, to verify (e.g. they may be in readcache now). - // We just locked this above, but for FlushMode.OnDisk it will be in the LockTable and will still be PENDING. - status = luContext.Read(readKey24, out var readValue24); - if (flushMode == FlushMode.OnDisk) - { - if (status.IsPending) - { - luContext.CompletePendingWithOutputs(out var completedOutputs, wait: true); - Assert.True(completedOutputs.Next()); - readValue24 = completedOutputs.Current.Output; - Assert.False(completedOutputs.Next()); - completedOutputs.Dispose(); - } - } - else - { - Assert.IsFalse(status.IsPending, status.ToString()); - } + luContext.Lock(keys); - status = luContext.Read(readKey51, out var readValue51); - if (flushMode == FlushMode.OnDisk) - { - if (status.IsPending) - { - luContext.CompletePendingWithOutputs(out var completedOutputs, wait: true); - Assert.True(completedOutputs.Next()); - readValue51 = completedOutputs.Current.Output; - Assert.False(completedOutputs.Next()); - completedOutputs.Dispose(); - } - } + // Verify locks. Note that while we do not increment lock counts for multiple keys (each bucket gets a single lock per thread, + // shared or exclusive), each key mapping to that bucket will report 'locked'. + foreach (var key in keys) + { + if (key.Key == resultKey) + AssertIsLocked(key, xlock: true, slock: false); else + AssertIsLocked(key, xlock: false, slock: true); + } + + // Use blt because the counts are not 1:1 with keys if there are multiple keys in the same bucket + foreach (var idx in EnumActionKeyIndices(keys, LockOperationType.Lock)) + blt.Increment(ref keys[idx]); + AssertTotalLockCounts(ref blt); + + // Re-get source values, to verify (e.g. they may be in readcache now). + // We just locked this above, but for FlushMode.OnDisk it will be in the LockTable and will still be PENDING. + status = luContext.Read(readKey24, out var readValue24); + if (flushMode == FlushMode.OnDisk) + { + if (status.IsPending) { - Assert.IsFalse(status.IsPending, status.ToString()); + luContext.CompletePendingWithOutputs(out var completedOutputs, wait: true); + Assert.True(completedOutputs.Next()); + readValue24 = completedOutputs.Current.Output; + Assert.AreEqual(24 * valueMult, readValue24); + Assert.False(completedOutputs.Next()); + completedOutputs.Dispose(); } + } + else + { + Assert.IsFalse(status.IsPending, status.ToString()); + } - // Set the phase to Phase.INTERMEDIATE to test the non-Phase.REST blocks - session.ctx.phase = phase; - int dummyInOut = 0; - status = useRMW - ? luContext.RMW(ref resultKey, ref expectedResult, ref dummyInOut, out RecordMetadata recordMetadata) - : luContext.Upsert(ref resultKey, ref dummyInOut, ref expectedResult, ref dummyInOut, out recordMetadata); - if (flushMode == FlushMode.OnDisk) + status = luContext.Read(readKey51, out var readValue51); + if (flushMode == FlushMode.OnDisk) + { + if (status.IsPending) { - if (status.IsPending) - { - luContext.CompletePendingWithOutputs(out var completedOutputs, wait: true); - Assert.True(completedOutputs.Next()); - resultValue = completedOutputs.Current.Output; - Assert.False(completedOutputs.Next()); - completedOutputs.Dispose(); - } + luContext.CompletePendingWithOutputs(out var completedOutputs, wait: true); + Assert.True(completedOutputs.Next()); + readValue51 = completedOutputs.Current.Output; + Assert.AreEqual(51 * valueMult, readValue51); + Assert.False(completedOutputs.Next()); + completedOutputs.Dispose(); } - else + } + else + { + Assert.IsFalse(status.IsPending, status.ToString()); + } + + // Set the phase to Phase.INTERMEDIATE to test the non-Phase.REST blocks + session.ctx.phase = phase; + long dummyInOut = 0; + status = useRMW + ? luContext.RMW(ref resultKey, ref expectedResult, ref dummyInOut, out RecordMetadata recordMetadata) + : luContext.Upsert(ref resultKey, ref dummyInOut, ref expectedResult, ref dummyInOut, out recordMetadata); + if (flushMode == FlushMode.OnDisk) + { + if (status.IsPending) { - Assert.IsFalse(status.IsPending, status.ToString()); + luContext.CompletePendingWithOutputs(out var completedOutputs, wait: true); + Assert.True(completedOutputs.Next()); + resultValue = completedOutputs.Current.Output; + Assert.AreEqual(expectedResult, resultValue); + Assert.False(completedOutputs.Next()); + completedOutputs.Dispose(); } - - // Reread the destination to verify - status = luContext.Read(resultKey, out resultValue); + } + else + { Assert.IsFalse(status.IsPending, status.ToString()); - Assert.AreEqual(expectedResult, resultValue); } - foreach (var key in locks.Keys.OrderBy(key => -key)) - luContext.Unlock(key, locks[key]); + + // Reread the destination to verify + status = luContext.Read(resultKey, out resultValue); + Assert.IsFalse(status.IsPending, status.ToString()); + Assert.AreEqual(expectedResult, resultValue); + + luContext.Unlock(keys); + + foreach (var idx in EnumActionKeyIndices(keys, LockOperationType.Lock)) + blt.Decrement(ref keys[idx]); + AssertNoLocks(ref blt); } catch (Exception) { @@ -433,7 +600,7 @@ public void InMemorySimpleLockTxnTest([Values] ResultLockTarget resultLockTarget status = session.Read(resultKey, out resultValue); Assert.IsFalse(status.IsPending, status.ToString()); Assert.AreEqual(expectedResult, resultValue); - EnsureNoLocks(); + AssertTotalLockCounts(0, 0); } [Test] @@ -447,21 +614,46 @@ public void InMemoryLongLockTest([Values] ResultLockTarget resultLockTarget, [Va bool initialDestWillBeLockTable = resultLockTarget == ResultLockTarget.LockTable || flushMode == FlushMode.OnDisk; const int readKey24 = 24, readKey51 = 51, valueMult2 = 10; - int resultKey = initialDestWillBeLockTable ? numRecords + 1 : readKey24 + readKey51; - int resultValue; + long resultKey = initialDestWillBeLockTable ? numRecords + 1 : readKey24 + readKey51; + long resultValue; int expectedResult = (readKey24 + readKey51) * valueMult * valueMult2; var useRMW = updateOp == UpdateOp.RMW; Status status; + BucketLockTracker blt = new(); var luContext = session.LockableUnsafeContext; luContext.BeginUnsafe(); luContext.BeginLockable(); + var keys = new[] + { + new FixedLengthLockableKeyStruct(readKey24, LockType.Shared, luContext), // Source, shared + new FixedLengthLockableKeyStruct(readKey51, LockType.Shared, luContext), // Source, shared + new FixedLengthLockableKeyStruct(resultKey, LockType.Exclusive, luContext), // Destination, exclusive + }; + + luContext.SortLockCodes(keys); + + var buckets = keys.Select(key => fht.LockTable.GetBucketIndex(key.LockCode)).ToArray(); + try { - luContext.Lock(readKey24, LockType.Shared); - luContext.Lock(readKey51, LockType.Shared); - luContext.Lock(resultKey, LockType.Exclusive); + luContext.Lock(keys); + + // Verify locks. Note that while we do not increment lock counts for multiple keys (each bucket gets a single lock per thread, + // shared or exclusive), each key mapping to that bucket will report 'locked'. + foreach (var key in keys) + { + if (key.Key == resultKey) + AssertIsLocked(key, xlock: true, slock: false); + else + AssertIsLocked(key, xlock: false, slock: true); + } + + // Use blt because the counts are not 1:1 with keys if there are multiple keys in the same bucket + foreach (var idx in EnumActionKeyIndices(keys, LockOperationType.Lock)) + blt.Increment(ref keys[idx]); + AssertTotalLockCounts(ref blt); status = luContext.Read(readKey24, out var readValue24); if (flushMode == FlushMode.OnDisk) @@ -475,7 +667,7 @@ public void InMemoryLongLockTest([Values] ResultLockTarget resultLockTarget, [Va Assert.IsFalse(status.IsPending, status.ToString()); Assert.AreEqual(readKey24 * valueMult, readValue24); - // We just locked this above, but for FlushMode.OnDisk it will be in the LockTable and will still be PENDING. + // We just locked this above, but for FlushMode.OnDisk it will still be PENDING. status = luContext.Read(readKey51, out var readValue51); if (flushMode == FlushMode.OnDisk) { @@ -516,9 +708,11 @@ public void InMemoryLongLockTest([Values] ResultLockTarget resultLockTarget, [Va Assert.IsFalse(status.IsPending, status.ToString()); Assert.AreEqual(expectedResult, resultValue); - luContext.Unlock(resultKey, LockType.Exclusive); - luContext.Unlock(readKey51, LockType.Shared); - luContext.Unlock(readKey24, LockType.Shared); + luContext.Unlock(keys); + + foreach (var idx in EnumActionKeyIndices(keys, LockOperationType.Lock)) + blt.Decrement(ref keys[idx]); + AssertNoLocks(ref blt); } catch (Exception) { @@ -535,37 +729,41 @@ public void InMemoryLongLockTest([Values] ResultLockTarget resultLockTarget, [Va status = session.Read(resultKey, out resultValue); Assert.IsFalse(status.IsPending, status.ToString()); Assert.AreEqual(expectedResult, resultValue); - EnsureNoLocks(); + AssertTotalLockCounts(0, 0); } [Test] [Category(LockableUnsafeContextTestCategory)] [Category(SmokeTestCategory)] +#pragma warning disable IDE0060 // Remove unused parameter: readCopyDestination is used by Setup public void InMemoryDeleteTest([Values] ResultLockTarget resultLockTarget, [Values] ReadCopyDestination readCopyDestination, [Values(FlushMode.NoFlush, FlushMode.ReadOnly)] FlushMode flushMode, [Values(Phase.REST, Phase.INTERMEDIATE)] Phase phase) +#pragma warning restore IDE0060 // Remove unused parameter { // Phase.INTERMEDIATE is to test the non-Phase.REST blocks Populate(); PrepareRecordLocation(flushMode); - Dictionary locks = new(); + BucketLockTracker blt = new(); // SetUp also reads this to determine whether to supply ReadCacheSettings. If ReadCache is specified it wins over CopyToTail. - bool useReadCache = readCopyDestination == ReadCopyDestination.ReadCache && flushMode == FlushMode.OnDisk; - bool initialDestWillBeLockTable = resultLockTarget == ResultLockTarget.LockTable || flushMode == FlushMode.OnDisk; - int resultKey = resultLockTarget == ResultLockTarget.LockTable ? numRecords + 1 : 75; + long resultKey = resultLockTarget == ResultLockTarget.LockTable ? numRecords + 1 : 75; Status status; var luContext = session.LockableUnsafeContext; luContext.BeginUnsafe(); luContext.BeginLockable(); + var keyVec = new[] { new FixedLengthLockableKeyStruct(resultKey, LockType.Exclusive, luContext) }; + try { // Lock destination value. - luContext.Lock(resultKey, LockType.Exclusive); - locks[resultKey] = LockType.Exclusive; - AssertIsLocked(luContext, resultKey, xlock: true, slock: false); + luContext.Lock(keyVec); + AssertIsLocked(ref keyVec[0], xlock: true, slock: false); + + blt.Increment(ref keyVec[0]); + AssertTotalLockCounts(ref blt); // Set the phase to Phase.INTERMEDIATE to test the non-Phase.REST blocks session.ctx.phase = phase; @@ -576,8 +774,10 @@ public void InMemoryDeleteTest([Values] ResultLockTarget resultLockTarget, [Valu status = luContext.Read(resultKey, out var _); Assert.IsFalse(status.Found, status.ToString()); - foreach (var key in locks.Keys.OrderBy(key => key)) - luContext.Unlock(key, locks[key]); + luContext.Unlock(keyVec); + blt.Decrement(ref keyVec[0]); + + AssertNoLocks(ref blt); } catch (Exception) { @@ -593,13 +793,13 @@ public void InMemoryDeleteTest([Values] ResultLockTarget resultLockTarget, [Valu // Verify reading the destination from the full session. status = session.Read(resultKey, out var _); Assert.IsFalse(status.Found, status.ToString()); - EnsureNoLocks(); + AssertTotalLockCounts(0, 0); } [Test] [Category(LockableUnsafeContextTestCategory)] [Category(SmokeTestCategory)] - public void StressLocks([Values(1, 8)] int numLockThreads, [Values(1, 8)] int numOpThreads) + public void StressManualLocks([Values(1, 8)] int numLockThreads, [Values(0, 1, 8)] int numOpThreads) { Populate(); @@ -609,9 +809,16 @@ public void StressLocks([Values(1, 8)] int numLockThreads, [Values(1, 8)] int nu const int numIncrement = 5; const int numIterations = 1000; - void runLockThread(int tid) + IEnumerable enumKeys(Random rng) + { + for (var key = baseKey + rng.Next(numIncrement); key < baseKey + numKeys; key += rng.Next(1, numIncrement)) + yield return key; + } + + void runManualLockThread(int tid) { - Dictionary locks = new(); + BucketLockTracker blt = new(); + Random rng = new(tid + 101); using var localSession = fht.For(new LockableUnsafeFunctions()).NewSession(); @@ -619,41 +826,45 @@ void runLockThread(int tid) luContext.BeginUnsafe(); luContext.BeginLockable(); - for (var iteration = 0; iteration < numIterations; ++iteration) + IEnumerable> enumKeysToLock() { - for (var key = baseKey + rng.Next(numIncrement); key < baseKey + numKeys; key += rng.Next(1, numIncrement)) + foreach (var key in enumKeys(rng)) { var lockType = rng.Next(100) < 60 ? LockType.Shared : LockType.Exclusive; - luContext.Lock(key, lockType); - locks[key] = lockType; + yield return new(key, lockType, luContext); } + } - foreach (var key in locks.Keys.OrderBy(key => key)) - luContext.Unlock(key, locks[key]); - locks.Clear(); + for (var iteration = 0; iteration < numIterations; ++iteration) + { + var keys = enumKeysToLock().ToArray(); + FixedLengthLockableKeyStruct.Sort(keys, luContext); + luContext.Lock(keys); + luContext.Unlock(keys); } luContext.EndLockable(); luContext.EndUnsafe(); } - void runOpThread(int tid) + void runLTransientLockOpThread(int tid) { Random rng = new(tid + 101); using var localSession = fht.For(new LockableUnsafeFunctions()).NewSession(); + var basicContext = localSession.BasicContext; for (var iteration = 0; iteration < numIterations; ++iteration) { - for (var key = baseKey + rng.Next(numIncrement); key < baseKey + numKeys; key += rng.Next(1, numIncrement)) + foreach (var key in enumKeys(rng)) { var rand = rng.Next(100); if (rand < 33) - localSession.Read(key); + basicContext.Read(key); else if (rand < 66) - localSession.Upsert(key, key * valueMult); + basicContext.Upsert(key, key * valueMult); else - localSession.RMW(key, key * valueMult); + basicContext.RMW(key, key * valueMult); } } } @@ -665,73 +876,70 @@ void runOpThread(int tid) { var tid = t; if (t <= numLockThreads) - tasks[t] = Task.Factory.StartNew(() => runLockThread(tid)); + tasks[t] = Task.Factory.StartNew(() => runManualLockThread(tid)); else - tasks[t] = Task.Factory.StartNew(() => runOpThread(tid)); + tasks[t] = Task.Factory.StartNew(() => runLTransientLockOpThread(tid)); } Task.WaitAll(tasks); - EnsureNoLocks(); + AssertTotalLockCounts(0, 0); } - void AddLockTableEntry(LockableUnsafeContext> luContext, int key, bool immutable) + FixedLengthLockableKeyStruct AddLockTableEntry(LockableUnsafeContext> luContext, long key) { - luContext.Lock(key, LockType.Exclusive); - var found = fht.LockTable.TryGet(ref key, out RecordInfo recordInfo); + var keyVec = new[] { new FixedLengthLockableKeyStruct(key, LockType.Exclusive, luContext) }; + luContext.Lock(keyVec); - // Immutable locks in the ReadOnly region; it does NOT create a LockTable entry - if (immutable) - { - Assert.IsFalse(found); - return; - } - Assert.IsTrue(found); - Assert.IsTrue(recordInfo.IsLockedExclusive); + HashEntryInfo hei = new(comparer.GetHashCode64(ref key)); + PopulateHei(ref hei); + + var lockState = fht.LockTable.GetLockState(ref key, ref hei); + + Assert.IsTrue(lockState.IsFound); + Assert.IsTrue(lockState.IsLockedExclusive); + return keyVec[0]; } - void VerifyAndUnlockSplicedInKey(LockableUnsafeContext> luContext, int expectedKey) + void VerifyAndUnlockSplicedInKey(LockableUnsafeContext> luContext, long expectedKey) { // Scan to the end of the readcache chain and verify we inserted the value. var (_, pa) = ChainTests.SkipReadCacheChain(fht, expectedKey); var storedKey = fht.hlog.GetKey(pa); Assert.AreEqual(expectedKey, storedKey); - // This is called after we've transferred from LockTable to log. - Assert.False(fht.LockTable.TryGet(ref expectedKey, out _)); - - // Verify we've transferred the expected locks. - ref RecordInfo recordInfo = ref fht.hlog.GetInfo(pa); - Assert.IsTrue(recordInfo.IsLockedExclusive); - Assert.IsFalse(recordInfo.IsLockedShared); - - // Now unlock it; we're done. - luContext.Unlock(expectedKey, LockType.Exclusive); + var keyVec = new[] { new FixedLengthLockableKeyStruct(expectedKey, LockType.Exclusive, luContext) }; + luContext.Unlock(keyVec); } [Test] [Category(LockableUnsafeContextTestCategory)] [Category(SmokeTestCategory)] - public void TransferFromLockTableToCTTTest() + public void VerifyLocksAfterReadAndCTTTest() { Populate(); fht.Log.FlushAndEvict(wait: true); - using var session = fht.NewSession(new SimpleFunctions()); + using var session = fht.NewSession(new SimpleFunctions()); var luContext = session.LockableUnsafeContext; - int input = 0, output = 0, key = transferToExistingKey; + long input = 0, output = 0, key = 24; ReadOptions readOptions = new() { ReadFlags = ReadFlags.CopyReadsToTail}; + BucketLockTracker blt = new(); luContext.BeginUnsafe(); luContext.BeginLockable(); try { - AddLockTableEntry(luContext, key, immutable: false); + var keyStruct = AddLockTableEntry(luContext, key); + blt.Increment(ref keyStruct); + AssertTotalLockCounts(ref blt); var status = luContext.Read(ref key, ref input, ref output, ref readOptions, out _); Assert.IsTrue(status.IsPending, status.ToString()); luContext.CompletePending(wait: true); VerifyAndUnlockSplicedInKey(luContext, key); + blt.Decrement(ref keyStruct); + AssertNoLocks(ref blt); } catch (Exception) { @@ -748,28 +956,32 @@ public void TransferFromLockTableToCTTTest() [Test] [Category(LockableUnsafeContextTestCategory)] [Category(SmokeTestCategory)] - public void TransferFromEvictionToLockTable() + public void VerifyCountsAfterFlushAndEvict() { Populate(); - using var session = fht.NewSession(new SimpleFunctions()); + using var session = fht.NewSession(new SimpleFunctions()); var luContext = session.LockableUnsafeContext; - int key = transferToExistingKey; + BucketLockTracker blt = new(); + long key = 24; luContext.BeginUnsafe(); luContext.BeginLockable(); try { - luContext.Lock(ref key, LockType.Exclusive); + var keyVec = new[] { new FixedLengthLockableKeyStruct(key, LockType.Exclusive, luContext) }; + luContext.Lock(keyVec); + blt.Increment(ref keyVec[0]); + AssertTotalLockCounts(ref blt); - // Force the eviction which should transfer to lock table. fht.Log.FlushAndEvict(wait: true); + AssertTotalLockCounts(1, 0); - // Verify the lock table entry. - Assert.IsTrue(fht.LockTable.IsActive, "Lock Table should be active"); - Assert.IsTrue(fht.LockTable.ContainsKey(ref key, fht.Comparer.GetHashCode64(ref key))); + luContext.Unlock(keyVec); + blt.Decrement(ref keyVec[0]); - luContext.Unlock(ref key, LockType.Exclusive); + blt.AssertNoLocks(); + AssertNoLocks(ref blt); } catch (Exception) { @@ -796,34 +1008,30 @@ void PopulateAndEvict(bool immutable = false) [Test] [Category(LockableUnsafeContextTestCategory)] [Category(SmokeTestCategory)] - public void TransferFromLockTableToUpsertTest([Values] ChainTests.RecordRegion recordRegion) + public void VerifyCountAfterUpsertToTailTest([Values] ChainTests.RecordRegion recordRegion) { PopulateAndEvict(recordRegion == ChainTests.RecordRegion.Immutable); - using var session = fht.NewSession(new SimpleFunctions()); + using var session = fht.NewSession(new SimpleFunctions()); var luContext = session.LockableUnsafeContext; + BucketLockTracker blt = new(); luContext.BeginUnsafe(); luContext.BeginLockable(); - int key = -1; + FixedLengthLockableKeyStruct keyStruct = default; try { if (recordRegion == ChainTests.RecordRegion.Immutable || recordRegion == ChainTests.RecordRegion.OnDisk) - { - key = transferToExistingKey; - AddLockTableEntry(luContext, key, recordRegion == ChainTests.RecordRegion.Immutable); - var status = luContext.Upsert(key, key * valueMult); - Assert.IsTrue(status.Record.Created, status.ToString()); - } + keyStruct = AddLockTableEntry(luContext, useExistingKey); else - { - key = transferToNewKey; - AddLockTableEntry(luContext, key, immutable: false); - var status = luContext.Upsert(key, key * valueMult); - Assert.IsTrue(status.Record.Created, status.ToString()); - } + keyStruct = AddLockTableEntry(luContext, useNewKey); + blt.Increment(ref keyStruct); + var status = luContext.Upsert(keyStruct.Key, keyStruct.Key * valueMult); + Assert.IsTrue(status.Record.Created, status.ToString()); - VerifyAndUnlockSplicedInKey(luContext, key); + VerifyAndUnlockSplicedInKey(luContext, keyStruct.Key); + blt.Decrement(ref keyStruct); + AssertNoLocks(ref blt); } catch (Exception) { @@ -840,34 +1048,37 @@ public void TransferFromLockTableToUpsertTest([Values] ChainTests.RecordRegion r [Test] [Category(LockableUnsafeContextTestCategory)] [Category(SmokeTestCategory)] - public void TransferFromLockTableToRMWTest([Values] ChainTests.RecordRegion recordRegion) + public void VerifyCountAfterRMWToTailTest([Values] ChainTests.RecordRegion recordRegion) { PopulateAndEvict(recordRegion == ChainTests.RecordRegion.Immutable); - using var session = fht.NewSession(new SimpleFunctions()); + using var session = fht.NewSession(new SimpleFunctions()); var luContext = session.LockableUnsafeContext; + BucketLockTracker blt = new(); luContext.BeginUnsafe(); luContext.BeginLockable(); - int key = -1; + FixedLengthLockableKeyStruct keyStruct = default; try { if (recordRegion == ChainTests.RecordRegion.Immutable || recordRegion == ChainTests.RecordRegion.OnDisk) { - key = transferToExistingKey; - AddLockTableEntry(luContext, key, recordRegion == ChainTests.RecordRegion.Immutable); - var status = luContext.RMW(key, key * valueMult); + keyStruct = AddLockTableEntry(luContext, useExistingKey); + var status = luContext.RMW(keyStruct.Key, keyStruct.Key * valueMult); Assert.IsTrue(recordRegion == ChainTests.RecordRegion.OnDisk ? status.IsPending : status.Found); luContext.CompletePending(wait: true); } else - { - key = transferToNewKey; - AddLockTableEntry(luContext, key, immutable: false); - var status = luContext.RMW(key, key * valueMult); + { + keyStruct = AddLockTableEntry(luContext, useNewKey); + var status = luContext.RMW(keyStruct.Key, keyStruct.Key * valueMult); Assert.IsFalse(status.Found, status.ToString()); } - VerifyAndUnlockSplicedInKey(luContext, key); + blt.Increment(ref keyStruct); + + VerifyAndUnlockSplicedInKey(luContext, keyStruct.Key); + blt.Decrement(ref keyStruct); + AssertNoLocks(ref blt); } catch (Exception) { @@ -884,39 +1095,39 @@ public void TransferFromLockTableToRMWTest([Values] ChainTests.RecordRegion reco [Test] [Category(LockableUnsafeContextTestCategory)] [Category(SmokeTestCategory)] - public void TransferFromLockTableToDeleteTest([Values] ChainTests.RecordRegion recordRegion) + public void VerifyCountAfterDeleteToTailTest([Values] ChainTests.RecordRegion recordRegion) { PopulateAndEvict(recordRegion == ChainTests.RecordRegion.Immutable); - using var session = fht.NewSession(new SimpleFunctions()); + using var session = fht.NewSession(new SimpleFunctions()); var luContext = session.LockableUnsafeContext; + BucketLockTracker blt = new(); luContext.BeginUnsafe(); luContext.BeginLockable(); - int key = -1; + FixedLengthLockableKeyStruct keyStruct = default; try { if (recordRegion == ChainTests.RecordRegion.Immutable || recordRegion == ChainTests.RecordRegion.OnDisk) { - key = transferToExistingKey; - AddLockTableEntry(luContext, key, recordRegion == ChainTests.RecordRegion.Immutable); - var status = luContext.Delete(key); + keyStruct = AddLockTableEntry(luContext, useExistingKey); + blt.Increment(ref keyStruct); + var status = luContext.Delete(keyStruct.Key); // Delete does not search outside mutable region so the key will not be found Assert.IsTrue(!status.Found && status.Record.Created, status.ToString()); - - VerifyAndUnlockSplicedInKey(luContext, key); } else { - key = transferToNewKey; - AddLockTableEntry(luContext, key, immutable: false); - var status = luContext.Delete(key); + keyStruct = AddLockTableEntry(luContext, useNewKey); + blt.Increment(ref keyStruct); + var status = luContext.Delete(keyStruct.Key); Assert.IsFalse(status.Found, status.ToString()); - - // The mutable portion of this test does not transfer because the key is not found - luContext.Unlock(key, LockType.Exclusive); } + + VerifyAndUnlockSplicedInKey(luContext, keyStruct.Key); + blt.Decrement(ref keyStruct); + AssertNoLocks(ref blt); } catch (Exception) { @@ -936,38 +1147,42 @@ public void TransferFromLockTableToDeleteTest([Values] ChainTests.RecordRegion r public void LockAndUnlockInLockTableOnlyTest() { // For this, just don't load anything, and it will happen in lock table. - using var session = fht.NewSession(new SimpleFunctions()); + using var session = fht.NewSession(new SimpleFunctions()); var luContext = session.LockableUnsafeContext; + BucketLockTracker blt = new(); + + FixedLengthLockableKeyStruct createKey(long key) => new(key, (key & 1) == 0 ? LockType.Exclusive : LockType.Shared, luContext); - Dictionary locks = new(); var rng = new Random(101); - foreach (var key in Enumerable.Range(0, numRecords).Select(ii => rng.Next(numRecords))) - locks[key] = (key & 1) == 0 ? LockType.Exclusive : LockType.Shared; + var keyVec = Enumerable.Range(0, numRecords).Select(ii => createKey(rng.Next(numRecords))).ToArray(); luContext.BeginUnsafe(); luContext.BeginLockable(); try { + fht.LockTable.SortLockCodes(keyVec); + luContext.Lock(keyVec); + foreach (var idx in EnumActionKeyIndices(keyVec, LockOperationType.Lock)) + blt.Increment(ref keyVec[idx]); + AssertTotalLockCounts(ref blt); - // For this single-threaded test, the locking does not really have to be in order, but for consistency do it. - foreach (var key in locks.Keys.OrderBy(k => k)) - luContext.Lock(key, locks[key]); - - Assert.IsTrue(LockTableHasEntries()); - Assert.AreEqual(locks.Count, LockTableEntryCount()); - - foreach (var key in locks.Keys.OrderBy(k => -k)) + foreach (var idx in EnumActionKeyIndices(keyVec, LockOperationType.Lock)) { - var localKey = key; // can't ref the iteration variable - var found = fht.LockTable.TryGet(ref localKey, out RecordInfo recordInfo); - Assert.IsTrue(found); - var lockType = locks[key]; - Assert.AreEqual(lockType == LockType.Exclusive, recordInfo.IsLockedExclusive); - Assert.AreEqual(lockType != LockType.Exclusive, recordInfo.IsLockedShared); - - luContext.Unlock(key, lockType); - Assert.IsFalse(fht.LockTable.TryGet(ref localKey, out _)); + ref var key = ref keyVec[idx]; + HashEntryInfo hei = new(key.KeyHash); + PopulateHei(ref hei); + var lockState = fht.LockTable.GetLockState(ref key.Key, ref hei); + Assert.IsTrue(lockState.IsFound); + Assert.AreEqual(key.LockType == LockType.Exclusive, lockState.IsLockedExclusive); + if (key.LockType == LockType.Shared) + Assert.IsTrue(lockState.IsLocked); // Could be either shared or exclusive; we only lock the bucket once per Lock() call + + luContext.Unlock(keyVec, idx, 1); + blt.Decrement(ref key); } + + blt.AssertNoLocks(); + AssertTotalLockCounts(0, 0); } catch (Exception) { @@ -979,35 +1194,33 @@ public void LockAndUnlockInLockTableOnlyTest() luContext.EndLockable(); luContext.EndUnsafe(); } - - Assert.IsFalse(LockTableHasEntries()); - Assert.AreEqual(0, LockTableEntryCount()); } [Test] [Category(LockableUnsafeContextTestCategory)] [Category(SmokeTestCategory)] - public void TransferFromReadOnlyToUpdateRecordTest([Values] UpdateOp updateOp) + public void VerifyCountAfterReadOnlyToUpdateRecordTest([Values] UpdateOp updateOp) { Populate(); this.fht.Log.ShiftReadOnlyAddress(this.fht.Log.TailAddress, wait: true); - const int key = 42; - static int getValue(int key) => key + valueMult; + static long getValue(long key) => key + valueMult; var luContext = session.LockableUnsafeContext; luContext.BeginUnsafe(); luContext.BeginLockable(); + var keyVec = new[] { new FixedLengthLockableKeyStruct(42, LockType.Exclusive, luContext) }; + try { - luContext.Lock(key, LockType.Exclusive); + luContext.Lock(keyVec); var status = updateOp switch { - UpdateOp.Upsert => luContext.Upsert(key, getValue(key)), - UpdateOp.RMW => luContext.RMW(key, getValue(key)), - UpdateOp.Delete => luContext.Delete(key), + UpdateOp.Upsert => luContext.Upsert(keyVec[0].Key, getValue(keyVec[0].Key)), + UpdateOp.RMW => luContext.RMW(keyVec[0].Key, getValue(keyVec[0].Key)), + UpdateOp.Delete => luContext.Delete(keyVec[0].Key), _ => new(StatusCode.Error) }; Assert.IsFalse(status.IsFaulted, $"Unexpected UpdateOp {updateOp}, status {status}"); @@ -1016,11 +1229,10 @@ public void TransferFromReadOnlyToUpdateRecordTest([Values] UpdateOp updateOp) else Assert.IsTrue(status.Record.Created, status.ToString()); - var (xlock, slock) = luContext.IsLocked(key); - Assert.IsTrue(xlock); - Assert.AreEqual(0, slock); + OverflowBucketLockTableTests.AssertLockCounts(fht, keyVec[0].Key, true, 0); - luContext.Unlock(key, LockType.Exclusive); + luContext.Unlock(keyVec); + OverflowBucketLockTableTests.AssertLockCounts(fht, keyVec[0].Key, false, 0); } catch (Exception) { @@ -1040,7 +1252,7 @@ public void LockNewRecordThenUpdateAndUnlockTest([Values] UpdateOp updateOp) { const int numNewRecords = 100; - using var session = fht.NewSession(new SimpleFunctions()); + using var session = fht.NewSession(new SimpleFunctions()); var luContext = session.LockableUnsafeContext; int getValue(int key) => key + valueMult; @@ -1055,10 +1267,13 @@ public void LockNewRecordThenUpdateAndUnlockTest([Values] UpdateOp updateOp) // Now populate the main area of the log. Populate(); + BucketLockTracker blt = new(); luContext.BeginUnsafe(); luContext.BeginLockable(); + var keyVec = new FixedLengthLockableKeyStruct[1]; + try { // We don't sleep in this test @@ -1066,18 +1281,15 @@ public void LockNewRecordThenUpdateAndUnlockTest([Values] UpdateOp updateOp) for (var key = numRecords; key < numRecords + numNewRecords; ++key) { - luContext.Lock(key, LockType.Exclusive); + keyVec[0] = new(key, LockType.Exclusive, luContext); + luContext.Lock(keyVec); for (var iter = 0; iter < 2; ++iter) { - var (xlock, slockCount) = luContext.IsLocked(key); - Assert.IsTrue(xlock, $"Expected xlock; iter {iter}, key {key}"); - Assert.AreEqual(0, slockCount, $"Unexpected slock; iter {iter}, key {key}, count {slockCount}"); + OverflowBucketLockTableTests.AssertLockCounts(fht, key, true, 0); updater(key, iter); } - luContext.Unlock(key, LockType.Exclusive); - - // There should be no entries in the locktable now; they should all be on the RecordInfo. - Assert.IsFalse(LockTableHasEntries(), $"key {key}, count {LockTableEntryCount()}"); + luContext.Unlock(keyVec); + OverflowBucketLockTableTests.AssertLockCounts(fht, key, false, 0); } } catch (Exception) @@ -1140,12 +1352,12 @@ public void LockNewRecordThenUnlockThenUpdateTest([Values] UpdateOp updateOp) if (TestContext.CurrentContext.CurrentRepeatCount > 0) Debug.WriteLine($"*** Current test iteration: {TestContext.CurrentContext.CurrentRepeatCount + 1} ***"); - const int numNewRecords = 100; + const int numNewRecords = 50; - using var lockSession = fht.NewSession(new SimpleFunctions()); + using var lockSession = fht.NewSession(new SimpleFunctions()); var lockLuContext = lockSession.LockableUnsafeContext; - using var updateSession = fht.NewSession(new SimpleFunctions()); + using var updateSession = fht.NewSession(new SimpleFunctions()); var basicContext = updateSession.BasicContext; int getValue(int key) => key + valueMult; @@ -1171,6 +1383,8 @@ public void LockNewRecordThenUnlockThenUpdateTest([Values] UpdateOp updateOp) int maxSleepMs = 10; Random lockRng = new(101), updateRng = new(107); + var lockKeyVec = new FixedLengthLockableKeyStruct[1]; + try { for (var key = numRecords; key < numRecords + numNewRecords; ++key) @@ -1178,12 +1392,11 @@ public void LockNewRecordThenUnlockThenUpdateTest([Values] UpdateOp updateOp) for (var iter = 0; iter < 2; ++iter) { // Use Task instead of Thread because this propagates exceptions (such as Assert.* failures) back to this thread. - // BasicContext's ephemeral lock will wait for the lock/unlock combo to complete, or the lock/unlock will wait for basicContext to finish if it wins. + // BasicContext's transient lock will wait for the lock/unlock combo to complete, or the lock/unlock will wait for basicContext to finish if it wins. Task.WaitAll(Task.Run(() => locker(key)), Task.Run(() => updater(key, iter))); } - // There should be no entries in the locktable now; they should all be on the RecordInfo. - Assert.IsFalse(LockTableHasEntries(), $"key {key}, count {LockTableEntryCount()}"); + AssertBucketLockCount(ref lockKeyVec[0], 0, 0); } } catch (Exception) @@ -1199,6 +1412,7 @@ public void LockNewRecordThenUnlockThenUpdateTest([Values] UpdateOp updateOp) void locker(int key) { + lockKeyVec[0] = new(key, LockType.Exclusive, lockLuContext); try { // Begin/EndLockable are called outside this function; we could not EndLockable in here as the lock lifetime is beyond that. @@ -1209,11 +1423,11 @@ void locker(int key) lastLockerKeys[1] = key; Thread.Sleep(lockRng.Next(maxSleepMs)); lastLockerKeys[2] = key; - lockLuContext.Lock(key, LockType.Exclusive); + lockLuContext.Lock(lockKeyVec); lastLockerKeys[3] = key; Thread.Sleep(lockRng.Next(maxSleepMs)); lastLockerKeys[4] = key; - lockLuContext.Unlock(key, LockType.Exclusive); + lockLuContext.Unlock(lockKeyVec); lastLockerKeys[5] = key; } catch (Exception) @@ -1279,7 +1493,7 @@ public void MultiSharedLockTest() { Populate(); - using var session = fht.NewSession(new SimpleFunctions()); + using var session = fht.NewSession(new SimpleFunctions()); var luContext = session.LockableUnsafeContext; const int key = 42; @@ -1287,24 +1501,25 @@ public void MultiSharedLockTest() luContext.BeginUnsafe(); luContext.BeginLockable(); + + var keyVec = new FixedLengthLockableKeyStruct[1]; + try { - for (var ii = 0; ii < maxLocks; ++ii) { - luContext.Lock(key, LockType.Shared); - var (xlock, slockCount) = luContext.IsLocked(key); - Assert.IsFalse(xlock); - Assert.AreEqual(ii + 1, slockCount); + keyVec[0] = new(key, LockType.Shared, luContext); + luContext.Lock(keyVec); + OverflowBucketLockTableTests.AssertLockCounts(fht, key, false, ii + 1); } for (var ii = 0; ii < maxLocks; ++ii) { - luContext.Unlock(key, LockType.Shared); - var (xlock, slockCount) = luContext.IsLocked(key); - Assert.IsFalse(xlock); - Assert.AreEqual(maxLocks - ii - 1, slockCount); + keyVec[0] = new(key, LockType.Shared, luContext); + luContext.Unlock(keyVec); + OverflowBucketLockTableTests.AssertLockCounts(fht, key, false, maxLocks - ii - 1); } + OverflowBucketLockTableTests.AssertLockCounts(fht, key, false, 0); } catch (Exception) { @@ -1317,350 +1532,5 @@ public void MultiSharedLockTest() luContext.EndUnsafe(); } } - - [Test] - [Category(LockableUnsafeContextTestCategory)] - [Category(SmokeTestCategory)] - public void EvictFromMainLogToLockTableTest() - { - Populate(); - - using var session = fht.NewSession(new SimpleFunctions()); - var luContext = session.LockableUnsafeContext; - - Dictionary locks = new(); - var rng = new Random(101); - foreach (var key in Enumerable.Range(0, numRecords / 5).Select(ii => rng.Next(numRecords))) - locks[key] = (key & 1) == 0 ? LockType.Exclusive : LockType.Shared; - - luContext.BeginUnsafe(); - luContext.BeginLockable(); - - try - { - // For this single-threaded test, the locking does not really have to be in order, but for consistency do it. - foreach (var key in locks.Keys.OrderBy(k => k)) - luContext.Lock(key, locks[key]); - - // All locking should have been done in main log. - Assert.IsFalse(LockTableHasEntries()); - Assert.AreEqual(0, LockTableEntryCount()); - - // Now evict main log which should transfer records to the LockTable. - fht.Log.FlushAndEvict(wait: true); - - Assert.IsTrue(LockTableHasEntries()); - Assert.AreEqual(locks.Count, LockTableEntryCount()); - - // Verify LockTable - foreach (var key in locks.Keys.OrderBy(k => -k)) - { - int localKey = key; - var found = fht.LockTable.TryGet(ref localKey, out RecordInfo recordInfo); - Assert.IsTrue(found); - var lockType = locks[key]; - Assert.AreEqual(lockType == LockType.Exclusive, recordInfo.IsLockedExclusive); - Assert.AreEqual(lockType != LockType.Exclusive, recordInfo.IsLockedShared); - - // Just a little more testing of Read/CTT transferring from LockTable - int input = 0, output = 0; - ReadOptions readOptions = new() { ReadFlags = ReadFlags.CopyReadsToTail}; - var status = luContext.Read(ref localKey, ref input, ref output, ref readOptions, out _); - Assert.IsTrue(status.IsPending, status.ToString()); - luContext.CompletePending(wait: true); - - Assert.IsFalse(fht.LockTable.TryGet(ref localKey, out _)); - var (isLockedExclusive, numLockedShared) = luContext.IsLocked(localKey); - Assert.AreEqual(lockType == LockType.Exclusive, isLockedExclusive); - Assert.AreEqual(lockType != LockType.Exclusive, numLockedShared > 0); - - luContext.Unlock(key, lockType); - (isLockedExclusive, numLockedShared) = luContext.IsLocked(localKey); - Assert.IsFalse(isLockedExclusive); - Assert.AreEqual(0, numLockedShared); - } - } - catch (Exception) - { - ClearCountsOnError(session); - throw; - } - finally - { - luContext.EndLockable(); - luContext.EndUnsafe(); - } - - Assert.IsFalse(LockTableHasEntries()); - Assert.AreEqual(0, LockTableEntryCount()); - } - - [Test] - [Category(LockableUnsafeContextTestCategory)] - [Category(CheckpointRestoreCategory)] - [Ignore("Should not hold LUC while calling sync checkpoint")] - public async ValueTask CheckpointRecoverTest([Values] CheckpointType checkpointType, [Values] SyncMode syncMode) - { - Populate(); - - Dictionary locks = new(); - var rng = new Random(101); - foreach (var key in Enumerable.Range(0, numRecords / 5).Select(ii => rng.Next(numRecords))) - locks[key] = (key & 1) == 0 ? LockType.Exclusive : LockType.Shared; - - Guid fullCheckpointToken; - bool success = true; - { - using var session = fht.NewSession(new SimpleFunctions()); - var luContext = session.LockableUnsafeContext; - - try - { - // We must retain this BeginLockable across the checkpoint, because we can't call EndLockable with locks held. - luContext.BeginUnsafe(); - luContext.BeginLockable(); - - // For this single-threaded test, the locking does not really have to be in order, but for consistency do it. - foreach (var key in locks.Keys.OrderBy(k => k)) - luContext.Lock(key, locks[key]); - } - catch (Exception) - { - ClearCountsOnError(session); - luContext.EndLockable(); - throw; - } - finally - { - luContext.EndUnsafe(); - } - - this.fht.Log.ShiftReadOnlyAddress(this.fht.Log.TailAddress, wait: true); - - if (syncMode == SyncMode.Sync) - { - this.fht.TryInitiateFullCheckpoint(out fullCheckpointToken, checkpointType); - await this.fht.CompleteCheckpointAsync(); - } - else - (success, fullCheckpointToken) = await fht.TakeFullCheckpointAsync(checkpointType); - Assert.IsTrue(success); - - try - { - luContext.BeginUnsafe(); - foreach (var key in locks.Keys.OrderBy(k => -k)) - luContext.Unlock(key, locks[key]); - } - catch (Exception) - { - ClearCountsOnError(session); - throw; - } - finally - { - luContext.EndLockable(); - luContext.EndUnsafe(); - } - } - - TearDown(forRecovery: true); - Setup(forRecovery: true); - - if (syncMode == SyncMode.Sync) - this.fht.Recover(fullCheckpointToken); - else - await this.fht.RecoverAsync(fullCheckpointToken); - - { - var luContext = this.session.LockableUnsafeContext; - luContext.BeginUnsafe(); - - try - { - foreach (var key in locks.Keys.OrderBy(k => k)) - { - var (exclusive, numShared) = luContext.IsLocked(key); - Assert.IsFalse(exclusive, $"key: {key}"); - Assert.AreEqual(0, numShared, $"key: {key}"); - } - } - catch (Exception) - { - ClearCountsOnError(session); - throw; - } - finally - { - luContext.EndUnsafe(); - } - } - } - - const int numSecondaryReaderKeys = 1500; - const int checkpointFreq = 250; - - [Test] - [Category(LockableUnsafeContextTestCategory)] - [Category(CheckpointRestoreCategory)] - [Ignore("Should not hold LUC while calling sync checkpoint")] - async public Task SecondaryReaderTest([Values] SyncMode syncMode) - { - // This test is taken from the SecondaryReaderStore sample - - var path = MethodTestDir; - DeleteDirectory(path, wait: true); - - var log = Devices.CreateLogDevice(path + "hlog.log", deleteOnClose: true); - - var primaryStore = new FasterKV - (1L << 10, - logSettings: new LogSettings { LogDevice = log, MutableFraction = 1, PageSizeBits = 10, MemorySizeBits = 20 }, - checkpointSettings: new CheckpointSettings { CheckpointDir = path } - ); - - var secondaryStore = new FasterKV - (1L << 10, - logSettings: new LogSettings { LogDevice = log, MutableFraction = 1, PageSizeBits = 10, MemorySizeBits = 20 }, - checkpointSettings: new CheckpointSettings { CheckpointDir = path } - ); - - // Use Task instead of Thread because this propagates exceptions (such as Assert.* failures) back to this thread. - await Task.WhenAll(Task.Run(() => PrimaryWriter(primaryStore, syncMode)), - Task.Run(() => SecondaryReader(secondaryStore, syncMode))); - - log.Dispose(); - DeleteDirectory(path, wait: true); - } - - async static Task PrimaryWriter(FasterKV primaryStore, SyncMode syncMode) - { - using var s1 = primaryStore.NewSession(new SimpleFunctions()); - var luc1 = s1.LockableUnsafeContext; - - // Upserting keys at primary starting from key 0 - for (long key = 0; key < numSecondaryReaderKeys; key++) - { - if (key > 0 && key % checkpointFreq == 0) - { - // Checkpointing primary until key {key - 1} - if (syncMode == SyncMode.Sync) - { - primaryStore.TryInitiateHybridLogCheckpoint(out _, CheckpointType.Snapshot); - await primaryStore.CompleteCheckpointAsync().ConfigureAwait(false); - } - else - { - var (success, _) = await primaryStore.TakeHybridLogCheckpointAsync(CheckpointType.Snapshot).ConfigureAwait(false); - Assert.IsTrue(success); - } - Thread.Sleep(10); - } - - var status = s1.Upsert(ref key, ref key); - Assert.IsTrue(status.Record.Created, status.ToString()); - - try - { - luc1.BeginUnsafe(); - luc1.BeginLockable(); - luc1.Lock(key, LockType.Shared); - } - catch (Exception) - { - ClearCountsOnError(s1); - throw; - } - finally - { - luc1.EndLockable(); - luc1.EndUnsafe(); - } - } - - // Checkpointing primary until key {numSecondaryReaderOps - 1} - await primaryStore.TakeHybridLogCheckpointAsync(CheckpointType.Snapshot).ConfigureAwait(false); - - try - { - luc1.BeginUnsafe(); - luc1.BeginLockable(); - - // Unlock everything before we Dispose() luc1 - for (long kk = 0; kk < numSecondaryReaderKeys; kk++) - { - luc1.Unlock(kk, LockType.Shared); - } - } - catch (Exception) - { - ClearCountsOnError(s1); - throw; - } - finally - { - luc1.EndLockable(); - luc1.EndUnsafe(); - } - } - - async static Task SecondaryReader(FasterKV secondaryStore, SyncMode syncMode) - { - using var s1 = secondaryStore.NewSession(new SimpleFunctions()); - var luc1 = s1.LockableUnsafeContext; - - long key = 0, output = 0; - while (true) - { - try - { - // read-only recovery, no writing back undos - if (syncMode == SyncMode.Sync) - secondaryStore.Recover(undoNextVersion: false); - else - await secondaryStore.RecoverAsync(undoNextVersion: false).ConfigureAwait(false); - } - catch (FasterException) - { - // Nothing to recover to at secondary, retrying - Thread.Sleep(500); - continue; - } - - luc1.BeginUnsafe(); - luc1.BeginLockable(); - try - { - while (true) - { - var status = luc1.Read(ref key, ref output); - if (!status.Found) - { - // Key {key} not found at secondary; performing recovery to catch up - Thread.Sleep(500); - break; - } - Assert.AreEqual(key, output); - var (xlock, slock) = luc1.IsLocked(key); - Assert.IsFalse(xlock); - Assert.AreEqual(0, slock); - - key++; - if (key == numSecondaryReaderKeys) - return; - } - } - catch (Exception) - { - ClearCountsOnError(s1); - throw; - } - finally - { - luc1.EndLockable(); - luc1.EndUnsafe(); - } - } - } } } diff --git a/cs/test/MiscFASTERTests.cs b/cs/test/MiscFASTERTests.cs index bc9a1a0b4..2933b17b0 100644 --- a/cs/test/MiscFASTERTests.cs +++ b/cs/test/MiscFASTERTests.cs @@ -126,7 +126,7 @@ public void ShouldCreateNewRecordIfConcurrentWriterReturnsFalse() var status = session.Upsert(ref key, ref input, ref value, ref output, out RecordMetadata recordMetadata1); Assert.IsTrue(!status.Found && status.Record.Created, status.ToString()); - // ConcurrentWriter returns false, so we create a new record (and leave the old one sealed). + // ConcurrentWriter returns false, so we create a new record. value = new ValueStruct() { vfield1 = 1001, vfield2 = 2002 }; status = session.Upsert(ref key, ref input, ref value, ref output, out RecordMetadata recordMetadata2); Assert.IsTrue(!status.Found && status.Record.Created, status.ToString()); @@ -136,15 +136,13 @@ public void ShouldCreateNewRecordIfConcurrentWriterReturnsFalse() var recordCount = 0; using (var iterator = fht.Log.Scan(fht.Log.BeginAddress, fht.Log.TailAddress)) { - // We seal before copying and leave it sealed after copying, so we only get one record. + // We should get both the old and the new records. while (iterator.GetNext(out var info)) - { recordCount++; - } } Assert.AreEqual(1, copyOnWrite.ConcurrentWriterCallCount); - Assert.AreEqual(1, recordCount); + Assert.AreEqual(2, recordCount); } finally { diff --git a/cs/test/ModifiedBitTests.cs b/cs/test/ModifiedBitTests.cs index 1bd904755..4a6d9b646 100644 --- a/cs/test/ModifiedBitTests.cs +++ b/cs/test/ModifiedBitTests.cs @@ -4,10 +4,11 @@ using System; using System.IO; using FASTER.core; +using FASTER.test.LockTable; using NUnit.Framework; using static FASTER.test.TestUtils; -namespace FASTER.test.ModifiedTests +namespace FASTER.test.ModifiedBit { internal class ModifiedBitTestComparer : IFasterEqualityComparer { @@ -34,7 +35,7 @@ public void Setup() { log = Devices.CreateLogDevice(Path.Combine(MethodTestDir, "test.log"), deleteOnClose: false); comparer = new ModifiedBitTestComparer(); - fht = new FasterKV(1L << 20, new LogSettings { LogDevice = log, ObjectLogDevice = null, PageSizeBits = 12, MemorySizeBits = 22 }, comparer: comparer, disableEphemeralLocking: false); + fht = new FasterKV(1L << 20, new LogSettings { LogDevice = log, ObjectLogDevice = null, PageSizeBits = 12, MemorySizeBits = 22 }, comparer: comparer, lockingMode: LockingMode.Standard); session = fht.For(new SimpleFunctions()).NewSession>(); } @@ -55,35 +56,29 @@ void Populate() Assert.IsFalse(session.Upsert(key, key * valueMult).IsPending); } - static void AssertLockandModified(LockableUnsafeContext> luContext, int key, bool xlock, bool slock, bool modified = false) + void AssertLockandModified(LockableUnsafeContext> luContext, int key, bool xlock, bool slock, bool modified = false) { - var (isX, isS) = luContext.IsLocked(key); + OverflowBucketLockTableTests.AssertLockCounts(fht, ref key, xlock, slock); var isM = luContext.IsModified(key); - Assert.AreEqual(xlock, isX, "xlock mismatch"); - Assert.AreEqual(slock, isS > 0, "slock mismatch"); Assert.AreEqual(modified, isM, "modified mismatch"); } - static void AssertLockandModified(LockableContext> luContext, int key, bool xlock, bool slock, bool modified = false) + void AssertLockandModified(LockableContext> luContext, int key, bool xlock, bool slock, bool modified = false) { - var (isX, isS) = luContext.IsLocked(key); + OverflowBucketLockTableTests.AssertLockCounts(fht, ref key, xlock, slock); var isM = luContext.IsModified(key); - Assert.AreEqual(xlock, isX, "xlock mismatch"); - Assert.AreEqual(slock, isS > 0, "slock mismatch"); Assert.AreEqual(modified, isM, "modified mismatch"); } - static void AssertLockandModified(ClientSession> session, int key, bool xlock, bool slock, bool modified = false) + void AssertLockandModified(ClientSession> session, int key, bool xlock, bool slock, bool modified = false) { var luContext = session.LockableUnsafeContext; luContext.BeginUnsafe(); - luContext.BeginLockable(); - var (isX, isS) = luContext.IsLocked(key); + + OverflowBucketLockTableTests.AssertLockCounts(fht, ref key, xlock, slock); var isM = luContext.IsModified(key); - Assert.AreEqual(xlock, isX, "xlock mismatch"); - Assert.AreEqual(slock, isS > 0, "slock mismatch"); Assert.AreEqual(modified, isM, "Modified mismatch"); - luContext.EndLockable(); + luContext.EndUnsafe(); } @@ -96,22 +91,26 @@ public void LockAndNotModify() int key = r.Next(numRecords); session.ResetModified(key); - var LC = session.LockableContext; - LC.BeginLockable(); - AssertLockandModified(LC, key, xlock: false, slock: false, modified: false); + var lContext = session.LockableContext; + lContext.BeginLockable(); + AssertLockandModified(lContext, key, xlock: false, slock: false, modified: false); + + var keyVec = new[] { new FixedLengthLockableKeyStruct(key, LockType.Exclusive, lContext) }; + + lContext.Lock(keyVec); + AssertLockandModified(lContext, key, xlock: true, slock: false, modified: false); - LC.Lock(key, LockType.Exclusive); - AssertLockandModified(LC, key, xlock: true, slock: false, modified: false); + lContext.Unlock(keyVec); + AssertLockandModified(lContext, key, xlock: false, slock: false, modified: false); - LC.Unlock(key, LockType.Exclusive); - AssertLockandModified(LC, key, xlock: false, slock: false, modified: false); + keyVec[0].LockType = LockType.Shared; - LC.Lock(key, LockType.Shared); - AssertLockandModified(LC, key, xlock: false, slock: true, modified: false); + lContext.Lock(keyVec); + AssertLockandModified(lContext, key, xlock: false, slock: true, modified: false); - LC.Unlock(key, LockType.Shared); - AssertLockandModified(LC, key, xlock: false, slock: false, modified: false); - LC.EndLockable(); + lContext.Unlock(keyVec); + AssertLockandModified(lContext, key, xlock: false, slock: false, modified: false); + lContext.EndLockable(); } [Test] @@ -198,7 +197,10 @@ public void ModifyLUC([Values(true, false)] bool flushToDisk, [Values] UpdateOp luContext.BeginUnsafe(); luContext.BeginLockable(); - luContext.Lock(key, LockType.Exclusive); + + var keyVec = new[] { new FixedLengthLockableKeyStruct(key, LockType.Exclusive, luContext) }; + + luContext.Lock(keyVec); switch (updateOp) { @@ -229,14 +231,15 @@ public void ModifyLUC([Values(true, false)] bool flushToDisk, [Values] UpdateOp } } - luContext.Unlock(key, LockType.Exclusive); + luContext.Unlock(keyVec); if (flushToDisk) { - luContext.Lock(key, LockType.Shared); + keyVec[0].LockType = LockType.Shared; + luContext.Lock(keyVec); (status, var _) = luContext.Read(key); Assert.AreEqual(updateOp != UpdateOp.Delete, status.Found, status.ToString()); - luContext.Unlock(key, LockType.Shared); + luContext.Unlock(keyVec); } AssertLockandModified(luContext, key, xlock: false, slock: false, modified: updateOp != UpdateOp.Delete); @@ -306,10 +309,13 @@ public void ModifyLC([Values(true, false)] bool flushToDisk, [Values] UpdateOp u int key = numRecords - 500; int value = 14; session.ResetModified(key); - var LC = session.LockableContext; - LC.BeginLockable(); - AssertLockandModified(LC, key, xlock: false, slock: false, modified: false); - LC.Lock(key, LockType.Exclusive); + var lContext = session.LockableContext; + lContext.BeginLockable(); + AssertLockandModified(lContext, key, xlock: false, slock: false, modified: false); + + var keyVec = new[] { new FixedLengthLockableKeyStruct(key, LockType.Exclusive, lContext) }; + + lContext.Lock(keyVec); if (flushToDisk) this.fht.Log.FlushAndEvict(wait: true); @@ -319,13 +325,13 @@ public void ModifyLC([Values(true, false)] bool flushToDisk, [Values] UpdateOp u switch (updateOp) { case UpdateOp.Upsert: - status = LC.Upsert(key, value); + status = lContext.Upsert(key, value); break; case UpdateOp.RMW: - status = LC.RMW(key, value); + status = lContext.RMW(key, value); break; case UpdateOp.Delete: - status = LC.Delete(key); + status = lContext.Delete(key); break; default: break; @@ -337,7 +343,7 @@ public void ModifyLC([Values(true, false)] bool flushToDisk, [Values] UpdateOp u { case UpdateOp.RMW: Assert.IsTrue(status.IsPending, status.ToString()); - LC.CompletePending(wait: true); + lContext.CompletePending(wait: true); break; default: Assert.IsTrue(status.NotFound); @@ -345,18 +351,19 @@ public void ModifyLC([Values(true, false)] bool flushToDisk, [Values] UpdateOp u } } - LC.Unlock(key, LockType.Exclusive); + lContext.Unlock(keyVec); if (flushToDisk) { - LC.Lock(key, LockType.Shared); - (status, var _) = LC.Read(key); + keyVec[0].LockType = LockType.Shared; + lContext.Lock(keyVec); + (status, var _) = lContext.Read(key); Assert.AreEqual(updateOp != UpdateOp.Delete, status.Found, status.ToString()); - LC.Unlock(key, LockType.Shared); + lContext.Unlock(keyVec); } - AssertLockandModified(LC, key, xlock: false, slock: false, modified: updateOp != UpdateOp.Delete); - LC.EndLockable(); + AssertLockandModified(lContext, key, xlock: false, slock: false, modified: updateOp != UpdateOp.Delete); + lContext.EndLockable(); } [Test] @@ -375,7 +382,9 @@ public void CopyToTailTest() luContext.BeginLockable(); AssertLockandModified(luContext, key, xlock: false, slock: false, modified: true); - luContext.Lock(key, LockType.Shared); + var keyVec = new[] { new FixedLengthLockableKeyStruct(key, LockType.Shared, luContext) }; + + luContext.Lock(keyVec); AssertLockandModified(luContext, key, xlock: false, slock: true, modified: true); // Check Read Copy to Tail resets the modified @@ -383,46 +392,22 @@ public void CopyToTailTest() Assert.IsTrue(status.IsPending, status.ToString()); luContext.CompletePending(wait: true); - luContext.Unlock(key, LockType.Shared); + luContext.Unlock(keyVec); AssertLockandModified(luContext, key, xlock: false, slock: false, modified: true); // Check Read Copy to Tail resets the modified on locked key key += 10; - luContext.Lock(key, LockType.Exclusive); + keyVec[0] = new(key, LockType.Exclusive, luContext); + luContext.Lock(keyVec); status = luContext.Read(ref key, ref input, ref output, ref readOptions, out _); Assert.IsTrue(status.IsPending, status.ToString()); luContext.CompletePending(wait: true); AssertLockandModified(luContext, key, xlock: true, slock: false, modified: true); - luContext.Unlock(key, LockType.Exclusive); + luContext.Unlock(keyVec); AssertLockandModified(luContext, key, xlock: false, slock: false, modified: true); luContext.EndLockable(); luContext.EndUnsafe(); } - - [Test] - [Category(ModifiedBitTestCategory), Category(SmokeTestCategory)] - public void ReadFlagsResetModifiedBit([Values] FlushMode flushMode) - { - Populate(); - - int input = 0, output = 0, key = numRecords / 2; - AssertLockandModified(session, key, xlock: false, slock: false, modified: true); - - if (flushMode == FlushMode.ReadOnly) - this.fht.hlog.ShiftReadOnlyAddress(fht.Log.TailAddress); - else if (flushMode == FlushMode.OnDisk) - this.fht.Log.FlushAndEvict(wait: true); - - ReadOptions readOptions = new() { ReadFlags = ReadFlags.CopyReadsToTail | ReadFlags.ResetModifiedBit }; - - // Check that reading the record clears the modified bit, even if it went through CopyToTail - var status = session.Read(ref key, ref input, ref output, ref readOptions, out _); - Assert.AreEqual(flushMode == FlushMode.OnDisk, status.IsPending, status.ToString()); - if (status.IsPending) - session.CompletePending(wait: true); - - AssertLockandModified(session, key, xlock: false, slock: false, modified: false); - } } } diff --git a/cs/test/OverflowBucketLockTableTests.cs b/cs/test/OverflowBucketLockTableTests.cs new file mode 100644 index 000000000..3c74d90d3 --- /dev/null +++ b/cs/test/OverflowBucketLockTableTests.cs @@ -0,0 +1,432 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. +using FASTER.core; +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using static FASTER.test.TestUtils; + +namespace FASTER.test.LockTable +{ + internal class SingleBucketComparer : IFasterEqualityComparer + { + public bool Equals(ref long k1, ref long k2) => k1 == k2; + + public long GetHashCode64(ref long k) => 42L; + } + + // Used to signal Setup to use the SingleBucketComparer + public enum UseSingleBucketComparer { UseSingleBucket } + + [TestFixture] + internal class OverflowBucketLockTableTests + { + IFasterEqualityComparer comparer = new LongFasterEqualityComparer(); + long SingleBucketKey = 1; // We use a single bucket here for most tests so this lets us use 'ref' easily + + // For OverflowBucketLockTable, we need an instance of FasterKV + private FasterKV fht; + private IDevice log; + + [SetUp] + public void Setup() + { + DeleteDirectory(MethodTestDir); + + log = Devices.CreateLogDevice(Path.Combine(MethodTestDir, "test.log"), deleteOnClose: false, recoverDevice: false); + + foreach (var arg in TestContext.CurrentContext.Test.Arguments) + { + if (arg is UseSingleBucketComparer) + { + comparer = new SingleBucketComparer(); + break; + } + } + comparer ??= new LongFasterEqualityComparer(); + + fht = new FasterKV(1L << 20, new LogSettings { LogDevice = log, ObjectLogDevice = null, PageSizeBits = 12, MemorySizeBits = 22 }, + comparer: comparer, lockingMode: LockingMode.Standard); + } + + [TearDown] + public void TearDown() + { + fht?.Dispose(); + fht = default; + log?.Dispose(); + log = default; + comparer = default; + DeleteDirectory(MethodTestDir); + } + + void TryLock(long key, LockType lockType, bool transient, int expectedCurrentReadLocks, bool expectedLockResult) + { + HashEntryInfo hei = new(comparer.GetHashCode64(ref key)); + PopulateHei(ref hei); + + // Check for existing lock + var lockState = fht.LockTable.GetLockState(ref key, ref hei); + Assert.AreEqual(expectedCurrentReadLocks, lockState.NumLockedShared); + + if (transient) + Assert.AreEqual(expectedLockResult, fht.LockTable.TryLockTransient(ref key, ref hei, lockType)); + else + Assert.AreEqual(expectedLockResult, fht.LockTable.TryLockManual(ref key, ref hei, lockType)); + } + + void Unlock(long key, LockType lockType) + { + HashEntryInfo hei = new(comparer.GetHashCode64(ref key)); + PopulateHei(ref hei); + fht.LockTable.Unlock(ref key, ref hei, lockType); + } + + internal void PopulateHei(ref HashEntryInfo hei) => PopulateHei(fht, ref hei); + + internal static void PopulateHei(FasterKV fht, ref HashEntryInfo hei) => fht.FindOrCreateTag(ref hei, fht.Log.BeginAddress); + + internal void AssertLockCounts(ref HashEntryInfo hei, bool expectedX, long expectedS) + { + var lockState = fht.LockTable.GetLockState(ref SingleBucketKey, ref hei); + Assert.AreEqual(expectedX, lockState.IsLockedExclusive); + Assert.AreEqual(expectedS, lockState.NumLockedShared); + } + + internal static void AssertLockCounts(FasterKV fht, TKey key, bool expectedX, int expectedS) + => AssertLockCounts(fht, ref key, expectedX, expectedS); + + internal static void AssertLockCounts(FasterKV fht, ref TKey key, bool expectedX, int expectedS) + { + HashEntryInfo hei = new(fht.comparer.GetHashCode64(ref key)); + PopulateHei(fht, ref hei); + var lockState = fht.LockTable.GetLockState(ref key, ref hei); + Assert.AreEqual(expectedX, lockState.IsLockedExclusive, "XLock mismatch"); + Assert.AreEqual(expectedS, lockState.NumLockedShared, "SLock mismatch"); + } + + internal static void AssertLockCounts(FasterKV fht, ref TKey key, bool expectedX, bool expectedS) + { + FixedLengthLockableKeyStruct keyStruct = new () + { + Key = key, + KeyHash = fht.comparer.GetHashCode64(ref key), + LockType = LockType.None, // Not used for this call + }; + keyStruct.LockCode = fht.LockTable.GetLockCode(ref key, keyStruct.KeyHash); + AssertLockCounts(fht, ref keyStruct, expectedX, expectedS); + } + + + internal static void AssertLockCounts(FasterKV fht, ref FixedLengthLockableKeyStruct key, bool expectedX, bool expectedS) + { + HashEntryInfo hei = new(key.KeyHash); + PopulateHei(fht, ref hei); + var lockState = fht.LockTable.GetLockState(ref key.Key, ref hei); + Assert.AreEqual(expectedX, lockState.IsLockedExclusive, "XLock mismatch"); + Assert.AreEqual(expectedS, lockState.NumLockedShared > 0, "SLock mismatch"); + } + + internal unsafe void AssertTotalLockCounts(long expectedX, long expectedS) + => AssertTotalLockCounts(fht, expectedX, expectedS); + + internal static unsafe void AssertTotalLockCounts(FasterKV fht, long expectedX, long expectedS) + { + HashBucket* buckets = fht.state[fht.resizeInfo.version].tableAligned; + var count = fht.LockTable.NumBuckets; + long xcount = 0, scount = 0; + for (var ii = 0; ii < count; ++ii) + { + if (HashBucket.IsLatchedExclusive(buckets + ii)) + ++xcount; + scount += HashBucket.NumLatchedShared(buckets + ii); + } + Assert.AreEqual(expectedX, xcount); + Assert.AreEqual(expectedS, scount); + } + + internal void AssertBucketLockCount(ref FixedLengthLockableKeyStruct key, long expectedX, long expectedS) => AssertBucketLockCount(fht, ref key, expectedX, expectedS); + + internal unsafe static void AssertBucketLockCount(FasterKV fht, ref FixedLengthLockableKeyStruct key, long expectedX, long expectedS) + { + var bucketIndex = fht.LockTable.GetBucketIndex(key.LockCode); + var bucket = fht.state[fht.resizeInfo.version].tableAligned + bucketIndex; + Assert.AreEqual(expectedX == 1, HashBucket.IsLatchedExclusive(bucket)); + Assert.AreEqual(expectedS, HashBucket.NumLatchedShared(bucket)); + } + + [Test] + [Category(LockTestCategory), Category(LockTableTestCategory), Category(SmokeTestCategory)] + public void SingleKeyTest([Values] UseSingleBucketComparer /* justToSignalSetup */ _) + { + HashEntryInfo hei = new(comparer.GetHashCode64(ref SingleBucketKey)); + PopulateHei(ref hei); + AssertLockCounts(ref hei, false, 0); + + // No entries + long key = 1; + TryLock(key, LockType.Shared, transient: true, expectedCurrentReadLocks: 0, expectedLockResult: true); + AssertLockCounts(ref hei, false, 1); + + // Add a non-transient lock + TryLock(key, LockType.Shared, transient: false, expectedCurrentReadLocks: 1, expectedLockResult: true); + AssertLockCounts(ref hei, false, 2); + + // Now both transient and manual x locks with the same key should fail + TryLock(key, LockType.Exclusive, transient: true, expectedCurrentReadLocks: 2, expectedLockResult: false); + AssertLockCounts(ref hei, false, 2); + TryLock(key, LockType.Exclusive, transient: false, expectedCurrentReadLocks: 2, expectedLockResult: false); + AssertLockCounts(ref hei, false, 2); + + // Now unlock + Unlock(key, LockType.Shared); + AssertLockCounts(ref hei, false, 1); + Unlock(key, LockType.Shared); + AssertLockCounts(ref hei, false, 0); + + // Now exclusive should succeed + TryLock(key, LockType.Exclusive, transient: false, expectedCurrentReadLocks: 0, expectedLockResult: true); + AssertLockCounts(ref hei, true, 0); + Unlock(key, LockType.Exclusive); + AssertLockCounts(ref hei, false, 0); + } + + [Test] + [Category(LockTestCategory), Category(LockTableTestCategory), Category(SmokeTestCategory)] + public void ThreeKeyTest([Values] UseSingleBucketComparer /* justToSignalSetup */ _) + { + HashEntryInfo hei = new(comparer.GetHashCode64(ref SingleBucketKey)); + PopulateHei(ref hei); + AssertLockCounts(ref hei, false, 0); + + TryLock(1, LockType.Shared, transient: false, expectedCurrentReadLocks: 0, expectedLockResult: true); + AssertLockCounts(ref hei, false, 1); + + TryLock(2, LockType.Shared, transient: false, expectedCurrentReadLocks: 1, expectedLockResult: true); + AssertLockCounts(ref hei, false, 2); + + TryLock(3, LockType.Shared, transient: false, expectedCurrentReadLocks: 2, expectedLockResult: true); + AssertLockCounts(ref hei, false, 3); + + // Exclusive lock should fail + TryLock(4, LockType.Exclusive, transient: false, expectedCurrentReadLocks: 3, expectedLockResult: false); + AssertLockCounts(ref hei, false, 3); + + // Now unlock + Unlock(3, LockType.Shared); + AssertLockCounts(ref hei, false, 2); + Unlock(2, LockType.Shared); + AssertLockCounts(ref hei, false, 1); + Unlock(1, LockType.Shared); + AssertLockCounts(ref hei, false, 0); + + // Now exclusive should succeed + TryLock(4, LockType.Exclusive, transient: false, expectedCurrentReadLocks: 0, expectedLockResult: true); + AssertLockCounts(ref hei, true, 0); + Unlock(4, LockType.Exclusive); + AssertLockCounts(ref hei, false, 0); + } + + [Test] + [Category(LockTestCategory), Category(LockTableTestCategory), Category(SmokeTestCategory)] + public void ThreadedLockStressTest1Thread() + { + List tasks = new(); + var lastTid = 0; + AddThreads(tasks, ref lastTid, numThreads: 1, maxNumKeys: 5, lowKey: 1, highKey: 5, LockType.Exclusive); + Task.WaitAll(tasks.ToArray()); + } + + [Test] + [Category(LockTestCategory), Category(LockTableTestCategory), Category(SmokeTestCategory)] + public void ThreadedLockStressTestMultiThreadsNoContention([Values(3, 8)] int numThreads) + { + List tasks = new(); + var lastTid = 0; + for (var ii = 0; ii < numThreads; ++ii) + AddThreads(tasks, ref lastTid, numThreads: 1, maxNumKeys: 5, lowKey: 1 + 10 * ii, highKey: 5 + 10 * ii, LockType.Exclusive); + Task.WaitAll(tasks.ToArray()); + AssertTotalLockCounts(0, 0); + } + + [Test] + [Category(LockTestCategory), Category(LockTableTestCategory), Category(SmokeTestCategory)] + public void ThreadedLockStressTestMultiThreadsFullContention([Values(3, 8)] int numThreads, [Values] LockType lockType) + { + List tasks = new(); + var lastTid = 0; + AddThreads(tasks, ref lastTid, numThreads: numThreads, maxNumKeys: 5, lowKey: 1, highKey: 5, lockType); + Task.WaitAll(tasks.ToArray()); + AssertTotalLockCounts(0, 0); + } + + [Test] + [Category(LockTestCategory), Category(LockTableTestCategory), Category(SmokeTestCategory)] + public void ThreadedLockStressTestMultiThreadsRandomContention([Values(3, 8)] int numThreads, [Values] LockType lockType) + { + List tasks = new(); + var lastTid = 0; + AddThreads(tasks, ref lastTid, numThreads: numThreads, maxNumKeys: 5, lowKey: 1, highKey: 10 * (numThreads / 2), lockType); + Task.WaitAll(tasks.ToArray()); + AssertTotalLockCounts(0, 0); + } + + FixedLengthLockableKeyStruct[] CreateKeys(Random rng, int numKeys, int numRecords) + { + FixedLengthLockableKeyStruct createKey() + { + long key = rng.Next(numKeys); + var keyHash = comparer.GetHashCode64(ref key); + return new() + { + Key = key, + // LockType.None means split randomly between Shared and Exclusive + LockType = rng.Next(0, 100) < 25 ? LockType.Exclusive : LockType.Shared, + KeyHash = keyHash, + LockCode = fht.LockTable.GetLockCode(ref key, keyHash) + }; + } + return Enumerable.Range(0, numRecords).Select(ii => createKey()).ToArray(); + } + + void AssertSorted(FixedLengthLockableKeyStruct[] keys, int count) + { + long prevCode = default; + long lastXcode = default; + LockType lastLockType = default; + + for (var ii = 0; ii < count; ++ii) + { + ref var key = ref keys[ii]; + if (ii == 0) + { + prevCode = key.LockCode; + lastXcode = key.LockType == LockType.Exclusive ? key.LockCode : -2; + lastLockType = key.LockType; + continue; + } + + Assert.GreaterOrEqual(fht.LockTable.CompareLockCodes(key, keys[ii - 1]), 0); + if (key.LockCode != prevCode) + { + // The BucketIndex of the keys must be nondecreasing, and may be equal but the first in such an equal sequence must be Exclusive. + Assert.Greater(fht.LockTable.GetBucketIndex(key.LockCode), fht.LockTable.GetBucketIndex(prevCode)); + lastXcode = key.LockType == LockType.Exclusive ? key.LockCode : -2; + } + else + { + // Identical BucketIndex sequence must start with an exclusive lock, followed by any number of exclusive locks, followed by any number of shared locks. + // (Enumeration will take only the first). + Assert.AreEqual(lastXcode, key.LockCode); + if (key.LockType == LockType.Exclusive) + Assert.AreNotEqual(LockType.Shared, lastLockType); + lastLockType = key.LockType; + } + } + } + + [Test] + [Category(LockTestCategory), Category(LockTableTestCategory), Category(SmokeTestCategory)] + public void FullArraySortTest() + { + var keys = CreateKeys(new Random(101), 100, 1000); + fht.LockTable.SortLockCodes(keys); + AssertSorted(keys, keys.Length); + } + + [Test] + [Category(LockTestCategory), Category(LockTableTestCategory), Category(SmokeTestCategory)] + public void PartialArraySortTest() + { + var numRecords = 1000; + var keys = CreateKeys(new Random(101), 100, numRecords); + const int count = 800; + + // Make the later elements invalid. + for (var ii = count; ii < numRecords; ++ii) + keys[ii].LockCode = -ii; + + fht.LockTable.SortLockCodes(keys, 0, count); + AssertSorted(keys, count); + + // Verify later elements were untouched. + for (var ii = count; ii < numRecords; ++ii) + keys[ii].LockCode = -ii; + } + + const int NumTestIterations = 15; + const int maxSleepMs = 5; + + private void AddThreads(List tasks, ref int lastTid, int numThreads, int maxNumKeys, int lowKey, int highKey, LockType lockType) + { + void runThread(int tid) + { + Random rng = new(101 * tid); + + // maxNumKeys < 0 means use random number of keys + int numKeys = maxNumKeys < 0 ? rng.Next(1, -maxNumKeys) : maxNumKeys; + FixedLengthLockableKeyStruct[] threadStructs = new FixedLengthLockableKeyStruct[numKeys]; + + long getNextKey() + { + while (true) + { + var key = rng.Next(lowKey, highKey + 1); // +1 because the end # is not included + if (!Array.Exists(threadStructs, it => it.Key == key )) + return key; + } + } + + for (var iteration = 0; iteration < NumTestIterations; ++iteration) + { + // Create key structs + for (var ii = 0; ii < numKeys; ++ii) + { + var key = getNextKey(); + threadStructs[ii] = new() // local var for debugging + { + Key = key, + // LockType.None means split randomly between Shared and Exclusive + LockType = lockType == LockType.None ? (rng.Next(0, 100) > 50 ? LockType.Shared : LockType.Exclusive) : lockType, + KeyHash = comparer.GetHashCode64(ref key), + }; + threadStructs[ii].LockCode = fht.LockTable.GetLockCode(ref key, threadStructs[ii].KeyHash); + } + + // Sort and lock + fht.LockTable.SortLockCodes(threadStructs); + for (var ii = 0; ii < numKeys; ++ii) + { + HashEntryInfo hei = new(threadStructs[ii].KeyHash); + PopulateHei(ref hei); + while (!fht.LockTable.TryLockManual(ref threadStructs[ii].Key, ref hei, threadStructs[ii].LockType)) + ; + } + + // Pretend to do work + Thread.Sleep(rng.Next(maxSleepMs)); + + // Unlock + for (var ii = 0; ii < numKeys; ++ii) + { + HashEntryInfo hei = new(threadStructs[ii].KeyHash); + PopulateHei(ref hei); + fht.LockTable.Unlock(ref threadStructs[ii].Key, ref hei, threadStructs[ii].LockType); + } + Array.Clear(threadStructs); + } + + } + + for (int t = 1; t <= numThreads; t++) + { + var tid = ++lastTid; + tasks.Add(Task.Factory.StartNew(() => runThread(tid))); + } + } + } +} diff --git a/cs/test/ReadAddressTests.cs b/cs/test/ReadAddressTests.cs index ee997f46c..6cfc75395 100644 --- a/cs/test/ReadAddressTests.cs +++ b/cs/test/ReadAddressTests.cs @@ -3,6 +3,7 @@ using System; using FASTER.core; +using static FASTER.test.TestUtils; using NUnit.Framework; using System.Threading.Tasks; @@ -145,7 +146,7 @@ private class TestStore : IDisposable internal long[] InsertAddresses = new long[numKeys]; - internal TestStore(bool useReadCache, ReadFlags readFlags, bool flush) + internal TestStore(bool useReadCache, ReadFlags readFlags, bool flush, LockingMode lockingMode) { this.testDir = TestUtils.MethodTestDir; TestUtils.DeleteDirectory(this.testDir, wait:true); @@ -169,7 +170,7 @@ internal TestStore(bool useReadCache, ReadFlags readFlags, bool flush) checkpointSettings: new CheckpointSettings { CheckpointDir = $"{this.testDir}/CheckpointDir" }, serializerSettings: null, comparer: new Key.Comparer(), - disableEphemeralLocking: true + lockingMode: lockingMode ); } @@ -255,19 +256,19 @@ public void Dispose() this.fkv = null; this.logDevice?.Dispose(); this.logDevice = null; - TestUtils.DeleteDirectory(this.testDir); + DeleteDirectory(this.testDir); } } // readCache and copyReadsToTail are mutually exclusive and orthogonal to populating by RMW vs. Upsert. - [TestCase(UseReadCache.NoReadCache, ReadFlags.None, false, false)] - [TestCase(UseReadCache.NoReadCache, ReadFlags.CopyReadsToTail | ReadFlags.CopyFromDeviceOnly, true, true)] - [TestCase(UseReadCache.ReadCache, ReadFlags.None, false, true)] + [TestCase(UseReadCache.NoReadCache, ReadFlags.None, false, false, LockingMode.None)] + [TestCase(UseReadCache.NoReadCache, ReadFlags.CopyReadsToTail | ReadFlags.CopyFromDeviceOnly, true, true, LockingMode.Standard)] + [TestCase(UseReadCache.ReadCache, ReadFlags.None, false, true, LockingMode.Standard)] [Category("FasterKV"), Category("Read")] - public void VersionedReadSyncTests(UseReadCache urc, ReadFlags readFlags, bool useRMW, bool flush) + public void VersionedReadSyncTests(UseReadCache urc, ReadFlags readFlags, bool useRMW, bool flush, [Values] LockingMode lockingMode) { var useReadCache = urc == UseReadCache.ReadCache; - using var testStore = new TestStore(useReadCache, readFlags, flush); + using var testStore = new TestStore(useReadCache, readFlags, flush, lockingMode); testStore.Populate(useRMW, useAsync:false).GetAwaiter().GetResult(); using var session = testStore.fkv.For(new Functions()).NewSession(); @@ -288,7 +289,7 @@ public void VersionedReadSyncTests(UseReadCache urc, ReadFlags readFlags, bool u { // This will wait for each retrieved record; not recommended for performance-critical code or when retrieving multiple records unless necessary. session.CompletePendingWithOutputs(out var completedOutputs, wait: true); - (status, output) = TestUtils.GetSinglePendingResult(completedOutputs, out recordMetadata); + (status, output) = GetSinglePendingResult(completedOutputs, out recordMetadata); } if (!testStore.ProcessChainRecord(status, recordMetadata, lap, ref output)) break; @@ -298,14 +299,14 @@ public void VersionedReadSyncTests(UseReadCache urc, ReadFlags readFlags, bool u } // readCache and copyReadsToTail are mutually exclusive and orthogonal to populating by RMW vs. Upsert. - [TestCase(UseReadCache.NoReadCache, ReadFlags.None, false, false)] - [TestCase(UseReadCache.NoReadCache, ReadFlags.CopyReadsToTail | ReadFlags.CopyFromDeviceOnly, true, true)] - [TestCase(UseReadCache.ReadCache, ReadFlags.None, false, true)] + [TestCase(UseReadCache.NoReadCache, ReadFlags.None, false, false, LockingMode.None)] + [TestCase(UseReadCache.NoReadCache, ReadFlags.CopyReadsToTail | ReadFlags.CopyFromDeviceOnly, true, true, LockingMode.Standard)] + [TestCase(UseReadCache.ReadCache, ReadFlags.None, false, true, LockingMode.Standard)] [Category("FasterKV"), Category("Read")] - public async Task VersionedReadAsyncTests(UseReadCache urc, ReadFlags readFlags, bool useRMW, bool flush) + public async Task VersionedReadAsyncTests(UseReadCache urc, ReadFlags readFlags, bool useRMW, bool flush, [Values] LockingMode lockingMode) { var useReadCache = urc == UseReadCache.ReadCache; - using var testStore = new TestStore(useReadCache, readFlags, flush); + using var testStore = new TestStore(useReadCache, readFlags, flush, lockingMode); await testStore.Populate(useRMW, useAsync: true); using var session = testStore.fkv.For(new Functions()).NewSession(); @@ -330,14 +331,14 @@ public async Task VersionedReadAsyncTests(UseReadCache urc, ReadFlags readFlags, } // readCache and copyReadsToTail are mutually exclusive and orthogonal to populating by RMW vs. Upsert. - [TestCase(UseReadCache.NoReadCache, ReadFlags.None, false, false)] - [TestCase(UseReadCache.NoReadCache, ReadFlags.CopyReadsToTail | ReadFlags.CopyFromDeviceOnly, true, true)] - [TestCase(UseReadCache.ReadCache, ReadFlags.None, false, true)] + [TestCase(UseReadCache.NoReadCache, ReadFlags.None, false, false, LockingMode.None)] + [TestCase(UseReadCache.NoReadCache, ReadFlags.CopyReadsToTail | ReadFlags.CopyFromDeviceOnly, true, true, LockingMode.Standard)] + [TestCase(UseReadCache.ReadCache, ReadFlags.None, false, true, LockingMode.Standard)] [Category("FasterKV"), Category("Read")] - public void ReadAtAddressSyncTests(UseReadCache urc, ReadFlags readFlags, bool useRMW, bool flush) + public void ReadAtAddressSyncTests(UseReadCache urc, ReadFlags readFlags, bool useRMW, bool flush, [Values] LockingMode lockingMode) { var useReadCache = urc == UseReadCache.ReadCache; - using var testStore = new TestStore(useReadCache, readFlags, flush); + using var testStore = new TestStore(useReadCache, readFlags, flush, lockingMode); testStore.Populate(useRMW, useAsync: false).GetAwaiter().GetResult(); using var session = testStore.fkv.For(new Functions()).NewSession(); @@ -357,7 +358,7 @@ public void ReadAtAddressSyncTests(UseReadCache urc, ReadFlags readFlags, bool u { // This will wait for each retrieved record; not recommended for performance-critical code or when retrieving multiple records unless necessary. session.CompletePendingWithOutputs(out var completedOutputs, wait: true); - (status, output) = TestUtils.GetSinglePendingResult(completedOutputs, out recordMetadata); + (status, output) = GetSinglePendingResult(completedOutputs, out recordMetadata); } if (!testStore.ProcessChainRecord(status, recordMetadata, lap, ref output)) @@ -373,7 +374,7 @@ public void ReadAtAddressSyncTests(UseReadCache urc, ReadFlags readFlags, bool u { // This will wait for each retrieved record; not recommended for performance-critical code or when retrieving multiple records unless necessary. session.CompletePendingWithOutputs(out var completedOutputs, wait: true); - (status, output) = TestUtils.GetSinglePendingResult(completedOutputs, out recordMetadata); + (status, output) = GetSinglePendingResult(completedOutputs, out recordMetadata); } Assert.AreEqual(saveOutput, output); @@ -385,14 +386,14 @@ public void ReadAtAddressSyncTests(UseReadCache urc, ReadFlags readFlags, bool u } // readCache and copyReadsToTail are mutually exclusive and orthogonal to populating by RMW vs. Upsert. - [TestCase(UseReadCache.NoReadCache, ReadFlags.None, false, false)] - [TestCase(UseReadCache.NoReadCache, ReadFlags.CopyReadsToTail | ReadFlags.CopyFromDeviceOnly, true, true)] - [TestCase(UseReadCache.ReadCache, ReadFlags.None, false, true)] + [TestCase(UseReadCache.NoReadCache, ReadFlags.None, false, false, LockingMode.None)] + [TestCase(UseReadCache.NoReadCache, ReadFlags.CopyReadsToTail | ReadFlags.CopyFromDeviceOnly, true, true, LockingMode.Standard)] + [TestCase(UseReadCache.ReadCache, ReadFlags.None, false, true, LockingMode.Standard)] [Category("FasterKV"), Category("Read")] - public async Task ReadAtAddressAsyncTests(UseReadCache urc, ReadFlags readFlags, bool useRMW, bool flush) + public async Task ReadAtAddressAsyncTests(UseReadCache urc, ReadFlags readFlags, bool useRMW, bool flush, [Values] LockingMode lockingMode) { var useReadCache = urc == UseReadCache.ReadCache; - using var testStore = new TestStore(useReadCache, readFlags, flush); + using var testStore = new TestStore(useReadCache, readFlags, flush, lockingMode); await testStore.Populate(useRMW, useAsync: true); using var session = testStore.fkv.For(new Functions()).NewSession(); @@ -430,14 +431,14 @@ public async Task ReadAtAddressAsyncTests(UseReadCache urc, ReadFlags readFlags, } // Test is similar to others but tests the Overload where RadFlag.none is set -- probably don't need all combinations of test but doesn't hurt - [TestCase(UseReadCache.NoReadCache, ReadFlags.None, false, false)] - [TestCase(UseReadCache.NoReadCache, ReadFlags.CopyReadsToTail | ReadFlags.CopyFromDeviceOnly, true, true)] - [TestCase(UseReadCache.ReadCache, ReadFlags.None, false, true)] + [TestCase(UseReadCache.NoReadCache, ReadFlags.None, false, false, LockingMode.None)] + [TestCase(UseReadCache.NoReadCache, ReadFlags.CopyReadsToTail | ReadFlags.CopyFromDeviceOnly, true, true, LockingMode.Standard)] + [TestCase(UseReadCache.ReadCache, ReadFlags.None, false, true, LockingMode.Standard)] [Category("FasterKV"), Category("Read")] - public async Task ReadAtAddressAsyncReadFlagsNoneTests(UseReadCache urc, ReadFlags readFlags, bool useRMW, bool flush) + public async Task ReadAtAddressAsyncReadFlagsNoneTests(UseReadCache urc, ReadFlags readFlags, bool useRMW, bool flush, [Values] LockingMode lockingMode) { var useReadCache = urc == UseReadCache.ReadCache; - using var testStore = new TestStore(useReadCache, readFlags, flush); + using var testStore = new TestStore(useReadCache, readFlags, flush, lockingMode); await testStore.Populate(useRMW, useAsync: true); using var session = testStore.fkv.For(new Functions()).NewSession(); @@ -475,14 +476,14 @@ public async Task ReadAtAddressAsyncReadFlagsNoneTests(UseReadCache urc, ReadFla } // Test is similar to others but tests the Overload where ReadFlag.SkipReadCache is set - [TestCase(UseReadCache.NoReadCache, ReadFlags.None, false, false)] - [TestCase(UseReadCache.NoReadCache, ReadFlags.CopyReadsToTail | ReadFlags.CopyFromDeviceOnly, true, true)] - [TestCase(UseReadCache.ReadCache, ReadFlags.None, false, true)] + [TestCase(UseReadCache.NoReadCache, ReadFlags.None, false, false, LockingMode.None)] + [TestCase(UseReadCache.NoReadCache, ReadFlags.CopyReadsToTail | ReadFlags.CopyFromDeviceOnly, true, true, LockingMode.Standard)] + [TestCase(UseReadCache.ReadCache, ReadFlags.None, false, true, LockingMode.Standard)] [Category("FasterKV"), Category("Read")] - public async Task ReadAtAddressAsyncReadFlagsSkipCacheTests(UseReadCache urc, ReadFlags readFlags, bool useRMW, bool flush) + public async Task ReadAtAddressAsyncReadFlagsSkipCacheTests(UseReadCache urc, ReadFlags readFlags, bool useRMW, bool flush, [Values] LockingMode lockingMode) { var useReadCache = urc == UseReadCache.ReadCache; - using var testStore = new TestStore(useReadCache, readFlags, flush); + using var testStore = new TestStore(useReadCache, readFlags, flush, lockingMode); await testStore.Populate(useRMW, useAsync: true); using var session = testStore.fkv.For(new Functions()).NewSession(); @@ -521,14 +522,14 @@ public async Task ReadAtAddressAsyncReadFlagsSkipCacheTests(UseReadCache urc, Re } // readCache and copyReadsToTail are mutually exclusive and orthogonal to populating by RMW vs. Upsert. - [TestCase(UseReadCache.NoReadCache, ReadFlags.None, false, false)] - [TestCase(UseReadCache.NoReadCache, ReadFlags.CopyReadsToTail | ReadFlags.CopyFromDeviceOnly, true, true)] - [TestCase(UseReadCache.ReadCache, ReadFlags.None, false, true)] + [TestCase(UseReadCache.NoReadCache, ReadFlags.None, false, false, LockingMode.None)] + [TestCase(UseReadCache.NoReadCache, ReadFlags.CopyReadsToTail | ReadFlags.CopyFromDeviceOnly, true, true, LockingMode.Standard)] + [TestCase(UseReadCache.ReadCache, ReadFlags.None, false, true, LockingMode.Standard)] [Category("FasterKV"), Category("Read")] - public void ReadNoKeySyncTests(UseReadCache urc, ReadFlags readFlags, bool useRMW, bool flush) // readCache and copyReadsToTail are mutually exclusive and orthogonal to populating by RMW vs. Upsert. + public void ReadNoKeySyncTests(UseReadCache urc, ReadFlags readFlags, bool useRMW, bool flush, [Values] LockingMode lockingMode) // readCache and copyReadsToTail are mutually exclusive and orthogonal to populating by RMW vs. Upsert. { var useReadCache = urc == UseReadCache.ReadCache; - using var testStore = new TestStore(useReadCache, readFlags, flush); + using var testStore = new TestStore(useReadCache, readFlags, flush, lockingMode); testStore.Populate(useRMW, useAsync: false).GetAwaiter().GetResult(); using var session = testStore.fkv.For(new Functions()).NewSession(); @@ -553,7 +554,7 @@ public void ReadNoKeySyncTests(UseReadCache urc, ReadFlags readFlags, bool useRM { // This will wait for each retrieved record; not recommended for performance-critical code or when retrieving multiple records unless necessary. session.CompletePendingWithOutputs(out var completedOutputs, wait: true); - (status, output) = TestUtils.GetSinglePendingResult(completedOutputs); + (status, output) = GetSinglePendingResult(completedOutputs); } TestStore.ProcessNoKeyRecord(status, ref output, keyOrdinal); @@ -564,14 +565,14 @@ public void ReadNoKeySyncTests(UseReadCache urc, ReadFlags readFlags, bool useRM } // readCache and copyReadsToTail are mutually exclusive and orthogonal to populating by RMW vs. Upsert. - [TestCase(UseReadCache.NoReadCache, ReadFlags.None, false, false)] - [TestCase(UseReadCache.NoReadCache, ReadFlags.CopyReadsToTail | ReadFlags.CopyFromDeviceOnly, true, true)] - [TestCase(UseReadCache.ReadCache, ReadFlags.None, false, true)] + [TestCase(UseReadCache.NoReadCache, ReadFlags.None, false, false, LockingMode.None)] + [TestCase(UseReadCache.NoReadCache, ReadFlags.CopyReadsToTail | ReadFlags.CopyFromDeviceOnly, true, true, LockingMode.Standard)] + [TestCase(UseReadCache.ReadCache, ReadFlags.None, false, true, LockingMode.Standard)] [Category("FasterKV"), Category("Read")] - public async Task ReadNoKeyAsyncTests(UseReadCache urc, ReadFlags readFlags, bool useRMW, bool flush) + public async Task ReadNoKeyAsyncTests(UseReadCache urc, ReadFlags readFlags, bool useRMW, bool flush, LockingMode lockingMode) { var useReadCache = urc == UseReadCache.ReadCache; - using var testStore = new TestStore(useReadCache, readFlags, flush); + using var testStore = new TestStore(useReadCache, readFlags, flush, lockingMode); await testStore.Populate(useRMW, useAsync: true); using var session = testStore.fkv.For(new Functions()).NewSession(); @@ -674,7 +675,7 @@ public void ReadFlagsMergeTest() } [TestFixture] - public class ReadMinAddressTests + class ReadMinAddressTests { const int numOps = 500; @@ -685,12 +686,23 @@ public class ReadMinAddressTests [SetUp] public void Setup() { - TestUtils.DeleteDirectory(TestUtils.MethodTestDir, wait: true); + DeleteDirectory(MethodTestDir, wait: true); - log = Devices.CreateLogDevice(TestUtils.MethodTestDir + "/SimpleRecoveryTest1.log", deleteOnClose: true); + log = Devices.CreateLogDevice(MethodTestDir + "/SimpleRecoveryTest1.log", deleteOnClose: true); + + var lockingMode = LockingMode.Standard; + foreach (var arg in TestContext.CurrentContext.Test.Arguments) + { + if (arg is LockingMode locking_mode) + { + lockingMode = locking_mode; + break; + } + } fht = new FasterKV(128, - logSettings: new LogSettings { LogDevice = log, MutableFraction = 0.1, MemorySizeBits = 29 } + logSettings: new LogSettings { LogDevice = log, MutableFraction = 0.1, MemorySizeBits = 29 }, + lockingMode: lockingMode ); session = fht.NewSession(new SimpleFunctions()); @@ -706,12 +718,12 @@ public void TearDown() log?.Dispose(); log = null; - TestUtils.DeleteDirectory(TestUtils.MethodTestDir); + DeleteDirectory(MethodTestDir); } [Test] [Category("FasterKV"), Category("Read")] - public async ValueTask ReadMinAddressTest([Values] bool isAsync) + public async ValueTask ReadMinAddressTest([Values] SyncMode syncMode, [Values] LockingMode lockingMode) { long minAddress = core.Constants.kInvalidAddress; var pivotKey = numOps / 2; @@ -733,7 +745,7 @@ async ValueTask ReadMin(long key, Status expectedStatus) Status status; long output = 0; ReadOptions readOptions = new() { StopAddress = minAddress }; - if (isAsync) + if (syncMode == SyncMode.Async) (status, output) = (await session.ReadAsync(ref key, ref input, ref readOptions)).Complete(); else { @@ -741,7 +753,7 @@ async ValueTask ReadMin(long key, Status expectedStatus) if (status.IsPending) { Assert.IsTrue(session.CompletePendingWithOutputs(out var completedOutputs, wait: true)); - (status, output) = TestUtils.GetSinglePendingResult(completedOutputs); + (status, output) = GetSinglePendingResult(completedOutputs); } } Assert.AreEqual(expectedStatus, status); diff --git a/cs/test/ReadCacheChainTests.cs b/cs/test/ReadCacheChainTests.cs index b98e6fe48..eec111ea2 100644 --- a/cs/test/ReadCacheChainTests.cs +++ b/cs/test/ReadCacheChainTests.cs @@ -10,40 +10,44 @@ using System.Threading.Tasks; using static FASTER.test.TestUtils; using FASTER.test.LockTable; +using FASTER.test.LockableUnsafeContext; + +#pragma warning disable IDE0060 // Remove unused parameter (used for Setup) namespace FASTER.test.ReadCacheTests { class ChainTests { - private FasterKV fht; + private FasterKV fht; private IDevice log; - const int lowChainKey = 40; - const int midChainKey = lowChainKey + chainLen * (mod / 2); - const int highChainKey = lowChainKey + chainLen * (mod - 1); + const long lowChainKey = 40; + const long midChainKey = lowChainKey + chainLen * (mod / 2); + const long highChainKey = lowChainKey + chainLen * (mod - 1); const int mod = 10; const int chainLen = 10; const int valueAdd = 1_000_000; // -1 so highChainKey is first in the chain. - const int numKeys = highChainKey + mod - 1; + const long numKeys = highChainKey + mod - 1; // Insert into chain. - const int spliceInNewKey = highChainKey + mod * 2; - const int spliceInExistingKey = highChainKey - mod; - const int immutableSplitKey = numKeys / 2; + const long spliceInNewKey = highChainKey + mod * 2; + const long spliceInExistingKey = highChainKey - mod; + const long immutableSplitKey = numKeys / 2; // This is the record after the first readcache record we insert; it lets us limit the range to ReadCacheEvict // so we get outsplicing rather than successively overwriting the hash table entry on ReadCacheEvict. long readCacheBelowMidChainKeyEvictionAddress; - internal class ChainComparer : IFasterEqualityComparer + internal class ChainComparer : IFasterEqualityComparer { - int mod; + readonly int mod; + internal ChainComparer(int mod) => this.mod = mod; - public bool Equals(ref int k1, ref int k2) => k1 == k2; + public bool Equals(ref long k1, ref long k2) => k1 == k2; - public long GetHashCode64(ref int k) => k % mod; + public long GetHashCode64(ref long k) => k % mod; } [SetUp] @@ -52,9 +56,20 @@ public void Setup() DeleteDirectory(MethodTestDir, wait: true); var readCacheSettings = new ReadCacheSettings { MemorySizeBits = 15, PageSizeBits = 9 }; log = Devices.CreateLogDevice(MethodTestDir + "/NativeReadCacheTests.log", deleteOnClose: true); - fht = new FasterKV + + var lockingMode = LockingMode.None; + foreach (var arg in TestContext.CurrentContext.Test.Arguments) + { + if (arg is LockingMode lm) + { + lockingMode = lm; + continue; + } + } + + fht = new FasterKV (1L << 20, new LogSettings { LogDevice = log, MemorySizeBits = 15, PageSizeBits = 10, ReadCacheSettings = readCacheSettings }, - comparer: new ChainComparer(mod)); + comparer: new ChainComparer(mod), lockingMode: lockingMode); } [TearDown] @@ -71,7 +86,7 @@ public enum RecordRegion { Immutable, OnDisk, Mutable }; void PopulateAndEvict(RecordRegion recordRegion = RecordRegion.OnDisk) { - using var session = fht.NewSession(new SimpleFunctions()); + using var session = fht.NewSession(new SimpleFunctions()); if (recordRegion != RecordRegion.Immutable) { @@ -89,7 +104,7 @@ void PopulateAndEvict(RecordRegion recordRegion = RecordRegion.OnDisk) session.CompletePending(true); fht.Log.FlushAndEvict(true); - for (int key = immutableSplitKey; key < numKeys; key++) + for (long key = immutableSplitKey; key < numKeys; key++) session.Upsert(key, key + valueAdd); session.CompletePending(true); fht.Log.ShiftReadOnlyAddress(fht.Log.TailAddress, wait: true); @@ -97,12 +112,12 @@ void PopulateAndEvict(RecordRegion recordRegion = RecordRegion.OnDisk) void CreateChain(RecordRegion recordRegion = RecordRegion.OnDisk) { - using var session = fht.NewSession(new SimpleFunctions()); - int output = -1; - bool expectPending(int key) => recordRegion == RecordRegion.OnDisk || (recordRegion == RecordRegion.Immutable && key < immutableSplitKey); + using var session = fht.NewSession(new SimpleFunctions()); + long output = -1; + bool expectPending(long key) => recordRegion == RecordRegion.OnDisk || (recordRegion == RecordRegion.Immutable && key < immutableSplitKey); // Pass1: PENDING reads and populate the cache - for (var ii = 0; ii < chainLen; ++ii) + for (long ii = 0; ii < chainLen; ++ii) { var key = lowChainKey + ii * mod; var status = session.Read(key, out _); @@ -144,10 +159,10 @@ void CreateChain(RecordRegion recordRegion = RecordRegion.OnDisk) } } - unsafe bool GetRecordInInMemoryHashChain(int key, out bool isReadCache) + unsafe bool GetRecordInInMemoryHashChain(long key, out bool isReadCache) { // returns whether the key was found before we'd go pending - var (la, pa) = GetHashChain(fht, key, out int recordKey, out bool invalid, out isReadCache); + var (la, pa) = GetHashChain(fht, key, out long recordKey, out bool invalid, out isReadCache); while (isReadCache || la >= fht.hlog.HeadAddress) { if (recordKey == key && !invalid) @@ -157,10 +172,10 @@ unsafe bool GetRecordInInMemoryHashChain(int key, out bool isReadCache) return false; } - internal bool FindRecordInReadCache(int key, out bool invalid, out long logicalAddress, out long physicalAddress) + internal bool FindRecordInReadCache(long key, out bool invalid, out long logicalAddress, out long physicalAddress) { // returns whether the key was found before we'd go pending - (logicalAddress, physicalAddress) = GetHashChain(fht, key, out int recordKey, out invalid, out bool isReadCache); + (logicalAddress, physicalAddress) = GetHashChain(fht, key, out long recordKey, out invalid, out bool isReadCache); while (isReadCache) { if (recordKey == key) @@ -170,9 +185,9 @@ internal bool FindRecordInReadCache(int key, out bool invalid, out long logicalA return false; } - internal static (long logicalAddress, long physicalAddress) GetHashChain(FasterKV fht, int key, out int recordKey, out bool invalid, out bool isReadCache) + internal static (long logicalAddress, long physicalAddress) GetHashChain(FasterKV fht, long key, out long recordKey, out bool invalid, out bool isReadCache) { - var tagExists = fht.FindKey(ref key, out var entry); + var tagExists = fht.FindHashBucketEntryForKey(ref key, out var entry); Assert.IsTrue(tagExists); isReadCache = entry.ReadCache; @@ -184,10 +199,10 @@ internal static (long logicalAddress, long physicalAddress) GetHashChain(FasterK return (entry.Address, pa); } - (long logicalAddress, long physicalAddress) NextInChain(long physicalAddress, out int recordKey, out bool invalid, ref bool isReadCache) + (long logicalAddress, long physicalAddress) NextInChain(long physicalAddress, out long recordKey, out bool invalid, ref bool isReadCache) => NextInChain(fht, physicalAddress, out recordKey, out invalid, ref isReadCache); - internal static (long logicalAddress, long physicalAddress) NextInChain(FasterKV fht, long physicalAddress, out int recordKey, out bool invalid, ref bool isReadCache) + internal static (long logicalAddress, long physicalAddress) NextInChain(FasterKV fht, long physicalAddress, out long recordKey, out bool invalid, ref bool isReadCache) { var log = isReadCache ? fht.readcache : fht.hlog; var info = log.GetInfo(physicalAddress); @@ -202,11 +217,11 @@ internal static (long logicalAddress, long physicalAddress) NextInChain(FasterKV return (la, pa); } - (long logicalAddress, long physicalAddress) ScanReadCacheChain(int[] omitted = null, bool evicted = false, bool deleted = false) + (long logicalAddress, long physicalAddress) ScanReadCacheChain(long[] omitted = null, bool evicted = false, bool deleted = false) { - omitted ??= Array.Empty(); + omitted ??= Array.Empty (); - var (la, pa) = GetHashChain(fht, lowChainKey, out int actualKey, out bool invalid, out bool isReadCache); + var (la, pa) = GetHashChain(fht, lowChainKey, out long actualKey, out bool invalid, out bool isReadCache); for (var expectedKey = highChainKey; expectedKey >= lowChainKey; expectedKey -= mod) { // We evict from readcache only to just below midChainKey @@ -232,10 +247,10 @@ internal static (long logicalAddress, long physicalAddress) NextInChain(FasterKV return (la, pa); } - (long logicalAddress, long physicalAddress) SkipReadCacheChain(int key) + (long logicalAddress, long physicalAddress) SkipReadCacheChain(long key) => SkipReadCacheChain(fht, key); - internal static (long logicalAddress, long physicalAddress) SkipReadCacheChain(FasterKV fht, int key) + internal static (long logicalAddress, long physicalAddress) SkipReadCacheChain(FasterKV fht, long key) { var (la, pa) = GetHashChain(fht, key, out _, out _, out bool isReadCache); while (isReadCache) @@ -243,7 +258,7 @@ internal static (long logicalAddress, long physicalAddress) SkipReadCacheChain(F return (la, pa); } - void VerifySplicedInKey(int expectedKey) + void VerifySplicedInKey(long expectedKey) { // Scan to the end of the readcache chain and verify we inserted the value. var (_, pa) = SkipReadCacheChain(expectedKey); @@ -251,15 +266,14 @@ void VerifySplicedInKey(int expectedKey) Assert.AreEqual(expectedKey, storedKey); } - static void ClearCountsOnError(ClientSession> luContext) + static void ClearCountsOnError(ClientSession> luContext) { // If we already have an exception, clear these counts so "Run" will not report them spuriously. luContext.sharedLockCount = 0; luContext.exclusiveLockCount = 0; } - bool LockTableHasEntries() => LockTableTests.LockTableHasEntries(fht.LockTable); - int LockTableEntryCount() => LockTableTests.LockTableEntryCount(fht.LockTable); + void AssertTotalLockCounts(long expectedX, long expectedS) => OverflowBucketLockTableTests.AssertTotalLockCounts(fht, expectedX, expectedS); [Test] [Category(FasterKVTestCategory)] @@ -277,13 +291,13 @@ public void ChainVerificationTest() [Category(FasterKVTestCategory)] [Category(ReadCacheTestCategory)] [Category(SmokeTestCategory)] - public void DeleteCacheRecordTest() + public void DeleteCacheRecordTest([Values] LockingMode lockingMode) { PopulateAndEvict(); CreateChain(); - using var session = fht.NewSession(new SimpleFunctions()); + using var session = fht.NewSession(new SimpleFunctions()); - void doTest(int key) + void doTest(long key) { var status = session.Delete(key); Assert.IsTrue(!status.Found && status.Record.Created, status.ToString()); @@ -305,13 +319,13 @@ void doTest(int key) [Category(FasterKVTestCategory)] [Category(ReadCacheTestCategory)] [Category(SmokeTestCategory)] - public void DeleteHalfOfAllCacheRecordsTest() + public void DeleteHalfOfAllCacheRecordsTest([Values] LockingMode lockingMode) { PopulateAndEvict(); CreateChain(); - using var session = fht.NewSession(new SimpleFunctions()); + using var session = fht.NewSession(new SimpleFunctions()); - void doTest(int key) + void doTest(long key) { var status = session.Delete(key); Assert.IsTrue(!status.Found && status.Record.Created, status.ToString()); @@ -357,7 +371,7 @@ void doTest(int key) [Category(FasterKVTestCategory)] [Category(ReadCacheTestCategory)] [Category(SmokeTestCategory)] - public void UpsertCacheRecordTest() + public void UpsertCacheRecordTest([Values] LockingMode lockingMode) { DoUpdateTest(useRMW: false); } @@ -366,7 +380,7 @@ public void UpsertCacheRecordTest() [Category(FasterKVTestCategory)] [Category(ReadCacheTestCategory)] [Category(SmokeTestCategory)] - public void RMWCacheRecordTest() + public void RMWCacheRecordTest([Values] LockingMode lockingMode) { DoUpdateTest(useRMW: true); } @@ -375,9 +389,9 @@ void DoUpdateTest(bool useRMW) { PopulateAndEvict(); CreateChain(); - using var session = fht.NewSession(new SimpleFunctions()); + using var session = fht.NewSession(new SimpleFunctions()); - void doTest(int key) + void doTest(long key) { var status = session.Read(key, out var value); Assert.IsTrue(status.Found, status.ToString()); @@ -415,13 +429,13 @@ void doTest(int key) [Category(FasterKVTestCategory)] [Category(ReadCacheTestCategory)] [Category(SmokeTestCategory)] - public void SpliceInFromCTTTest() + public void SpliceInFromCTTTest([Values] LockingMode lockingMode) { PopulateAndEvict(); CreateChain(); - using var session = fht.NewSession(new SimpleFunctions()); - int input = 0, output = 0, key = lowChainKey - mod; // key must be in evicted region for this test + using var session = fht.NewSession(new SimpleFunctions()); + long input = 0, output = 0, key = lowChainKey - mod; // key must be in evicted region for this test ReadOptions readOptions = new() { ReadFlags = ReadFlags.CopyReadsToTail }; var status = session.Read(ref key, ref input, ref output, ref readOptions, out _); @@ -435,13 +449,13 @@ public void SpliceInFromCTTTest() [Category(FasterKVTestCategory)] [Category(ReadCacheTestCategory)] [Category(SmokeTestCategory)] - public void SpliceInFromUpsertTest([Values] RecordRegion recordRegion) + public void SpliceInFromUpsertTest([Values] RecordRegion recordRegion, [Values] LockingMode lockingMode) { PopulateAndEvict(recordRegion); CreateChain(recordRegion); - using var session = fht.NewSession(new SimpleFunctions()); - int key = -1; + using var session = fht.NewSession(new SimpleFunctions()); + long key = -1; if (recordRegion == RecordRegion.Immutable || recordRegion == RecordRegion.OnDisk) { @@ -463,13 +477,13 @@ public void SpliceInFromUpsertTest([Values] RecordRegion recordRegion) [Category(FasterKVTestCategory)] [Category(ReadCacheTestCategory)] [Category(SmokeTestCategory)] - public void SpliceInFromRMWTest([Values] RecordRegion recordRegion) + public void SpliceInFromRMWTest([Values] RecordRegion recordRegion, [Values] LockingMode lockingMode) { PopulateAndEvict(recordRegion); CreateChain(recordRegion); - using var session = fht.NewSession(new SimpleFunctions()); - int key = -1, output = -1; + using var session = fht.NewSession(new SimpleFunctions()); + long key = -1, output = -1; if (recordRegion == RecordRegion.Immutable || recordRegion == RecordRegion.OnDisk) { @@ -510,13 +524,13 @@ public void SpliceInFromRMWTest([Values] RecordRegion recordRegion) [Category(FasterKVTestCategory)] [Category(ReadCacheTestCategory)] [Category(SmokeTestCategory)] - public void SpliceInFromDeleteTest([Values] RecordRegion recordRegion) + public void SpliceInFromDeleteTest([Values] RecordRegion recordRegion, [Values] LockingMode lockingMode) { PopulateAndEvict(recordRegion); CreateChain(recordRegion); - using var session = fht.NewSession(new SimpleFunctions()); - int key = -1; + using var session = fht.NewSession(new SimpleFunctions()); + long key = -1; if (recordRegion == RecordRegion.Immutable || recordRegion == RecordRegion.OnDisk) { @@ -538,19 +552,19 @@ public void SpliceInFromDeleteTest([Values] RecordRegion recordRegion) [Category(FasterKVTestCategory)] [Category(ReadCacheTestCategory)] [Category(SmokeTestCategory)] - public void EvictFromReadCacheToLockTableTest() + public void VerifyLockCountsAfterReadCacheEvict([Values(LockingMode.Standard)] LockingMode lockingMode) { PopulateAndEvict(); CreateChain(); - using var session = fht.NewSession(new SimpleFunctions()); + using var session = fht.NewSession(new SimpleFunctions()); var luContext = session.LockableUnsafeContext; - Dictionary locks = new() + var keys = new[] { - { lowChainKey, LockType.Exclusive }, - { midChainKey, LockType.Shared }, - { highChainKey, LockType.Exclusive } + new FixedLengthLockableKeyStruct(lowChainKey, LockType.Exclusive, luContext), + new FixedLengthLockableKeyStruct(midChainKey, LockType.Shared, luContext), + new FixedLengthLockableKeyStruct(highChainKey, LockType.Exclusive, luContext) }; luContext.BeginUnsafe(); @@ -558,108 +572,40 @@ public void EvictFromReadCacheToLockTableTest() try { - // For this single-threaded test, the locking does not really have to be in order, but for consistency do it. - foreach (var key in locks.Keys.OrderBy(k => k)) - luContext.Lock(key, locks[key]); + luContext.SortLockCodes(keys); - fht.ReadCache.FlushAndEvict(wait: true); - - Assert.IsTrue(LockTableHasEntries()); - Assert.AreEqual(locks.Count, LockTableEntryCount()); - - foreach (var key in locks.Keys) - { - var localKey = key; // can't ref the iteration variable - var found = fht.LockTable.TryGet(ref localKey, out RecordInfo recordInfo); - Assert.IsTrue(found); - var lockType = locks[key]; - Assert.AreEqual(lockType == LockType.Exclusive, recordInfo.IsLockedExclusive); - Assert.AreEqual(lockType != LockType.Exclusive, recordInfo.IsLockedShared); - - luContext.Unlock(key, lockType); - Assert.IsFalse(fht.LockTable.TryGet(ref localKey, out recordInfo)); - } - } - catch (Exception) - { - ClearCountsOnError(session); - throw; - } - finally - { - luContext.EndLockable(); - luContext.EndUnsafe(); - } - - Assert.IsFalse(LockTableHasEntries()); - Assert.AreEqual(0, LockTableEntryCount()); - } - - [Test] - [Category(FasterKVTestCategory)] - [Category(ReadCacheTestCategory)] - [Category(SmokeTestCategory)] - public void TransferFromLockTableToReadCacheTest() - { - PopulateAndEvict(); - - // DO NOT create the chain here; do that below. Here, we create records in the lock table and THEN we create - // the chain, resulting in transfer of the locked records. - //CreateChain(); - - using var session = fht.NewSession(new SimpleFunctions()); - var luContext = session.LockableUnsafeContext; - - Dictionary locks = new() - { - { lowChainKey, LockType.Exclusive }, - { midChainKey, LockType.Shared }, - { highChainKey, LockType.Exclusive } - }; - - luContext.BeginUnsafe(); - luContext.BeginLockable(); - - try - { // For this single-threaded test, the locking does not really have to be in order, but for consistency do it. - foreach (var key in locks.Keys.OrderBy(k => k)) - luContext.Lock(key, locks[key]); + luContext.Lock(keys); fht.ReadCache.FlushAndEvict(wait: true); - // Verify the locks have been evicted to the lockTable - Assert.IsTrue(LockTableHasEntries()); - Assert.AreEqual(locks.Count, LockTableEntryCount()); - - foreach (var key in locks.Keys) + int xlocks = 0, slocks = 0; + foreach (var idx in LockableUnsafeContextTests.EnumActionKeyIndices(keys, LockOperationType.Unlock)) { - var localKey = key; // can't ref the iteration variable - var found = fht.LockTable.TryGet(ref localKey, out RecordInfo recordInfo); - Assert.IsTrue(found); - var lockType = locks[key]; - Assert.AreEqual(lockType == LockType.Exclusive, recordInfo.IsLockedExclusive); - Assert.AreEqual(lockType != LockType.Exclusive, recordInfo.IsLockedShared); + if (keys[idx].LockType == LockType.Exclusive) + ++xlocks; + else + ++slocks; } + AssertTotalLockCounts(xlocks, slocks); - fht.Log.FlushAndEvict(wait: true); - - // Create the readcache entries, which will transfer the locks from the locktable to the readcache - foreach (var key in locks.Keys) + foreach (var idx in LockableUnsafeContextTests.EnumActionKeyIndices(keys, LockOperationType.Unlock)) { - var status = luContext.Read(key, out _); - Assert.IsTrue(status.IsPending, status.ToString()); - luContext.CompletePending(wait: true); - - var lockType = locks[key]; - var (exclusive, sharedCount) = luContext.IsLocked(key); - Assert.AreEqual(lockType == LockType.Exclusive, exclusive); - Assert.AreEqual(lockType != LockType.Exclusive, sharedCount > 0); - - luContext.Unlock(key, lockType); - var localKey = key; // can't ref the iteration variable - Assert.IsFalse(fht.LockTable.TryGet(ref localKey, out _)); + ref var key = ref keys[idx]; + HashEntryInfo hei = new(fht.comparer.GetHashCode64(ref key.Key)); + OverflowBucketLockTableTests.PopulateHei(fht, ref hei); + + var lockState = fht.LockTable.GetLockState(ref key.Key, ref hei); + Assert.IsTrue(lockState.IsFound); + Assert.AreEqual(key.LockType == LockType.Exclusive, lockState.IsLockedExclusive); + Assert.AreEqual(key.LockType != LockType.Exclusive, lockState.NumLockedShared > 0); + + luContext.Unlock(keys, idx, 1); + lockState = fht.LockTable.GetLockState(ref key.Key, ref hei); + Assert.IsFalse(lockState.IsLockedExclusive); + Assert.AreEqual(0, lockState.NumLockedShared); } + AssertTotalLockCounts(0, 0); } catch (Exception) { @@ -672,8 +618,7 @@ public void TransferFromLockTableToReadCacheTest() luContext.EndUnsafe(); } - Assert.IsFalse(LockTableHasEntries()); - Assert.AreEqual(0, LockTableEntryCount()); + AssertTotalLockCounts(0, 0); } } @@ -718,7 +663,7 @@ public void Setup() this.log ??= Devices.CreateLogDevice(filename, deleteOnClose: true); // Make the main log small enough that we force the readcache - var readCacheSettings = new ReadCacheSettings { MemorySizeBits = 15, PageSizeBits = 9 }; + ReadCacheSettings readCacheSettings = new() { MemorySizeBits = 15, PageSizeBits = 9 }; var logSettings = new LogSettings { LogDevice = log, MemorySizeBits = 15, PageSizeBits = 10, ReadCacheSettings = readCacheSettings }; ModuloRange modRange = ModuloRange.None; @@ -731,7 +676,8 @@ public void Setup() } } - fht = new FasterKV(1L << 20, logSettings, comparer: new LongComparerModulo(modRange)); + fht = new FasterKV(1L << 20, logSettings, comparer: new LongComparerModulo(modRange), + lockingMode: LockingMode.Ephemeral); } [TearDown] @@ -843,7 +789,7 @@ unsafe void runReadThread(int tid) (status, output) = GetSinglePendingResult(completedOutputs, out var recordMetadata); Assert.AreEqual(recordMetadata.Address == Constants.kInvalidAddress, status.Record.CopiedToReadCache, $"key {ii}: {status}"); } - Assert.IsTrue(status.Found, $"key {key}, status {status}"); + Assert.IsTrue(status.Found, $"key {key}, status {status}, wasPending {wasPending}"); Assert.AreEqual(ii, output % valueAdd); } } @@ -873,7 +819,7 @@ unsafe void runUpdateThread(int tid) // Assert.IsTrue(status.Record.CopyUpdated, $"Expected Record.CopyUpdated but was: {status}"); } if (updateOp == UpdateOp.RMW) // Upsert will not try to find records below HeadAddress, but it may find them in-memory - Assert.IsTrue(status.Found, $"key {key}, status {status}"); + Assert.IsTrue(status.Found, $"key {key}, status {status}, wasPending {wasPending}"); Assert.AreEqual(ii + valueAdd * tid, output); } } @@ -946,7 +892,8 @@ public void Setup() } } - fht = new FasterKV(1L << 20, logSettings, comparer: new SpanByteComparerModulo(modRange)); + fht = new FasterKV(1L << 20, logSettings, comparer: new SpanByteComparerModulo(modRange), + lockingMode: LockingMode.Standard); } [TearDown] diff --git a/cs/test/RecoveryChecks.cs b/cs/test/RecoveryChecks.cs index 64000b433..934c26d74 100644 --- a/cs/test/RecoveryChecks.cs +++ b/cs/test/RecoveryChecks.cs @@ -477,7 +477,8 @@ public async ValueTask RecoveryCheck5([Values] CheckpointType checkpointType, [V using var fht1 = new FasterKV (size, logSettings: new LogSettings { LogDevice = log, MutableFraction = 1, PageSizeBits = 10, MemorySizeBits = 14, ReadCacheSettings = useReadCache ? new ReadCacheSettings() : null }, - checkpointSettings: new CheckpointSettings { CheckpointDir = path } + checkpointSettings: new CheckpointSettings { CheckpointDir = path }, + lockingMode : LockingMode.Ephemeral ); using var s1 = fht1.NewSession(new MyFunctions()); diff --git a/cs/test/ReproReadCacheTest.cs b/cs/test/ReproReadCacheTest.cs index 8027af8a0..c751084fe 100644 --- a/cs/test/ReproReadCacheTest.cs +++ b/cs/test/ReproReadCacheTest.cs @@ -51,9 +51,9 @@ public void Setup() DeleteDirectory(MethodTestDir, wait: true); ReadCacheSettings readCacheSettings = default; - bool disableEphemeralLocking = false; string filename = MethodTestDir + "/BasicFasterTests.log"; + var lockingMode = LockingMode.None; foreach (var arg in TestContext.CurrentContext.Test.Arguments) { if (arg is ReadCacheMode rcm) @@ -67,9 +67,9 @@ public void Setup() }; continue; } - if (arg is EphemeralLockingMode elm) + if (arg is LockingMode lm) { - disableEphemeralLocking = elm == EphemeralLockingMode.NoEphemeralLocking; + lockingMode = lm; continue; } if (arg is DeviceType deviceType) @@ -88,7 +88,7 @@ public void Setup() MemorySizeBits = 15, PageSizeBits = 12, ReadCacheSettings = readCacheSettings, - }, disableEphemeralLocking: disableEphemeralLocking); + }, lockingMode: lockingMode); } [TearDown] @@ -106,8 +106,8 @@ public void TearDown() [Category(ReadCacheTestCategory)] [Category(StressTestCategory)] //[Repeat(300)] - public unsafe void RandomReadCacheTest([Values(1, 2, 4, 8)] int numThreads, [Values] KeyContentionMode keyContentionMode, - [Values] EphemeralLockingMode ephemeralLockingMode, [Values] ReadCacheMode readCacheMode, + public unsafe void RandomReadCacheTest([Values(1, 2, 8)] int numThreads, [Values] KeyContentionMode keyContentionMode, + [Values] LockingMode lockingMode, [Values] ReadCacheMode readCacheMode, #if WINDOWS [Values(DeviceType.LSD #else @@ -160,7 +160,7 @@ void LocalRun(int startKey, int endKey) LocalRead(sessionContext, r.Next(startKey, endKey)); } - const int MaxKeys = 24000; + const int MaxKeys = 8000; { // Write the values first (single-threaded, all keys) var session = fht.For(new Functions()).NewSession(); diff --git a/cs/test/TestUtils.cs b/cs/test/TestUtils.cs index 6655d4ea0..36190a57c 100644 --- a/cs/test/TestUtils.cs +++ b/cs/test/TestUtils.cs @@ -198,8 +198,6 @@ public enum KeyEquality { Equal, NotEqual } public enum ReadCacheMode { UseReadCache, NoReadCache } - public enum EphemeralLockingMode { UseEphemeralLocking, NoEphemeralLocking }; - public enum KeyContentionMode { Contention, NoContention }; public enum BatchMode { Batch, NoBatch }; @@ -236,17 +234,12 @@ internal async static ValueTask DoTwoThreadRandomKeyTest(int count, Action } } - internal unsafe static bool FindKey(this FasterKV fht, ref Key key, out HashBucketEntry entry) + internal static unsafe bool FindHashBucketEntryForKey(this FasterKV fht, ref Key key, out HashBucketEntry entry) { - var bucket = default(HashBucket*); - var firstBucket = default(HashBucket*); - int slot = default; - entry = default; - - var hash = fht.Comparer.GetHashCode64(ref key); - var tag = (ushort)((ulong)hash >> Constants.kHashTagShift); - - return fht.FindTag(hash, tag, ref firstBucket, ref bucket, ref slot, ref entry); + HashEntryInfo hei = new(fht.Comparer.GetHashCode64(ref key)); + var success = fht.FindTag(ref hei); + entry = hei.entry; + return success; } } } diff --git a/docs/_docs/23-fasterkv-tuning.md b/docs/_docs/23-fasterkv-tuning.md index ebbd308af..4d2d06273 100644 --- a/docs/_docs/23-fasterkv-tuning.md +++ b/docs/_docs/23-fasterkv-tuning.md @@ -58,7 +58,7 @@ is useful when reads are infrequent, but will be followed by an update, or subse causes any reads from disk to be copied to the tail of log. `CopyReadsToTail.FromReadOnly` causes any reads from either disk or the read-only region of memory to be copied to the tail of log. Latter is helpful when you do not want particularly hot items to "escape" to disk only to be immediately brought back to the tail of main memory. It is also useful when you -use FASTER as a memory-only cache (with `NullDevice`) as in [this](https://github.com/microsoft/FASTER/tree/master/cs/samples/MemOnlyCache) sample. +use FASTER as a memory-only cache (with `NullDevice`) as in [this](https://github.com/microsoft/FASTER/tree/master/cs/samples/ResizableCacheStore) sample. * `ReadCacheSettings`: This setting is used to enable a read cache, separately from the main FASTER log. If you need to frequently read @@ -116,8 +116,8 @@ correspond exactly to the total log memory utilization. One can accurately track the total memory used by FASTER, including heap objects, using a cache size tracker that lets `IFunctions` notify it of record additions and deletions, and by subscribing to -evictions from the head of the in-memory log. Details are shown in the [MemOnlySample](https://github.com/microsoft/FASTER/tree/master/cs/samples/MemOnlyCache) -sample, where we show how to implement such a cache size [tracker](https://github.com/microsoft/FASTER/blob/master/cs/samples/MemOnlyCache/CacheSizeTracker.cs) +evictions from the head of the in-memory log. Details are shown in the [ResizableCacheStore](https://github.com/microsoft/FASTER/tree/master/cs/samples/ResizableCacheStore) +sample, where we show how to implement such a cache size [tracker](https://github.com/microsoft/FASTER/blob/master/cs/samples/ResizableCacheStore/CacheSizeTracker.cs) to: 1. Track FASTER's total memory usage (including the heap objects, log, hash table, and overflow buckets) accurately. 2. Set a target memory usage and tune `EmptyPageCount` to achieve this target memory utilization.