From 16edd83aa895951640135e77b3d3402ace6fdfdd Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Tue, 17 Sep 2019 10:32:16 -0700 Subject: [PATCH 01/36] Initial checkin --- cs/FASTER.sln | 9 + cs/playground/FasterLogSample/App.config | 6 + .../FasterLogSample/FasterLogSample.csproj | 38 +++ cs/playground/FasterLogSample/Program.cs | 99 ++++++++ .../Properties/AssemblyInfo.cs | 22 ++ cs/src/core/Device/Devices.cs | 9 +- cs/src/core/Device/LocalStorageDevice.cs | 21 +- .../core/Device/ManagedLocalStorageDevice.cs | 23 +- cs/src/core/Device/StorageDeviceBase.cs | 2 +- cs/src/core/Epochs/FastThreadLocal.cs | 50 ++-- cs/src/core/Epochs/LightEpoch.cs | 61 +++-- cs/src/core/FASTER.core.csproj | 1 + cs/src/core/Index/FasterLog/FasterLog.cs | 197 +++++++++++++++ .../core/Index/FasterLog/FasterLogIterator.cs | 224 ++++++++++++++++++ 14 files changed, 708 insertions(+), 54 deletions(-) create mode 100644 cs/playground/FasterLogSample/App.config create mode 100644 cs/playground/FasterLogSample/FasterLogSample.csproj create mode 100644 cs/playground/FasterLogSample/Program.cs create mode 100644 cs/playground/FasterLogSample/Properties/AssemblyInfo.cs create mode 100644 cs/src/core/Index/FasterLog/FasterLog.cs create mode 100644 cs/src/core/Index/FasterLog/FasterLogIterator.cs diff --git a/cs/FASTER.sln b/cs/FASTER.sln index af8a268e5..c1f9b7a42 100644 --- a/cs/FASTER.sln +++ b/cs/FASTER.sln @@ -44,6 +44,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "devices", "devices", "{A6B1 EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FASTER.devices.AzureStorageDevice", "src\devices\AzureStorageDevice\FASTER.devices.AzureStorageDevice.csproj", "{E571E686-01A0-44D5-BFF5-B7678284258B}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FasterLogSample", "playground\FasterLogSample\FasterLogSample.csproj", "{25C5C6B6-4A8A-46DD-88C1-EB247033FE58}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -156,6 +158,12 @@ Global {E571E686-01A0-44D5-BFF5-B7678284258B}.Release|Any CPU.Build.0 = Release|Any CPU {E571E686-01A0-44D5-BFF5-B7678284258B}.Release|x64.ActiveCfg = Release|x64 {E571E686-01A0-44D5-BFF5-B7678284258B}.Release|x64.Build.0 = Release|x64 + {25C5C6B6-4A8A-46DD-88C1-EB247033FE58}.Debug|Any CPU.ActiveCfg = Debug|x64 + {25C5C6B6-4A8A-46DD-88C1-EB247033FE58}.Debug|x64.ActiveCfg = Debug|x64 + {25C5C6B6-4A8A-46DD-88C1-EB247033FE58}.Debug|x64.Build.0 = Debug|x64 + {25C5C6B6-4A8A-46DD-88C1-EB247033FE58}.Release|Any CPU.ActiveCfg = Release|x64 + {25C5C6B6-4A8A-46DD-88C1-EB247033FE58}.Release|x64.ActiveCfg = Release|x64 + {25C5C6B6-4A8A-46DD-88C1-EB247033FE58}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -176,6 +184,7 @@ Global {7EBB5ADF-D9EA-4B8B-AAE7-C48A98EBF780} = {E6026D6A-01C5-4582-B2C1-64751490DABE} {A6B14415-D316-4955-BE5F-725BB2DEBEBE} = {28800357-C8CE-4CD0-A2AD-D4A910ABB496} {E571E686-01A0-44D5-BFF5-B7678284258B} = {A6B14415-D316-4955-BE5F-725BB2DEBEBE} + {25C5C6B6-4A8A-46DD-88C1-EB247033FE58} = {E6026D6A-01C5-4582-B2C1-64751490DABE} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {A0750637-2CCB-4139-B25E-F2CE740DCFAC} diff --git a/cs/playground/FasterLogSample/App.config b/cs/playground/FasterLogSample/App.config new file mode 100644 index 000000000..d69a9b153 --- /dev/null +++ b/cs/playground/FasterLogSample/App.config @@ -0,0 +1,6 @@ + + + + + + diff --git a/cs/playground/FasterLogSample/FasterLogSample.csproj b/cs/playground/FasterLogSample/FasterLogSample.csproj new file mode 100644 index 000000000..7b8c2eee1 --- /dev/null +++ b/cs/playground/FasterLogSample/FasterLogSample.csproj @@ -0,0 +1,38 @@ + + + + net46 + x64 + win7-x64 + + + + Exe + true + StructSample + prompt + PackageReference + true + + + + TRACE;DEBUG + full + true + bin\x64\Debug\ + + + TRACE + pdbonly + true + bin\x64\Release\ + + + + + + + + + + \ No newline at end of file diff --git a/cs/playground/FasterLogSample/Program.cs b/cs/playground/FasterLogSample/Program.cs new file mode 100644 index 000000000..1b4dd542c --- /dev/null +++ b/cs/playground/FasterLogSample/Program.cs @@ -0,0 +1,99 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using FASTER.core; +using FASTER.core.log; +using System; +using System.Diagnostics; +using System.Diagnostics.Eventing.Reader; +using System.IO; +using System.Runtime.CompilerServices; +using System.Threading; + +namespace FasterLogSample +{ + public class Program + { + const int entryLength = 100; + static FasterLog log; + + static void ReportThread() + { + long lastTime = 0; + long lastValue = log.TailAddress; + Stopwatch sw = new Stopwatch(); + sw.Start(); + + while (true) + { + Thread.Sleep(10000); + var nowTime = sw.ElapsedMilliseconds; + var nowValue = log.TailAddress; + + Console.WriteLine("Throughput: {0} MB/sec", + (nowValue - lastValue) / (1000*(nowTime - lastTime))); + lastTime = nowTime; + lastValue = nowValue; + } + } + + static void AppendThread() + { + byte[] entry = new byte[entryLength]; + for (int i = 0; i < entryLength; i++) + entry[i] = (byte)i; + + while (true) + { + log.Append(entry); + } + } + + static void ScanThread() + { + Thread.Sleep(5000); + + byte[] entry = new byte[entryLength]; + for (int i = 0; i < entryLength; i++) + entry[i] = (byte)i; + var entrySpan = new Span(entry); + + + long lastAddress = 0; + Span result; + using (var iter = log.Scan(0, long.MaxValue)) + { + while (true) + { + while (!iter.GetNext(out result)) + Thread.Sleep(1000); + if (!result.SequenceEqual(entrySpan)) + { + throw new Exception("Invalid entry found"); + } + + if (iter.CurrentAddress - lastAddress > 500000000) + { + log.TruncateUntil(iter.CurrentAddress); + lastAddress = iter.CurrentAddress; + } + } + } + } + + static void Main(string[] args) + { + var device = Devices.CreateLogDevice("E:\\logs\\hlog.log"); + log = new FasterLog(new FasterLogSettings { LogDevice = device, MemorySizeBits = 26 }); + + new Thread(new ThreadStart(AppendThread)).Start(); + //new Thread(new ThreadStart(AppendThread)).Start(); + //new Thread(new ThreadStart(AppendThread)).Start(); + //new Thread(new ThreadStart(AppendThread)).Start(); + // new Thread(new ThreadStart(ScanThread)).Start(); + new Thread(new ThreadStart(ReportThread)).Start(); + + Thread.Sleep(500*1000); + } + } +} diff --git a/cs/playground/FasterLogSample/Properties/AssemblyInfo.cs b/cs/playground/FasterLogSample/Properties/AssemblyInfo.cs new file mode 100644 index 000000000..5e08438c2 --- /dev/null +++ b/cs/playground/FasterLogSample/Properties/AssemblyInfo.cs @@ -0,0 +1,22 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyDescription("")] +[assembly: AssemblyCopyright("Copyright © 2017")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("17bdd0a5-98e5-464a-8a00-050d9ff4c562")] diff --git a/cs/src/core/Device/Devices.cs b/cs/src/core/Device/Devices.cs index 14a975445..4bfa0a14f 100644 --- a/cs/src/core/Device/Devices.cs +++ b/cs/src/core/Device/Devices.cs @@ -26,9 +26,10 @@ public static class Devices /// Path to file that will store the log (empty for null device) /// Whether we try to preallocate the file on creation /// Delete files on close - /// + /// The maximal number of bytes this storage device can accommondate, or CAPACITY_UNSPECIFIED if there is no such limit + /// Whether to recover device metadata from existing files /// Device instance - public static IDevice CreateLogDevice(string logPath, bool preallocateFile = true, bool deleteOnClose = false, long capacity = CAPACITY_UNSPECIFIED) + public static IDevice CreateLogDevice(string logPath, bool preallocateFile = true, bool deleteOnClose = false, long capacity = CAPACITY_UNSPECIFIED, bool recoverDevice = false) { if (string.IsNullOrWhiteSpace(logPath)) return new NullDevice(); @@ -38,12 +39,12 @@ public static IDevice CreateLogDevice(string logPath, bool preallocateFile = tru #if DOTNETCORE if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { - logDevice = new ManagedLocalStorageDevice(logPath, preallocateFile, deleteOnClose, capacity); + logDevice = new ManagedLocalStorageDevice(logPath, preallocateFile, deleteOnClose, capacity, recoverDevice); } else #endif { - logDevice = new LocalStorageDevice(logPath, preallocateFile, deleteOnClose, capacity: capacity); + logDevice = new LocalStorageDevice(logPath, preallocateFile, deleteOnClose, true, capacity, recoverDevice); } return logDevice; } diff --git a/cs/src/core/Device/LocalStorageDevice.cs b/cs/src/core/Device/LocalStorageDevice.cs index c034390fa..aa6d0aa9d 100644 --- a/cs/src/core/Device/LocalStorageDevice.cs +++ b/cs/src/core/Device/LocalStorageDevice.cs @@ -4,6 +4,7 @@ using Microsoft.Win32.SafeHandles; using System; using System.Collections.Concurrent; +using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Runtime.InteropServices; @@ -24,16 +25,18 @@ public class LocalStorageDevice : StorageDeviceBase /// /// Constructor /// - /// + /// File name (or prefix) with path /// /// /// /// The maximum number of bytes this storage device can accommondate, or CAPACITY_UNSPECIFIED if there is no such limit + /// Whether to recover device metadata from existing files public LocalStorageDevice(string filename, bool preallocateFile = false, bool deleteOnClose = false, bool disableFileBuffering = true, - long capacity = Devices.CAPACITY_UNSPECIFIED) + long capacity = Devices.CAPACITY_UNSPECIFIED, + bool recoverDevice = false) : base(filename, GetSectorSize(filename), capacity) { @@ -42,7 +45,8 @@ public LocalStorageDevice(string filename, this.deleteOnClose = deleteOnClose; this.disableFileBuffering = disableFileBuffering; logHandles = new SafeConcurrentDictionary(); - RecoverFiles(); + if (recoverDevice) + RecoverFiles(); } private void RecoverFiles() @@ -53,14 +57,19 @@ private void RecoverFiles() string bareName = fi.Name; - int prevSegmentId = -1; + List segids = new List(); foreach (FileInfo item in di.GetFiles(bareName + "*")) { - int segmentId = Int32.Parse(item.Name.Replace(bareName, "").Replace(".", "")); + segids.Add(Int32.Parse(item.Name.Replace(bareName, "").Replace(".", ""))); + } + segids.Sort(); + + int prevSegmentId = -1; + foreach (int segmentId in segids) + { if (segmentId != prevSegmentId + 1) { startSegment = segmentId; - } else { diff --git a/cs/src/core/Device/ManagedLocalStorageDevice.cs b/cs/src/core/Device/ManagedLocalStorageDevice.cs index 35ffebeb9..255cd8132 100644 --- a/cs/src/core/Device/ManagedLocalStorageDevice.cs +++ b/cs/src/core/Device/ManagedLocalStorageDevice.cs @@ -4,6 +4,7 @@ using Microsoft.Win32.SafeHandles; using System; using System.Collections.Concurrent; +using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Runtime.InteropServices; @@ -24,11 +25,12 @@ public class ManagedLocalStorageDevice : StorageDeviceBase /// /// /// - /// + /// File name (or prefix) with path /// /// - /// The maximal number of bytes this storage device can accommondate, or CAPACITY_UNSPECIFIED if there is no such limit - public ManagedLocalStorageDevice(string filename, bool preallocateFile = false, bool deleteOnClose = false, long capacity = Devices.CAPACITY_UNSPECIFIED) + /// The maximal number of bytes this storage device can accommondate, or CAPACITY_UNSPECIFIED if there is no such limit + /// Whether to recover device metadata from existing files + public ManagedLocalStorageDevice(string filename, bool preallocateFile = false, bool deleteOnClose = false, long capacity = Devices.CAPACITY_UNSPECIFIED, bool recoverDevice = false) : base(filename, GetSectorSize(filename), capacity) { pool = new SectorAlignedBufferPool(1, 1); @@ -36,7 +38,8 @@ public ManagedLocalStorageDevice(string filename, bool preallocateFile = false, this.preallocateFile = preallocateFile; this.deleteOnClose = deleteOnClose; logHandles = new ConcurrentDictionary(); - RecoverFiles(); + if (recoverDevice) + RecoverFiles(); } @@ -48,14 +51,19 @@ private void RecoverFiles() string bareName = fi.Name; - int prevSegmentId = -1; + List segids = new List(); foreach (FileInfo item in di.GetFiles(bareName + "*")) { - int segmentId = Int32.Parse(item.Name.Replace(bareName, "").Replace(".", "")); + segids.Add(Int32.Parse(item.Name.Replace(bareName, "").Replace(".", ""))); + } + segids.Sort(); + + int prevSegmentId = -1; + foreach (int segmentId in segids) + { if (segmentId != prevSegmentId + 1) { startSegment = segmentId; - } else { @@ -68,6 +76,7 @@ private void RecoverFiles() + class ReadCallbackWrapper { readonly IOCompletionCallback callback; diff --git a/cs/src/core/Device/StorageDeviceBase.cs b/cs/src/core/Device/StorageDeviceBase.cs index 1c84d708c..7ab14ab82 100644 --- a/cs/src/core/Device/StorageDeviceBase.cs +++ b/cs/src/core/Device/StorageDeviceBase.cs @@ -233,7 +233,7 @@ public void TruncateUntilSegment(int toSegment) public virtual void TruncateUntilAddressAsync(long toAddress, AsyncCallback callback, IAsyncResult result) { // Truncate only up to segment boundary if address is not aligned - TruncateUntilSegmentAsync((int)toAddress >> segmentSizeBits, callback, result); + TruncateUntilSegmentAsync((int)(toAddress >> segmentSizeBits), callback, result); } /// diff --git a/cs/src/core/Epochs/FastThreadLocal.cs b/cs/src/core/Epochs/FastThreadLocal.cs index e9f53656f..7bc2e2e30 100644 --- a/cs/src/core/Epochs/FastThreadLocal.cs +++ b/cs/src/core/Epochs/FastThreadLocal.cs @@ -2,6 +2,7 @@ // Licensed under the MIT license. using System; +using System.Net; using System.Threading; namespace FASTER.core @@ -16,18 +17,25 @@ internal class FastThreadLocal private const int kMaxInstances = 128; [ThreadStatic] - private static T[] values; + private static T[] tl_values; + [ThreadStatic] + private static int[] tl_iid; + + private readonly int offset; + private readonly int iid; - private readonly int id; private static readonly int[] instances = new int[kMaxInstances]; + private static int instanceId = 0; public FastThreadLocal() { + iid = Interlocked.Increment(ref instanceId); + for (int i = 0; i < kMaxInstances; i++) { - if (0 == Interlocked.CompareExchange(ref instances[i], 1, 0)) + if (0 == Interlocked.CompareExchange(ref instances[i], iid, 0)) { - id = i; + offset = i; return; } } @@ -36,22 +44,22 @@ public FastThreadLocal() public void InitializeThread() { - if (values == null) - values = new T[kMaxInstances]; + if (tl_values == null) + { + tl_values = new T[kMaxInstances]; + tl_iid = new int[kMaxInstances]; + } + if (tl_iid[offset] != iid) + { + tl_iid[offset] = iid; + tl_values[offset] = default(T); + } } public void DisposeThread() { - Value = default(T); - - // Dispose values only if there are no other - // instances active for this thread - for (int i = 0; i < kMaxInstances; i++) - { - if ((instances[i] == 1) && (i != id)) - return; - } - values = null; + tl_values[offset] = default(T); + tl_iid[offset] = 0; } /// @@ -59,15 +67,15 @@ public void DisposeThread() /// public void Dispose() { - instances[id] = 0; + instances[offset] = 0; } public T Value { - get => values[id]; - set => values[id] = value; + get => tl_values[offset]; + set => tl_values[offset] = value; } - public bool IsInitializedForThread => values != null; + public bool IsInitializedForThread => (tl_values != null) && (iid == tl_iid[offset]); } -} +} \ No newline at end of file diff --git a/cs/src/core/Epochs/LightEpoch.cs b/cs/src/core/Epochs/LightEpoch.cs index 2cd7f6232..cd47f0aaf 100644 --- a/cs/src/core/Epochs/LightEpoch.cs +++ b/cs/src/core/Epochs/LightEpoch.cs @@ -52,7 +52,14 @@ public unsafe class LightEpoch /// /// A thread's entry in the epoch table. /// - private FastThreadLocal threadEntryIndex; + [ThreadStatic] + private static int threadEntryIndex; + + /// + /// Number of instances using this entry + /// + [ThreadStatic] + private static int threadEntryIndexCount; /// /// Global current epoch value @@ -79,7 +86,7 @@ public LightEpoch(int size = kTableSize) /// unsafe void Initialize(int size) { - threadEntryIndex = new FastThreadLocal(); + // threadEntryIndex = new FastThreadLocal(); numEntries = size; // Over-allocate to do cache-line alignment @@ -112,7 +119,7 @@ public void Dispose() CurrentEpoch = 1; SafeToReclaimEpoch = 0; - threadEntryIndex.Dispose(); + // threadEntryIndex.Dispose(); } /// @@ -121,7 +128,7 @@ public void Dispose() /// Result of the check public bool IsProtected() { - return threadEntryIndex.IsInitializedForThread && kInvalidIndex != threadEntryIndex.Value; + return kInvalidIndex != threadEntryIndex; } /// @@ -131,7 +138,7 @@ public bool IsProtected() [MethodImpl(MethodImplOptions.AggressiveInlining)] public int ProtectAndDrain() { - int entry = threadEntryIndex.Value; + int entry = threadEntryIndex; (*(tableAligned + entry)).localCurrentEpoch = CurrentEpoch; @@ -175,8 +182,9 @@ private void Drain(int nextEpoch) /// public void Acquire() { - threadEntryIndex.InitializeThread(); - threadEntryIndex.Value = ReserveEntryForThread(); + if (threadEntryIndex == kInvalidIndex) + threadEntryIndex = ReserveEntryForThread(); + threadEntryIndexCount++; } @@ -185,16 +193,39 @@ public void Acquire() /// public void Release() { - int entry = threadEntryIndex.Value; + int entry = threadEntryIndex; if (kInvalidIndex == entry) { return; } - threadEntryIndex.Value = kInvalidIndex; - threadEntryIndex.DisposeThread(); - (*(tableAligned + entry)).localCurrentEpoch = 0; - (*(tableAligned + entry)).threadId = 0; + threadEntryIndexCount--; + if (threadEntryIndexCount == 0) + { + threadEntryIndex = kInvalidIndex; + (*(tableAligned + entry)).localCurrentEpoch = 0; + (*(tableAligned + entry)).threadId = 0; + } + } + + /// + /// Thread suspends its epoch entry + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Suspend() + { + (*(tableAligned + threadEntryIndex)).localCurrentEpoch = int.MaxValue; + } + + /// + /// Thread resumes its epoch entry + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Resume() + { + if (threadEntryIndex == kInvalidIndex) + Acquire(); + ProtectAndDrain(); } /// @@ -388,7 +419,7 @@ private struct EpochActionPair [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool MarkAndCheckIsComplete(int markerIdx, int version) { - int entry = threadEntryIndex.Value; + int entry = threadEntryIndex; if (kInvalidIndex == entry) { Debug.WriteLine("New Thread entered during CPR"); @@ -404,7 +435,7 @@ public bool MarkAndCheckIsComplete(int markerIdx, int version) int fc_version = (*(tableAligned + index)).markers[markerIdx]; if (0 != entry_epoch) { - if (fc_version != version) + if (fc_version != version && entry_epoch < int.MaxValue) { return false; } @@ -413,4 +444,4 @@ public bool MarkAndCheckIsComplete(int markerIdx, int version) return true; } } -} +} \ No newline at end of file diff --git a/cs/src/core/FASTER.core.csproj b/cs/src/core/FASTER.core.csproj index 1d68a5425..dca9cd6a5 100644 --- a/cs/src/core/FASTER.core.csproj +++ b/cs/src/core/FASTER.core.csproj @@ -35,6 +35,7 @@ + \ No newline at end of file diff --git a/cs/src/core/Index/FasterLog/FasterLog.cs b/cs/src/core/Index/FasterLog/FasterLog.cs new file mode 100644 index 000000000..a34376ea1 --- /dev/null +++ b/cs/src/core/Index/FasterLog/FasterLog.cs @@ -0,0 +1,197 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma warning disable 0162 + +using System; +using System.Collections.Concurrent; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Threading; +using FASTER.core; + +namespace FASTER.core.log +{ + public class FasterLogSettings + { + /// + /// Device used for log + /// + public IDevice LogDevice = new NullDevice(); + + /// + /// Size of a segment (group of pages), in bits + /// + public int PageSizeBits = 22; + + /// + /// Size of a segment (group of pages), in bits + /// + public int SegmentSizeBits = 30; + + /// + /// Total size of in-memory part of log, in bits + /// + public int MemorySizeBits = 34; + + internal LogSettings GetLogSettings() + { + return new LogSettings + { + LogDevice = LogDevice, + PageSizeBits = PageSizeBits, + SegmentSizeBits = SegmentSizeBits, + MemorySizeBits = MemorySizeBits, + CopyReadsToTail = false, + MutableFraction = 0, + ObjectLogDevice = null, + ReadCacheSettings = null + }; + } + } + + /// + /// FASTER log + /// + public class FasterLog + { + private readonly BlittableAllocator allocator; + private readonly LightEpoch epoch; + + /// + /// Beginning address of log + /// + public long BeginAddress => allocator.BeginAddress; + + /// + /// Tail address of log + /// + public long TailAddress => allocator.GetTailAddress(); + + /// + /// Flushed until address + /// + public long FlushedUntilAddress => allocator.FlushedUntilAddress; + + /// + /// Create new log instance + /// + /// + public FasterLog(FasterLogSettings logSettings) + { + this.epoch = new LightEpoch(); + allocator = new BlittableAllocator(logSettings.GetLogSettings(), null, null, epoch); + allocator.Initialize(); + } + + /// + /// Append entry to log + /// + /// + /// Logical address of added entry + public unsafe long Append(Span entry) + { + epoch.Resume(); + var length = entry.Length; + BlockAllocate(4 + length, out long logicalAddress); + var physicalAddress = allocator.GetPhysicalAddress(logicalAddress); + *(int*)physicalAddress = length; + fixed (byte* bp = &entry.GetPinnableReference()) + Buffer.MemoryCopy(bp, (void*)(4 + physicalAddress), length, length); + epoch.Suspend(); + return logicalAddress; + } + + /// + /// Append entry to log + /// + /// + /// Logical address of added entry + public unsafe long Append(byte[] entry) + { + + epoch.Resume(); + var length = entry.Length; + BlockAllocate(4 + length, out long logicalAddress); + var physicalAddress = allocator.GetPhysicalAddress(logicalAddress); + *(int*)physicalAddress = length; + fixed (byte* bp = entry) + Buffer.MemoryCopy(bp, (void*)(4 + physicalAddress), length, length); + epoch.Suspend(); + return logicalAddress; + } + + /// + /// Flush the log until tail + /// + public long Flush(bool spinWait = false) + { + epoch.Resume(); + allocator.ShiftReadOnlyToTail(out long tailAddress); + epoch.Suspend(); + if (spinWait) + { + while (allocator.FlushedUntilAddress < tailAddress) + Thread.Yield(); + } + return tailAddress; + } + + /// + /// Truncate the log until, but not including, untilAddress + /// + /// + public void TruncateUntil(long untilAddress) + { + epoch.Resume(); + allocator.ShiftBeginAddress(untilAddress); + epoch.Suspend(); + } + + /// + /// Iterator interface for scanning FASTER log + /// + /// + /// + /// + /// + public FasterLogScanIterator Scan(long beginAddress, long endAddress, ScanBufferingMode scanBufferingMode = ScanBufferingMode.DoublePageBuffering) + { + return new FasterLogScanIterator(allocator, beginAddress, endAddress, scanBufferingMode, epoch); + } + + /// + /// Dispose this thread's epoch entry. Use when you manage your own + /// threads and want to recycle a thread-local epoch entry. + /// + public void DisposeThread() + { + epoch.Release(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void BlockAllocate(int recordSize, out long logicalAddress) + { + logicalAddress = allocator.Allocate(recordSize); + if (logicalAddress >= 0) return; + + while (logicalAddress < 0 && -logicalAddress >= allocator.ReadOnlyAddress) + { + epoch.ProtectAndDrain(); + allocator.CheckForAllocateComplete(ref logicalAddress); + if (logicalAddress < 0) + { + Thread.Sleep(10); + } + } + + logicalAddress = logicalAddress < 0 ? -logicalAddress : logicalAddress; + + if (logicalAddress < allocator.ReadOnlyAddress) + { + Debug.WriteLine("Allocated address is read-only, retrying"); + BlockAllocate(recordSize, out logicalAddress); + } + } + } +} diff --git a/cs/src/core/Index/FasterLog/FasterLogIterator.cs b/cs/src/core/Index/FasterLog/FasterLogIterator.cs new file mode 100644 index 000000000..1fccba3ea --- /dev/null +++ b/cs/src/core/Index/FasterLog/FasterLogIterator.cs @@ -0,0 +1,224 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using System; +using System.Threading; +using System.Diagnostics; + +namespace FASTER.core.log +{ + /// + /// Scan iterator for hybrid log + /// + public class FasterLogScanIterator : IDisposable + { + private readonly int frameSize; + private readonly BlittableAllocator allocator; + private readonly long beginAddress, endAddress; + private readonly BlittableFrame frame; + private readonly CountdownEvent[] loaded; + + private bool first = true; + private long currentAddress, nextAddress; + private long currentPhysicalAddress; + private LightEpoch epoch; + + /// + /// Current address + /// + public long CurrentAddress => currentAddress; + + /// + /// Constructor + /// + /// + /// + /// + /// + public unsafe FasterLogScanIterator(BlittableAllocator hlog, long beginAddress, long endAddress, ScanBufferingMode scanBufferingMode, LightEpoch epoch) + { + this.allocator = hlog; + this.epoch = epoch; + + if (beginAddress == 0) + beginAddress = hlog.GetFirstValidLogicalAddress(0); + + this.beginAddress = beginAddress; + this.endAddress = endAddress; + currentAddress = -1; + nextAddress = beginAddress; + + if (scanBufferingMode == ScanBufferingMode.SinglePageBuffering) + frameSize = 1; + else if (scanBufferingMode == ScanBufferingMode.DoublePageBuffering) + frameSize = 2; + else if (scanBufferingMode == ScanBufferingMode.NoBuffering) + { + frameSize = 0; + return; + } + + frame = new BlittableFrame(frameSize, hlog.PageSize, hlog.GetDeviceSectorSize()); + loaded = new CountdownEvent[frameSize]; + + // Only load addresses flushed to disk + if (nextAddress < hlog.HeadAddress) + { + var frameNumber = (nextAddress >> hlog.LogPageSizeBits) % frameSize; + hlog.AsyncReadPagesFromDeviceToFrame + (nextAddress >> hlog.LogPageSizeBits, + 1, endAddress, AsyncReadPagesCallback, Empty.Default, + frame, out loaded[frameNumber]); + } + } + + /// + /// Get next record in iterator + /// + /// + /// + public unsafe bool GetNext(out Span entry) + { + currentAddress = nextAddress; + while (true) + { + // Check for boundary conditions + if ((currentAddress >= endAddress) || (currentAddress >= allocator.ReadOnlyAddress)) + { + entry = default(Span); + return false; + } + + if (currentAddress < allocator.BeginAddress) + { + throw new Exception("Iterator address is less than log BeginAddress " + allocator.BeginAddress); + } + + if (frameSize == 0 && currentAddress < allocator.HeadAddress) + { + throw new Exception("Iterator address is less than log HeadAddress in memory-scan mode"); + } + + var currentPage = currentAddress >> allocator.LogPageSizeBits; + var offset = currentAddress & allocator.PageSizeMask; + + var headAddress = allocator.HeadAddress; + var physicalAddress = default(long); + + if (currentAddress < headAddress) + { + BufferAndLoad(currentAddress, currentPage, currentPage % frameSize); + physicalAddress = frame.GetPhysicalAddress(currentPage % frameSize, offset); + } + else + { + epoch.Resume(); + headAddress = allocator.HeadAddress; + if (currentAddress < headAddress) // rare case + { + epoch.Suspend(); + continue; + } + + physicalAddress = allocator.GetPhysicalAddress(currentAddress); + } + + // Check if record fits on page, if not skip to next page + int length = *(int*)physicalAddress; + int recordSize = 4; + if (length > 0) + recordSize += length; + if ((currentAddress & allocator.PageSizeMask) + recordSize > allocator.PageSize) + { + if (currentAddress >= headAddress) + epoch.Suspend(); + currentAddress = (1 + (currentAddress >> allocator.LogPageSizeBits)) << allocator.LogPageSizeBits; + continue; + } + + if (length == 0) + { + if (currentAddress >= headAddress) + epoch.Suspend(); + currentAddress += recordSize; + continue; + } + + entry = new Span((void*)(physicalAddress + 4), length); + if (currentAddress >= headAddress) + { + // Have to copy out bytes within epoch protection in + // this case because this is a shared buffer + var _entry = new byte[length]; + entry.CopyTo(_entry); + entry = _entry; + epoch.Suspend(); + } + currentPhysicalAddress = physicalAddress; + nextAddress = currentAddress + recordSize; + return true; + } + } + + /// + /// Dispose the iterator + /// + public void Dispose() + { + frame?.Dispose(); + } + + private unsafe void BufferAndLoad(long currentAddress, long currentPage, long currentFrame) + { + if (first || (currentAddress & allocator.PageSizeMask) == 0) + { + // Prefetch pages based on buffering mode + if (frameSize == 1) + { + if (!first) + { + allocator.AsyncReadPagesFromDeviceToFrame(currentAddress >> allocator.LogPageSizeBits, 1, endAddress, AsyncReadPagesCallback, Empty.Default, frame, out loaded[currentFrame]); + } + } + else + { + var endPage = endAddress >> allocator.LogPageSizeBits; + if ((endPage > currentPage) && + ((endPage > currentPage + 1) || ((endAddress & allocator.PageSizeMask) != 0))) + { + allocator.AsyncReadPagesFromDeviceToFrame(1 + (currentAddress >> allocator.LogPageSizeBits), 1, endAddress, AsyncReadPagesCallback, Empty.Default, frame, out loaded[(currentPage + 1) % frameSize]); + } + } + first = false; + } + loaded[currentFrame].Wait(); + } + + private unsafe void AsyncReadPagesCallback(uint errorCode, uint numBytes, NativeOverlapped* overlap) + { + if (errorCode != 0) + { + Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); + } + + var result = (PageAsyncReadResult)Overlapped.Unpack(overlap).AsyncResult; + + if (result.freeBuffer1 != null) + { + allocator.PopulatePage(result.freeBuffer1.GetValidPointer(), result.freeBuffer1.required_bytes, result.page); + result.freeBuffer1.Return(); + result.freeBuffer1 = null; + } + + if (result.handle != null) + { + result.handle.Signal(); + } + + Interlocked.MemoryBarrier(); + Overlapped.Free(overlap); + } + } +} + + From 3077f5295ae5696940967d64daaa3b6057442dc1 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Tue, 17 Sep 2019 11:25:43 -0700 Subject: [PATCH 02/36] Updates. --- cs/playground/FasterLogSample/Program.cs | 16 ++++++++------- cs/src/core/Index/FasterLog/FasterLog.cs | 26 ++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/cs/playground/FasterLogSample/Program.cs b/cs/playground/FasterLogSample/Program.cs index 1b4dd542c..1ee028215 100644 --- a/cs/playground/FasterLogSample/Program.cs +++ b/cs/playground/FasterLogSample/Program.cs @@ -26,7 +26,7 @@ static void ReportThread() while (true) { - Thread.Sleep(10000); + Thread.Sleep(5000); var nowTime = sw.ElapsedMilliseconds; var nowValue = log.TailAddress; @@ -51,6 +51,8 @@ static void AppendThread() static void ScanThread() { + Random r = new Random(); + Thread.Sleep(5000); byte[] entry = new byte[entryLength]; @@ -72,6 +74,9 @@ static void ScanThread() throw new Exception("Invalid entry found"); } + if (r.Next(100) < 10) + log.Append(result); + if (iter.CurrentAddress - lastAddress > 500000000) { log.TruncateUntil(iter.CurrentAddress); @@ -83,14 +88,11 @@ static void ScanThread() static void Main(string[] args) { - var device = Devices.CreateLogDevice("E:\\logs\\hlog.log"); - log = new FasterLog(new FasterLogSettings { LogDevice = device, MemorySizeBits = 26 }); + var device = Devices.CreateLogDevice("D:\\logs\\hlog.log"); + log = new FasterLog(new FasterLogSettings { LogDevice = device, MemorySizeBits = 29, PageSizeBits = 25 }); new Thread(new ThreadStart(AppendThread)).Start(); - //new Thread(new ThreadStart(AppendThread)).Start(); - //new Thread(new ThreadStart(AppendThread)).Start(); - //new Thread(new ThreadStart(AppendThread)).Start(); - // new Thread(new ThreadStart(ScanThread)).Start(); + new Thread(new ThreadStart(ScanThread)).Start(); new Thread(new ThreadStart(ReportThread)).Start(); Thread.Sleep(500*1000); diff --git a/cs/src/core/Index/FasterLog/FasterLog.cs b/cs/src/core/Index/FasterLog/FasterLog.cs index a34376ea1..2004cc942 100644 --- a/cs/src/core/Index/FasterLog/FasterLog.cs +++ b/cs/src/core/Index/FasterLog/FasterLog.cs @@ -5,6 +5,7 @@ using System; using System.Collections.Concurrent; +using System.Collections.Generic; using System.Diagnostics; using System.Runtime.CompilerServices; using System.Threading; @@ -56,7 +57,7 @@ internal LogSettings GetLogSettings() public class FasterLog { private readonly BlittableAllocator allocator; - private readonly LightEpoch epoch; + public readonly LightEpoch epoch; /// /// Beginning address of log @@ -109,7 +110,6 @@ public unsafe long Append(Span entry) /// Logical address of added entry public unsafe long Append(byte[] entry) { - epoch.Resume(); var length = entry.Length; BlockAllocate(4 + length, out long logicalAddress); @@ -121,6 +121,28 @@ public unsafe long Append(byte[] entry) return logicalAddress; } + /// + /// Append batch of entries to log + /// + /// + /// Logical address of last added entry + public unsafe long Append(List entries) + { + long logicalAddress = 0; + epoch.Resume(); + foreach (var entry in entries) + { + var length = entry.Length; + BlockAllocate(4 + length, out logicalAddress); + var physicalAddress = allocator.GetPhysicalAddress(logicalAddress); + *(int*)physicalAddress = length; + fixed (byte* bp = entry) + Buffer.MemoryCopy(bp, (void*)(4 + physicalAddress), length, length); + } + epoch.Suspend(); + return logicalAddress; + } + /// /// Flush the log until tail /// From 853b3eaf23d933dac30fb25a690aeeca2a59e23e Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Tue, 17 Sep 2019 13:36:42 -0700 Subject: [PATCH 03/36] Updates --- cs/playground/FasterLogSample/Program.cs | 2 +- cs/src/core/Index/FasterLog/FasterLog.cs | 39 +---------- .../core/Index/FasterLog/FasterLogIterator.cs | 67 +++++++++---------- .../core/Index/FasterLog/FasterLogSettings.cs | 48 +++++++++++++ 4 files changed, 81 insertions(+), 75 deletions(-) create mode 100644 cs/src/core/Index/FasterLog/FasterLogSettings.cs diff --git a/cs/playground/FasterLogSample/Program.cs b/cs/playground/FasterLogSample/Program.cs index 1ee028215..3fdd4d617 100644 --- a/cs/playground/FasterLogSample/Program.cs +++ b/cs/playground/FasterLogSample/Program.cs @@ -89,7 +89,7 @@ static void ScanThread() static void Main(string[] args) { var device = Devices.CreateLogDevice("D:\\logs\\hlog.log"); - log = new FasterLog(new FasterLogSettings { LogDevice = device, MemorySizeBits = 29, PageSizeBits = 25 }); + log = new FasterLog(new FasterLogSettings { LogDevice = device }); new Thread(new ThreadStart(AppendThread)).Start(); new Thread(new ThreadStart(ScanThread)).Start(); diff --git a/cs/src/core/Index/FasterLog/FasterLog.cs b/cs/src/core/Index/FasterLog/FasterLog.cs index 2004cc942..01d6326ff 100644 --- a/cs/src/core/Index/FasterLog/FasterLog.cs +++ b/cs/src/core/Index/FasterLog/FasterLog.cs @@ -13,43 +13,6 @@ namespace FASTER.core.log { - public class FasterLogSettings - { - /// - /// Device used for log - /// - public IDevice LogDevice = new NullDevice(); - - /// - /// Size of a segment (group of pages), in bits - /// - public int PageSizeBits = 22; - - /// - /// Size of a segment (group of pages), in bits - /// - public int SegmentSizeBits = 30; - - /// - /// Total size of in-memory part of log, in bits - /// - public int MemorySizeBits = 34; - - internal LogSettings GetLogSettings() - { - return new LogSettings - { - LogDevice = LogDevice, - PageSizeBits = PageSizeBits, - SegmentSizeBits = SegmentSizeBits, - MemorySizeBits = MemorySizeBits, - CopyReadsToTail = false, - MutableFraction = 0, - ObjectLogDevice = null, - ReadCacheSettings = null - }; - } - } /// /// FASTER log @@ -57,7 +20,7 @@ internal LogSettings GetLogSettings() public class FasterLog { private readonly BlittableAllocator allocator; - public readonly LightEpoch epoch; + private readonly LightEpoch epoch; /// /// Beginning address of log diff --git a/cs/src/core/Index/FasterLog/FasterLogIterator.cs b/cs/src/core/Index/FasterLog/FasterLogIterator.cs index 1fccba3ea..c97e82c9f 100644 --- a/cs/src/core/Index/FasterLog/FasterLogIterator.cs +++ b/cs/src/core/Index/FasterLog/FasterLogIterator.cs @@ -14,14 +14,14 @@ public class FasterLogScanIterator : IDisposable { private readonly int frameSize; private readonly BlittableAllocator allocator; - private readonly long beginAddress, endAddress; + private readonly long endAddress; private readonly BlittableFrame frame; private readonly CountdownEvent[] loaded; + private readonly long[] loadedPage; + private readonly LightEpoch epoch; - private bool first = true; private long currentAddress, nextAddress; - private long currentPhysicalAddress; - private LightEpoch epoch; + /// /// Current address @@ -35,6 +35,7 @@ public class FasterLogScanIterator : IDisposable /// /// /// + /// public unsafe FasterLogScanIterator(BlittableAllocator hlog, long beginAddress, long endAddress, ScanBufferingMode scanBufferingMode, LightEpoch epoch) { this.allocator = hlog; @@ -43,7 +44,6 @@ public unsafe FasterLogScanIterator(BlittableAllocator hlog, long b if (beginAddress == 0) beginAddress = hlog.GetFirstValidLogicalAddress(0); - this.beginAddress = beginAddress; this.endAddress = endAddress; currentAddress = -1; nextAddress = beginAddress; @@ -60,16 +60,10 @@ public unsafe FasterLogScanIterator(BlittableAllocator hlog, long b frame = new BlittableFrame(frameSize, hlog.PageSize, hlog.GetDeviceSectorSize()); loaded = new CountdownEvent[frameSize]; + loadedPage = new long[frameSize]; + for (int i = 0; i < frameSize; i++) + loadedPage[i] = -1; - // Only load addresses flushed to disk - if (nextAddress < hlog.HeadAddress) - { - var frameNumber = (nextAddress >> hlog.LogPageSizeBits) % frameSize; - hlog.AsyncReadPagesFromDeviceToFrame - (nextAddress >> hlog.LogPageSizeBits, - 1, endAddress, AsyncReadPagesCallback, Empty.Default, - frame, out loaded[frameNumber]); - } } /// @@ -83,16 +77,18 @@ public unsafe bool GetNext(out Span entry) while (true) { // Check for boundary conditions + if (currentAddress < allocator.BeginAddress) + { + Debug.WriteLine("Iterator address is less than log BeginAddress " + allocator.BeginAddress + ", adjusting iterator address"); + currentAddress = allocator.BeginAddress; + } + if ((currentAddress >= endAddress) || (currentAddress >= allocator.ReadOnlyAddress)) { entry = default(Span); return false; } - if (currentAddress < allocator.BeginAddress) - { - throw new Exception("Iterator address is less than log BeginAddress " + allocator.BeginAddress); - } if (frameSize == 0 && currentAddress < allocator.HeadAddress) { @@ -154,7 +150,6 @@ public unsafe bool GetNext(out Span entry) entry = _entry; epoch.Suspend(); } - currentPhysicalAddress = physicalAddress; nextAddress = currentAddress + recordSize; return true; } @@ -170,26 +165,26 @@ public void Dispose() private unsafe void BufferAndLoad(long currentAddress, long currentPage, long currentFrame) { - if (first || (currentAddress & allocator.PageSizeMask) == 0) + if (loadedPage[currentFrame] != currentPage) { - // Prefetch pages based on buffering mode - if (frameSize == 1) - { - if (!first) - { - allocator.AsyncReadPagesFromDeviceToFrame(currentAddress >> allocator.LogPageSizeBits, 1, endAddress, AsyncReadPagesCallback, Empty.Default, frame, out loaded[currentFrame]); - } - } - else + if (loadedPage[currentFrame] != -1) + loaded[currentFrame].Wait(); // Ensure we have completed ongoing load + allocator.AsyncReadPagesFromDeviceToFrame(currentAddress >> allocator.LogPageSizeBits, 1, endAddress, AsyncReadPagesCallback, Empty.Default, frame, out loaded[currentFrame]); + loadedPage[currentFrame] = currentAddress >> allocator.LogPageSizeBits; + } + + if (frameSize == 2) + { + currentPage++; + currentFrame = (currentFrame + 1) % frameSize; + + if (loadedPage[currentFrame] != currentPage) { - var endPage = endAddress >> allocator.LogPageSizeBits; - if ((endPage > currentPage) && - ((endPage > currentPage + 1) || ((endAddress & allocator.PageSizeMask) != 0))) - { - allocator.AsyncReadPagesFromDeviceToFrame(1 + (currentAddress >> allocator.LogPageSizeBits), 1, endAddress, AsyncReadPagesCallback, Empty.Default, frame, out loaded[(currentPage + 1) % frameSize]); - } + if (loadedPage[currentFrame] != -1) + loaded[currentFrame].Wait(); // Ensure we have completed ongoing load + allocator.AsyncReadPagesFromDeviceToFrame(1 + (currentAddress >> allocator.LogPageSizeBits), 1, endAddress, AsyncReadPagesCallback, Empty.Default, frame, out loaded[currentFrame]); + loadedPage[currentFrame] = 1 + (currentAddress >> allocator.LogPageSizeBits); } - first = false; } loaded[currentFrame].Wait(); } diff --git a/cs/src/core/Index/FasterLog/FasterLogSettings.cs b/cs/src/core/Index/FasterLog/FasterLogSettings.cs new file mode 100644 index 000000000..dc758b57b --- /dev/null +++ b/cs/src/core/Index/FasterLog/FasterLogSettings.cs @@ -0,0 +1,48 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma warning disable 0162 + +namespace FASTER.core.log +{ + /// + /// FASTER Log Settings + /// + public class FasterLogSettings + { + /// + /// Device used for log + /// + public IDevice LogDevice = new NullDevice(); + + /// + /// Size of a segment (group of pages), in bits + /// + public int PageSizeBits = 22; + + /// + /// Size of a segment (group of pages), in bits + /// + public int SegmentSizeBits = 30; + + /// + /// Total size of in-memory part of log, in bits + /// + public int MemorySizeBits = 26; + + internal LogSettings GetLogSettings() + { + return new LogSettings + { + LogDevice = LogDevice, + PageSizeBits = PageSizeBits, + SegmentSizeBits = SegmentSizeBits, + MemorySizeBits = MemorySizeBits, + CopyReadsToTail = false, + MutableFraction = 0, + ObjectLogDevice = null, + ReadCacheSettings = null + }; + } + } +} From 6315a14b06239e2ffc0afd4da407f8c612a0fe71 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Wed, 18 Sep 2019 11:39:07 -0700 Subject: [PATCH 04/36] Cleaned up epochs, improved fine grain scalability. --- cs/playground/FasterLogSample/Program.cs | 8 +- cs/src/core/Epochs/LightEpoch.cs | 86 ++++++++++--------- cs/src/core/Index/FasterLog/FasterLog.cs | 33 +++++-- .../core/Index/FasterLog/FasterLogIterator.cs | 4 +- .../core/Index/FasterLog/FasterLogSettings.cs | 2 +- cs/test/FasterLogTests.cs | 66 ++++++++++++++ cs/test/SimpleRecoveryTest.cs | 5 +- 7 files changed, 145 insertions(+), 59 deletions(-) create mode 100644 cs/test/FasterLogTests.cs diff --git a/cs/playground/FasterLogSample/Program.cs b/cs/playground/FasterLogSample/Program.cs index 3fdd4d617..631848f44 100644 --- a/cs/playground/FasterLogSample/Program.cs +++ b/cs/playground/FasterLogSample/Program.cs @@ -1,20 +1,16 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. -using FASTER.core; -using FASTER.core.log; using System; using System.Diagnostics; -using System.Diagnostics.Eventing.Reader; -using System.IO; -using System.Runtime.CompilerServices; using System.Threading; +using FASTER.core; namespace FasterLogSample { public class Program { - const int entryLength = 100; + const int entryLength = 96; static FasterLog log; static void ReportThread() diff --git a/cs/src/core/Epochs/LightEpoch.cs b/cs/src/core/Epochs/LightEpoch.cs index cd47f0aaf..8434f8964 100644 --- a/cs/src/core/Epochs/LightEpoch.cs +++ b/cs/src/core/Epochs/LightEpoch.cs @@ -9,9 +9,8 @@ namespace FASTER.core { - /// - /// + /// Epoch protection /// public unsafe class LightEpoch { @@ -37,6 +36,10 @@ public unsafe class LightEpoch private GCHandle tableHandle; private Entry* tableAligned; + private static Entry[] threadIndex; + private static GCHandle threadIndexHandle; + private static Entry* threadIndexAligned; + /// /// List of action, epoch pairs containing actions to performed /// when an epoch becomes safe to reclaim. @@ -44,11 +47,6 @@ public unsafe class LightEpoch private int drainCount = 0; private readonly EpochActionPair[] drainList = new EpochActionPair[kDrainListSize]; - /// - /// Number of entries in the epoch table - /// - private int numEntries; - /// /// A thread's entry in the epoch table. /// @@ -61,6 +59,9 @@ public unsafe class LightEpoch [ThreadStatic] private static int threadEntryIndexCount; + [ThreadStatic] + static int threadId; + /// /// Global current epoch value /// @@ -72,25 +73,28 @@ public unsafe class LightEpoch public int SafeToReclaimEpoch; /// - /// Instantiate the epoch table + /// Static constructor to setup shared cache-aligned space + /// to store per-entry count of instances using that entry /// - /// - public LightEpoch(int size = kTableSize) + static LightEpoch() { - Initialize(size); + // Over-allocate to do cache-line alignment + threadIndex = new Entry[kTableSize + 2]; + threadIndexHandle = GCHandle.Alloc(threadIndex, GCHandleType.Pinned); + long p = (long)threadIndexHandle.AddrOfPinnedObject(); + + // Force the pointer to align to 64-byte boundaries + long p2 = (p + (Constants.kCacheLineBytes - 1)) & ~(Constants.kCacheLineBytes - 1); + threadIndexAligned = (Entry*)p2; } /// - /// Initialize the epoch table + /// Instantiate the epoch table /// - /// - unsafe void Initialize(int size) + public LightEpoch() { - // threadEntryIndex = new FastThreadLocal(); - numEntries = size; - // Over-allocate to do cache-line alignment - tableRaw = new Entry[size + 2]; + tableRaw = new Entry[kTableSize + 2]; tableHandle = GCHandle.Alloc(tableRaw, GCHandleType.Pinned); long p = (long)tableHandle.AddrOfPinnedObject(); @@ -114,12 +118,8 @@ public void Dispose() tableHandle.Free(); tableAligned = null; tableRaw = null; - - numEntries = 0; CurrentEpoch = 1; SafeToReclaimEpoch = 0; - - // threadEntryIndex.Dispose(); } /// @@ -140,6 +140,7 @@ public int ProtectAndDrain() { int entry = threadEntryIndex; + (*(tableAligned + entry)).threadId = threadEntryIndex; (*(tableAligned + entry)).localCurrentEpoch = CurrentEpoch; if (drainCount > 0) @@ -180,6 +181,7 @@ private void Drain(int nextEpoch) /// /// Thread acquires its epoch entry /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Acquire() { if (threadEntryIndex == kInvalidIndex) @@ -191,20 +193,18 @@ public void Acquire() /// /// Thread releases its epoch entry /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Release() { int entry = threadEntryIndex; - if (kInvalidIndex == entry) - { - return; - } + (*(tableAligned + entry)).localCurrentEpoch = 0; + (*(tableAligned + entry)).threadId = 0; threadEntryIndexCount--; if (threadEntryIndexCount == 0) { + (threadIndexAligned + threadEntryIndex)->threadId = 0; threadEntryIndex = kInvalidIndex; - (*(tableAligned + entry)).localCurrentEpoch = 0; - (*(tableAligned + entry)).threadId = 0; } } @@ -214,7 +214,7 @@ public void Release() [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Suspend() { - (*(tableAligned + threadEntryIndex)).localCurrentEpoch = int.MaxValue; + Release(); } /// @@ -223,8 +223,7 @@ public void Suspend() [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Resume() { - if (threadEntryIndex == kInvalidIndex) - Acquire(); + Acquire(); ProtectAndDrain(); } @@ -307,7 +306,7 @@ private int ComputeNewSafeToReclaimEpoch(int currentEpoch) { int oldestOngoingCall = currentEpoch; - for (int index = 1; index <= numEntries; ++index) + for (int index = 1; index <= kTableSize; ++index) { int entry_epoch = (*(tableAligned + index)).localCurrentEpoch; if (0 != entry_epoch) @@ -332,20 +331,20 @@ private int ComputeNewSafeToReclaimEpoch(int currentEpoch) /// Start index /// Thread id /// Reserved entry - private int ReserveEntry(int startIndex, int threadId) + private static int ReserveEntry(int startIndex, int threadId) { int current_iteration = 0; for (; ; ) { // Reserve an entry in the table. - for (int i = 0; i < numEntries; ++i) + for (int i = 0; i < kTableSize; ++i) { - int index_to_test = 1 + ((startIndex + i) & (numEntries - 1)); - if (0 == (*(tableAligned + index_to_test)).threadId) + int index_to_test = 1 + ((startIndex + i) & (kTableSize - 1)); + if (0 == (threadIndexAligned + index_to_test)->threadId) { bool success = (0 == Interlocked.CompareExchange( - ref (*(tableAligned + index_to_test)).threadId, + ref (threadIndexAligned+index_to_test)->threadId, threadId, 0)); if (success) @@ -356,7 +355,7 @@ private int ReserveEntry(int startIndex, int threadId) ++current_iteration; } - if (current_iteration > (numEntries * 3)) + if (current_iteration > (kTableSize * 10)) { throw new Exception("Unable to reserve an epoch entry, try increasing the epoch table size (kTableSize)"); } @@ -368,10 +367,13 @@ private int ReserveEntry(int startIndex, int threadId) /// once for a thread. /// /// Reserved entry - private int ReserveEntryForThread() + private static int ReserveEntryForThread() { - // for portability(run on non-windows platform) - int threadId = Environment.OSVersion.Platform == PlatformID.Win32NT ? (int)Native32.GetCurrentThreadId() : Thread.CurrentThread.ManagedThreadId; + if (threadId == 0) // run once per thread for performance + { + // For portability(run on non-windows platform) + threadId = Environment.OSVersion.Platform == PlatformID.Win32NT ? (int)Native32.GetCurrentThreadId() : Thread.CurrentThread.ManagedThreadId; + } int startIndex = Utility.Murmur3(threadId); return ReserveEntry(startIndex, threadId); } @@ -429,7 +431,7 @@ public bool MarkAndCheckIsComplete(int markerIdx, int version) (*(tableAligned + entry)).markers[markerIdx] = version; // check if all threads have reported complete - for (int index = 1; index <= numEntries; ++index) + for (int index = 1; index <= kTableSize; ++index) { int entry_epoch = (*(tableAligned + index)).localCurrentEpoch; int fc_version = (*(tableAligned + index)).markers[markerIdx]; diff --git a/cs/src/core/Index/FasterLog/FasterLog.cs b/cs/src/core/Index/FasterLog/FasterLog.cs index 01d6326ff..5435008ed 100644 --- a/cs/src/core/Index/FasterLog/FasterLog.cs +++ b/cs/src/core/Index/FasterLog/FasterLog.cs @@ -9,15 +9,14 @@ using System.Diagnostics; using System.Runtime.CompilerServices; using System.Threading; -using FASTER.core; -namespace FASTER.core.log +namespace FASTER.core { /// /// FASTER log /// - public class FasterLog + public class FasterLog : IDisposable { private readonly BlittableAllocator allocator; private readonly LightEpoch epoch; @@ -43,11 +42,20 @@ public class FasterLog /// public FasterLog(FasterLogSettings logSettings) { - this.epoch = new LightEpoch(); + epoch = new LightEpoch(); allocator = new BlittableAllocator(logSettings.GetLogSettings(), null, null, epoch); allocator.Initialize(); } + /// + /// Dispose + /// + public void Dispose() + { + allocator.Dispose(); + epoch.Dispose(); + } + /// /// Append entry to log /// @@ -146,10 +154,21 @@ public FasterLogScanIterator Scan(long beginAddress, long endAddress, ScanBuffer } /// - /// Dispose this thread's epoch entry. Use when you manage your own - /// threads and want to recycle a thread-local epoch entry. + /// Create and pin epoch entry for this thread - use with ReleaseThread + /// if you manage the thread. + /// DO NOT USE WITH ASYNC CODE + /// + public void AcquireThread() + { + epoch.Acquire(); + } + + /// + /// Dispose epoch entry for this thread. Use with AcquireThread + /// if you manage the thread. + /// DO NOT USE WITH ASYNC CODE /// - public void DisposeThread() + public void ReleaseThread() { epoch.Release(); } diff --git a/cs/src/core/Index/FasterLog/FasterLogIterator.cs b/cs/src/core/Index/FasterLog/FasterLogIterator.cs index c97e82c9f..b09db13c3 100644 --- a/cs/src/core/Index/FasterLog/FasterLogIterator.cs +++ b/cs/src/core/Index/FasterLog/FasterLogIterator.cs @@ -5,7 +5,7 @@ using System.Threading; using System.Diagnostics; -namespace FASTER.core.log +namespace FASTER.core { /// /// Scan iterator for hybrid log @@ -36,7 +36,7 @@ public class FasterLogScanIterator : IDisposable /// /// /// - public unsafe FasterLogScanIterator(BlittableAllocator hlog, long beginAddress, long endAddress, ScanBufferingMode scanBufferingMode, LightEpoch epoch) + internal unsafe FasterLogScanIterator(BlittableAllocator hlog, long beginAddress, long endAddress, ScanBufferingMode scanBufferingMode, LightEpoch epoch) { this.allocator = hlog; this.epoch = epoch; diff --git a/cs/src/core/Index/FasterLog/FasterLogSettings.cs b/cs/src/core/Index/FasterLog/FasterLogSettings.cs index dc758b57b..1b70aa656 100644 --- a/cs/src/core/Index/FasterLog/FasterLogSettings.cs +++ b/cs/src/core/Index/FasterLog/FasterLogSettings.cs @@ -3,7 +3,7 @@ #pragma warning disable 0162 -namespace FASTER.core.log +namespace FASTER.core { /// /// FASTER Log Settings diff --git a/cs/test/FasterLogTests.cs b/cs/test/FasterLogTests.cs new file mode 100644 index 000000000..60542c9be --- /dev/null +++ b/cs/test/FasterLogTests.cs @@ -0,0 +1,66 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using System; +using System.Linq; +using FASTER.core; +using NUnit.Framework; + +namespace FASTER.test +{ + + [TestFixture] + internal class FasterLogTests + { + const int entryLength = 100; + const int numEntries = 1000000; + private FasterLog log; + private IDevice device; + + + [SetUp] + public void Setup() + { + device = Devices.CreateLogDevice(TestContext.CurrentContext.TestDirectory + "\\fasterlog.log", deleteOnClose: true); + } + + [TearDown] + public void TearDown() + { + device.Close(); + } + + [Test] + public void FasterLogTest1() + { + log = new FasterLog(new FasterLogSettings { LogDevice = device }); + log.AcquireThread(); + + byte[] entry = new byte[entryLength]; + for (int i = 0; i < entryLength; i++) + entry[i] = (byte)i; + + for (int i = 0; i < numEntries; i++) + { + log.Append(entry); + } + log.Flush(true); + + using (var iter = log.Scan(0, long.MaxValue)) + { + int count = 0; + while (iter.GetNext(out Span result)) + { + count++; + Assert.IsTrue(result.SequenceEqual(entry)); + if (count % 100 == 0) + log.TruncateUntil(iter.CurrentAddress); + } + Assert.IsTrue(count == numEntries); + } + + log.ReleaseThread(); + log.Dispose(); + } + } +} diff --git a/cs/test/SimpleRecoveryTest.cs b/cs/test/SimpleRecoveryTest.cs index 4ced1f870..0f5d41689 100644 --- a/cs/test/SimpleRecoveryTest.cs +++ b/cs/test/SimpleRecoveryTest.cs @@ -63,10 +63,13 @@ public void SimpleRecoveryTest1() } fht1.TakeFullCheckpoint(out Guid token); fht1.CompleteCheckpoint(true); + + fht2.StartSession(); + fht1.StopSession(); fht2.Recover(token); - fht2.StartSession(); + for (int key = 0; key < numOps; key++) { var status = fht2.Read(ref inputArray[key], ref inputArg, ref output, Empty.Default, 0); From 19d5d82641965df3b203b2b9e141ba0594051977 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Wed, 18 Sep 2019 11:39:40 -0700 Subject: [PATCH 05/36] Fixing test change --- cs/test/SimpleRecoveryTest.cs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cs/test/SimpleRecoveryTest.cs b/cs/test/SimpleRecoveryTest.cs index 0f5d41689..4ced1f870 100644 --- a/cs/test/SimpleRecoveryTest.cs +++ b/cs/test/SimpleRecoveryTest.cs @@ -63,13 +63,10 @@ public void SimpleRecoveryTest1() } fht1.TakeFullCheckpoint(out Guid token); fht1.CompleteCheckpoint(true); - - fht2.StartSession(); - fht1.StopSession(); fht2.Recover(token); - + fht2.StartSession(); for (int key = 0; key < numOps; key++) { var status = fht2.Read(ref inputArray[key], ref inputArg, ref output, Empty.Default, 0); From 88d72698482233e4ed7db0737513380933e5c9ce Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Wed, 18 Sep 2019 20:01:49 -0700 Subject: [PATCH 06/36] Added commit and recovery support. --- cs/playground/FasterLogSample/Program.cs | 10 + cs/src/core/Allocator/AllocatorBase.cs | 22 ++- cs/src/core/Allocator/BlittableAllocator.cs | 15 +- cs/src/core/Allocator/GenericAllocator.cs | 6 +- .../Allocator/VarLenBlittableAllocator.cs | 13 +- cs/src/core/Index/FasterLog/FasterLog.cs | 72 ++++++- .../core/Index/FasterLog/FasterLogSettings.cs | 101 ++++++++++ .../core/Index/FasterLog/ILogCommitManager.cs | 25 +++ .../Index/FasterLog/LocalLogCommitManager.cs | 62 ++++++ cs/src/core/Index/Recovery/Recovery.cs | 177 ++++++++++-------- cs/test/FasterLogTests.cs | 2 +- 11 files changed, 402 insertions(+), 103 deletions(-) create mode 100644 cs/src/core/Index/FasterLog/ILogCommitManager.cs create mode 100644 cs/src/core/Index/FasterLog/LocalLogCommitManager.cs diff --git a/cs/playground/FasterLogSample/Program.cs b/cs/playground/FasterLogSample/Program.cs index 631848f44..0ca4005b4 100644 --- a/cs/playground/FasterLogSample/Program.cs +++ b/cs/playground/FasterLogSample/Program.cs @@ -33,6 +33,15 @@ static void ReportThread() } } + static void CommitThread() + { + while (true) + { + Thread.Sleep(100); + log.FlushAndCommit(true); + } + } + static void AppendThread() { byte[] entry = new byte[entryLength]; @@ -90,6 +99,7 @@ static void Main(string[] args) new Thread(new ThreadStart(AppendThread)).Start(); new Thread(new ThreadStart(ScanThread)).Start(); new Thread(new ThreadStart(ReportThread)).Start(); + new Thread(new ThreadStart(CommitThread)).Start(); Thread.Sleep(500*1000); } diff --git a/cs/src/core/Allocator/AllocatorBase.cs b/cs/src/core/Allocator/AllocatorBase.cs index 371cfad69..6365b7198 100644 --- a/cs/src/core/Allocator/AllocatorBase.cs +++ b/cs/src/core/Allocator/AllocatorBase.cs @@ -49,7 +49,7 @@ internal struct PageOffset /// /// /// - public unsafe abstract class AllocatorBase : IDisposable + public unsafe abstract partial class AllocatorBase : IDisposable where Key : new() where Value : new() { @@ -221,6 +221,11 @@ public unsafe abstract class AllocatorBase : IDisposable /// protected readonly Action EvictCallback = null; + /// + /// Flush callback + /// + protected readonly Action FlushCallback = null; + /// /// Observer for records entering read-only region /// @@ -380,7 +385,8 @@ public unsafe abstract class AllocatorBase : IDisposable /// Clear page /// /// Page number to be cleared - protected abstract void ClearPage(long page); + /// Offset to clear from (if partial clear) + protected abstract void ClearPage(long page, int offset = 0); /// /// Write page (async) /// @@ -469,13 +475,15 @@ public unsafe abstract class AllocatorBase : IDisposable /// /// /// - public AllocatorBase(LogSettings settings, IFasterEqualityComparer comparer, Action evictCallback, LightEpoch epoch) + /// + public AllocatorBase(LogSettings settings, IFasterEqualityComparer comparer, Action evictCallback, LightEpoch epoch, Action flushCallback) { if (evictCallback != null) { ReadCache = true; EvictCallback = evictCallback; } + FlushCallback = flushCallback; this.comparer = comparer; if (epoch == null) @@ -1100,7 +1108,10 @@ protected void ShiftFlushedUntilAddress() if (update) { - Utility.MonotonicUpdate(ref FlushedUntilAddress, currentFlushedUntilAddress, out long oldFlushedUntilAddress); + if (Utility.MonotonicUpdate(ref FlushedUntilAddress, currentFlushedUntilAddress, out long oldFlushedUntilAddress)) + { + FlushCallback?.Invoke(FlushedUntilAddress); + } } } @@ -1146,6 +1157,9 @@ public void RecoveryReset(long tailAddress, long headAddress, long beginAddress) PageStatusIndicator[pageIndex].PageFlushCloseStatus.PageCloseStatus = PMMCloseStatus.Open; PageStatusIndicator[pageIndex].PageFlushCloseStatus.PageFlushStatus = PMMFlushStatus.Flushed; + // clear the last page starting from tail address + ClearPage(pageIndex, (int)GetOffsetInPage(tailAddress)); + // Printing debug info Debug.WriteLine("******* Recovered HybridLog Stats *******"); Debug.WriteLine("Head Address: {0}", HeadAddress); diff --git a/cs/src/core/Allocator/BlittableAllocator.cs b/cs/src/core/Allocator/BlittableAllocator.cs index e3339ff6b..6e80ccb78 100644 --- a/cs/src/core/Allocator/BlittableAllocator.cs +++ b/cs/src/core/Allocator/BlittableAllocator.cs @@ -31,8 +31,8 @@ public unsafe sealed class BlittableAllocator : AllocatorBase comparer, Action evictCallback = null, LightEpoch epoch = null) - : base(settings, comparer, evictCallback, epoch) + public BlittableAllocator(LogSettings settings, IFasterEqualityComparer comparer, Action evictCallback = null, LightEpoch epoch = null, Action flushCallback = null) + : base(settings, comparer, evictCallback, epoch, flushCallback) { values = new byte[BufferSize][]; handles = new GCHandle[BufferSize]; @@ -198,9 +198,16 @@ public override long GetFirstValidLogicalAddress(long page) return page << LogPageSizeBits; } - protected override void ClearPage(long page) + protected override void ClearPage(long page, int offset) { - Array.Clear(values[page % BufferSize], 0, values[page % BufferSize].Length); + if (offset == 0) + Array.Clear(values[page % BufferSize], offset, values[page % BufferSize].Length - offset); + else + { + // Adjust array offset for cache alignment + offset += (int)(pointers[page % BufferSize] - (long)handles[page % BufferSize].AddrOfPinnedObject()); + Array.Clear(values[page % BufferSize], offset, values[page % BufferSize].Length - offset); + } } /// diff --git a/cs/src/core/Allocator/GenericAllocator.cs b/cs/src/core/Allocator/GenericAllocator.cs index ccea80368..0b6b7983d 100644 --- a/cs/src/core/Allocator/GenericAllocator.cs +++ b/cs/src/core/Allocator/GenericAllocator.cs @@ -42,7 +42,7 @@ public unsafe sealed class GenericAllocator : AllocatorBase(); public GenericAllocator(LogSettings settings, SerializerSettings serializerSettings, IFasterEqualityComparer comparer, Action evictCallback = null, LightEpoch epoch = null) - : base(settings, comparer, evictCallback, epoch) + : base(settings, comparer, evictCallback, epoch, null) { SerializerSettings = serializerSettings; @@ -254,9 +254,9 @@ protected override void WriteAsyncToDevice - protected override void ClearPage(long page) + protected override void ClearPage(long page, int offset) { - Array.Clear(values[page % BufferSize], 0, values[page % BufferSize].Length); + Array.Clear(values[page % BufferSize], offset / recordSize, values[page % BufferSize].Length - offset / recordSize); // Close segments var thisCloseSegment = page >> (LogSegmentSizeBits - LogPageSizeBits); diff --git a/cs/src/core/Allocator/VarLenBlittableAllocator.cs b/cs/src/core/Allocator/VarLenBlittableAllocator.cs index a7587f685..b5057d8cd 100644 --- a/cs/src/core/Allocator/VarLenBlittableAllocator.cs +++ b/cs/src/core/Allocator/VarLenBlittableAllocator.cs @@ -34,7 +34,7 @@ public unsafe sealed class VariableLengthBlittableAllocator : Alloca internal readonly IVariableLengthStruct ValueLength; public VariableLengthBlittableAllocator(LogSettings settings, VariableLengthStructSettings vlSettings, IFasterEqualityComparer comparer, Action evictCallback = null, LightEpoch epoch = null) - : base(settings, comparer, evictCallback, epoch) + : base(settings, comparer, evictCallback, epoch, null) { values = new byte[BufferSize][]; handles = new GCHandle[BufferSize]; @@ -282,9 +282,16 @@ public override long GetFirstValidLogicalAddress(long page) return page << LogPageSizeBits; } - protected override void ClearPage(long page) + protected override void ClearPage(long page, int offset) { - Array.Clear(values[page % BufferSize], 0, values[page % BufferSize].Length); + if (offset == 0) + Array.Clear(values[page % BufferSize], offset, values[page % BufferSize].Length - offset); + else + { + // Adjust array offset for cache alignment + offset += (int)(pointers[page % BufferSize] - (long)handles[page % BufferSize].AddrOfPinnedObject()); + Array.Clear(values[page % BufferSize], offset, values[page % BufferSize].Length - offset); + } } /// diff --git a/cs/src/core/Index/FasterLog/FasterLog.cs b/cs/src/core/Index/FasterLog/FasterLog.cs index 5435008ed..90c65041c 100644 --- a/cs/src/core/Index/FasterLog/FasterLog.cs +++ b/cs/src/core/Index/FasterLog/FasterLog.cs @@ -7,6 +7,7 @@ using System.Collections.Concurrent; using System.Collections.Generic; using System.Diagnostics; +using System.IO; using System.Runtime.CompilerServices; using System.Threading; @@ -32,19 +33,76 @@ public class FasterLog : IDisposable public long TailAddress => allocator.GetTailAddress(); /// - /// Flushed until address + /// Log flushed until address /// public long FlushedUntilAddress => allocator.FlushedUntilAddress; + /// + /// Log commit until address + /// + public long CommitUntilAddress; + + private ILogCommitManager logCommitManager; + /// /// Create new log instance /// /// public FasterLog(FasterLogSettings logSettings) { + logCommitManager = logSettings.LogCommitManager ?? + new LocalLogCommitManager(logSettings.LogCommitFile ?? + logSettings.LogDevice.FileName + ".commit"); + epoch = new LightEpoch(); - allocator = new BlittableAllocator(logSettings.GetLogSettings(), null, null, epoch); + allocator = new BlittableAllocator( + logSettings.GetLogSettings(), null, + null, epoch, e => Commit(e)); allocator.Initialize(); + Restore(); + } + + /// + /// Commit log + /// + private void Commit(long flushAddress) + { + epoch.Resume(); + FasterLogRecoveryInfo info = new FasterLogRecoveryInfo(); + info.FlushedUntilAddress = allocator.FlushedUntilAddress; + info.BeginAddress = allocator.BeginAddress; + epoch.Suspend(); + + // We can only allow serial monotonic synchronous commit + lock (this) + { + if (flushAddress > CommitUntilAddress) + { + logCommitManager.Commit(info.ToByteArray()); + CommitUntilAddress = flushAddress; + info.DebugPrint(); + } + } + } + + /// + /// Restore log + /// + private void Restore() + { + FasterLogRecoveryInfo info = new FasterLogRecoveryInfo(); + var commitInfo = logCommitManager.GetCommitMetadata(); + + if (commitInfo == null) return; + + using (var r = new BinaryReader(new MemoryStream(commitInfo))) + { + info.Initialize(r); + } + + allocator.RestoreHybridLog(info.FlushedUntilAddress, + info.FlushedUntilAddress - allocator.GetOffsetInPage(info.FlushedUntilAddress), + info.BeginAddress); } /// @@ -117,16 +175,20 @@ public unsafe long Append(List entries) /// /// Flush the log until tail /// - public long Flush(bool spinWait = false) + public long FlushAndCommit(bool spinWait = false) { epoch.Resume(); allocator.ShiftReadOnlyToTail(out long tailAddress); - epoch.Suspend(); + if (spinWait) { - while (allocator.FlushedUntilAddress < tailAddress) + while (CommitUntilAddress < tailAddress) + { + epoch.ProtectAndDrain(); Thread.Yield(); + } } + epoch.Suspend(); return tailAddress; } diff --git a/cs/src/core/Index/FasterLog/FasterLogSettings.cs b/cs/src/core/Index/FasterLog/FasterLogSettings.cs index 1b70aa656..5969a034e 100644 --- a/cs/src/core/Index/FasterLog/FasterLogSettings.cs +++ b/cs/src/core/Index/FasterLog/FasterLogSettings.cs @@ -3,6 +3,10 @@ #pragma warning disable 0162 +using System; +using System.Diagnostics; +using System.IO; + namespace FASTER.core { /// @@ -30,6 +34,18 @@ public class FasterLogSettings /// public int MemorySizeBits = 26; + /// + /// Log commit manager + /// + public ILogCommitManager LogCommitManager = null; + + /// + /// Use specified directory for storing and retrieving checkpoints + /// This is a shortcut to providing the following: + /// FasterLogSettings.LogCommitManager = new LocalLogCommitManager(LogCommitFile) + /// + public string LogCommitFile = null; + internal LogSettings GetLogSettings() { return new LogSettings @@ -45,4 +61,89 @@ internal LogSettings GetLogSettings() }; } } + + /// + /// Recovery info for FASTER Log + /// + internal struct FasterLogRecoveryInfo + { + /// + /// Begin address + /// + public long BeginAddress; + + /// + /// Flushed logical address + /// + public long FlushedUntilAddress; + + + /// + /// Initialize + /// + public void Initialize() + { + BeginAddress = 0; + FlushedUntilAddress = 0; + } + + /// + /// Initialize from stream + /// + /// + public void Initialize(BinaryReader reader) + { + BeginAddress = reader.ReadInt64(); + FlushedUntilAddress = reader.ReadInt64(); + } + + /// + /// Recover info from token + /// + /// + /// + internal void Recover(ILogCommitManager logCommitManager) + { + var metadata = logCommitManager.GetCommitMetadata(); + if (metadata == null) + throw new Exception("Invalid log commit metadata during recovery"); + + Initialize(new BinaryReader(new MemoryStream(metadata))); + } + + /// + /// Reset + /// + public void Reset() + { + Initialize(); + } + + /// + /// Write info to byte array + /// + public byte[] ToByteArray() + { + using (var ms = new MemoryStream()) + { + using (var writer = new BinaryWriter(ms)) + { + writer.Write(BeginAddress); + writer.Write(FlushedUntilAddress); + } + return ms.ToArray(); + } + } + + /// + /// Print checkpoint info for debugging purposes + /// + public void DebugPrint() + { + Debug.WriteLine("******** Log Commit Info ********"); + + Debug.WriteLine("BeginAddress: {0}", BeginAddress); + Debug.WriteLine("FlushedUntilAddress: {0}", FlushedUntilAddress); + } + } } diff --git a/cs/src/core/Index/FasterLog/ILogCommitManager.cs b/cs/src/core/Index/FasterLog/ILogCommitManager.cs new file mode 100644 index 000000000..7c5f37de9 --- /dev/null +++ b/cs/src/core/Index/FasterLog/ILogCommitManager.cs @@ -0,0 +1,25 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using System.IO; + +namespace FASTER.core +{ + /// + /// Log commit manager + /// + public interface ILogCommitManager + { + /// + /// Perform (synchronous) commit with specified metadata + /// + /// + void Commit(byte[] commitMetadata); + + /// + /// Return prior commit metadata during recovery + /// + /// + byte[] GetCommitMetadata(); + } +} \ No newline at end of file diff --git a/cs/src/core/Index/FasterLog/LocalLogCommitManager.cs b/cs/src/core/Index/FasterLog/LocalLogCommitManager.cs new file mode 100644 index 000000000..d7bdba2a0 --- /dev/null +++ b/cs/src/core/Index/FasterLog/LocalLogCommitManager.cs @@ -0,0 +1,62 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using System.IO; + +namespace FASTER.core +{ + /// + /// Implementation of checkpoint interface for local file storage + /// + public class LocalLogCommitManager : ILogCommitManager + { + private string CommitFile; + + /// + /// Create new instance of local checkpoint manager at given base directory + /// + /// + public LocalLogCommitManager(string CommitFile) + { + this.CommitFile = CommitFile; + } + + /// + /// Commit log + /// + /// + public void Commit(byte[] commitMetadata) + { + // Two phase to ensure we write metadata in single Write operation + using (var ms = new MemoryStream()) + { + using (var writer = new BinaryWriter(ms)) + { + writer.Write(commitMetadata.Length); + writer.Write(commitMetadata); + } + using (var writer = new BinaryWriter(new FileStream(CommitFile, FileMode.OpenOrCreate))) + { + writer.Write(ms.ToArray()); + writer.Flush(); + } + } + } + + /// + /// Retrieve commit metadata + /// + /// Metadata, or null if invalid + public byte[] GetCommitMetadata() + { + if (!File.Exists(CommitFile)) + return null; + + using (var reader = new BinaryReader(new FileStream(CommitFile, FileMode.Open))) + { + var len = reader.ReadInt32(); + return reader.ReadBytes(len); + } + } + } +} \ No newline at end of file diff --git a/cs/src/core/Index/Recovery/Recovery.cs b/cs/src/core/Index/Recovery/Recovery.cs index e3ffcffad..7ddd5db3f 100644 --- a/cs/src/core/Index/Recovery/Recovery.cs +++ b/cs/src/core/Index/Recovery/Recovery.cs @@ -89,7 +89,7 @@ private void InternalRecover(Guid indexToken, Guid hybridLogToken) recoveredHLCInfo.info.DebugPrint(); // Check if the two checkpoints are compatible for recovery - if(!IsCompatible(recoveredICInfo.info, recoveredHLCInfo.info)) + if (!IsCompatible(recoveredICInfo.info, recoveredHLCInfo.info)) { throw new Exception("Cannot recover from (" + indexToken.ToString() + "," + hybridLogToken.ToString() + ") checkpoint pair!\n"); } @@ -118,69 +118,15 @@ private void InternalRecover(Guid indexToken, Guid hybridLogToken) { RecoverHybridLogFromSnapshotFile(recoveredICInfo.info, recoveredHLCInfo.info); } - + // Read appropriate hybrid log pages into memory - RestoreHybridLog(recoveredHLCInfo.info.finalLogicalAddress, recoveredHLCInfo.info.headAddress, recoveredHLCInfo.info.beginAddress); + hlog.RestoreHybridLog(recoveredHLCInfo.info.finalLogicalAddress, recoveredHLCInfo.info.headAddress, recoveredHLCInfo.info.beginAddress); // Recover session information _recoveredSessions = recoveredHLCInfo.info.continueTokens; } - private void RestoreHybridLog(long untilAddress, long headAddress, long beginAddress) - { - Debug.Assert(beginAddress <= headAddress); - Debug.Assert(headAddress <= untilAddress); - - // Special cases: we do not load any records into memory - if ( - (beginAddress == untilAddress) || // Empty log - ((headAddress == untilAddress) && (hlog.GetOffsetInPage(headAddress) == 0)) // Empty in-memory page - ) - { - hlog.AllocatePage(hlog.GetPageIndexForAddress(headAddress)); - } - else - { - var tailPage = hlog.GetPage(untilAddress); - var headPage = hlog.GetPage(headAddress); - - var recoveryStatus = new RecoveryStatus(hlog.GetCapacityNumPages(), headPage, tailPage, untilAddress); - for (int i = 0; i < recoveryStatus.capacity; i++) - { - recoveryStatus.readStatus[i] = ReadStatus.Done; - } - - var numPages = 0; - for (var page = headPage; page <= tailPage; page++) - { - var pageIndex = hlog.GetPageIndexForPage(page); - recoveryStatus.readStatus[pageIndex] = ReadStatus.Pending; - numPages++; - } - - hlog.AsyncReadPagesFromDevice(headPage, numPages, untilAddress, AsyncReadPagesCallbackForRecovery, recoveryStatus); - - var done = false; - while (!done) - { - done = true; - for (long page = headPage; page <= tailPage; page++) - { - int pageIndex = hlog.GetPageIndexForPage(page); - if (recoveryStatus.readStatus[pageIndex] == ReadStatus.Pending) - { - done = false; - break; - } - } - } - } - - hlog.RecoveryReset(untilAddress, headAddress, beginAddress); - } - - private void RecoverHybridLog(IndexRecoveryInfo indexRecoveryInfo, HybridLogRecoveryInfo recoveryInfo) { @@ -202,8 +148,8 @@ private void RecoverHybridLog(IndexRecoveryInfo indexRecoveryInfo, int numPagesToReadFirst = Math.Min(capacity, totalPagesToRead); // Issue request to read pages as much as possible - hlog.AsyncReadPagesFromDevice(startPage, numPagesToReadFirst, untilAddress, AsyncReadPagesCallbackForRecovery, recoveryStatus); - + hlog.AsyncReadPagesFromDevice(startPage, numPagesToReadFirst, untilAddress, hlog.AsyncReadPagesCallbackForRecovery, recoveryStatus); + for (long page = startPage; page < endPage; page++) { // Ensure page has been read into memory @@ -227,7 +173,7 @@ private void RecoverHybridLog(IndexRecoveryInfo indexRecoveryInfo, { pageUntilAddress = hlog.GetOffsetInPage(untilAddress); } - + var physicalAddress = hlog.GetPhysicalAddress(startLogicalAddress); RecoverFromPage(fromAddress, pageFromAddress, pageUntilAddress, startLogicalAddress, physicalAddress, recoveryInfo.version); @@ -292,7 +238,7 @@ private void RecoverHybridLogFromSnapshotFile( int numPagesToReadFirst = Math.Min(capacity, totalPagesToRead); hlog.AsyncReadPagesFromDevice(startPage, numPagesToReadFirst, untilAddress, - AsyncReadPagesCallbackForRecovery, + hlog.AsyncReadPagesCallbackForRecovery, recoveryStatus, recoveryStatus.recoveryDevicePageOffset, recoveryStatus.recoveryDevice, recoveryStatus.objectLogRecoveryDevice); @@ -430,26 +376,6 @@ private void RecoverFromPage(long startRecoveryAddress, } } - private void AsyncReadPagesCallbackForRecovery(uint errorCode, uint numBytes, NativeOverlapped* overlap) - { - if (errorCode != 0) - { - Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); - } - - // Set the page status to flushed - var result = (PageAsyncReadResult)Overlapped.Unpack(overlap).AsyncResult; - - if (result.freeBuffer1 != null) - { - hlog.PopulatePage(result.freeBuffer1.GetValidPointer(), result.freeBuffer1.required_bytes, result.page); - result.freeBuffer1.Return(); - } - int index = hlog.GetPageIndexForPage(result.page); - result.context.readStatus[index] = ReadStatus.Done; - Interlocked.MemoryBarrier(); - Overlapped.Free(overlap); - } private void AsyncFlushPageCallbackForRecovery(uint errorCode, uint numBytes, NativeOverlapped* overlap) { @@ -470,11 +396,11 @@ private void AsyncFlushPageCallbackForRecovery(uint errorCode, uint numBytes, Na long readPage = result.page + result.context.capacity; if (FoldOverSnapshot) { - hlog.AsyncReadPagesFromDevice(readPage, 1, result.context.untilAddress, AsyncReadPagesCallbackForRecovery, result.context); + hlog.AsyncReadPagesFromDevice(readPage, 1, result.context.untilAddress, hlog.AsyncReadPagesCallbackForRecovery, result.context); } else { - hlog.AsyncReadPagesFromDevice(readPage, 1, result.context.untilAddress, AsyncReadPagesCallbackForRecovery, + hlog.AsyncReadPagesFromDevice(readPage, 1, result.context.untilAddress, hlog.AsyncReadPagesCallbackForRecovery, result.context, result.context.recoveryDevicePageOffset, result.context.recoveryDevice, result.context.objectLogRecoveryDevice); @@ -485,4 +411,89 @@ private void AsyncFlushPageCallbackForRecovery(uint errorCode, uint numBytes, Na Overlapped.Free(overlap); } } + + public unsafe abstract partial class AllocatorBase : IDisposable + where Key : new() + where Value : new() + { + /// + /// Restore log + /// + /// + /// + /// + public void RestoreHybridLog(long untilAddress, long headAddress, long beginAddress) + { + Debug.Assert(beginAddress <= headAddress); + Debug.Assert(headAddress <= untilAddress); + + // Special cases: we do not load any records into memory + if ( + (beginAddress == untilAddress) || // Empty log + ((headAddress == untilAddress) && (GetOffsetInPage(headAddress) == 0)) // Empty in-memory page + ) + { + AllocatePage(GetPageIndexForAddress(headAddress)); + } + else + { + var tailPage = GetPage(untilAddress); + var headPage = GetPage(headAddress); + + var recoveryStatus = new RecoveryStatus(GetCapacityNumPages(), headPage, tailPage, untilAddress); + for (int i = 0; i < recoveryStatus.capacity; i++) + { + recoveryStatus.readStatus[i] = ReadStatus.Done; + } + + var numPages = 0; + for (var page = headPage; page <= tailPage; page++) + { + var pageIndex = GetPageIndexForPage(page); + recoveryStatus.readStatus[pageIndex] = ReadStatus.Pending; + numPages++; + } + + AsyncReadPagesFromDevice(headPage, numPages, untilAddress, AsyncReadPagesCallbackForRecovery, recoveryStatus); + + var done = false; + while (!done) + { + done = true; + for (long page = headPage; page <= tailPage; page++) + { + int pageIndex = GetPageIndexForPage(page); + if (recoveryStatus.readStatus[pageIndex] == ReadStatus.Pending) + { + done = false; + break; + } + } + } + } + + RecoveryReset(untilAddress, headAddress, beginAddress); + } + + internal void AsyncReadPagesCallbackForRecovery(uint errorCode, uint numBytes, NativeOverlapped* overlap) + { + if (errorCode != 0) + { + Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); + } + + // Set the page status to flushed + var result = (PageAsyncReadResult)Overlapped.Unpack(overlap).AsyncResult; + + if (result.freeBuffer1 != null) + { + PopulatePage(result.freeBuffer1.GetValidPointer(), result.freeBuffer1.required_bytes, result.page); + result.freeBuffer1.Return(); + } + int index = GetPageIndexForPage(result.page); + result.context.readStatus[index] = ReadStatus.Done; + Interlocked.MemoryBarrier(); + Overlapped.Free(overlap); + } + } } diff --git a/cs/test/FasterLogTests.cs b/cs/test/FasterLogTests.cs index 60542c9be..8059006bf 100644 --- a/cs/test/FasterLogTests.cs +++ b/cs/test/FasterLogTests.cs @@ -44,7 +44,7 @@ public void FasterLogTest1() { log.Append(entry); } - log.Flush(true); + log.FlushAndCommit(true); using (var iter = log.Scan(0, long.MaxValue)) { From ddcc3382dd7318691b83efd9fbb9582ad9bf21df Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Thu, 19 Sep 2019 10:48:53 -0700 Subject: [PATCH 07/36] Added TryAppend so users can implement log throttling. --- cs/playground/FasterLogSample/Program.cs | 12 ++ cs/src/core/Index/FasterLog/FasterLog.cs | 154 ++++++++++++------ .../core/Index/FasterLog/ILogCommitManager.cs | 5 +- .../Index/FasterLog/LocalLogCommitManager.cs | 7 +- 4 files changed, 125 insertions(+), 53 deletions(-) diff --git a/cs/playground/FasterLogSample/Program.cs b/cs/playground/FasterLogSample/Program.cs index 0ca4005b4..6ba53ffff 100644 --- a/cs/playground/FasterLogSample/Program.cs +++ b/cs/playground/FasterLogSample/Program.cs @@ -51,6 +51,14 @@ static void AppendThread() while (true) { log.Append(entry); + + // We also support a Span-based version of Append + + // We also support TryAppend to allow throttling/back-off: + // while (!log.TryAppend(entry, out long logicalAddress)) + // { + // Thread.Sleep(10); + // } } } @@ -97,6 +105,10 @@ static void Main(string[] args) log = new FasterLog(new FasterLogSettings { LogDevice = device }); new Thread(new ThreadStart(AppendThread)).Start(); + + // Can have multiple append threads if needed + // new Thread(new ThreadStart(AppendThread)).Start(); + new Thread(new ThreadStart(ScanThread)).Start(); new Thread(new ThreadStart(ReportThread)).Start(); new Thread(new ThreadStart(CommitThread)).Start(); diff --git a/cs/src/core/Index/FasterLog/FasterLog.cs b/cs/src/core/Index/FasterLog/FasterLog.cs index 90c65041c..fc963cb3d 100644 --- a/cs/src/core/Index/FasterLog/FasterLog.cs +++ b/cs/src/core/Index/FasterLog/FasterLog.cs @@ -21,6 +21,7 @@ public class FasterLog : IDisposable { private readonly BlittableAllocator allocator; private readonly LightEpoch epoch; + private ILogCommitManager logCommitManager; /// /// Beginning address of log @@ -40,9 +41,7 @@ public class FasterLog : IDisposable /// /// Log commit until address /// - public long CommitUntilAddress; - - private ILogCommitManager logCommitManager; + public long CommittedUntilAddress; /// /// Create new log instance @@ -62,49 +61,6 @@ public FasterLog(FasterLogSettings logSettings) Restore(); } - /// - /// Commit log - /// - private void Commit(long flushAddress) - { - epoch.Resume(); - FasterLogRecoveryInfo info = new FasterLogRecoveryInfo(); - info.FlushedUntilAddress = allocator.FlushedUntilAddress; - info.BeginAddress = allocator.BeginAddress; - epoch.Suspend(); - - // We can only allow serial monotonic synchronous commit - lock (this) - { - if (flushAddress > CommitUntilAddress) - { - logCommitManager.Commit(info.ToByteArray()); - CommitUntilAddress = flushAddress; - info.DebugPrint(); - } - } - } - - /// - /// Restore log - /// - private void Restore() - { - FasterLogRecoveryInfo info = new FasterLogRecoveryInfo(); - var commitInfo = logCommitManager.GetCommitMetadata(); - - if (commitInfo == null) return; - - using (var r = new BinaryReader(new MemoryStream(commitInfo))) - { - info.Initialize(r); - } - - allocator.RestoreHybridLog(info.FlushedUntilAddress, - info.FlushedUntilAddress - allocator.GetOffsetInPage(info.FlushedUntilAddress), - info.BeginAddress); - } - /// /// Dispose /// @@ -150,6 +106,60 @@ public unsafe long Append(byte[] entry) return logicalAddress; } + /// + /// Try to append entry to log + /// + /// Entry to be appended to log + /// Logical address of added entry + /// Whether the append succeeded + public unsafe bool TryAppend(byte[] entry, out long logicalAddress) + { + epoch.Resume(); + logicalAddress = 0; + long tail = -allocator.GetTailAddress(); + allocator.CheckForAllocateComplete(ref tail); + if (tail < 0) + { + epoch.Suspend(); + return false; + } + var length = entry.Length; + BlockAllocate(4 + length, out logicalAddress); + var physicalAddress = allocator.GetPhysicalAddress(logicalAddress); + *(int*)physicalAddress = length; + fixed (byte* bp = entry) + Buffer.MemoryCopy(bp, (void*)(4 + physicalAddress), length, length); + epoch.Suspend(); + return true; + } + + /// + /// Try to append entry to log + /// + /// Entry to be appended to log + /// Logical address of added entry + /// Whether the append succeeded + public unsafe bool TryAppend(Span entry, out long logicalAddress) + { + epoch.Resume(); + logicalAddress = 0; + long tail = -allocator.GetTailAddress(); + allocator.CheckForAllocateComplete(ref tail); + if (tail < 0) + { + epoch.Suspend(); + return false; + } + var length = entry.Length; + BlockAllocate(4 + length, out logicalAddress); + var physicalAddress = allocator.GetPhysicalAddress(logicalAddress); + *(int*)physicalAddress = length; + fixed (byte* bp = &entry.GetPinnableReference()) + Buffer.MemoryCopy(bp, (void*)(4 + physicalAddress), length, length); + epoch.Suspend(); + return true; + } + /// /// Append batch of entries to log /// @@ -182,7 +192,7 @@ public long FlushAndCommit(bool spinWait = false) if (spinWait) { - while (CommitUntilAddress < tailAddress) + while (CommittedUntilAddress < tailAddress) { epoch.ProtectAndDrain(); Thread.Yield(); @@ -204,7 +214,7 @@ public void TruncateUntil(long untilAddress) } /// - /// Iterator interface for scanning FASTER log + /// Pull-based iterator interface for scanning FASTER log /// /// /// @@ -235,6 +245,11 @@ public void ReleaseThread() epoch.Release(); } + /// + /// Block allocate + /// + /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] private void BlockAllocate(int recordSize, out long logicalAddress) { @@ -259,5 +274,48 @@ private void BlockAllocate(int recordSize, out long logicalAddress) BlockAllocate(recordSize, out logicalAddress); } } + + /// + /// Commit log + /// + private void Commit(long flushAddress) + { + epoch.Resume(); + FasterLogRecoveryInfo info = new FasterLogRecoveryInfo(); + info.FlushedUntilAddress = allocator.FlushedUntilAddress; + info.BeginAddress = allocator.BeginAddress; + epoch.Suspend(); + + // We can only allow serial monotonic synchronous commit + lock (this) + { + if (flushAddress > CommittedUntilAddress) + { + logCommitManager.Commit(flushAddress, info.ToByteArray()); + CommittedUntilAddress = flushAddress; + info.DebugPrint(); + } + } + } + + /// + /// Restore log + /// + private void Restore() + { + FasterLogRecoveryInfo info = new FasterLogRecoveryInfo(); + var commitInfo = logCommitManager.GetCommitMetadata(); + + if (commitInfo == null) return; + + using (var r = new BinaryReader(new MemoryStream(commitInfo))) + { + info.Initialize(r); + } + + allocator.RestoreHybridLog(info.FlushedUntilAddress, + info.FlushedUntilAddress - allocator.GetOffsetInPage(info.FlushedUntilAddress), + info.BeginAddress); + } } } diff --git a/cs/src/core/Index/FasterLog/ILogCommitManager.cs b/cs/src/core/Index/FasterLog/ILogCommitManager.cs index 7c5f37de9..892273815 100644 --- a/cs/src/core/Index/FasterLog/ILogCommitManager.cs +++ b/cs/src/core/Index/FasterLog/ILogCommitManager.cs @@ -13,8 +13,9 @@ public interface ILogCommitManager /// /// Perform (synchronous) commit with specified metadata /// - /// - void Commit(byte[] commitMetadata); + /// Address committed until (for information only, not necessary to persist) + /// Commit metadata + void Commit(long address, byte[] commitMetadata); /// /// Return prior commit metadata during recovery diff --git a/cs/src/core/Index/FasterLog/LocalLogCommitManager.cs b/cs/src/core/Index/FasterLog/LocalLogCommitManager.cs index d7bdba2a0..761984d68 100644 --- a/cs/src/core/Index/FasterLog/LocalLogCommitManager.cs +++ b/cs/src/core/Index/FasterLog/LocalLogCommitManager.cs @@ -22,10 +22,11 @@ public LocalLogCommitManager(string CommitFile) } /// - /// Commit log + /// Perform (synchronous) commit with specified metadata /// - /// - public void Commit(byte[] commitMetadata) + /// Address committed until (for information only, not necessary to persist) + /// Commit metadata + public void Commit(long address, byte[] commitMetadata) { // Two phase to ensure we write metadata in single Write operation using (var ms = new MemoryStream()) From 2cd85e3904c5849cb5a914622f3617d8e86287d2 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Thu, 26 Sep 2019 00:09:50 -0400 Subject: [PATCH 08/36] Fasterlog lowmem (#178) Adding support for low memory footprint (4 pages) Added support for odd-sized payloads in presence of holes in log Fixed concurrency issue that occurs with low num of pages Improved max throughput by eliminating a 10ms sleep in BlockAllocate Misc cleanup of logic to track flush and close addresses in log --- cs/playground/FasterLogSample/Program.cs | 16 +- cs/src/core/Allocator/AllocatorBase.cs | 278 +++++++++--------- cs/src/core/Allocator/BlittableAllocator.cs | 4 - cs/src/core/Allocator/GenericAllocator.cs | 7 +- .../Allocator/VarLenBlittableAllocator.cs | 8 +- cs/src/core/Index/FASTER/FASTERImpl.cs | 45 +-- cs/src/core/Index/FasterLog/FasterLog.cs | 23 +- .../core/Index/FasterLog/FasterLogIterator.cs | 6 +- .../core/Index/FasterLog/FasterLogSettings.cs | 9 +- cs/src/core/Index/Recovery/Recovery.cs | 3 +- 10 files changed, 196 insertions(+), 203 deletions(-) diff --git a/cs/playground/FasterLogSample/Program.cs b/cs/playground/FasterLogSample/Program.cs index 6ba53ffff..cb377cf6e 100644 --- a/cs/playground/FasterLogSample/Program.cs +++ b/cs/playground/FasterLogSample/Program.cs @@ -51,7 +51,7 @@ static void AppendThread() while (true) { log.Append(entry); - + // We also support a Span-based version of Append // We also support TryAppend to allow throttling/back-off: @@ -84,7 +84,7 @@ static void ScanThread() Thread.Sleep(1000); if (!result.SequenceEqual(entrySpan)) { - throw new Exception("Invalid entry found"); + throw new Exception("Invalid entry found at offset " + FindDiff(result, entrySpan)); } if (r.Next(100) < 10) @@ -99,6 +99,18 @@ static void ScanThread() } } + static int FindDiff(Span b1, Span b2) + { + for (int i=0; i : IDisposable /// /// HeadOffset lag (from tail) /// - protected const int HeadOffsetLagNumPages = 4; + protected const int HeadOffsetLagNumPages = 2; /// /// HeadOffset lag (from tail) for ReadCache /// - protected const int ReadCacheHeadOffsetLagNumPages = 1; + protected const int ReadCacheHeadOffsetLagNumPages = 2; /// /// HeadOffset lag size /// @@ -168,6 +157,11 @@ public unsafe abstract partial class AllocatorBase : IDisposable /// public long FlushedUntilAddress; + /// + /// Flushed until address + /// + public long ClosedUntilAddress; + /// /// Begin address /// @@ -190,7 +184,7 @@ public unsafe abstract partial class AllocatorBase : IDisposable /// /// Index in circular buffer, of the current tail page /// - private volatile int TailPageIndex; + private volatile int TailPageCache; // Array that indicates the status of each buffer page internal readonly FullPageStatus[] PageStatusIndicator; @@ -521,9 +515,9 @@ public AllocatorBase(LogSettings settings, IFasterEqualityComparer comparer SegmentSize = 1 << LogSegmentSizeBits; SegmentBufferSize = 1 + (LogTotalSizeBytes / SegmentSize < 1 ? 1 : (int)(LogTotalSizeBytes / SegmentSize)); - if (BufferSize < 16) + if (BufferSize < 4) { - throw new Exception("HLOG buffer must be at least 16 pages"); + throw new Exception("HLOG buffer must be at least 4 pages"); } PageStatusIndicator = new FullPageStatus[BufferSize]; @@ -555,13 +549,14 @@ protected void Initialize(long firstValidAddress) ReadOnlyAddress = firstValidAddress; SafeHeadAddress = firstValidAddress; HeadAddress = firstValidAddress; + ClosedUntilAddress = firstValidAddress; FlushedUntilAddress = firstValidAddress; BeginAddress = firstValidAddress; TailPageOffset.Page = (int)(firstValidAddress >> LogPageSizeBits); TailPageOffset.Offset = (int)(firstValidAddress & PageSizeMask); - TailPageIndex = 0; + TailPageCache = 0; } /// @@ -623,6 +618,11 @@ public long GetSegmentSize() public long GetTailAddress() { var local = TailPageOffset; + if (local.Offset >= PageSize) + { + local.Page++; + local.Offset = 0; + } return ((long)local.Page << LogPageSizeBits) | (uint)local.Offset; } @@ -778,18 +778,14 @@ public long Allocate(int numSlots = 1) long address = (((long)page) << LogPageSizeBits) | ((long)offset); - // Check if TailPageIndex is appropriate and allocated! - int pageIndex = page % BufferSize; - - if (TailPageIndex == pageIndex) + // Check for TailPageCache hit + if (TailPageCache == page) { return (address); } - //Invert the address if either the previous page is not flushed or if it is null - if ((PageStatusIndicator[pageIndex].PageFlushCloseStatus.PageFlushStatus != PMMFlushStatus.Flushed) || - (PageStatusIndicator[pageIndex].PageFlushCloseStatus.PageCloseStatus != PMMCloseStatus.Closed) || - (!IsAllocated(pageIndex))) + // Negate the address if page not ready to be used + if (CannotAllocate(page)) { address = -address; } @@ -799,13 +795,11 @@ public long Allocate(int numSlots = 1) { if (address >= 0) { - TailPageIndex = pageIndex; + TailPageCache = page; Interlocked.MemoryBarrier(); } - long newPage = page + 1; - int newPageIndex = (int)((page + 1) % BufferSize); - + int newPageIndex = (page + 1) % BufferSize; long tailAddress = (address < 0 ? -address : address); PageAlignedShiftReadOnlyAddress(tailAddress); PageAlignedShiftHeadAddress(tailAddress); @@ -814,9 +808,14 @@ public long Allocate(int numSlots = 1) { AllocatePage(newPageIndex); } + + // We refreshed epoch, so address may have + // become read-only; re-check + if (tailAddress < ReadOnlyAddress) + return Allocate(numSlots); } - return (address); + return address; } /// @@ -837,22 +836,17 @@ public void CheckForAllocateComplete(ref long address) p.Page = (int)((-address) >> LogPageSizeBits); p.Offset = (int)((-address) & PageSizeMask); - //Check write cache - int pageIndex = p.Page % BufferSize; - if (TailPageIndex == pageIndex) + // Check write cache + if (TailPageCache == p.Page) { address = -address; return; } - //Check if we can move the head offset - long currentTailAddress = GetTailAddress(); - PageAlignedShiftHeadAddress(currentTailAddress); + PageAlignedShiftHeadAddress(GetTailAddress()); - //Check if I can allocate pageIndex at all - if ((PageStatusIndicator[pageIndex].PageFlushCloseStatus.PageFlushStatus != PMMFlushStatus.Flushed) || - (PageStatusIndicator[pageIndex].PageFlushCloseStatus.PageCloseStatus != PMMCloseStatus.Closed) || - (!IsAllocated(pageIndex))) + // Check if we can allocate pageIndex + if (CannotAllocate(p.Page)) { return; } @@ -861,11 +855,19 @@ public void CheckForAllocateComplete(ref long address) address = -address; if (p.Offset == 0) { - TailPageIndex = pageIndex; + TailPageCache = p.Page; } return; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool CannotAllocate(int page) + { + return + (page >= BufferSize + (ClosedUntilAddress >> LogPageSizeBits) - 1) || + !IsAllocated(page % BufferSize); + } + /// /// Used by applications to make the current state of the database immutable quickly /// @@ -942,22 +944,8 @@ public void OnPagesMarkedReadOnly(long newSafeReadOnlyAddress, bool waitForPendi if (Utility.MonotonicUpdate(ref SafeReadOnlyAddress, newSafeReadOnlyAddress, out long oldSafeReadOnlyAddress)) { Debug.WriteLine("SafeReadOnly shifted from {0:X} to {1:X}", oldSafeReadOnlyAddress, newSafeReadOnlyAddress); - long startPage = oldSafeReadOnlyAddress >> LogPageSizeBits; - - long endPage = (newSafeReadOnlyAddress >> LogPageSizeBits); - OnReadOnlyObserver?.OnNext(Scan(oldSafeReadOnlyAddress, newSafeReadOnlyAddress, ScanBufferingMode.NoBuffering)); - - int numPages = (int)(endPage - startPage); - if (numPages > 10) - { - new Thread( - () => AsyncFlushPages(oldSafeReadOnlyAddress, newSafeReadOnlyAddress)).Start(); - } - else - { - AsyncFlushPages(oldSafeReadOnlyAddress, newSafeReadOnlyAddress); - } + AsyncFlushPages(oldSafeReadOnlyAddress, newSafeReadOnlyAddress); } } @@ -980,39 +968,42 @@ public void OnPagesClosed(long newSafeHeadAddress) return; } - int closePage = (int)((closePageAddress >> LogPageSizeBits) % BufferSize); + int closePage = (int)(closePageAddress >> LogPageSizeBits); + int closePageIndex = closePage % BufferSize; - if (!IsAllocated(closePage)) + if (!IsAllocated(closePageIndex)) + AllocatePage(closePageIndex); + else + ClearPage(closePage); + Utility.MonotonicUpdate(ref PageStatusIndicator[closePageIndex].LastClosedUntilAddress, closePageAddress + PageSize, out _); + ShiftClosedUntilAddress(); + if (ClosedUntilAddress > FlushedUntilAddress) { - AllocatePage(closePage); + throw new Exception($"Closed address {ClosedUntilAddress} exceeds flushed address {FlushedUntilAddress}"); } - - while (true) - { - var oldStatus = PageStatusIndicator[closePage].PageFlushCloseStatus; - if (oldStatus.PageFlushStatus == PMMFlushStatus.Flushed) - { - ClearPage(closePageAddress >> LogPageSizeBits); - } - else - { - throw new Exception("Error: page should already be flushed at this point"); - } - - var newStatus = oldStatus; - newStatus.PageCloseStatus = PMMCloseStatus.Closed; - if (oldStatus.value == Interlocked.CompareExchange(ref PageStatusIndicator[closePage].PageFlushCloseStatus.value, newStatus.value, oldStatus.value)) - { - break; - } - } - - // Necessary to propagate this change to other threads - Interlocked.MemoryBarrier(); } } } + private void DebugPrintAddresses(long closePageAddress) + { + var _flush = FlushedUntilAddress; + var _readonly = ReadOnlyAddress; + var _safereadonly = SafeReadOnlyAddress; + var _tail = GetTailAddress(); + var _head = HeadAddress; + var _safehead = SafeHeadAddress; + + Console.WriteLine("ClosePageAddress: {0}.{1}", GetPage(closePageAddress), GetOffsetInPage(closePageAddress)); + Console.WriteLine("FlushedUntil: {0}.{1}", GetPage(_flush), GetOffsetInPage(_flush)); + Console.WriteLine("Tail: {0}.{1}", GetPage(_tail), GetOffsetInPage(_tail)); + Console.WriteLine("Head: {0}.{1}", GetPage(_head), GetOffsetInPage(_head)); + Console.WriteLine("SafeHead: {0}.{1}", GetPage(_safehead), GetOffsetInPage(_safehead)); + Console.WriteLine("ReadOnly: {0}.{1}", GetPage(_readonly), GetOffsetInPage(_readonly)); + Console.WriteLine("SafeReadOnly: {0}.{1}", GetPage(_safereadonly), GetOffsetInPage(_safereadonly)); + Console.WriteLine("TailPageCache: {0}", TailPageCache); + } + /// /// Called every time a new tail page is allocated. Here the read-only is /// shifted only to page boundaries unlike ShiftReadOnlyToTail where shifting @@ -1097,13 +1088,13 @@ protected void ShiftFlushedUntilAddress() long page = GetPage(currentFlushedUntilAddress); bool update = false; - long pageLastFlushedAddress = Interlocked.Read(ref PageStatusIndicator[(int)(page % BufferSize)].LastFlushedUntilAddress); - while (pageLastFlushedAddress >= currentFlushedUntilAddress) + long pageLastFlushedAddress = PageStatusIndicator[page % BufferSize].LastFlushedUntilAddress; + while (pageLastFlushedAddress >= currentFlushedUntilAddress && currentFlushedUntilAddress >= (page << LogPageSizeBits)) { currentFlushedUntilAddress = pageLastFlushedAddress; update = true; page++; - pageLastFlushedAddress = Interlocked.Read(ref PageStatusIndicator[(int)(page % BufferSize)].LastFlushedUntilAddress); + pageLastFlushedAddress = PageStatusIndicator[page % BufferSize].LastFlushedUntilAddress; } if (update) @@ -1115,6 +1106,30 @@ protected void ShiftFlushedUntilAddress() } } + /// + /// Shift ClosedUntil address + /// + protected void ShiftClosedUntilAddress() + { + long currentClosedUntilAddress = ClosedUntilAddress; + long page = GetPage(currentClosedUntilAddress); + + bool update = false; + long pageLastClosedAddress = PageStatusIndicator[page % BufferSize].LastClosedUntilAddress; + while (pageLastClosedAddress >= currentClosedUntilAddress && currentClosedUntilAddress >= (page << LogPageSizeBits)) + { + currentClosedUntilAddress = pageLastClosedAddress; + update = true; + page++; + pageLastClosedAddress = PageStatusIndicator[(int)(page % BufferSize)].LastClosedUntilAddress; + } + + if (update) + { + Utility.MonotonicUpdate(ref ClosedUntilAddress, currentClosedUntilAddress, out long oldClosedUntilAddress); + } + } + /// /// Reset for recovery /// @@ -1127,35 +1142,25 @@ public void RecoveryReset(long tailAddress, long headAddress, long beginAddress) long offsetInPage = GetOffsetInPage(tailAddress); TailPageOffset.Page = (int)tailPage; TailPageOffset.Offset = (int)offsetInPage; - TailPageIndex = GetPageIndexForPage(TailPageOffset.Page); + TailPageCache = TailPageOffset.Page; // allocate next page as well - this is an invariant in the allocator! var pageIndex = (TailPageOffset.Page % BufferSize); var nextPageIndex = (pageIndex + 1) % BufferSize; if (tailAddress > 0) - AllocatePage(nextPageIndex); + if (!IsAllocated(nextPageIndex)) + AllocatePage(nextPageIndex); BeginAddress = beginAddress; HeadAddress = headAddress; SafeHeadAddress = headAddress; + ClosedUntilAddress = headAddress; FlushedUntilAddress = tailAddress; ReadOnlyAddress = tailAddress; SafeReadOnlyAddress = tailAddress; - // ensure appropriate page status for all pages in memory - // note: they must have been read in previously during recovery - var addr = GetStartLogicalAddress(GetPage(headAddress)); - for (; addr < tailAddress; addr += PageSize) - { - pageIndex = GetPageIndexForAddress(addr); - PageStatusIndicator[pageIndex].PageFlushCloseStatus.PageCloseStatus = PMMCloseStatus.Closed; - PageStatusIndicator[pageIndex].PageFlushCloseStatus.PageFlushStatus = PMMFlushStatus.Flushed; - } - // for the last page which contains tailoffset, it must be open pageIndex = GetPageIndexForAddress(tailAddress); - PageStatusIndicator[pageIndex].PageFlushCloseStatus.PageCloseStatus = PMMCloseStatus.Open; - PageStatusIndicator[pageIndex].PageFlushCloseStatus.PageFlushStatus = PMMFlushStatus.Flushed; // clear the last page starting from tail address ClearPage(pageIndex, (int)GetOffsetInPage(tailAddress)); @@ -1307,12 +1312,25 @@ public void AsyncFlushPages(long fromAddress, long untilAddress) long startPage = fromAddress >> LogPageSizeBits; long endPage = untilAddress >> LogPageSizeBits; int numPages = (int)(endPage - startPage); + + long offsetInStartPage = GetOffsetInPage(fromAddress); long offsetInEndPage = GetOffsetInPage(untilAddress); + + + // Extra (partial) page being flushed if (offsetInEndPage > 0) - { numPages++; - } + // Partial page starting point, need to wait until the + // ongoing adjacent flush is completed to ensure correctness + if (offsetInStartPage > 0) + { + while (FlushedUntilAddress < fromAddress) + { + epoch.ProtectAndDrain(); + Thread.Yield(); + } + } /* Request asynchronous writes to the device. If waitForPendingFlushComplete * is set, then a CountDownEvent is set in the callback handle. @@ -1325,41 +1343,24 @@ public void AsyncFlushPages(long fromAddress, long untilAddress) var asyncResult = new PageAsyncFlushResult { page = flushPage, - count = 1 + count = 1, + partial = false, + fromAddress = pageStartAddress, + untilAddress = pageEndAddress }; - if (pageEndAddress > untilAddress || pageStartAddress < fromAddress) + if ( + ((fromAddress > pageStartAddress) && (fromAddress < pageEndAddress)) || + ((untilAddress > pageStartAddress) && (untilAddress < pageEndAddress)) + ) { asyncResult.partial = true; - asyncResult.fromAddress = pageStartAddress; - asyncResult.untilAddress = pageEndAddress; - if (pageEndAddress > untilAddress) + if (untilAddress < pageEndAddress) asyncResult.untilAddress = untilAddress; - if (pageStartAddress < fromAddress) + if (fromAddress > pageStartAddress) asyncResult.fromAddress = fromAddress; - - - // Are we flushing until the end of page? - if (untilAddress >= pageEndAddress) - { - // Set status to in-progress - PageStatusIndicator[flushPage % BufferSize].PageFlushCloseStatus - = new FlushCloseStatus { PageFlushStatus = PMMFlushStatus.InProgress, PageCloseStatus = PMMCloseStatus.Open }; - } } - else - { - asyncResult.partial = false; - asyncResult.fromAddress = pageStartAddress; - asyncResult.untilAddress = pageEndAddress; - - // Set status to in-progress - PageStatusIndicator[flushPage % BufferSize].PageFlushCloseStatus - = new FlushCloseStatus { PageFlushStatus = PMMFlushStatus.InProgress, PageCloseStatus = PMMCloseStatus.Open }; - } - - Interlocked.Exchange(ref PageStatusIndicator[flushPage % BufferSize].LastFlushedUntilAddress, -1); WriteAsync(flushPage, AsyncFlushPageCallback, asyncResult); } @@ -1527,24 +1528,7 @@ private void AsyncFlushPageCallback(uint errorCode, uint numBytes, NativeOverlap if (Interlocked.Decrement(ref result.count) == 0) { - if (!result.partial || (result.untilAddress >= ((result.page + 1) << LogPageSizeBits))) - { - while (true) - { - var oldStatus = PageStatusIndicator[result.page % BufferSize].PageFlushCloseStatus; - if (oldStatus.PageCloseStatus == PMMCloseStatus.Closed) - { - throw new Exception("Error: page should not be closed at this point"); - } - var newStatus = oldStatus; - newStatus.PageFlushStatus = PMMFlushStatus.Flushed; - if (oldStatus.value == Interlocked.CompareExchange(ref PageStatusIndicator[result.page % BufferSize].PageFlushCloseStatus.value, newStatus.value, oldStatus.value)) - { - break; - } - } - } - Interlocked.Exchange(ref PageStatusIndicator[result.page % BufferSize].LastFlushedUntilAddress, result.untilAddress); + Utility.MonotonicUpdate(ref PageStatusIndicator[result.page % BufferSize].LastFlushedUntilAddress, result.untilAddress, out long old); ShiftFlushedUntilAddress(); result.Free(); } diff --git a/cs/src/core/Allocator/BlittableAllocator.cs b/cs/src/core/Allocator/BlittableAllocator.cs index 6e80ccb78..9c37ffb3e 100644 --- a/cs/src/core/Allocator/BlittableAllocator.cs +++ b/cs/src/core/Allocator/BlittableAllocator.cs @@ -131,10 +131,6 @@ internal override void AllocatePage(int index) long p = (long)handles[index].AddrOfPinnedObject(); pointers[index] = (p + (sectorSize - 1)) & ~(sectorSize - 1); values[index] = tmp; - - PageStatusIndicator[index].PageFlushCloseStatus.PageFlushStatus = PMMFlushStatus.Flushed; - PageStatusIndicator[index].PageFlushCloseStatus.PageCloseStatus = PMMCloseStatus.Closed; - Interlocked.MemoryBarrier(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] diff --git a/cs/src/core/Allocator/GenericAllocator.cs b/cs/src/core/Allocator/GenericAllocator.cs index 0b6b7983d..1442c8e81 100644 --- a/cs/src/core/Allocator/GenericAllocator.cs +++ b/cs/src/core/Allocator/GenericAllocator.cs @@ -41,8 +41,8 @@ public unsafe sealed class GenericAllocator : AllocatorBase(); private readonly bool valueBlittable = Utility.IsBlittable(); - public GenericAllocator(LogSettings settings, SerializerSettings serializerSettings, IFasterEqualityComparer comparer, Action evictCallback = null, LightEpoch epoch = null) - : base(settings, comparer, evictCallback, epoch, null) + public GenericAllocator(LogSettings settings, SerializerSettings serializerSettings, IFasterEqualityComparer comparer, Action evictCallback = null, LightEpoch epoch = null, Action flushCallback = null) + : base(settings, comparer, evictCallback, epoch, flushCallback) { SerializerSettings = serializerSettings; @@ -200,9 +200,6 @@ internal override void DeleteFromMemory() internal override void AllocatePage(int index) { values[index] = AllocatePage(); - PageStatusIndicator[index].PageFlushCloseStatus.PageFlushStatus = PMMFlushStatus.Flushed; - PageStatusIndicator[index].PageFlushCloseStatus.PageCloseStatus = PMMCloseStatus.Closed; - Interlocked.MemoryBarrier(); } internal Record[] AllocatePage() diff --git a/cs/src/core/Allocator/VarLenBlittableAllocator.cs b/cs/src/core/Allocator/VarLenBlittableAllocator.cs index b5057d8cd..3b59a0050 100644 --- a/cs/src/core/Allocator/VarLenBlittableAllocator.cs +++ b/cs/src/core/Allocator/VarLenBlittableAllocator.cs @@ -33,8 +33,8 @@ public unsafe sealed class VariableLengthBlittableAllocator : Alloca internal readonly IVariableLengthStruct KeyLength; internal readonly IVariableLengthStruct ValueLength; - public VariableLengthBlittableAllocator(LogSettings settings, VariableLengthStructSettings vlSettings, IFasterEqualityComparer comparer, Action evictCallback = null, LightEpoch epoch = null) - : base(settings, comparer, evictCallback, epoch, null) + public VariableLengthBlittableAllocator(LogSettings settings, VariableLengthStructSettings vlSettings, IFasterEqualityComparer comparer, Action evictCallback = null, LightEpoch epoch = null, Action flushCallback = null) + : base(settings, comparer, evictCallback, epoch, flushCallback) { values = new byte[BufferSize][]; handles = new GCHandle[BufferSize]; @@ -215,10 +215,6 @@ internal override void AllocatePage(int index) long p = (long)handles[index].AddrOfPinnedObject(); pointers[index] = (p + (sectorSize - 1)) & ~(sectorSize - 1); values[index] = tmp; - - PageStatusIndicator[index].PageFlushCloseStatus.PageFlushStatus = PMMFlushStatus.Flushed; - PageStatusIndicator[index].PageFlushCloseStatus.PageCloseStatus = PMMCloseStatus.Closed; - Interlocked.MemoryBarrier(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] diff --git a/cs/src/core/Index/FASTER/FASTERImpl.cs b/cs/src/core/Index/FASTER/FASTERImpl.cs index 4311777ce..32fc159d4 100644 --- a/cs/src/core/Index/FASTER/FASTERImpl.cs +++ b/cs/src/core/Index/FASTER/FASTERImpl.cs @@ -1896,7 +1896,7 @@ private void BlockAllocate(int recordSize, out long logicalAddress) hlog.CheckForAllocateComplete(ref logicalAddress); if (logicalAddress < 0) { - Thread.Sleep(10); + Thread.Yield(); } } @@ -1923,7 +1923,7 @@ private void BlockAllocateReadCache(int recordSize, out long logicalAddress) readcache.CheckForAllocateComplete(ref logicalAddress); if (logicalAddress < 0) { - Thread.Sleep(10); + Thread.Yield(); } } @@ -2323,29 +2323,32 @@ private void ReadCacheEvict(long fromHeadAddress, long toHeadAddress) { physicalAddress = readcache.GetPhysicalAddress(logicalAddress); var recordSize = readcache.GetRecordSize(physicalAddress); - ref Key key = ref readcache.GetKey(physicalAddress); ref RecordInfo info = ref readcache.GetInfo(physicalAddress); - entry.word = info.PreviousAddress; - if (!entry.ReadCache) + if (!info.Invalid) { - var hash = comparer.GetHashCode64(ref key); - var tag = (ushort)((ulong)hash >> Constants.kHashTagShift); - - entry = default(HashBucketEntry); - var tagExists = FindTag(hash, tag, ref bucket, ref slot, ref entry); - while (tagExists && entry.ReadCache) + ref Key key = ref readcache.GetKey(physicalAddress); + entry.word = info.PreviousAddress; + if (!entry.ReadCache) { - var updatedEntry = default(HashBucketEntry); - updatedEntry.Tag = tag; - updatedEntry.Address = info.PreviousAddress; - updatedEntry.Pending = entry.Pending; - updatedEntry.Tentative = false; - - if (entry.word == Interlocked.CompareExchange - (ref bucket->bucket_entries[slot], updatedEntry.word, entry.word)) - break; + var hash = comparer.GetHashCode64(ref key); + var tag = (ushort)((ulong)hash >> Constants.kHashTagShift); + + entry = default(HashBucketEntry); + var tagExists = FindTag(hash, tag, ref bucket, ref slot, ref entry); + while (tagExists && entry.ReadCache) + { + var updatedEntry = default(HashBucketEntry); + updatedEntry.Tag = tag; + updatedEntry.Address = info.PreviousAddress; + updatedEntry.Pending = entry.Pending; + updatedEntry.Tentative = false; + + if (entry.word == Interlocked.CompareExchange + (ref bucket->bucket_entries[slot], updatedEntry.word, entry.word)) + break; - tagExists = FindTag(hash, tag, ref bucket, ref slot, ref entry); + tagExists = FindTag(hash, tag, ref bucket, ref slot, ref entry); + } } } logicalAddress += recordSize; diff --git a/cs/src/core/Index/FasterLog/FasterLog.cs b/cs/src/core/Index/FasterLog/FasterLog.cs index fc963cb3d..9ebf1af80 100644 --- a/cs/src/core/Index/FasterLog/FasterLog.cs +++ b/cs/src/core/Index/FasterLog/FasterLog.cs @@ -79,7 +79,8 @@ public unsafe long Append(Span entry) { epoch.Resume(); var length = entry.Length; - BlockAllocate(4 + length, out long logicalAddress); + var alignedLength = (length + 3) & ~3; // round up to multiple of 4 + BlockAllocate(4 + alignedLength, out long logicalAddress); var physicalAddress = allocator.GetPhysicalAddress(logicalAddress); *(int*)physicalAddress = length; fixed (byte* bp = &entry.GetPinnableReference()) @@ -97,7 +98,8 @@ public unsafe long Append(byte[] entry) { epoch.Resume(); var length = entry.Length; - BlockAllocate(4 + length, out long logicalAddress); + var alignedLength = (length + 3) & ~3; // round up to multiple of 4 + BlockAllocate(4 + alignedLength, out long logicalAddress); var physicalAddress = allocator.GetPhysicalAddress(logicalAddress); *(int*)physicalAddress = length; fixed (byte* bp = entry) @@ -124,7 +126,8 @@ public unsafe bool TryAppend(byte[] entry, out long logicalAddress) return false; } var length = entry.Length; - BlockAllocate(4 + length, out logicalAddress); + var alignedLength = (length + 3) & ~3; // round up to multiple of 4 + BlockAllocate(4 + alignedLength, out logicalAddress); var physicalAddress = allocator.GetPhysicalAddress(logicalAddress); *(int*)physicalAddress = length; fixed (byte* bp = entry) @@ -151,7 +154,8 @@ public unsafe bool TryAppend(Span entry, out long logicalAddress) return false; } var length = entry.Length; - BlockAllocate(4 + length, out logicalAddress); + var alignedLength = (length + 3) & ~3; // round up to multiple of 4 + BlockAllocate(4 + alignedLength, out logicalAddress); var physicalAddress = allocator.GetPhysicalAddress(logicalAddress); *(int*)physicalAddress = length; fixed (byte* bp = &entry.GetPinnableReference()) @@ -172,7 +176,8 @@ public unsafe long Append(List entries) foreach (var entry in entries) { var length = entry.Length; - BlockAllocate(4 + length, out logicalAddress); + var alignedLength = (length + 3) & ~3; // round up to multiple of 4 + BlockAllocate(4 + alignedLength, out logicalAddress); var physicalAddress = allocator.GetPhysicalAddress(logicalAddress); *(int*)physicalAddress = length; fixed (byte* bp = entry) @@ -262,7 +267,7 @@ private void BlockAllocate(int recordSize, out long logicalAddress) allocator.CheckForAllocateComplete(ref logicalAddress); if (logicalAddress < 0) { - Thread.Sleep(10); + Thread.Yield(); } } @@ -280,11 +285,9 @@ private void BlockAllocate(int recordSize, out long logicalAddress) /// private void Commit(long flushAddress) { - epoch.Resume(); FasterLogRecoveryInfo info = new FasterLogRecoveryInfo(); - info.FlushedUntilAddress = allocator.FlushedUntilAddress; + info.FlushedUntilAddress = flushAddress; info.BeginAddress = allocator.BeginAddress; - epoch.Suspend(); // We can only allow serial monotonic synchronous commit lock (this) @@ -293,7 +296,7 @@ private void Commit(long flushAddress) { logCommitManager.Commit(flushAddress, info.ToByteArray()); CommittedUntilAddress = flushAddress; - info.DebugPrint(); + // info.DebugPrint(); } } } diff --git a/cs/src/core/Index/FasterLog/FasterLogIterator.cs b/cs/src/core/Index/FasterLog/FasterLogIterator.cs index b09db13c3..e61c5131d 100644 --- a/cs/src/core/Index/FasterLog/FasterLogIterator.cs +++ b/cs/src/core/Index/FasterLog/FasterLogIterator.cs @@ -121,9 +121,9 @@ public unsafe bool GetNext(out Span entry) // Check if record fits on page, if not skip to next page int length = *(int*)physicalAddress; - int recordSize = 4; - if (length > 0) - recordSize += length; + int alignedLength = (length + 3) & ~3; // round up to multiple of 4 + + int recordSize = 4 + alignedLength; if ((currentAddress & allocator.PageSizeMask) + recordSize > allocator.PageSize) { if (currentAddress >= headAddress) diff --git a/cs/src/core/Index/FasterLog/FasterLogSettings.cs b/cs/src/core/Index/FasterLog/FasterLogSettings.cs index 5969a034e..d6294cb32 100644 --- a/cs/src/core/Index/FasterLog/FasterLogSettings.cs +++ b/cs/src/core/Index/FasterLog/FasterLogSettings.cs @@ -25,14 +25,15 @@ public class FasterLogSettings public int PageSizeBits = 22; /// - /// Size of a segment (group of pages), in bits + /// Total size of in-memory part of log, in bits + /// Num pages = 2^(MemorySizeBits-PageSizeBits) /// - public int SegmentSizeBits = 30; + public int MemorySizeBits = 24; /// - /// Total size of in-memory part of log, in bits + /// Size of a segment (group of pages), in bits /// - public int MemorySizeBits = 26; + public int SegmentSizeBits = 30; /// /// Log commit manager diff --git a/cs/src/core/Index/Recovery/Recovery.cs b/cs/src/core/Index/Recovery/Recovery.cs index 7ddd5db3f..4ae0388ac 100644 --- a/cs/src/core/Index/Recovery/Recovery.cs +++ b/cs/src/core/Index/Recovery/Recovery.cs @@ -433,7 +433,8 @@ public void RestoreHybridLog(long untilAddress, long headAddress, long beginAddr ((headAddress == untilAddress) && (GetOffsetInPage(headAddress) == 0)) // Empty in-memory page ) { - AllocatePage(GetPageIndexForAddress(headAddress)); + if (!IsAllocated(GetPageIndexForAddress(headAddress))) + AllocatePage(GetPageIndexForAddress(headAddress)); } else { From ec2a3b598d5d07e15c9decf54b477ef5ff4a2328 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Mon, 30 Sep 2019 10:44:28 -0700 Subject: [PATCH 09/36] Fasterlog TryAppend (#179) Adding truly non-blocking TryAppend functionality. See sample for how this is used. --- cs/playground/FasterLogSample/Program.cs | 17 ++- cs/src/core/Allocator/AllocatorBase.cs | 89 ++++++++++++- cs/src/core/Index/Common/LogSettings.cs | 6 +- cs/src/core/Index/FASTER/FASTER.cs | 6 +- cs/src/core/Index/FasterLog/FasterLog.cs | 160 ++++++++++++++++------- 5 files changed, 216 insertions(+), 62 deletions(-) diff --git a/cs/playground/FasterLogSample/Program.cs b/cs/playground/FasterLogSample/Program.cs index cb377cf6e..a6365a805 100644 --- a/cs/playground/FasterLogSample/Program.cs +++ b/cs/playground/FasterLogSample/Program.cs @@ -10,7 +10,7 @@ namespace FasterLogSample { public class Program { - const int entryLength = 96; + const int entryLength = 996; static FasterLog log; static void ReportThread() @@ -50,15 +50,18 @@ static void AppendThread() while (true) { + // Sync append log.Append(entry); - // We also support a Span-based version of Append + // We also support a Span-based variant of Append - // We also support TryAppend to allow throttling/back-off: - // while (!log.TryAppend(entry, out long logicalAddress)) - // { - // Thread.Sleep(10); - // } + // We also support TryAppend to allow throttling/back-off + // (expect this to be slightly slower than the sync version) + // Make sure you supply a "starting" logical address of 0 + // Retries must send back the current logical address. + // + // long logicalAddress = 0; + // while (!log.TryAppend(entry, ref logicalAddress)) ; } } diff --git a/cs/src/core/Allocator/AllocatorBase.cs b/cs/src/core/Allocator/AllocatorBase.cs index 8718266d1..ecedcf2c6 100644 --- a/cs/src/core/Allocator/AllocatorBase.cs +++ b/cs/src/core/Allocator/AllocatorBase.cs @@ -818,6 +818,90 @@ public long Allocate(int numSlots = 1) return address; } + /// + /// Try allocate, no thread spinning allowed + /// May return 0 in case of inability + /// May also return negative address + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public long TryAllocate(int numSlots = 1) + { + PageOffset localTailPageOffset = default(PageOffset); + + // Necessary to check because threads keep retrying and we do not + // want to overflow offset more than once per thread + if (TailPageOffset.Offset > PageSize) + return 0; + + // Determine insertion index. + // ReSharper disable once CSharpWarnings::CS0420 +#pragma warning disable 420 + localTailPageOffset.PageAndOffset = Interlocked.Add(ref TailPageOffset.PageAndOffset, numSlots); +#pragma warning restore 420 + + int page = localTailPageOffset.Page; + int offset = localTailPageOffset.Offset - numSlots; + + #region HANDLE PAGE OVERFLOW + if (localTailPageOffset.Offset > PageSize) + { + if (offset > PageSize) + { + return 0; + } + + // The thread that "makes" the offset incorrect + // is the one that is elected to fix it and + // shift read-only/head. + localTailPageOffset.Page++; + localTailPageOffset.Offset = 0; + TailPageOffset = localTailPageOffset; + + long shiftAddress = ((long)(page + 1)) << LogPageSizeBits; + PageAlignedShiftReadOnlyAddress(shiftAddress); + PageAlignedShiftHeadAddress(shiftAddress); + + return 0; + } + #endregion + + long address = (((long)page) << LogPageSizeBits) | ((long)offset); + + // Check for TailPageCache hit + if (TailPageCache == page) + { + return address; + } + + // Address has been allocated. Negate the address + // if page is not ready to be used. + if (CannotAllocate(page)) + { + address = -address; + } + + // Update the read-only so that we can get more space for the tail + if (offset == 0) + { + if (address >= 0) + { + TailPageCache = page; + Interlocked.MemoryBarrier(); + } + + // Allocate next page in advance, if needed + int newPageIndex = (page + 1) % BufferSize; + if ((!IsAllocated(newPageIndex))) + { + AllocatePage(newPageIndex); + } + } + + return address; + } + /// /// If allocator cannot allocate new memory as the head has not shifted or the previous page /// is not yet closed, it allocates but returns the negative address. @@ -827,11 +911,6 @@ public long Allocate(int numSlots = 1) [MethodImpl(MethodImplOptions.AggressiveInlining)] public void CheckForAllocateComplete(ref long address) { - if (address >= 0) - { - throw new Exception("Address already allocated!"); - } - PageOffset p = default(PageOffset); p.Page = (int)((-address) >> LogPageSizeBits); p.Offset = (int)((-address) & PageSizeMask); diff --git a/cs/src/core/Index/Common/LogSettings.cs b/cs/src/core/Index/Common/LogSettings.cs index c0943dfe8..f6b59e337 100644 --- a/cs/src/core/Index/Common/LogSettings.cs +++ b/cs/src/core/Index/Common/LogSettings.cs @@ -176,8 +176,10 @@ public class ReadCacheSettings public int MemorySizeBits = 34; /// - /// Fraction of log used for second chance copy to tail + /// Fraction of log head (in memory) used for second chance + /// copy to tail. This is (1 - MutableFraction) for the + /// underlying log /// - public double SecondChanceFraction = 0.9; + public double SecondChanceFraction = 0.1; } } diff --git a/cs/src/core/Index/FASTER/FASTER.cs b/cs/src/core/Index/FASTER/FASTER.cs index 32c7a5a5d..e222a3585 100644 --- a/cs/src/core/Index/FASTER/FASTER.cs +++ b/cs/src/core/Index/FASTER/FASTER.cs @@ -135,7 +135,7 @@ public FasterKV(long size, Functions functions, LogSettings logSettings, Checkpo PageSizeBits = logSettings.ReadCacheSettings.PageSizeBits, MemorySizeBits = logSettings.ReadCacheSettings.MemorySizeBits, SegmentSizeBits = logSettings.ReadCacheSettings.MemorySizeBits, - MutableFraction = logSettings.ReadCacheSettings.SecondChanceFraction + MutableFraction = 1 - logSettings.ReadCacheSettings.SecondChanceFraction }, variableLengthStructSettings, this.comparer, ReadCacheEvict, epoch); readcache.Initialize(); ReadCache = new LogAccessor(this, readcache); @@ -153,7 +153,7 @@ public FasterKV(long size, Functions functions, LogSettings logSettings, Checkpo PageSizeBits = logSettings.ReadCacheSettings.PageSizeBits, MemorySizeBits = logSettings.ReadCacheSettings.MemorySizeBits, SegmentSizeBits = logSettings.ReadCacheSettings.MemorySizeBits, - MutableFraction = logSettings.ReadCacheSettings.SecondChanceFraction + MutableFraction = 1 - logSettings.ReadCacheSettings.SecondChanceFraction }, this.comparer, ReadCacheEvict, epoch); readcache.Initialize(); ReadCache = new LogAccessor(this, readcache); @@ -174,7 +174,7 @@ public FasterKV(long size, Functions functions, LogSettings logSettings, Checkpo PageSizeBits = logSettings.ReadCacheSettings.PageSizeBits, MemorySizeBits = logSettings.ReadCacheSettings.MemorySizeBits, SegmentSizeBits = logSettings.ReadCacheSettings.MemorySizeBits, - MutableFraction = logSettings.ReadCacheSettings.SecondChanceFraction + MutableFraction = 1 - logSettings.ReadCacheSettings.SecondChanceFraction }, serializerSettings, this.comparer, ReadCacheEvict, epoch); readcache.Initialize(); ReadCache = new LogAccessor(this, readcache); diff --git a/cs/src/core/Index/FasterLog/FasterLog.cs b/cs/src/core/Index/FasterLog/FasterLog.cs index 9ebf1af80..979a9d803 100644 --- a/cs/src/core/Index/FasterLog/FasterLog.cs +++ b/cs/src/core/Index/FasterLog/FasterLog.cs @@ -109,82 +109,73 @@ public unsafe long Append(byte[] entry) } /// - /// Try to append entry to log + /// Try to append entry to log. If is returns true, we are + /// done. If it returns false with negative address, user + /// needs to call TryCompleteAppend to finalize the append. + /// See TryCompleteAppend for more info. /// /// Entry to be appended to log /// Logical address of added entry /// Whether the append succeeded - public unsafe bool TryAppend(byte[] entry, out long logicalAddress) + public unsafe bool TryAppend(byte[] entry, ref long logicalAddress) { + if (logicalAddress < 0) + return TryCompleteAppend(entry, ref logicalAddress); + epoch.Resume(); - logicalAddress = 0; - long tail = -allocator.GetTailAddress(); - allocator.CheckForAllocateComplete(ref tail); - if (tail < 0) - { + + var length = entry.Length; + var alignedLength = (length + 3) & ~3; // round up to multiple of 4 + + logicalAddress = allocator.TryAllocate(4 + alignedLength); + if (logicalAddress <= 0) + { epoch.Suspend(); return false; } - var length = entry.Length; - var alignedLength = (length + 3) & ~3; // round up to multiple of 4 - BlockAllocate(4 + alignedLength, out logicalAddress); + var physicalAddress = allocator.GetPhysicalAddress(logicalAddress); *(int*)physicalAddress = length; fixed (byte* bp = entry) Buffer.MemoryCopy(bp, (void*)(4 + physicalAddress), length, length); + epoch.Suspend(); return true; } /// - /// Try to append entry to log + /// Try to append entry to log. If is returns true, we are + /// done. If it returns false with negative address, user + /// needs to call TryCompleteAppend to finalize the append. + /// See TryCompleteAppend for more info. /// /// Entry to be appended to log /// Logical address of added entry /// Whether the append succeeded - public unsafe bool TryAppend(Span entry, out long logicalAddress) + public unsafe bool TryAppend(Span entry, ref long logicalAddress) { + if (logicalAddress < 0) + return TryCompleteAppend(entry, ref logicalAddress); + epoch.Resume(); - logicalAddress = 0; - long tail = -allocator.GetTailAddress(); - allocator.CheckForAllocateComplete(ref tail); - if (tail < 0) + + var length = entry.Length; + var alignedLength = (length + 3) & ~3; // round up to multiple of 4 + + logicalAddress = allocator.TryAllocate(4 + alignedLength); + if (logicalAddress <= 0) { epoch.Suspend(); return false; } - var length = entry.Length; - var alignedLength = (length + 3) & ~3; // round up to multiple of 4 - BlockAllocate(4 + alignedLength, out logicalAddress); + var physicalAddress = allocator.GetPhysicalAddress(logicalAddress); *(int*)physicalAddress = length; fixed (byte* bp = &entry.GetPinnableReference()) Buffer.MemoryCopy(bp, (void*)(4 + physicalAddress), length, length); - epoch.Suspend(); - return true; - } - /// - /// Append batch of entries to log - /// - /// - /// Logical address of last added entry - public unsafe long Append(List entries) - { - long logicalAddress = 0; - epoch.Resume(); - foreach (var entry in entries) - { - var length = entry.Length; - var alignedLength = (length + 3) & ~3; // round up to multiple of 4 - BlockAllocate(4 + alignedLength, out logicalAddress); - var physicalAddress = allocator.GetPhysicalAddress(logicalAddress); - *(int*)physicalAddress = length; - fixed (byte* bp = entry) - Buffer.MemoryCopy(bp, (void*)(4 + physicalAddress), length, length); - } epoch.Suspend(); - return logicalAddress; + return true; } /// @@ -280,6 +271,84 @@ private void BlockAllocate(int recordSize, out long logicalAddress) } } + /// + /// Try to complete partial allocation. Succeeds when address + /// turns positive. If failed with negative address, try the + /// operation. If failed with zero address, user needs to start + /// afresh with a new TryAppend operation. + /// + /// + /// + /// Whether operation succeeded + private unsafe bool TryCompleteAppend(byte[] entry, ref long logicalAddress) + { + epoch.Resume(); + + allocator.CheckForAllocateComplete(ref logicalAddress); + + if (logicalAddress < 0) + { + epoch.Suspend(); + return false; + } + + if (logicalAddress < allocator.ReadOnlyAddress) + { + logicalAddress = 0; + epoch.Suspend(); + return false; + } + + var length = entry.Length; + + var physicalAddress = allocator.GetPhysicalAddress(logicalAddress); + *(int*)physicalAddress = length; + fixed (byte* bp = entry) + Buffer.MemoryCopy(bp, (void*)(4 + physicalAddress), length, length); + + epoch.Suspend(); + return true; + } + + /// + /// Try to complete partial allocation. Succeeds when address + /// turns positive. If failed with negative address, try the + /// operation. If failed with zero address, user needs to start + /// afresh with a new TryAppend operation. + /// + /// + /// + /// Whether operation succeeded + private unsafe bool TryCompleteAppend(Span entry, ref long logicalAddress) + { + epoch.Resume(); + + allocator.CheckForAllocateComplete(ref logicalAddress); + + if (logicalAddress < 0) + { + epoch.Suspend(); + return false; + } + + if (logicalAddress < allocator.ReadOnlyAddress) + { + logicalAddress = 0; + epoch.Suspend(); + return false; + } + + var length = entry.Length; + + var physicalAddress = allocator.GetPhysicalAddress(logicalAddress); + *(int*)physicalAddress = length; + fixed (byte* bp = &entry.GetPinnableReference()) + Buffer.MemoryCopy(bp, (void*)(4 + physicalAddress), length, length); + + epoch.Suspend(); + return true; + } + /// /// Commit log /// @@ -316,9 +385,10 @@ private void Restore() info.Initialize(r); } - allocator.RestoreHybridLog(info.FlushedUntilAddress, - info.FlushedUntilAddress - allocator.GetOffsetInPage(info.FlushedUntilAddress), - info.BeginAddress); + var headAddress = info.FlushedUntilAddress - allocator.GetOffsetInPage(info.FlushedUntilAddress); + if (headAddress == 0) headAddress = Constants.kFirstValidAddress; + + allocator.RestoreHybridLog(info.FlushedUntilAddress, headAddress, info.BeginAddress); } } } From bb4e357d7b79b123f15fb8e4380a1250b2c36938 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Mon, 30 Sep 2019 11:29:58 -0700 Subject: [PATCH 10/36] minor fix --- cs/src/core/Index/FasterLog/FasterLog.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cs/src/core/Index/FasterLog/FasterLog.cs b/cs/src/core/Index/FasterLog/FasterLog.cs index fc963cb3d..b33612bc2 100644 --- a/cs/src/core/Index/FasterLog/FasterLog.cs +++ b/cs/src/core/Index/FasterLog/FasterLog.cs @@ -54,6 +54,7 @@ public FasterLog(FasterLogSettings logSettings) logSettings.LogDevice.FileName + ".commit"); epoch = new LightEpoch(); + CommittedUntilAddress = Constants.kFirstValidAddress; allocator = new BlittableAllocator( logSettings.GetLogSettings(), null, null, epoch, e => Commit(e)); @@ -316,6 +317,7 @@ private void Restore() allocator.RestoreHybridLog(info.FlushedUntilAddress, info.FlushedUntilAddress - allocator.GetOffsetInPage(info.FlushedUntilAddress), info.BeginAddress); + CommittedUntilAddress = info.FlushedUntilAddress; } } } From b06d11253e4bd065b67c8ffbfc04ecaddbf74091 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Thu, 3 Oct 2019 14:55:04 -0700 Subject: [PATCH 11/36] Fasterlog async (#180) * Added support for TryAppend. Removed List-based batch support. * Added non-blocking TryAppend * Added span variant * Fix definition of SecondChanceFraction for read cache, to be 1 - MutableFraction of the log. * Added async FlushAndCommit * Added batched version by separating out in-memory append and wait for commit - gives better perf as the first operation is usually sync * Tweak async sample to get back to 2GB/sec * Other updates: 1) Allocations can handle thousands of parallel tasks 2) Removed concept of negative address - allocations are always over available pages 3) Improved scan interface to allow user memory pooling 4) Exposed commit task 5) Cleaned up sample * Added check for entry fitting on single page * Added batch interface (sync and async) to log append. --- .../FasterLogSample/FasterLogSample.csproj | 2 +- cs/playground/FasterLogSample/Program.cs | 170 ++++++-- cs/src/core/Allocator/AllocatorBase.cs | 68 ++-- cs/src/core/Epochs/LightEpoch.cs | 1 + cs/src/core/FASTER.core.csproj | 1 + cs/src/core/Index/FasterLog/FasterLog.cs | 384 +++++++++++------- .../core/Index/FasterLog/FasterLogIterator.cs | 68 +++- cs/src/core/Index/FasterLog/ISpanBatch.cs | 26 ++ 8 files changed, 474 insertions(+), 246 deletions(-) create mode 100644 cs/src/core/Index/FasterLog/ISpanBatch.cs diff --git a/cs/playground/FasterLogSample/FasterLogSample.csproj b/cs/playground/FasterLogSample/FasterLogSample.csproj index 7b8c2eee1..f0c1def2b 100644 --- a/cs/playground/FasterLogSample/FasterLogSample.csproj +++ b/cs/playground/FasterLogSample/FasterLogSample.csproj @@ -1,7 +1,7 @@  - net46 + netcoreapp2.2 x64 win7-x64 diff --git a/cs/playground/FasterLogSample/Program.cs b/cs/playground/FasterLogSample/Program.cs index a6365a805..3b13b1413 100644 --- a/cs/playground/FasterLogSample/Program.cs +++ b/cs/playground/FasterLogSample/Program.cs @@ -4,13 +4,17 @@ using System; using System.Diagnostics; using System.Threading; +using System.Threading.Tasks; using FASTER.core; namespace FasterLogSample { public class Program { - const int entryLength = 996; + // Entry length can be between 1 and ((1 << FasterLogSettings.PageSizeBits) - 4) + const int entryLength = 1 << 10; + static readonly byte[] staticEntry = new byte[entryLength]; + static readonly SpanBatch spanBatch = new SpanBatch(10); static FasterLog log; static void ReportThread() @@ -26,8 +30,8 @@ static void ReportThread() var nowTime = sw.ElapsedMilliseconds; var nowValue = log.TailAddress; - Console.WriteLine("Throughput: {0} MB/sec", - (nowValue - lastValue) / (1000*(nowTime - lastTime))); + Console.WriteLine("Throughput: {0} MB/sec, Tail: {1}", + (nowValue - lastValue) / (1000 * (nowTime - lastTime)), nowValue); lastTime = nowTime; lastValue = nowValue; } @@ -37,31 +41,29 @@ static void CommitThread() { while (true) { - Thread.Sleep(100); + Thread.Sleep(5); log.FlushAndCommit(true); + + // Async version + // await Task.Delay(5); + // await log.FlushAndCommitAsync(); } } static void AppendThread() { - byte[] entry = new byte[entryLength]; - for (int i = 0; i < entryLength; i++) - entry[i] = (byte)i; - while (true) { - // Sync append - log.Append(entry); - - // We also support a Span-based variant of Append - - // We also support TryAppend to allow throttling/back-off - // (expect this to be slightly slower than the sync version) - // Make sure you supply a "starting" logical address of 0 - // Retries must send back the current logical address. - // - // long logicalAddress = 0; - // while (!log.TryAppend(entry, ref logicalAddress)) ; + // TryAppend - can be used with throttling/back-off + // Accepts byte[] and Span + while (!log.TryAppend(staticEntry, out _)) ; + + // Synchronous blocking append + // Accepts byte[] and Span + // log.Append(entry); + + // Batched append - batch must fit on one page + // while (!log.TryAppend(spanBatch, out _)) ; } } @@ -73,9 +75,11 @@ static void ScanThread() byte[] entry = new byte[entryLength]; for (int i = 0; i < entryLength; i++) + { entry[i] = (byte)i; - var entrySpan = new Span(entry); + } + var entrySpan = new Span(entry); long lastAddress = 0; Span result; @@ -84,16 +88,25 @@ static void ScanThread() while (true) { while (!iter.GetNext(out result)) + { Thread.Sleep(1000); + } + if (!result.SequenceEqual(entrySpan)) { - throw new Exception("Invalid entry found at offset " + FindDiff(result, entrySpan)); + if (result.Length != entrySpan.Length) + throw new Exception("Invalid entry found, expected length " + entrySpan.Length + ", actual length " + result.Length); + else + throw new Exception("Invalid entry found at offset " + FindDiff(result, entrySpan)); } + // Re-insert entry with small probability if (r.Next(100) < 10) + { log.Append(result); + } - if (iter.CurrentAddress - lastAddress > 500000000) + if (iter.CurrentAddress - lastAddress > 500_000_000) { log.TruncateUntil(iter.CurrentAddress); lastAddress = iter.CurrentAddress; @@ -102,9 +115,9 @@ static void ScanThread() } } - static int FindDiff(Span b1, Span b2) + private static int FindDiff(Span b1, Span b2) { - for (int i=0; i b1, Span b2) return 0; } + /// + /// Main program entry point + /// + /// static void Main(string[] args) { + bool sync = true; var device = Devices.CreateLogDevice("D:\\logs\\hlog.log"); log = new FasterLog(new FasterLogSettings { LogDevice = device }); - new Thread(new ThreadStart(AppendThread)).Start(); - - // Can have multiple append threads if needed - // new Thread(new ThreadStart(AppendThread)).Start(); - - new Thread(new ThreadStart(ScanThread)).Start(); - new Thread(new ThreadStart(ReportThread)).Start(); - new Thread(new ThreadStart(CommitThread)).Start(); + // Populate entry being inserted + for (int i = 0; i < entryLength; i++) + { + staticEntry[i] = (byte)i; + } + + if (sync) + { + // Append thread: create as many as needed + new Thread(new ThreadStart(AppendThread)).Start(); + + // Threads for scan, reporting, commit + var t1 = new Thread(new ThreadStart(ScanThread)); + var t2 = new Thread(new ThreadStart(ReportThread)); + var t3 = new Thread(new ThreadStart(CommitThread)); + t1.Start(); t2.Start(); t3.Start(); + t1.Join(); t2.Join(); t3.Join(); + } + else + { + // Async version of demo: expect lower performance + // particularly for small payload sizes + + const int NumParallelTasks = 10_000; + ThreadPool.SetMinThreads(2 * Environment.ProcessorCount, 2 * Environment.ProcessorCount); + TaskScheduler.UnobservedTaskException += (object sender, UnobservedTaskExceptionEventArgs e) => + { + Console.WriteLine($"Unobserved task exception: {e.Exception}"); + e.SetObserved(); + }; + + Task[] tasks = new Task[NumParallelTasks]; + for (int i = 0; i < NumParallelTasks; i++) + { + int local = i; + tasks[i] = Task.Run(() => AppendAsync(local)); + } - Thread.Sleep(500*1000); + // Threads for scan, reporting, commit + var t1 = new Thread(new ThreadStart(ScanThread)); + var t2 = new Thread(new ThreadStart(ReportThread)); + var t3 = new Thread(new ThreadStart(CommitThread)); + t1.Start(); t2.Start(); t3.Start(); + t1.Join(); t2.Join(); t3.Join(); + + Task.WaitAll(tasks); + } } + + static async Task AppendAsync(int id) + { + bool batched = false; + + await Task.Yield(); + + if (!batched) + { + // Unbatched version - append each item with commit + // Needs high parallelism (NumParallelTasks) for perf + while (true) + { + try + { + await log.AppendAsync(staticEntry); + } + catch (Exception ex) + { + Console.WriteLine($"{nameof(AppendAsync)}({id}): {ex}"); + } + } + } + else + { + // Batched version - we append many entries to memory, + // then wait for commit periodically + int count = 0; + while (true) + { + await log.AppendToMemoryAsync(staticEntry); + if (count++ % 100 == 0) + { + await log.WaitForCommitAsync(); + } + } + } + } + + private struct SpanBatch : ISpanBatch + { + private readonly int batchSize; + public SpanBatch(int batchSize) => this.batchSize = batchSize; + public Span Get(int index) => staticEntry; + public int TotalEntries() => batchSize; + } + } } diff --git a/cs/src/core/Allocator/AllocatorBase.cs b/cs/src/core/Allocator/AllocatorBase.cs index ecedcf2c6..efe897c58 100644 --- a/cs/src/core/Allocator/AllocatorBase.cs +++ b/cs/src/core/Allocator/AllocatorBase.cs @@ -785,7 +785,7 @@ public long Allocate(int numSlots = 1) } // Negate the address if page not ready to be used - if (CannotAllocate(page)) + if (CannotAllocateNext(page)) { address = -address; } @@ -820,14 +820,16 @@ public long Allocate(int numSlots = 1) /// /// Try allocate, no thread spinning allowed - /// May return 0 in case of inability - /// May also return negative address + /// May return 0 in case of inability to allocate /// /// /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public long TryAllocate(int numSlots = 1) { + if (numSlots > PageSize) + throw new Exception("Entry does not fit on page"); + PageOffset localTailPageOffset = default(PageOffset); // Necessary to check because threads keep retrying and we do not @@ -855,51 +857,36 @@ public long TryAllocate(int numSlots = 1) // The thread that "makes" the offset incorrect // is the one that is elected to fix it and // shift read-only/head. - localTailPageOffset.Page++; - localTailPageOffset.Offset = 0; - TailPageOffset = localTailPageOffset; - long shiftAddress = ((long)(page + 1)) << LogPageSizeBits; + long shiftAddress = ((long)(localTailPageOffset.Page + 1)) << LogPageSizeBits; PageAlignedShiftReadOnlyAddress(shiftAddress); PageAlignedShiftHeadAddress(shiftAddress); - return 0; - } - #endregion - - long address = (((long)page) << LogPageSizeBits) | ((long)offset); - - // Check for TailPageCache hit - if (TailPageCache == page) - { - return address; - } - - // Address has been allocated. Negate the address - // if page is not ready to be used. - if (CannotAllocate(page)) - { - address = -address; - } - - // Update the read-only so that we can get more space for the tail - if (offset == 0) - { - if (address >= 0) + if (CannotAllocate(localTailPageOffset.Page + 1)) { - TailPageCache = page; - Interlocked.MemoryBarrier(); + // We should not allocate the next page; reset to end of page + // so that next attempt can retry + localTailPageOffset.Offset = PageSize; + Interlocked.Exchange(ref TailPageOffset.PageAndOffset, localTailPageOffset.PageAndOffset); + return 0; } // Allocate next page in advance, if needed - int newPageIndex = (page + 1) % BufferSize; - if ((!IsAllocated(newPageIndex))) + int nextPageIndex = (localTailPageOffset.Page + 2) % BufferSize; + if ((!IsAllocated(nextPageIndex))) { - AllocatePage(newPageIndex); + AllocatePage(nextPageIndex); } + + localTailPageOffset.Page++; + localTailPageOffset.Offset = 0; + TailPageOffset = localTailPageOffset; + + return 0; } + #endregion - return address; + return (((long)page) << LogPageSizeBits) | ((long)offset); } /// @@ -925,7 +912,7 @@ public void CheckForAllocateComplete(ref long address) PageAlignedShiftHeadAddress(GetTailAddress()); // Check if we can allocate pageIndex - if (CannotAllocate(p.Page)) + if (CannotAllocateNext(p.Page)) { return; } @@ -940,13 +927,18 @@ public void CheckForAllocateComplete(ref long address) } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool CannotAllocate(int page) + private bool CannotAllocateNext(int page) { return (page >= BufferSize + (ClosedUntilAddress >> LogPageSizeBits) - 1) || !IsAllocated(page % BufferSize); } + private bool CannotAllocate(int page) + { + return + (page >= BufferSize + (ClosedUntilAddress >> LogPageSizeBits)); + } /// /// Used by applications to make the current state of the database immutable quickly /// diff --git a/cs/src/core/Epochs/LightEpoch.cs b/cs/src/core/Epochs/LightEpoch.cs index 8434f8964..5d7b789e4 100644 --- a/cs/src/core/Epochs/LightEpoch.cs +++ b/cs/src/core/Epochs/LightEpoch.cs @@ -283,6 +283,7 @@ public int BumpCurrentEpoch(Action onDrain) if (++i == kDrainListSize) { + ProtectAndDrain(); i = 0; if (++j == 500) { diff --git a/cs/src/core/FASTER.core.csproj b/cs/src/core/FASTER.core.csproj index dca9cd6a5..ae1a7e8fb 100644 --- a/cs/src/core/FASTER.core.csproj +++ b/cs/src/core/FASTER.core.csproj @@ -37,5 +37,6 @@ + \ No newline at end of file diff --git a/cs/src/core/Index/FasterLog/FasterLog.cs b/cs/src/core/Index/FasterLog/FasterLog.cs index 54eb5f9d3..fc0b96227 100644 --- a/cs/src/core/Index/FasterLog/FasterLog.cs +++ b/cs/src/core/Index/FasterLog/FasterLog.cs @@ -4,12 +4,10 @@ #pragma warning disable 0162 using System; -using System.Collections.Concurrent; -using System.Collections.Generic; -using System.Diagnostics; using System.IO; using System.Runtime.CompilerServices; using System.Threading; +using System.Threading.Tasks; namespace FASTER.core { @@ -22,6 +20,7 @@ public class FasterLog : IDisposable private readonly BlittableAllocator allocator; private readonly LightEpoch epoch; private ILogCommitManager logCommitManager; + private TaskCompletionSource commitTcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); /// /// Beginning address of log @@ -43,6 +42,11 @@ public class FasterLog : IDisposable /// public long CommittedUntilAddress; + /// + /// Task notifying commit completions + /// + public Task CommitTask => commitTcs.Task; + /// /// Create new log instance /// @@ -76,17 +80,10 @@ public void Dispose() /// /// /// Logical address of added entry - public unsafe long Append(Span entry) + public long Append(Span entry) { - epoch.Resume(); - var length = entry.Length; - var alignedLength = (length + 3) & ~3; // round up to multiple of 4 - BlockAllocate(4 + alignedLength, out long logicalAddress); - var physicalAddress = allocator.GetPhysicalAddress(logicalAddress); - *(int*)physicalAddress = length; - fixed (byte* bp = &entry.GetPinnableReference()) - Buffer.MemoryCopy(bp, (void*)(4 + physicalAddress), length, length); - epoch.Suspend(); + long logicalAddress; + while (!TryAppend(entry, out logicalAddress)) ; return logicalAddress; } @@ -95,41 +92,29 @@ public unsafe long Append(Span entry) /// /// /// Logical address of added entry - public unsafe long Append(byte[] entry) + public long Append(byte[] entry) { - epoch.Resume(); - var length = entry.Length; - var alignedLength = (length + 3) & ~3; // round up to multiple of 4 - BlockAllocate(4 + alignedLength, out long logicalAddress); - var physicalAddress = allocator.GetPhysicalAddress(logicalAddress); - *(int*)physicalAddress = length; - fixed (byte* bp = entry) - Buffer.MemoryCopy(bp, (void*)(4 + physicalAddress), length, length); - epoch.Suspend(); + long logicalAddress; + while (!TryAppend(entry, out logicalAddress)) ; return logicalAddress; } /// - /// Try to append entry to log. If is returns true, we are - /// done. If it returns false with negative address, user - /// needs to call TryCompleteAppend to finalize the append. - /// See TryCompleteAppend for more info. + /// Try to append entry to log. If it returns true, we are + /// done. If it returns false, we need to retry. /// /// Entry to be appended to log /// Logical address of added entry /// Whether the append succeeded - public unsafe bool TryAppend(byte[] entry, ref long logicalAddress) + public unsafe bool TryAppend(byte[] entry, out long logicalAddress) { - if (logicalAddress < 0) - return TryCompleteAppend(entry, ref logicalAddress); + logicalAddress = 0; epoch.Resume(); var length = entry.Length; - var alignedLength = (length + 3) & ~3; // round up to multiple of 4 - - logicalAddress = allocator.TryAllocate(4 + alignedLength); - if (logicalAddress <= 0) + logicalAddress = allocator.TryAllocate(4 + Align(length)); + if (logicalAddress == 0) { epoch.Suspend(); return false; @@ -145,26 +130,21 @@ public unsafe bool TryAppend(byte[] entry, ref long logicalAddress) } /// - /// Try to append entry to log. If is returns true, we are - /// done. If it returns false with negative address, user - /// needs to call TryCompleteAppend to finalize the append. - /// See TryCompleteAppend for more info. + /// Try to append entry to log. If it returns true, we are + /// done. If it returns false, we need to retry. /// /// Entry to be appended to log /// Logical address of added entry /// Whether the append succeeded - public unsafe bool TryAppend(Span entry, ref long logicalAddress) + public unsafe bool TryAppend(Span entry, out long logicalAddress) { - if (logicalAddress < 0) - return TryCompleteAppend(entry, ref logicalAddress); + logicalAddress = 0; epoch.Resume(); var length = entry.Length; - var alignedLength = (length + 3) & ~3; // round up to multiple of 4 - - logicalAddress = allocator.TryAllocate(4 + alignedLength); - if (logicalAddress <= 0) + logicalAddress = allocator.TryAllocate(4 + Align(length)); + if (logicalAddress == 0) { epoch.Suspend(); return false; @@ -179,9 +159,135 @@ public unsafe bool TryAppend(Span entry, ref long logicalAddress) return true; } + /// + /// Try to append batch of entries as a single atomic unit. Entire batch + /// needs to fit on one page. + /// + /// Batch to be appended to log + /// Logical address of first added entry + /// Whether the append succeeded + public bool TryAppend(ISpanBatch spanBatch, out long logicalAddress) + { + return TryAppend(spanBatch, out logicalAddress, out _); + } + + /// + /// Append entry to log (async) - completes after entry is flushed to storage + /// + /// + /// + public async ValueTask AppendAsync(byte[] entry) + { + long logicalAddress; + + // Phase 1: wait for commit to memory + while (true) + { + var task = CommitTask; + if (TryAppend(entry, out logicalAddress)) + break; + await task; + } + + // Phase 2: wait for commit/flush to storage + while (true) + { + var task = CommitTask; + if (CommittedUntilAddress < logicalAddress + 4 + entry.Length) + { + await task; + } + else + break; + } + + return logicalAddress; + } + + /// + /// Append batch of entries to log (async) - completes after batch is flushed to storage + /// + /// + /// + public async ValueTask AppendAsync(ISpanBatch spanBatch) + { + long logicalAddress; + int allocatedLength; + + // Phase 1: wait for commit to memory + while (true) + { + var task = CommitTask; + if (TryAppend(spanBatch, out logicalAddress, out allocatedLength)) + break; + await task; + } + + // Phase 2: wait for commit/flush to storage + while (true) + { + var task = CommitTask; + if (CommittedUntilAddress < logicalAddress + allocatedLength) + { + await task; + } + else + break; + } + + return logicalAddress; + } + + /// + /// Append entry to log in memory (async) - completes after entry is appended + /// to memory, not necessarily committed to storage. + /// + /// + /// + public async ValueTask AppendToMemoryAsync(byte[] entry) + { + long logicalAddress; + + while (true) + { + var task = CommitTask; + if (TryAppend(entry, out logicalAddress)) + break; + await task; + } + + return logicalAddress; + } + + /// + /// Wait for all prior appends (in memory) to commit to storage. Does not + /// itself issue a commit, just waits for commit. So you should ensure that + /// someone else causes the commit to happen. + /// + /// Address until which we should wait for commit, default 0 for tail of log + /// + public async ValueTask WaitForCommitAsync(long untilAddress = 0) + { + var tailAddress = untilAddress; + if (tailAddress == 0) tailAddress = allocator.GetTailAddress(); + + while (true) + { + var task = CommitTask; + if (CommittedUntilAddress < tailAddress) + { + await task; + } + else + break; + } + } + /// /// Flush the log until tail /// + /// If true, wait until flush completes. Otherwise, issue flush and return. + /// public long FlushAndCommit(bool spinWait = false) { epoch.Resume(); @@ -199,6 +305,27 @@ public long FlushAndCommit(bool spinWait = false) return tailAddress; } + /// + /// Async flush log until tail + /// + /// + public async ValueTask FlushAndCommitAsync() + { + var tailAddress = FlushAndCommit(); + + while (true) + { + var task = CommitTask; + if (CommittedUntilAddress < tailAddress) + { + await task; + } + else + break; + } + return tailAddress; + } + /// /// Truncate the log until, but not including, untilAddress /// @@ -213,13 +340,14 @@ public void TruncateUntil(long untilAddress) /// /// Pull-based iterator interface for scanning FASTER log /// - /// - /// - /// + /// Begin address for scan + /// End address for scan (or long.MaxValue for tailing) + /// Delegate to provide user memory where data gets copied to + /// Use single or double buffering /// - public FasterLogScanIterator Scan(long beginAddress, long endAddress, ScanBufferingMode scanBufferingMode = ScanBufferingMode.DoublePageBuffering) + public FasterLogScanIterator Scan(long beginAddress, long endAddress, GetMemory getMemory = null, ScanBufferingMode scanBufferingMode = ScanBufferingMode.DoublePageBuffering) { - return new FasterLogScanIterator(allocator, beginAddress, endAddress, scanBufferingMode, epoch); + return new FasterLogScanIterator(this, allocator, beginAddress, endAddress, getMemory, scanBufferingMode, epoch); } /// @@ -242,112 +370,10 @@ public void ReleaseThread() epoch.Release(); } - /// - /// Block allocate - /// - /// - /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void BlockAllocate(int recordSize, out long logicalAddress) + private int Align(int length) { - logicalAddress = allocator.Allocate(recordSize); - if (logicalAddress >= 0) return; - - while (logicalAddress < 0 && -logicalAddress >= allocator.ReadOnlyAddress) - { - epoch.ProtectAndDrain(); - allocator.CheckForAllocateComplete(ref logicalAddress); - if (logicalAddress < 0) - { - Thread.Yield(); - } - } - - logicalAddress = logicalAddress < 0 ? -logicalAddress : logicalAddress; - - if (logicalAddress < allocator.ReadOnlyAddress) - { - Debug.WriteLine("Allocated address is read-only, retrying"); - BlockAllocate(recordSize, out logicalAddress); - } - } - - /// - /// Try to complete partial allocation. Succeeds when address - /// turns positive. If failed with negative address, try the - /// operation. If failed with zero address, user needs to start - /// afresh with a new TryAppend operation. - /// - /// - /// - /// Whether operation succeeded - private unsafe bool TryCompleteAppend(byte[] entry, ref long logicalAddress) - { - epoch.Resume(); - - allocator.CheckForAllocateComplete(ref logicalAddress); - - if (logicalAddress < 0) - { - epoch.Suspend(); - return false; - } - - if (logicalAddress < allocator.ReadOnlyAddress) - { - logicalAddress = 0; - epoch.Suspend(); - return false; - } - - var length = entry.Length; - - var physicalAddress = allocator.GetPhysicalAddress(logicalAddress); - *(int*)physicalAddress = length; - fixed (byte* bp = entry) - Buffer.MemoryCopy(bp, (void*)(4 + physicalAddress), length, length); - - epoch.Suspend(); - return true; - } - - /// - /// Try to complete partial allocation. Succeeds when address - /// turns positive. If failed with negative address, try the - /// operation. If failed with zero address, user needs to start - /// afresh with a new TryAppend operation. - /// - /// - /// - /// Whether operation succeeded - private unsafe bool TryCompleteAppend(Span entry, ref long logicalAddress) - { - epoch.Resume(); - - allocator.CheckForAllocateComplete(ref logicalAddress); - - if (logicalAddress < 0) - { - epoch.Suspend(); - return false; - } - - if (logicalAddress < allocator.ReadOnlyAddress) - { - logicalAddress = 0; - epoch.Suspend(); - return false; - } - - var length = entry.Length; - - var physicalAddress = allocator.GetPhysicalAddress(logicalAddress); - *(int*)physicalAddress = length; - fixed (byte* bp = &entry.GetPinnableReference()) - Buffer.MemoryCopy(bp, (void*)(4 + physicalAddress), length, length); - - epoch.Suspend(); - return true; + return (length + 3) & ~3; } /// @@ -359,6 +385,9 @@ private void Commit(long flushAddress) info.FlushedUntilAddress = flushAddress; info.BeginAddress = allocator.BeginAddress; + var _newCommitTask = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + TaskCompletionSource _commitTask; + // We can only allow serial monotonic synchronous commit lock (this) { @@ -368,7 +397,11 @@ private void Commit(long flushAddress) CommittedUntilAddress = flushAddress; // info.DebugPrint(); } + + _commitTask = commitTcs; + commitTcs = _newCommitTask; } + _commitTask.SetResult(flushAddress); } /// @@ -392,5 +425,48 @@ private void Restore() allocator.RestoreHybridLog(info.FlushedUntilAddress, headAddress, info.BeginAddress); CommittedUntilAddress = info.FlushedUntilAddress; } + + /// + /// Try to append batch of entries as a single atomic unit. Entire batch + /// needs to fit on one page. + /// + /// Batch to be appended to log + /// Logical address of first added entry + /// Actual allocated length + /// Whether the append succeeded + private unsafe bool TryAppend(ISpanBatch spanBatch, out long logicalAddress, out int allocatedLength) + { + logicalAddress = 0; + + int totalEntries = spanBatch.TotalEntries(); + allocatedLength = 0; + for (int i = 0; i < totalEntries; i++) + { + allocatedLength += Align(spanBatch.Get(i).Length) + 4; + } + + epoch.Resume(); + + logicalAddress = allocator.TryAllocate(allocatedLength); + if (logicalAddress == 0) + { + epoch.Suspend(); + return false; + } + + var physicalAddress = allocator.GetPhysicalAddress(logicalAddress); + for (int i = 0; i < totalEntries; i++) + { + var span = spanBatch.Get(i); + var entryLength = span.Length; + *(int*)physicalAddress = entryLength; + fixed (byte* bp = &span.GetPinnableReference()) + Buffer.MemoryCopy(bp, (void*)(4 + physicalAddress), entryLength, entryLength); + physicalAddress += Align(entryLength) + 4; + } + + epoch.Suspend(); + return true; + } } } diff --git a/cs/src/core/Index/FasterLog/FasterLogIterator.cs b/cs/src/core/Index/FasterLog/FasterLogIterator.cs index e61c5131d..526e4180d 100644 --- a/cs/src/core/Index/FasterLog/FasterLogIterator.cs +++ b/cs/src/core/Index/FasterLog/FasterLogIterator.cs @@ -4,22 +4,31 @@ using System; using System.Threading; using System.Diagnostics; +using System.Runtime.CompilerServices; namespace FASTER.core { + /// + /// Delegate for getting memory from user + /// + /// + /// + public delegate Span GetMemory(int length); + /// /// Scan iterator for hybrid log /// public class FasterLogScanIterator : IDisposable { private readonly int frameSize; + private readonly FasterLog fasterLog; private readonly BlittableAllocator allocator; private readonly long endAddress; private readonly BlittableFrame frame; private readonly CountdownEvent[] loaded; private readonly long[] loadedPage; private readonly LightEpoch epoch; - + private readonly GetMemory getMemory; private long currentAddress, nextAddress; @@ -31,14 +40,18 @@ public class FasterLogScanIterator : IDisposable /// /// Constructor /// + /// /// /// /// /// /// - internal unsafe FasterLogScanIterator(BlittableAllocator hlog, long beginAddress, long endAddress, ScanBufferingMode scanBufferingMode, LightEpoch epoch) + /// + internal unsafe FasterLogScanIterator(FasterLog fasterLog, BlittableAllocator hlog, long beginAddress, long endAddress, GetMemory getMemory, ScanBufferingMode scanBufferingMode, LightEpoch epoch) { + this.fasterLog = fasterLog; this.allocator = hlog; + this.getMemory = getMemory; this.epoch = epoch; if (beginAddress == 0) @@ -83,7 +96,7 @@ public unsafe bool GetNext(out Span entry) currentAddress = allocator.BeginAddress; } - if ((currentAddress >= endAddress) || (currentAddress >= allocator.ReadOnlyAddress)) + if ((currentAddress >= endAddress) || (currentAddress >= fasterLog.CommittedUntilAddress)) { entry = default(Span); return false; @@ -121,36 +134,46 @@ public unsafe bool GetNext(out Span entry) // Check if record fits on page, if not skip to next page int length = *(int*)physicalAddress; - int alignedLength = (length + 3) & ~3; // round up to multiple of 4 + int recordSize = 4 + Align(length); - int recordSize = 4 + alignedLength; if ((currentAddress & allocator.PageSizeMask) + recordSize > allocator.PageSize) + throw new Exception(); + + if (length == 0) // we are at end of page, skip to next { + // If record if (currentAddress >= headAddress) epoch.Suspend(); currentAddress = (1 + (currentAddress >> allocator.LogPageSizeBits)) << allocator.LogPageSizeBits; continue; } - if (length == 0) + if (getMemory != null) { - if (currentAddress >= headAddress) - epoch.Suspend(); - currentAddress += recordSize; - continue; + // Use user delegate to allocate memory + entry = getMemory(length); + if (entry.Length != length) + throw new Exception("Span provided has invalid length"); + } + else + { + // We allocate a byte array from heap + entry = new Span(new byte[length]); } - entry = new Span((void*)(physicalAddress + 4), length); + fixed (byte* bp = &entry.GetPinnableReference()) + Buffer.MemoryCopy((void*)(4 + physicalAddress), bp, length, length); + if (currentAddress >= headAddress) - { - // Have to copy out bytes within epoch protection in - // this case because this is a shared buffer - var _entry = new byte[length]; - entry.CopyTo(_entry); - entry = _entry; epoch.Suspend(); - } - nextAddress = currentAddress + recordSize; + + Debug.Assert((currentAddress & allocator.PageSizeMask) + recordSize <= allocator.PageSize); + + if ((currentAddress & allocator.PageSizeMask) + recordSize == allocator.PageSize) + nextAddress = (1 + (currentAddress >> allocator.LogPageSizeBits)) << allocator.LogPageSizeBits; + else + nextAddress = currentAddress + recordSize; + return true; } } @@ -213,6 +236,13 @@ private unsafe void AsyncReadPagesCallback(uint errorCode, uint numBytes, Native Interlocked.MemoryBarrier(); Overlapped.Free(overlap); } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private int Align(int length) + { + return (length + 3) & ~3; + } + } } diff --git a/cs/src/core/Index/FasterLog/ISpanBatch.cs b/cs/src/core/Index/FasterLog/ISpanBatch.cs new file mode 100644 index 000000000..30c791626 --- /dev/null +++ b/cs/src/core/Index/FasterLog/ISpanBatch.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using System; + +namespace FASTER.core +{ + /// + /// Interface to provide a batch of Span[byte] data to FASTER + /// + public interface ISpanBatch + { + /// + /// Number of entries in provided batch + /// + /// Number of entries + int TotalEntries(); + + /// + /// Retrieve batch entry at specified index + /// + /// Index + /// + Span Get(int index); + } +} From 944504bca226183ad2028a3f2c56686034f64aa5 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Thu, 3 Oct 2019 18:18:10 -0700 Subject: [PATCH 12/36] Added tailing iterator WaitAsync to wait for iteration to proceed. --- .../core/Index/FasterLog/FasterLogIterator.cs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/cs/src/core/Index/FasterLog/FasterLogIterator.cs b/cs/src/core/Index/FasterLog/FasterLogIterator.cs index 526e4180d..45bc570b8 100644 --- a/cs/src/core/Index/FasterLog/FasterLogIterator.cs +++ b/cs/src/core/Index/FasterLog/FasterLogIterator.cs @@ -5,6 +5,7 @@ using System.Threading; using System.Diagnostics; using System.Runtime.CompilerServices; +using System.Threading.Tasks; namespace FASTER.core { @@ -79,6 +80,22 @@ internal unsafe FasterLogScanIterator(FasterLog fasterLog, BlittableAllocator + /// Wait for iteration to be ready to continue + /// + /// + public async void WaitAsync() + { + while (true) + { + var commitTask = fasterLog.CommitTask; + if (nextAddress >= fasterLog.CommittedUntilAddress) + await commitTask; + else + break; + } + } + /// /// Get next record in iterator /// From 80a2aeb207a6b85b529aeb023e9d6e34888c6cf9 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Fri, 4 Oct 2019 10:03:47 -0700 Subject: [PATCH 13/36] Convert Span to ReadOnlySpan for appends --- cs/playground/FasterLogSample/Program.cs | 12 ++++----- cs/src/core/Index/FasterLog/FasterLog.cs | 26 +++++++++---------- .../{ISpanBatch.cs => IReadOnlySpanBatch.cs} | 6 ++--- 3 files changed, 22 insertions(+), 22 deletions(-) rename cs/src/core/Index/FasterLog/{ISpanBatch.cs => IReadOnlySpanBatch.cs} (77%) diff --git a/cs/playground/FasterLogSample/Program.cs b/cs/playground/FasterLogSample/Program.cs index 3b13b1413..6ccd84abe 100644 --- a/cs/playground/FasterLogSample/Program.cs +++ b/cs/playground/FasterLogSample/Program.cs @@ -14,7 +14,7 @@ public class Program // Entry length can be between 1 and ((1 << FasterLogSettings.PageSizeBits) - 4) const int entryLength = 1 << 10; static readonly byte[] staticEntry = new byte[entryLength]; - static readonly SpanBatch spanBatch = new SpanBatch(10); + static readonly ReadOnlySpanBatch spanBatch = new ReadOnlySpanBatch(10); static FasterLog log; static void ReportThread() @@ -55,11 +55,11 @@ static void AppendThread() while (true) { // TryAppend - can be used with throttling/back-off - // Accepts byte[] and Span + // Accepts byte[] and ReadOnlySpan while (!log.TryAppend(staticEntry, out _)) ; // Synchronous blocking append - // Accepts byte[] and Span + // Accepts byte[] and ReadOnlySpan // log.Append(entry); // Batched append - batch must fit on one page @@ -224,11 +224,11 @@ static async Task AppendAsync(int id) } } - private struct SpanBatch : ISpanBatch + private struct ReadOnlySpanBatch : IReadOnlySpanBatch { private readonly int batchSize; - public SpanBatch(int batchSize) => this.batchSize = batchSize; - public Span Get(int index) => staticEntry; + public ReadOnlySpanBatch(int batchSize) => this.batchSize = batchSize; + public ReadOnlySpan Get(int index) => staticEntry; public int TotalEntries() => batchSize; } diff --git a/cs/src/core/Index/FasterLog/FasterLog.cs b/cs/src/core/Index/FasterLog/FasterLog.cs index fc0b96227..f381689d9 100644 --- a/cs/src/core/Index/FasterLog/FasterLog.cs +++ b/cs/src/core/Index/FasterLog/FasterLog.cs @@ -80,7 +80,7 @@ public void Dispose() /// /// /// Logical address of added entry - public long Append(Span entry) + public long Append(ReadOnlySpan entry) { long logicalAddress; while (!TryAppend(entry, out logicalAddress)) ; @@ -136,7 +136,7 @@ public unsafe bool TryAppend(byte[] entry, out long logicalAddress) /// Entry to be appended to log /// Logical address of added entry /// Whether the append succeeded - public unsafe bool TryAppend(Span entry, out long logicalAddress) + public unsafe bool TryAppend(ReadOnlySpan entry, out long logicalAddress) { logicalAddress = 0; @@ -163,12 +163,12 @@ public unsafe bool TryAppend(Span entry, out long logicalAddress) /// Try to append batch of entries as a single atomic unit. Entire batch /// needs to fit on one page. /// - /// Batch to be appended to log + /// Batch to be appended to log /// Logical address of first added entry /// Whether the append succeeded - public bool TryAppend(ISpanBatch spanBatch, out long logicalAddress) + public bool TryAppend(IReadOnlySpanBatch readOnlySpanBatch, out long logicalAddress) { - return TryAppend(spanBatch, out logicalAddress, out _); + return TryAppend(readOnlySpanBatch, out logicalAddress, out _); } /// @@ -207,9 +207,9 @@ public async ValueTask AppendAsync(byte[] entry) /// /// Append batch of entries to log (async) - completes after batch is flushed to storage /// - /// + /// /// - public async ValueTask AppendAsync(ISpanBatch spanBatch) + public async ValueTask AppendAsync(IReadOnlySpanBatch readOnlySpanBatch) { long logicalAddress; int allocatedLength; @@ -218,7 +218,7 @@ public async ValueTask AppendAsync(ISpanBatch spanBatch) while (true) { var task = CommitTask; - if (TryAppend(spanBatch, out logicalAddress, out allocatedLength)) + if (TryAppend(readOnlySpanBatch, out logicalAddress, out allocatedLength)) break; await task; } @@ -430,19 +430,19 @@ private void Restore() /// Try to append batch of entries as a single atomic unit. Entire batch /// needs to fit on one page. /// - /// Batch to be appended to log + /// Batch to be appended to log /// Logical address of first added entry /// Actual allocated length /// Whether the append succeeded - private unsafe bool TryAppend(ISpanBatch spanBatch, out long logicalAddress, out int allocatedLength) + private unsafe bool TryAppend(IReadOnlySpanBatch readOnlySpanBatch, out long logicalAddress, out int allocatedLength) { logicalAddress = 0; - int totalEntries = spanBatch.TotalEntries(); + int totalEntries = readOnlySpanBatch.TotalEntries(); allocatedLength = 0; for (int i = 0; i < totalEntries; i++) { - allocatedLength += Align(spanBatch.Get(i).Length) + 4; + allocatedLength += Align(readOnlySpanBatch.Get(i).Length) + 4; } epoch.Resume(); @@ -457,7 +457,7 @@ private unsafe bool TryAppend(ISpanBatch spanBatch, out long logicalAddress, out var physicalAddress = allocator.GetPhysicalAddress(logicalAddress); for (int i = 0; i < totalEntries; i++) { - var span = spanBatch.Get(i); + var span = readOnlySpanBatch.Get(i); var entryLength = span.Length; *(int*)physicalAddress = entryLength; fixed (byte* bp = &span.GetPinnableReference()) diff --git a/cs/src/core/Index/FasterLog/ISpanBatch.cs b/cs/src/core/Index/FasterLog/IReadOnlySpanBatch.cs similarity index 77% rename from cs/src/core/Index/FasterLog/ISpanBatch.cs rename to cs/src/core/Index/FasterLog/IReadOnlySpanBatch.cs index 30c791626..15f61b13d 100644 --- a/cs/src/core/Index/FasterLog/ISpanBatch.cs +++ b/cs/src/core/Index/FasterLog/IReadOnlySpanBatch.cs @@ -6,9 +6,9 @@ namespace FASTER.core { /// - /// Interface to provide a batch of Span[byte] data to FASTER + /// Interface to provide a batch of ReadOnlySpan[byte] data to FASTER /// - public interface ISpanBatch + public interface IReadOnlySpanBatch { /// /// Number of entries in provided batch @@ -21,6 +21,6 @@ public interface ISpanBatch /// /// Index /// - Span Get(int index); + ReadOnlySpan Get(int index); } } From 005069485bbb5fa17d0851cd2c28fa7456e20790 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Fri, 4 Oct 2019 12:39:48 -0700 Subject: [PATCH 14/36] Added MemoryPool/IMemoryOwner variant of iterator --- cs/playground/FasterLogSample/Program.cs | 4 +- .../core/Index/FasterLog/FasterLogIterator.cs | 200 +++++++++++------- 2 files changed, 125 insertions(+), 79 deletions(-) diff --git a/cs/playground/FasterLogSample/Program.cs b/cs/playground/FasterLogSample/Program.cs index 6ccd84abe..8922a995b 100644 --- a/cs/playground/FasterLogSample/Program.cs +++ b/cs/playground/FasterLogSample/Program.cs @@ -87,9 +87,9 @@ static void ScanThread() { while (true) { - while (!iter.GetNext(out result)) + while (!iter.GetNext(out result, out int length)) { - Thread.Sleep(1000); + iter.WaitAsync().GetAwaiter().GetResult(); } if (!result.SequenceEqual(entrySpan)) diff --git a/cs/src/core/Index/FasterLog/FasterLogIterator.cs b/cs/src/core/Index/FasterLog/FasterLogIterator.cs index 45bc570b8..1ca1e1f0b 100644 --- a/cs/src/core/Index/FasterLog/FasterLogIterator.cs +++ b/cs/src/core/Index/FasterLog/FasterLogIterator.cs @@ -6,15 +6,16 @@ using System.Diagnostics; using System.Runtime.CompilerServices; using System.Threading.Tasks; +using System.Buffers; namespace FASTER.core { /// /// Delegate for getting memory from user /// - /// + /// Minimum length of returned span /// - public delegate Span GetMemory(int length); + public delegate Span GetMemory(int minLength); /// /// Scan iterator for hybrid log @@ -84,7 +85,7 @@ internal unsafe FasterLogScanIterator(FasterLog fasterLog, BlittableAllocator /// - public async void WaitAsync() + public async ValueTask WaitAsync() { while (true) { @@ -99,100 +100,60 @@ public async void WaitAsync() /// /// Get next record in iterator /// - /// + /// Copy of entry, if found + /// Actual length of entry /// - public unsafe bool GetNext(out Span entry) + public unsafe bool GetNext(out Span entry, out int entryLength) { - currentAddress = nextAddress; - while (true) + if (GetNextInternal(out long physicalAddress, out entryLength, out bool epochTaken)) { - // Check for boundary conditions - if (currentAddress < allocator.BeginAddress) - { - Debug.WriteLine("Iterator address is less than log BeginAddress " + allocator.BeginAddress + ", adjusting iterator address"); - currentAddress = allocator.BeginAddress; - } - - if ((currentAddress >= endAddress) || (currentAddress >= fasterLog.CommittedUntilAddress)) - { - entry = default(Span); - return false; - } - - - if (frameSize == 0 && currentAddress < allocator.HeadAddress) - { - throw new Exception("Iterator address is less than log HeadAddress in memory-scan mode"); - } - - var currentPage = currentAddress >> allocator.LogPageSizeBits; - var offset = currentAddress & allocator.PageSizeMask; - - var headAddress = allocator.HeadAddress; - var physicalAddress = default(long); - - if (currentAddress < headAddress) - { - BufferAndLoad(currentAddress, currentPage, currentPage % frameSize); - physicalAddress = frame.GetPhysicalAddress(currentPage % frameSize, offset); - } - else - { - epoch.Resume(); - headAddress = allocator.HeadAddress; - if (currentAddress < headAddress) // rare case - { - epoch.Suspend(); - continue; - } - - physicalAddress = allocator.GetPhysicalAddress(currentAddress); - } - - // Check if record fits on page, if not skip to next page - int length = *(int*)physicalAddress; - int recordSize = 4 + Align(length); - - if ((currentAddress & allocator.PageSizeMask) + recordSize > allocator.PageSize) - throw new Exception(); - - if (length == 0) // we are at end of page, skip to next - { - // If record - if (currentAddress >= headAddress) - epoch.Suspend(); - currentAddress = (1 + (currentAddress >> allocator.LogPageSizeBits)) << allocator.LogPageSizeBits; - continue; - } - if (getMemory != null) { // Use user delegate to allocate memory - entry = getMemory(length); - if (entry.Length != length) + entry = getMemory(entryLength); + if (entry.Length < entryLength) throw new Exception("Span provided has invalid length"); } else { // We allocate a byte array from heap - entry = new Span(new byte[length]); + entry = new Span(new byte[entryLength]); } fixed (byte* bp = &entry.GetPinnableReference()) - Buffer.MemoryCopy((void*)(4 + physicalAddress), bp, length, length); + Buffer.MemoryCopy((void*)(4 + physicalAddress), bp, entryLength, entryLength); - if (currentAddress >= headAddress) + if (epochTaken) epoch.Suspend(); - Debug.Assert((currentAddress & allocator.PageSizeMask) + recordSize <= allocator.PageSize); + return true; + } + entry = default; + return false; + } + + /// + /// GetNext supporting memory pools + /// + /// + /// + /// + /// + public unsafe bool GetNext(MemoryPool pool, out IMemoryOwner entry, out int entryLength) + { + if (GetNextInternal(out long physicalAddress, out entryLength, out bool epochTaken)) + { + entry = pool.Rent(entryLength); + Buffer.MemoryCopy((void*)(4 + physicalAddress), (void*)((byte*)entry.Memory.Pin().Pointer + 4), entryLength, entryLength); + + if (epochTaken) + epoch.Suspend(); - if ((currentAddress & allocator.PageSizeMask) + recordSize == allocator.PageSize) - nextAddress = (1 + (currentAddress >> allocator.LogPageSizeBits)) << allocator.LogPageSizeBits; - else - nextAddress = currentAddress + recordSize; - return true; } + entry = default; + entryLength = default; + return false; } /// @@ -260,6 +221,91 @@ private int Align(int length) return (length + 3) & ~3; } + /// + /// Retrieve physical address of next iterator value + /// (under epoch protection if it is from main page buffer) + /// + /// + /// + /// + /// + private unsafe bool GetNextInternal(out long physicalAddress, out int entryLength, out bool epochTaken) + { + physicalAddress = 0; + entryLength = 0; + epochTaken = false; + + currentAddress = nextAddress; + while (true) + { + // Check for boundary conditions + if (currentAddress < allocator.BeginAddress) + { + Debug.WriteLine("Iterator address is less than log BeginAddress " + allocator.BeginAddress + ", adjusting iterator address"); + currentAddress = allocator.BeginAddress; + } + + if ((currentAddress >= endAddress) || (currentAddress >= fasterLog.CommittedUntilAddress)) + { + return false; + } + + if (frameSize == 0 && currentAddress < allocator.HeadAddress) + { + throw new Exception("Iterator address is less than log HeadAddress in memory-scan mode"); + } + + var currentPage = currentAddress >> allocator.LogPageSizeBits; + var offset = currentAddress & allocator.PageSizeMask; + + var headAddress = allocator.HeadAddress; + + if (currentAddress < headAddress) + { + BufferAndLoad(currentAddress, currentPage, currentPage % frameSize); + physicalAddress = frame.GetPhysicalAddress(currentPage % frameSize, offset); + } + else + { + epoch.Resume(); + headAddress = allocator.HeadAddress; + if (currentAddress < headAddress) // rare case + { + epoch.Suspend(); + continue; + } + + physicalAddress = allocator.GetPhysicalAddress(currentAddress); + } + + // Check if record fits on page, if not skip to next page + entryLength = *(int*)physicalAddress; + int recordSize = 4 + Align(entryLength); + + if ((currentAddress & allocator.PageSizeMask) + recordSize > allocator.PageSize) + throw new Exception(); + + if (entryLength == 0) // we are at end of page, skip to next + { + // If record + if (currentAddress >= headAddress) + epoch.Suspend(); + currentAddress = (1 + (currentAddress >> allocator.LogPageSizeBits)) << allocator.LogPageSizeBits; + continue; + } + + Debug.Assert((currentAddress & allocator.PageSizeMask) + recordSize <= allocator.PageSize); + + if ((currentAddress & allocator.PageSizeMask) + recordSize == allocator.PageSize) + nextAddress = (1 + (currentAddress >> allocator.LogPageSizeBits)) << allocator.LogPageSizeBits; + else + nextAddress = currentAddress + recordSize; + + epochTaken = currentAddress >= headAddress; + return true; + } + } + } } From 127e908998ea90b657020731263390172e87328d Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Fri, 4 Oct 2019 13:11:29 -0700 Subject: [PATCH 15/36] Updates --- cs/playground/FasterLogSample/Program.cs | 24 +++++++++++++++---- .../core/Index/FasterLog/FasterLogIterator.cs | 3 ++- cs/test/FasterLogTests.cs | 2 +- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/cs/playground/FasterLogSample/Program.cs b/cs/playground/FasterLogSample/Program.cs index 8922a995b..b9f8e013d 100644 --- a/cs/playground/FasterLogSample/Program.cs +++ b/cs/playground/FasterLogSample/Program.cs @@ -16,24 +16,37 @@ public class Program static readonly byte[] staticEntry = new byte[entryLength]; static readonly ReadOnlySpanBatch spanBatch = new ReadOnlySpanBatch(10); static FasterLog log; + static FasterLogScanIterator iter; static void ReportThread() { long lastTime = 0; long lastValue = log.TailAddress; + long lastIterValue = log.BeginAddress; + Stopwatch sw = new Stopwatch(); sw.Start(); while (true) { Thread.Sleep(5000); + var nowTime = sw.ElapsedMilliseconds; var nowValue = log.TailAddress; - Console.WriteLine("Throughput: {0} MB/sec, Tail: {1}", + Console.WriteLine("Append Throughput: {0} MB/sec, Tail: {1}", (nowValue - lastValue) / (1000 * (nowTime - lastTime)), nowValue); - lastTime = nowTime; lastValue = nowValue; + + if (iter != null) + { + var nowIterValue = iter.CurrentAddress; + Console.WriteLine("Scan Throughput: {0} MB/sec, Iter pos: {1}", + (nowIterValue - lastIterValue) / (1000 * (nowTime - lastTime)), nowIterValue); + lastIterValue = nowIterValue; + } + + lastTime = nowTime; } } @@ -71,8 +84,6 @@ static void ScanThread() { Random r = new Random(); - Thread.Sleep(5000); - byte[] entry = new byte[entryLength]; for (int i = 0; i < entryLength; i++) { @@ -83,7 +94,7 @@ static void ScanThread() long lastAddress = 0; Span result; - using (var iter = log.Scan(0, long.MaxValue)) + using (iter = log.Scan(log.BeginAddress, long.MaxValue)) { while (true) { @@ -92,6 +103,9 @@ static void ScanThread() iter.WaitAsync().GetAwaiter().GetResult(); } + // Memory pool variant: + // iter.GetNext(pool, out IMemoryOwner resultMem, out int length)) + if (!result.SequenceEqual(entrySpan)) { if (result.Length != entrySpan.Length) diff --git a/cs/src/core/Index/FasterLog/FasterLogIterator.cs b/cs/src/core/Index/FasterLog/FasterLogIterator.cs index 1ca1e1f0b..5bacd1476 100644 --- a/cs/src/core/Index/FasterLog/FasterLogIterator.cs +++ b/cs/src/core/Index/FasterLog/FasterLogIterator.cs @@ -144,7 +144,8 @@ public unsafe bool GetNext(MemoryPool pool, out IMemoryOwner entry, if (GetNextInternal(out long physicalAddress, out entryLength, out bool epochTaken)) { entry = pool.Rent(entryLength); - Buffer.MemoryCopy((void*)(4 + physicalAddress), (void*)((byte*)entry.Memory.Pin().Pointer + 4), entryLength, entryLength); + using (var handle = entry.Memory.Pin()) + Buffer.MemoryCopy((void*)(4 + physicalAddress), handle.Pointer, entryLength, entryLength); if (epochTaken) epoch.Suspend(); diff --git a/cs/test/FasterLogTests.cs b/cs/test/FasterLogTests.cs index 8059006bf..725460929 100644 --- a/cs/test/FasterLogTests.cs +++ b/cs/test/FasterLogTests.cs @@ -49,7 +49,7 @@ public void FasterLogTest1() using (var iter = log.Scan(0, long.MaxValue)) { int count = 0; - while (iter.GetNext(out Span result)) + while (iter.GetNext(out Span result, out int length)) { count++; Assert.IsTrue(result.SequenceEqual(entry)); From 6dc7af610dbe467b29b3740313a12526161c025d Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Fri, 4 Oct 2019 13:20:47 -0700 Subject: [PATCH 16/36] Updated way to pin pooled memory --- cs/src/core/Index/FasterLog/FasterLogIterator.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cs/src/core/Index/FasterLog/FasterLogIterator.cs b/cs/src/core/Index/FasterLog/FasterLogIterator.cs index 5bacd1476..4af3c4c58 100644 --- a/cs/src/core/Index/FasterLog/FasterLogIterator.cs +++ b/cs/src/core/Index/FasterLog/FasterLogIterator.cs @@ -144,8 +144,9 @@ public unsafe bool GetNext(MemoryPool pool, out IMemoryOwner entry, if (GetNextInternal(out long physicalAddress, out entryLength, out bool epochTaken)) { entry = pool.Rent(entryLength); - using (var handle = entry.Memory.Pin()) - Buffer.MemoryCopy((void*)(4 + physicalAddress), handle.Pointer, entryLength, entryLength); + + fixed (byte* bp = &entry.Memory.Span.GetPinnableReference()) + Buffer.MemoryCopy((void*)(4 + physicalAddress), bp, entryLength, entryLength); if (epochTaken) epoch.Suspend(); From ff2744877482376199fbff18a554fefd53cb8a6b Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Fri, 4 Oct 2019 16:16:36 -0700 Subject: [PATCH 17/36] Update azure-pipelines.yml --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 21fd70bb7..4af941e9b 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -5,7 +5,7 @@ variables: jobs: - job: 'csharpWindows' pool: - vmImage: vs2017-win2016 + vmImage: windows-latest displayName: 'C# (Windows)' strategy: From 8e42a74ca8c50a1a375baf311b088de29a245eec Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Sat, 5 Oct 2019 16:51:19 -0700 Subject: [PATCH 18/36] Support minimum buffer size of just 1 page! --- cs/test/FasterLogTests.cs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cs/test/FasterLogTests.cs b/cs/test/FasterLogTests.cs index 725460929..1c109d81d 100644 --- a/cs/test/FasterLogTests.cs +++ b/cs/test/FasterLogTests.cs @@ -2,6 +2,7 @@ // Licensed under the MIT license. using System; +using System.IO; using System.Linq; using FASTER.core; using NUnit.Framework; @@ -17,10 +18,11 @@ internal class FasterLogTests private FasterLog log; private IDevice device; - [SetUp] public void Setup() { + if (File.Exists(TestContext.CurrentContext.TestDirectory + "\\fasterlog.log.commit")) + File.Delete(TestContext.CurrentContext.TestDirectory + "\\fasterlog.log.commit"); device = Devices.CreateLogDevice(TestContext.CurrentContext.TestDirectory + "\\fasterlog.log", deleteOnClose: true); } @@ -28,6 +30,8 @@ public void Setup() public void TearDown() { device.Close(); + if (File.Exists(TestContext.CurrentContext.TestDirectory + "\\fasterlog.log.commit")) + File.Delete(TestContext.CurrentContext.TestDirectory + "\\fasterlog.log.commit"); } [Test] From c55de3f6b9f441180a687b6bb7f4935e5f05f053 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Mon, 7 Oct 2019 14:13:16 -0700 Subject: [PATCH 19/36] Actually checking in support for 1 page in memory, added initial draft of disposing task --- cs/src/core/Allocator/AllocatorBase.cs | 208 ++---------------- cs/src/core/Index/FASTER/FASTERImpl.cs | 40 +--- cs/src/core/Index/FasterLog/FasterLog.cs | 4 +- .../core/Index/FasterLog/FasterLogSettings.cs | 6 +- 4 files changed, 27 insertions(+), 231 deletions(-) diff --git a/cs/src/core/Allocator/AllocatorBase.cs b/cs/src/core/Allocator/AllocatorBase.cs index efe897c58..8053410f0 100644 --- a/cs/src/core/Allocator/AllocatorBase.cs +++ b/cs/src/core/Allocator/AllocatorBase.cs @@ -105,16 +105,8 @@ public unsafe abstract partial class AllocatorBase : IDisposable /// /// HeadOffset lag (from tail) /// - protected const int HeadOffsetLagNumPages = 2; + protected readonly bool HeadOffsetExtraLag; - /// - /// HeadOffset lag (from tail) for ReadCache - /// - protected const int ReadCacheHeadOffsetLagNumPages = 2; - /// - /// HeadOffset lag size - /// - protected readonly int HeadOffsetLagSize; /// /// HeadOFfset lag address /// @@ -181,10 +173,6 @@ public unsafe abstract partial class AllocatorBase : IDisposable #endregion #region Private page metadata - /// - /// Index in circular buffer, of the current tail page - /// - private volatile int TailPageCache; // Array that indicates the status of each buffer page internal readonly FullPageStatus[] PageStatusIndicator; @@ -503,8 +491,10 @@ public AllocatorBase(LogSettings settings, IFasterEqualityComparer comparer BufferSizeMask = BufferSize - 1; // HeadOffset lag (from tail). - HeadOffsetLagSize = BufferSize - (ReadCache ? ReadCacheHeadOffsetLagNumPages : HeadOffsetLagNumPages); - HeadOffsetLagAddress = (long)HeadOffsetLagSize << LogPageSizeBits; + var headOffsetLagSize = BufferSize - 1; // (ReadCache ? ReadCacheHeadOffsetLagNumPages : HeadOffsetLagNumPages); + if (BufferSize > 1 && HeadOffsetExtraLag) headOffsetLagSize--; + + HeadOffsetLagAddress = (long)headOffsetLagSize << LogPageSizeBits; // ReadOnlyOffset lag (from tail). This should not exceed HeadOffset lag. LogMutableFraction = settings.MutableFraction; @@ -515,9 +505,12 @@ public AllocatorBase(LogSettings settings, IFasterEqualityComparer comparer SegmentSize = 1 << LogSegmentSizeBits; SegmentBufferSize = 1 + (LogTotalSizeBytes / SegmentSize < 1 ? 1 : (int)(LogTotalSizeBytes / SegmentSize)); - if (BufferSize < 4) + if (SegmentSize < PageSize) + throw new Exception("Segment must be at least of page size"); + + if (BufferSize < 1) { - throw new Exception("HLOG buffer must be at least 4 pages"); + throw new Exception("Log buffer must be of size at least 1 page"); } PageStatusIndicator = new FullPageStatus[BufferSize]; @@ -542,8 +535,11 @@ protected void Initialize(long firstValidAddress) AllocatePage(tailPageIndex); // Allocate next page as well - if (firstValidAddress > 0) - AllocatePage(tailPageIndex + 1); + int nextPageIndex = (int)(tailPage + 1) % BufferSize; + if ((!IsAllocated(nextPageIndex))) + { + AllocatePage(nextPageIndex); + } SafeReadOnlyAddress = firstValidAddress; ReadOnlyAddress = firstValidAddress; @@ -555,8 +551,6 @@ protected void Initialize(long firstValidAddress) TailPageOffset.Page = (int)(firstValidAddress >> LogPageSizeBits); TailPageOffset.Offset = (int)(firstValidAddress & PageSizeMask); - - TailPageCache = 0; } /// @@ -685,15 +679,6 @@ public long GetOffsetInPage(long address) return address & PageSizeMask; } - /// - /// Get offset lag in pages - /// - /// - public long GetHeadOffsetLagInPages() - { - return HeadOffsetLagSize; - } - /// /// Get sector size for main hlog device /// @@ -703,121 +688,6 @@ public int GetDeviceSectorSize() return sectorSize; } - /// - /// Key function used to allocate memory for a specified number of items - /// - /// - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public long Allocate(int numSlots = 1) - { - PageOffset localTailPageOffset = default(PageOffset); - - // Determine insertion index. - // ReSharper disable once CSharpWarnings::CS0420 -#pragma warning disable 420 - localTailPageOffset.PageAndOffset = Interlocked.Add(ref TailPageOffset.PageAndOffset, numSlots); -#pragma warning restore 420 - - int page = localTailPageOffset.Page; - int offset = localTailPageOffset.Offset - numSlots; - - #region HANDLE PAGE OVERFLOW - /* To prove correctness of the following modifications - * done to TailPageOffset and the allocation itself, - * we should use the fact that only one thread will have any - * of the following cases since it is a counter and we spin-wait - * until the tail is folded onto next page accordingly. - */ - if (localTailPageOffset.Offset >= PageSize) - { - if (offset >= PageSize) - { - //The tail offset value was more than page size before atomic add - //We consider that a failed attempt and retry again - var spin = new SpinWait(); - do - { - //Just to give some more time to the thread - // that is handling this overflow - while (TailPageOffset.Offset >= PageSize) - { - spin.SpinOnce(); - } - - // ReSharper disable once CSharpWarnings::CS0420 -#pragma warning disable 420 - localTailPageOffset.PageAndOffset = Interlocked.Add(ref TailPageOffset.PageAndOffset, numSlots); -#pragma warning restore 420 - - page = localTailPageOffset.Page; - offset = localTailPageOffset.Offset - numSlots; - } while (offset >= PageSize); - } - - - if (localTailPageOffset.Offset == PageSize) - { - //Folding over at page boundary - localTailPageOffset.Page++; - localTailPageOffset.Offset = 0; - TailPageOffset = localTailPageOffset; - } - else if (localTailPageOffset.Offset >= PageSize) - { - //Overflows not allowed. We allot same space in next page. - localTailPageOffset.Page++; - localTailPageOffset.Offset = numSlots; - TailPageOffset = localTailPageOffset; - - page = localTailPageOffset.Page; - offset = 0; - } - } - #endregion - - long address = (((long)page) << LogPageSizeBits) | ((long)offset); - - // Check for TailPageCache hit - if (TailPageCache == page) - { - return (address); - } - - // Negate the address if page not ready to be used - if (CannotAllocateNext(page)) - { - address = -address; - } - - // Update the read-only so that we can get more space for the tail - if (offset == 0) - { - if (address >= 0) - { - TailPageCache = page; - Interlocked.MemoryBarrier(); - } - - int newPageIndex = (page + 1) % BufferSize; - long tailAddress = (address < 0 ? -address : address); - PageAlignedShiftReadOnlyAddress(tailAddress); - PageAlignedShiftHeadAddress(tailAddress); - - if ((!IsAllocated(newPageIndex))) - { - AllocatePage(newPageIndex); - } - - // We refreshed epoch, so address may have - // become read-only; re-check - if (tailAddress < ReadOnlyAddress) - return Allocate(numSlots); - } - - return address; - } - /// /// Try allocate, no thread spinning allowed /// May return 0 in case of inability to allocate @@ -889,56 +759,12 @@ public long TryAllocate(int numSlots = 1) return (((long)page) << LogPageSizeBits) | ((long)offset); } - /// - /// If allocator cannot allocate new memory as the head has not shifted or the previous page - /// is not yet closed, it allocates but returns the negative address. - /// This function is invoked to check if the address previously allocated has become valid to be used - /// - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void CheckForAllocateComplete(ref long address) - { - PageOffset p = default(PageOffset); - p.Page = (int)((-address) >> LogPageSizeBits); - p.Offset = (int)((-address) & PageSizeMask); - - // Check write cache - if (TailPageCache == p.Page) - { - address = -address; - return; - } - - PageAlignedShiftHeadAddress(GetTailAddress()); - - // Check if we can allocate pageIndex - if (CannotAllocateNext(p.Page)) - { - return; - } - - //correct values and set write cache - address = -address; - if (p.Offset == 0) - { - TailPageCache = p.Page; - } - return; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool CannotAllocateNext(int page) - { - return - (page >= BufferSize + (ClosedUntilAddress >> LogPageSizeBits) - 1) || - !IsAllocated(page % BufferSize); - } - private bool CannotAllocate(int page) { return (page >= BufferSize + (ClosedUntilAddress >> LogPageSizeBits)); } + /// /// Used by applications to make the current state of the database immutable quickly /// @@ -1072,7 +898,6 @@ private void DebugPrintAddresses(long closePageAddress) Console.WriteLine("SafeHead: {0}.{1}", GetPage(_safehead), GetOffsetInPage(_safehead)); Console.WriteLine("ReadOnly: {0}.{1}", GetPage(_readonly), GetOffsetInPage(_readonly)); Console.WriteLine("SafeReadOnly: {0}.{1}", GetPage(_safereadonly), GetOffsetInPage(_safereadonly)); - Console.WriteLine("TailPageCache: {0}", TailPageCache); } /// @@ -1213,7 +1038,6 @@ public void RecoveryReset(long tailAddress, long headAddress, long beginAddress) long offsetInPage = GetOffsetInPage(tailAddress); TailPageOffset.Page = (int)tailPage; TailPageOffset.Offset = (int)offsetInPage; - TailPageCache = TailPageOffset.Page; // allocate next page as well - this is an invariant in the allocator! var pageIndex = (TailPageOffset.Page % BufferSize); diff --git a/cs/src/core/Index/FASTER/FASTERImpl.cs b/cs/src/core/Index/FASTER/FASTERImpl.cs index 32fc159d4..52a025375 100644 --- a/cs/src/core/Index/FASTER/FASTERImpl.cs +++ b/cs/src/core/Index/FASTER/FASTERImpl.cs @@ -1887,52 +1887,20 @@ private void HeavyEnter(long hash) [MethodImpl(MethodImplOptions.AggressiveInlining)] private void BlockAllocate(int recordSize, out long logicalAddress) { - logicalAddress = hlog.Allocate(recordSize); - if (logicalAddress >= 0) return; - - while (logicalAddress < 0 && -logicalAddress >= hlog.ReadOnlyAddress) + while ((logicalAddress = hlog.TryAllocate(recordSize)) == 0) { InternalRefresh(); - hlog.CheckForAllocateComplete(ref logicalAddress); - if (logicalAddress < 0) - { - Thread.Yield(); - } - } - - logicalAddress = logicalAddress < 0 ? -logicalAddress : logicalAddress; - - if (logicalAddress < hlog.ReadOnlyAddress) - { - Debug.WriteLine("Allocated address is read-only, retrying"); - BlockAllocate(recordSize, out logicalAddress); + Thread.Yield(); } } - [MethodImpl(MethodImplOptions.AggressiveInlining)] private void BlockAllocateReadCache(int recordSize, out long logicalAddress) { - logicalAddress = readcache.Allocate(recordSize); - if (logicalAddress >= 0) - return; - - while (logicalAddress < 0 && -logicalAddress >= readcache.ReadOnlyAddress) + while ((logicalAddress = readcache.TryAllocate(recordSize)) == 0) { InternalRefresh(); - readcache.CheckForAllocateComplete(ref logicalAddress); - if (logicalAddress < 0) - { - Thread.Yield(); - } - } - - logicalAddress = logicalAddress < 0 ? -logicalAddress : logicalAddress; - - if (logicalAddress < readcache.ReadOnlyAddress) - { - Debug.WriteLine("Allocated address is read-only, retrying"); - BlockAllocateReadCache(recordSize, out logicalAddress); + Thread.Yield(); } } diff --git a/cs/src/core/Index/FasterLog/FasterLog.cs b/cs/src/core/Index/FasterLog/FasterLog.cs index f381689d9..29c382d80 100644 --- a/cs/src/core/Index/FasterLog/FasterLog.cs +++ b/cs/src/core/Index/FasterLog/FasterLog.cs @@ -73,6 +73,7 @@ public void Dispose() { allocator.Dispose(); epoch.Dispose(); + commitTcs.TrySetException(new ObjectDisposedException("Log has been disposed")); } /// @@ -399,7 +400,8 @@ private void Commit(long flushAddress) } _commitTask = commitTcs; - commitTcs = _newCommitTask; + if (commitTcs.Task.Status != TaskStatus.Faulted) + commitTcs = _newCommitTask; } _commitTask.SetResult(flushAddress); } diff --git a/cs/src/core/Index/FasterLog/FasterLogSettings.cs b/cs/src/core/Index/FasterLog/FasterLogSettings.cs index d6294cb32..00d6e73f1 100644 --- a/cs/src/core/Index/FasterLog/FasterLogSettings.cs +++ b/cs/src/core/Index/FasterLog/FasterLogSettings.cs @@ -20,18 +20,20 @@ public class FasterLogSettings public IDevice LogDevice = new NullDevice(); /// - /// Size of a segment (group of pages), in bits + /// Size of a page, in bits /// public int PageSizeBits = 22; /// /// Total size of in-memory part of log, in bits + /// Should be at least one page long /// Num pages = 2^(MemorySizeBits-PageSizeBits) /// - public int MemorySizeBits = 24; + public int MemorySizeBits = 23; /// /// Size of a segment (group of pages), in bits + /// This is the granularity of files on disk /// public int SegmentSizeBits = 30; From db68ae061b3518361fdaecb72a76e67f327bdfe6 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Mon, 7 Oct 2019 15:37:04 -0700 Subject: [PATCH 20/36] Added a test --- cs/test/FasterLogTests.cs | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/cs/test/FasterLogTests.cs b/cs/test/FasterLogTests.cs index 1c109d81d..d55c90838 100644 --- a/cs/test/FasterLogTests.cs +++ b/cs/test/FasterLogTests.cs @@ -4,6 +4,7 @@ using System; using System.IO; using System.Linq; +using System.Threading.Tasks; using FASTER.core; using NUnit.Framework; @@ -66,5 +67,43 @@ public void FasterLogTest1() log.ReleaseThread(); log.Dispose(); } + + [Test] + public async Task FasterLogTest2() + { + log = new FasterLog(new FasterLogSettings { LogDevice = device }); + byte[] data1 = new byte[10000]; + for (int i = 00; i < 10000; i++) data1[i] = (byte)i; + + using (var iter = log.Scan(0, long.MaxValue, scanBufferingMode: ScanBufferingMode.SinglePageBuffering)) + { + int i = 0; + while (i++ < 500) + { + var waitingReader = iter.WaitAsync(); + Assert.IsTrue(!waitingReader.IsCompleted); + + while (!log.TryAppend(data1, out _)) ; + Assert.IsFalse(waitingReader.IsCompleted); + + await log.FlushAndCommitAsync(); + while (!waitingReader.IsCompleted) ; + Assert.IsTrue(waitingReader.IsCompleted); + + var result = GetNext(iter); + Assert.IsTrue(result.SequenceEqual(data1)); + + var next = iter.GetNext(out _, out _); + Assert.IsFalse(next); + } + } + log.Dispose(); + } + + private byte[] GetNext(FasterLogScanIterator iter) + { + iter.GetNext(out Span entry, out _); + return entry.ToArray(); + } } } From 5caea66562e9117861be2ef49efc1328cc7f2939 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Mon, 7 Oct 2019 16:37:33 -0700 Subject: [PATCH 21/36] Improved sample, changed GetMemory to use byte[] instead of Span --- cs/playground/FasterLogSample/Program.cs | 253 +++++++++--------- .../core/Index/FasterLog/FasterLogIterator.cs | 10 +- cs/test/FasterLogTests.cs | 11 +- 3 files changed, 134 insertions(+), 140 deletions(-) diff --git a/cs/playground/FasterLogSample/Program.cs b/cs/playground/FasterLogSample/Program.cs index b9f8e013d..bc356f5d2 100644 --- a/cs/playground/FasterLogSample/Program.cs +++ b/cs/playground/FasterLogSample/Program.cs @@ -14,133 +14,9 @@ public class Program // Entry length can be between 1 and ((1 << FasterLogSettings.PageSizeBits) - 4) const int entryLength = 1 << 10; static readonly byte[] staticEntry = new byte[entryLength]; - static readonly ReadOnlySpanBatch spanBatch = new ReadOnlySpanBatch(10); static FasterLog log; static FasterLogScanIterator iter; - static void ReportThread() - { - long lastTime = 0; - long lastValue = log.TailAddress; - long lastIterValue = log.BeginAddress; - - Stopwatch sw = new Stopwatch(); - sw.Start(); - - while (true) - { - Thread.Sleep(5000); - - var nowTime = sw.ElapsedMilliseconds; - var nowValue = log.TailAddress; - - Console.WriteLine("Append Throughput: {0} MB/sec, Tail: {1}", - (nowValue - lastValue) / (1000 * (nowTime - lastTime)), nowValue); - lastValue = nowValue; - - if (iter != null) - { - var nowIterValue = iter.CurrentAddress; - Console.WriteLine("Scan Throughput: {0} MB/sec, Iter pos: {1}", - (nowIterValue - lastIterValue) / (1000 * (nowTime - lastTime)), nowIterValue); - lastIterValue = nowIterValue; - } - - lastTime = nowTime; - } - } - - static void CommitThread() - { - while (true) - { - Thread.Sleep(5); - log.FlushAndCommit(true); - - // Async version - // await Task.Delay(5); - // await log.FlushAndCommitAsync(); - } - } - - static void AppendThread() - { - while (true) - { - // TryAppend - can be used with throttling/back-off - // Accepts byte[] and ReadOnlySpan - while (!log.TryAppend(staticEntry, out _)) ; - - // Synchronous blocking append - // Accepts byte[] and ReadOnlySpan - // log.Append(entry); - - // Batched append - batch must fit on one page - // while (!log.TryAppend(spanBatch, out _)) ; - } - } - - static void ScanThread() - { - Random r = new Random(); - - byte[] entry = new byte[entryLength]; - for (int i = 0; i < entryLength; i++) - { - entry[i] = (byte)i; - } - - var entrySpan = new Span(entry); - - long lastAddress = 0; - Span result; - using (iter = log.Scan(log.BeginAddress, long.MaxValue)) - { - while (true) - { - while (!iter.GetNext(out result, out int length)) - { - iter.WaitAsync().GetAwaiter().GetResult(); - } - - // Memory pool variant: - // iter.GetNext(pool, out IMemoryOwner resultMem, out int length)) - - if (!result.SequenceEqual(entrySpan)) - { - if (result.Length != entrySpan.Length) - throw new Exception("Invalid entry found, expected length " + entrySpan.Length + ", actual length " + result.Length); - else - throw new Exception("Invalid entry found at offset " + FindDiff(result, entrySpan)); - } - - // Re-insert entry with small probability - if (r.Next(100) < 10) - { - log.Append(result); - } - - if (iter.CurrentAddress - lastAddress > 500_000_000) - { - log.TruncateUntil(iter.CurrentAddress); - lastAddress = iter.CurrentAddress; - } - } - } - } - - private static int FindDiff(Span b1, Span b2) - { - for (int i = 0; i < b1.Length; i++) - { - if (b1[i] != b2[i]) - { - return i; - } - } - return 0; - } - /// /// Main program entry point /// @@ -200,6 +76,27 @@ static void Main(string[] args) } } + + static void AppendThread() + { + while (true) + { + // TryAppend - can be used with throttling/back-off + // Accepts byte[] and ReadOnlySpan + while (!log.TryAppend(staticEntry, out _)) ; + + // Synchronous blocking append + // Accepts byte[] and ReadOnlySpan + // log.Append(entry); + + // Batched append - batch must fit on one page + // while (!log.TryAppend(spanBatch, out _)) ; + } + } + + /// + /// Async version of append + /// static async Task AppendAsync(int id) { bool batched = false; @@ -208,7 +105,7 @@ static async Task AppendAsync(int id) if (!batched) { - // Unbatched version - append each item with commit + // Single commit version - append each item with commit // Needs high parallelism (NumParallelTasks) for perf while (true) { @@ -224,7 +121,7 @@ static async Task AppendAsync(int id) } else { - // Batched version - we append many entries to memory, + // Group-commit version - we append many entries to memory, // then wait for commit periodically int count = 0; while (true) @@ -238,6 +135,109 @@ static async Task AppendAsync(int id) } } + static void ScanThread() + { + Random r = new Random(); + + long lastAddress = 0; + byte[] result; + using (iter = log.Scan(log.BeginAddress, long.MaxValue)) + { + while (true) + { + while (!iter.GetNext(out result, out int length)) + { + iter.WaitAsync().GetAwaiter().GetResult(); + } + + // Memory pool variant: + // iter.GetNext(pool, out IMemoryOwner resultMem, out int length)) + + if (Different(result, staticEntry, out int location)) + { + if (result.Length != staticEntry.Length) + throw new Exception("Invalid entry found, expected length " + staticEntry.Length + ", actual length " + result.Length); + else + throw new Exception("Invalid entry found at offset " + location); + } + + // Re-insert entry with small probability + if (r.Next(100) < 10) + { + log.Append(result); + } + + if (iter.CurrentAddress - lastAddress > 500_000_000) + { + log.TruncateUntil(iter.CurrentAddress); + lastAddress = iter.CurrentAddress; + } + } + } + } + + static void ReportThread() + { + long lastTime = 0; + long lastValue = log.TailAddress; + long lastIterValue = log.BeginAddress; + + Stopwatch sw = new Stopwatch(); + sw.Start(); + + while (true) + { + Thread.Sleep(5000); + + var nowTime = sw.ElapsedMilliseconds; + var nowValue = log.TailAddress; + + Console.WriteLine("Append Throughput: {0} MB/sec, Tail: {1}", + (nowValue - lastValue) / (1000 * (nowTime - lastTime)), nowValue); + lastValue = nowValue; + + if (iter != null) + { + var nowIterValue = iter.CurrentAddress; + Console.WriteLine("Scan Throughput: {0} MB/sec, Iter pos: {1}", + (nowIterValue - lastIterValue) / (1000 * (nowTime - lastTime)), nowIterValue); + lastIterValue = nowIterValue; + } + + lastTime = nowTime; + } + } + + static void CommitThread() + { + while (true) + { + Thread.Sleep(5); + log.FlushAndCommit(true); + + // Async version + // await Task.Delay(5); + // await log.FlushAndCommitAsync(); + } + } + + private static bool Different(byte[] b1, byte[] b2, out int location) + { + location = 0; + if (b1.Length != b2.Length) return true; + for (location = 0; location < b1.Length; location++) + { + if (b1[location] != b2[location]) + { + return true; + } + } + return false; + } + + // For batch append API + static readonly ReadOnlySpanBatch spanBatch = new ReadOnlySpanBatch(10); + private struct ReadOnlySpanBatch : IReadOnlySpanBatch { private readonly int batchSize; @@ -245,6 +245,5 @@ private struct ReadOnlySpanBatch : IReadOnlySpanBatch public ReadOnlySpan Get(int index) => staticEntry; public int TotalEntries() => batchSize; } - } } diff --git a/cs/src/core/Index/FasterLog/FasterLogIterator.cs b/cs/src/core/Index/FasterLog/FasterLogIterator.cs index 4af3c4c58..c6c39e44c 100644 --- a/cs/src/core/Index/FasterLog/FasterLogIterator.cs +++ b/cs/src/core/Index/FasterLog/FasterLogIterator.cs @@ -15,7 +15,7 @@ namespace FASTER.core /// /// Minimum length of returned span /// - public delegate Span GetMemory(int minLength); + public delegate byte[] GetMemory(int minLength); /// /// Scan iterator for hybrid log @@ -103,7 +103,7 @@ public async ValueTask WaitAsync() /// Copy of entry, if found /// Actual length of entry /// - public unsafe bool GetNext(out Span entry, out int entryLength) + public unsafe bool GetNext(out byte[] entry, out int entryLength) { if (GetNextInternal(out long physicalAddress, out entryLength, out bool epochTaken)) { @@ -112,15 +112,15 @@ public unsafe bool GetNext(out Span entry, out int entryLength) // Use user delegate to allocate memory entry = getMemory(entryLength); if (entry.Length < entryLength) - throw new Exception("Span provided has invalid length"); + throw new Exception("Byte array provided has invalid length"); } else { // We allocate a byte array from heap - entry = new Span(new byte[entryLength]); + entry = new byte[entryLength]; } - fixed (byte* bp = &entry.GetPinnableReference()) + fixed (byte* bp = entry) Buffer.MemoryCopy((void*)(4 + physicalAddress), bp, entryLength, entryLength); if (epochTaken) diff --git a/cs/test/FasterLogTests.cs b/cs/test/FasterLogTests.cs index d55c90838..184f42a2e 100644 --- a/cs/test/FasterLogTests.cs +++ b/cs/test/FasterLogTests.cs @@ -54,7 +54,7 @@ public void FasterLogTest1() using (var iter = log.Scan(0, long.MaxValue)) { int count = 0; - while (iter.GetNext(out Span result, out int length)) + while (iter.GetNext(out byte[] result, out int length)) { count++; Assert.IsTrue(result.SequenceEqual(entry)); @@ -90,7 +90,8 @@ public async Task FasterLogTest2() while (!waitingReader.IsCompleted) ; Assert.IsTrue(waitingReader.IsCompleted); - var result = GetNext(iter); + var curr = iter.GetNext(out byte[] result, out _); + Assert.IsTrue(curr); Assert.IsTrue(result.SequenceEqual(data1)); var next = iter.GetNext(out _, out _); @@ -99,11 +100,5 @@ public async Task FasterLogTest2() } log.Dispose(); } - - private byte[] GetNext(FasterLogScanIterator iter) - { - iter.GetNext(out Span entry, out _); - return entry.ToArray(); - } } } From 0f33d4a35300045d89e1d72b2f9ce24816749b04 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Mon, 7 Oct 2019 18:01:04 -0700 Subject: [PATCH 22/36] Update next address of iterator if GetNext fails early. --- .../core/Index/FasterLog/FasterLogIterator.cs | 1 + cs/test/FasterLogTests.cs | 41 ++++++++++++++++++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/cs/src/core/Index/FasterLog/FasterLogIterator.cs b/cs/src/core/Index/FasterLog/FasterLogIterator.cs index c6c39e44c..b52a4c2f1 100644 --- a/cs/src/core/Index/FasterLog/FasterLogIterator.cs +++ b/cs/src/core/Index/FasterLog/FasterLogIterator.cs @@ -249,6 +249,7 @@ private unsafe bool GetNextInternal(out long physicalAddress, out int entryLengt if ((currentAddress >= endAddress) || (currentAddress >= fasterLog.CommittedUntilAddress)) { + nextAddress = currentAddress; return false; } diff --git a/cs/test/FasterLogTests.cs b/cs/test/FasterLogTests.cs index 184f42a2e..358b6ec35 100644 --- a/cs/test/FasterLogTests.cs +++ b/cs/test/FasterLogTests.cs @@ -73,7 +73,7 @@ public async Task FasterLogTest2() { log = new FasterLog(new FasterLogSettings { LogDevice = device }); byte[] data1 = new byte[10000]; - for (int i = 00; i < 10000; i++) data1[i] = (byte)i; + for (int i = 0; i < 10000; i++) data1[i] = (byte)i; using (var iter = log.Scan(0, long.MaxValue, scanBufferingMode: ScanBufferingMode.SinglePageBuffering)) { @@ -100,5 +100,44 @@ public async Task FasterLogTest2() } log.Dispose(); } + + [Test] + public async Task FasterLogTest3() + { + log = new FasterLog(new FasterLogSettings { LogDevice = device, PageSizeBits = 14 }); + byte[] data1 = new byte[10000]; + for (int i = 0; i < 10000; i++) data1[i] = (byte)i; + + using (var iter = log.Scan(0, long.MaxValue, scanBufferingMode: ScanBufferingMode.SinglePageBuffering)) + { + var appendResult = log.TryAppend(data1, out _); + Assert.IsTrue(appendResult); + await log.FlushAndCommitAsync(); + await iter.WaitAsync(); + var iterResult = iter.GetNext(out byte[] entry, out _); + Assert.IsTrue(iterResult); + + appendResult = log.TryAppend(data1, out _); + Assert.IsFalse(appendResult); + await iter.WaitAsync(); + + // Should read the "hole" and return false + iterResult = iter.GetNext(out entry, out _); + Assert.IsFalse(iterResult); + + // Should wait for next item + var task = iter.WaitAsync(); + Assert.IsFalse(task.IsCompleted); + + appendResult = log.TryAppend(data1, out _); + Assert.IsTrue(appendResult); + await log.FlushAndCommitAsync(); + + await task; + iterResult = iter.GetNext(out entry, out _); + Assert.IsTrue(iterResult); + } + log.Dispose(); + } } } From 2e59b432278e652541ffe9860619b5b342d37b3a Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Wed, 9 Oct 2019 13:30:31 -0700 Subject: [PATCH 23/36] Added random read functionality (ReadAsync) for FasterLog. Moved GetMemory to FasterLogSettings instead of Scan. Speed up TruncateUntil. Updated nuspec. --- cs/benchmark/FASTER.benchmark.csproj | 2 +- .../FasterLogSample/FasterLogSample.csproj | 2 +- cs/playground/FasterLogSample/Program.cs | 73 +-- .../StructSampleCore/StructSampleCore.csproj | 2 +- cs/src/core/Allocator/AllocatorBase.cs | 55 ++- cs/src/core/Allocator/AsyncIOContext.cs | 16 + cs/src/core/FASTER.core.csproj | 3 +- cs/src/core/FASTER.core.debug.nuspec | 18 +- cs/src/core/FASTER.core.nuspec | 18 +- cs/src/core/Index/FasterLog/FasterLog.cs | 417 ++++++++++++++---- .../core/Index/FasterLog/FasterLogIterator.cs | 13 +- .../core/Index/FasterLog/FasterLogSettings.cs | 12 + .../FASTER.devices.AzureStorageDevice.csproj | 2 +- .../FASTER.devices.AzureStorageDevice.nuspec | 10 +- cs/test/FASTER.test.csproj | 8 +- cs/test/FasterLogTests.cs | 20 +- 16 files changed, 494 insertions(+), 177 deletions(-) diff --git a/cs/benchmark/FASTER.benchmark.csproj b/cs/benchmark/FASTER.benchmark.csproj index 89e99cf29..304f6a288 100644 --- a/cs/benchmark/FASTER.benchmark.csproj +++ b/cs/benchmark/FASTER.benchmark.csproj @@ -36,7 +36,7 @@ - + diff --git a/cs/playground/FasterLogSample/FasterLogSample.csproj b/cs/playground/FasterLogSample/FasterLogSample.csproj index f0c1def2b..54367d67c 100644 --- a/cs/playground/FasterLogSample/FasterLogSample.csproj +++ b/cs/playground/FasterLogSample/FasterLogSample.csproj @@ -3,7 +3,7 @@ netcoreapp2.2 x64 - win7-x64 + win7-x64;linux-x64 diff --git a/cs/playground/FasterLogSample/Program.cs b/cs/playground/FasterLogSample/Program.cs index bc356f5d2..66107ccd5 100644 --- a/cs/playground/FasterLogSample/Program.cs +++ b/cs/playground/FasterLogSample/Program.cs @@ -20,8 +20,7 @@ public class Program /// /// Main program entry point /// - /// - static void Main(string[] args) + static void Main() { bool sync = true; var device = Devices.CreateLogDevice("D:\\logs\\hlog.log"); @@ -35,8 +34,8 @@ static void Main(string[] args) if (sync) { - // Append thread: create as many as needed - new Thread(new ThreadStart(AppendThread)).Start(); + // Log writer thread: create as many as needed + new Thread(new ThreadStart(LogWriterThread)).Start(); // Threads for scan, reporting, commit var t1 = new Thread(new ThreadStart(ScanThread)); @@ -62,7 +61,7 @@ static void Main(string[] args) for (int i = 0; i < NumParallelTasks; i++) { int local = i; - tasks[i] = Task.Run(() => AppendAsync(local)); + tasks[i] = Task.Run(() => AsyncLogWriter(local)); } // Threads for scan, reporting, commit @@ -77,27 +76,29 @@ static void Main(string[] args) } - static void AppendThread() + static void LogWriterThread() { while (true) { - // TryAppend - can be used with throttling/back-off + // TryEnqueue - can be used with throttling/back-off // Accepts byte[] and ReadOnlySpan - while (!log.TryAppend(staticEntry, out _)) ; + while (!log.TryEnqueue(staticEntry, out _)) ; - // Synchronous blocking append + // Synchronous blocking enqueue // Accepts byte[] and ReadOnlySpan - // log.Append(entry); + // log.Enqueue(entry); - // Batched append - batch must fit on one page - // while (!log.TryAppend(spanBatch, out _)) ; + // Batched enqueue - batch must fit on one page + // Add this to class: + // static readonly ReadOnlySpanBatch spanBatch = new ReadOnlySpanBatch(10); + // while (!log.TryEnqueue(spanBatch, out _)) ; } } /// - /// Async version of append + /// Async version of enqueue /// - static async Task AppendAsync(int id) + static async Task AsyncLogWriter(int id) { bool batched = false; @@ -105,28 +106,30 @@ static async Task AppendAsync(int id) if (!batched) { - // Single commit version - append each item with commit + // Single commit version - append each item and wait for commit // Needs high parallelism (NumParallelTasks) for perf + // Needs separate commit thread to perform regular commit + // Otherwise we commit only at page boundaries while (true) { try { - await log.AppendAsync(staticEntry); + await log.EnqueueAndWaitForCommitAsync(staticEntry); } catch (Exception ex) { - Console.WriteLine($"{nameof(AppendAsync)}({id}): {ex}"); + Console.WriteLine($"{nameof(AsyncLogWriter)}({id}): {ex}"); } } } else { - // Group-commit version - we append many entries to memory, + // Batched version - we enqueue many entries to memory, // then wait for commit periodically int count = 0; while (true) { - await log.AppendToMemoryAsync(staticEntry); + await log.EnqueueAsync(staticEntry); if (count++ % 100 == 0) { await log.WaitForCommitAsync(); @@ -138,9 +141,21 @@ static async Task AppendAsync(int id) static void ScanThread() { Random r = new Random(); - - long lastAddress = 0; byte[] result; + + // First we demonstrate single random read into specified log offset + if (log.CommittedUntilAddress > log.BeginAddress) + { + (result, _) = log.ReadAsync(log.BeginAddress).GetAwaiter().GetResult(); + if (Different(result, staticEntry, out int location)) + { + if (result.Length != staticEntry.Length) + throw new Exception("Invalid entry found, expected length " + staticEntry.Length + ", actual length " + result.Length); + else + throw new Exception("Invalid entry found at offset " + location); + } + } + using (iter = log.Scan(log.BeginAddress, long.MaxValue)) { while (true) @@ -164,14 +179,10 @@ static void ScanThread() // Re-insert entry with small probability if (r.Next(100) < 10) { - log.Append(result); + log.Enqueue(result); } - if (iter.CurrentAddress - lastAddress > 500_000_000) - { - log.TruncateUntil(iter.CurrentAddress); - lastAddress = iter.CurrentAddress; - } + log.TruncateUntil(iter.NextAddress); } } } @@ -213,11 +224,10 @@ static void CommitThread() while (true) { Thread.Sleep(5); - log.FlushAndCommit(true); + log.Commit(true); // Async version - // await Task.Delay(5); - // await log.FlushAndCommitAsync(); + // await log.CommitAsync(); } } @@ -235,9 +245,6 @@ private static bool Different(byte[] b1, byte[] b2, out int location) return false; } - // For batch append API - static readonly ReadOnlySpanBatch spanBatch = new ReadOnlySpanBatch(10); - private struct ReadOnlySpanBatch : IReadOnlySpanBatch { private readonly int batchSize; diff --git a/cs/playground/StructSampleCore/StructSampleCore.csproj b/cs/playground/StructSampleCore/StructSampleCore.csproj index 06d9ad194..eb5638298 100644 --- a/cs/playground/StructSampleCore/StructSampleCore.csproj +++ b/cs/playground/StructSampleCore/StructSampleCore.csproj @@ -1,7 +1,7 @@  - netcoreapp2.0 + netcoreapp2.2 x64 win7-x64;linux-x64 true diff --git a/cs/src/core/Allocator/AllocatorBase.cs b/cs/src/core/Allocator/AllocatorBase.cs index 8053410f0..79a58fde4 100644 --- a/cs/src/core/Allocator/AllocatorBase.cs +++ b/cs/src/core/Allocator/AllocatorBase.cs @@ -801,24 +801,32 @@ public bool ShiftReadOnlyAddress(long newReadOnlyAddress) public void ShiftBeginAddress(long newBeginAddress) { // First update the begin address - Utility.MonotonicUpdate(ref BeginAddress, newBeginAddress, out long oldBeginAddress); + var b = Utility.MonotonicUpdate(ref BeginAddress, newBeginAddress, out long oldBeginAddress); + b = b && (oldBeginAddress >> LogSegmentSizeBits != newBeginAddress >> LogSegmentSizeBits); + // Then the head address var h = Utility.MonotonicUpdate(ref HeadAddress, newBeginAddress, out long old); + // Finally the read-only address var r = Utility.MonotonicUpdate(ref ReadOnlyAddress, newBeginAddress, out old); - // Clean up until begin address - epoch.BumpCurrentEpoch(() => + if (h || r || b) { - if (r) + epoch.Resume(); + // Clean up until begin address + epoch.BumpCurrentEpoch(() => { - Utility.MonotonicUpdate(ref SafeReadOnlyAddress, newBeginAddress, out long _old); - Utility.MonotonicUpdate(ref FlushedUntilAddress, newBeginAddress, out _old); - } - if (h) OnPagesClosed(newBeginAddress); + if (r) + { + Utility.MonotonicUpdate(ref SafeReadOnlyAddress, newBeginAddress, out long _old); + Utility.MonotonicUpdate(ref FlushedUntilAddress, newBeginAddress, out _old); + } + if (h) OnPagesClosed(newBeginAddress); - TruncateUntilAddress(newBeginAddress); - }); + if (b) TruncateUntilAddress(newBeginAddress); + }); + epoch.Suspend(); + } } /// @@ -1101,6 +1109,33 @@ public void RecoveryReset(long tailAddress, long headAddress, long beginAddress) asyncResult); } + /// + /// Read record to memory - simple version + /// + /// + /// + /// + /// + internal void AsyncReadRecordToMemory(long fromLogical, int numBytes, IOCompletionCallback callback, ref SimpleReadContext context) + { + ulong fileOffset = (ulong)(AlignedPageSizeBytes * (fromLogical >> LogPageSizeBits) + (fromLogical & PageSizeMask)); + ulong alignedFileOffset = (ulong)(((long)fileOffset / sectorSize) * sectorSize); + + uint alignedReadLength = (uint)((long)fileOffset + numBytes - (long)alignedFileOffset); + alignedReadLength = (uint)((alignedReadLength + (sectorSize - 1)) & ~(sectorSize - 1)); + + context.record = bufferPool.Get((int)alignedReadLength); + context.record.valid_offset = (int)(fileOffset - alignedFileOffset); + context.record.available_bytes = (int)(alignedReadLength - (fileOffset - alignedFileOffset)); + context.record.required_bytes = numBytes; + + device.ReadAsync(alignedFileOffset, + (IntPtr)context.record.aligned_pointer, + alignedReadLength, + callback, + context); + } + /// /// Read pages from specified device /// diff --git a/cs/src/core/Allocator/AsyncIOContext.cs b/cs/src/core/Allocator/AsyncIOContext.cs index 07c535593..f0ce5fae9 100644 --- a/cs/src/core/Allocator/AsyncIOContext.cs +++ b/cs/src/core/Allocator/AsyncIOContext.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Concurrent; +using System.Threading; namespace FASTER.core { @@ -61,4 +62,19 @@ public void Dispose() record.Return(); } } + + internal class SimpleReadContext : IAsyncResult + { + public long logicalAddress; + public SectorAlignedMemory record; + public SemaphoreSlim completedRead; + + public object AsyncState => throw new NotImplementedException(); + + public WaitHandle AsyncWaitHandle => throw new NotImplementedException(); + + public bool CompletedSynchronously => throw new NotImplementedException(); + + public bool IsCompleted => throw new NotImplementedException(); + } } diff --git a/cs/src/core/FASTER.core.csproj b/cs/src/core/FASTER.core.csproj index ae1a7e8fb..dc808cfd7 100644 --- a/cs/src/core/FASTER.core.csproj +++ b/cs/src/core/FASTER.core.csproj @@ -36,7 +36,8 @@ - + + \ No newline at end of file diff --git a/cs/src/core/FASTER.core.debug.nuspec b/cs/src/core/FASTER.core.debug.nuspec index 3753d4d5d..48728a1b1 100644 --- a/cs/src/core/FASTER.core.debug.nuspec +++ b/cs/src/core/FASTER.core.debug.nuspec @@ -6,20 +6,26 @@ FASTER (Debug) Microsoft Microsoft - https://github.com/Microsoft/FASTER + https://github.com/microsoft/FASTER MIT true - Debug version. FASTER is a fast concurrent key-value store that also supports indexing of larger-than-memory data - See the project website at https://github.com/Microsoft/FASTER for more details + Debug version. FASTER is a fast concurrent key-value store and log for larger-than-memory data. + See the project website at https://github.com/microsoft/FASTER for more details. © Microsoft Corporation. All rights reserved. en-US - key-value store dictionary hashtable concurrent log persistent + key-value store dictionary hashtable concurrent log persistent commit write-ahead - + + + + - + + + + diff --git a/cs/src/core/FASTER.core.nuspec b/cs/src/core/FASTER.core.nuspec index c2d29ea49..294cb75a2 100644 --- a/cs/src/core/FASTER.core.nuspec +++ b/cs/src/core/FASTER.core.nuspec @@ -6,20 +6,26 @@ FASTER Microsoft Microsoft - https://github.com/Microsoft/FASTER + https://github.com/microsoft/FASTER MIT true - FASTER is a fast concurrent key-value store that also supports indexing of larger-than-memory data - See the project website at https://github.com/Microsoft/FASTER for more details + FASTER is a fast concurrent key-value store and log for larger-than-memory data. + See the project website at https://github.com/microsoft/FASTER for more details. © Microsoft Corporation. All rights reserved. en-US - key-value store dictionary hashtable concurrent log persistent + key-value store dictionary hashtable concurrent log persistent commit write-ahead - + + + + - + + + + diff --git a/cs/src/core/Index/FasterLog/FasterLog.cs b/cs/src/core/Index/FasterLog/FasterLog.cs index 29c382d80..4b9352831 100644 --- a/cs/src/core/Index/FasterLog/FasterLog.cs +++ b/cs/src/core/Index/FasterLog/FasterLog.cs @@ -4,6 +4,7 @@ #pragma warning disable 0162 using System; +using System.Diagnostics; using System.IO; using System.Runtime.CompilerServices; using System.Threading; @@ -19,7 +20,8 @@ public class FasterLog : IDisposable { private readonly BlittableAllocator allocator; private readonly LightEpoch epoch; - private ILogCommitManager logCommitManager; + private readonly ILogCommitManager logCommitManager; + private readonly GetMemory getMemory; private TaskCompletionSource commitTcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); /// @@ -38,7 +40,7 @@ public class FasterLog : IDisposable public long FlushedUntilAddress => allocator.FlushedUntilAddress; /// - /// Log commit until address + /// Log committed until address /// public long CommittedUntilAddress; @@ -57,6 +59,7 @@ public FasterLog(FasterLogSettings logSettings) new LocalLogCommitManager(logSettings.LogCommitFile ?? logSettings.LogDevice.FileName + ".commit"); + getMemory = logSettings.GetMemory; epoch = new LightEpoch(); CommittedUntilAddress = Constants.kFirstValidAddress; allocator = new BlittableAllocator( @@ -76,38 +79,53 @@ public void Dispose() commitTcs.TrySetException(new ObjectDisposedException("Log has been disposed")); } + #region Enqueue /// - /// Append entry to log + /// Enqueue entry to log (in memory) - no guarantee of flush/commit /// - /// + /// Entry to be enqueued to log /// Logical address of added entry - public long Append(ReadOnlySpan entry) + public long Enqueue(byte[] entry) { long logicalAddress; - while (!TryAppend(entry, out logicalAddress)) ; + while (!TryEnqueue(entry, out logicalAddress)) ; return logicalAddress; } /// - /// Append entry to log + /// Enqueue entry to log (in memory) - no guarantee of flush/commit /// - /// + /// Entry to be enqueued to log /// Logical address of added entry - public long Append(byte[] entry) + public long Enqueue(ReadOnlySpan entry) { long logicalAddress; - while (!TryAppend(entry, out logicalAddress)) ; + while (!TryEnqueue(entry, out logicalAddress)) ; return logicalAddress; } /// - /// Try to append entry to log. If it returns true, we are + /// Enqueue batch of entries to log (in memory) - no guarantee of flush/commit + /// + /// Batch of entries to be enqueued to log + /// Logical address of added entry + public long Enqueue(IReadOnlySpanBatch readOnlySpanBatch) + { + long logicalAddress; + while (!TryEnqueue(readOnlySpanBatch, out logicalAddress)) ; + return logicalAddress; + } + #endregion + + #region TryEnqueue + /// + /// Try to enqueue entry to log (in memory). If it returns true, we are /// done. If it returns false, we need to retry. /// - /// Entry to be appended to log + /// Entry to be enqueued to log /// Logical address of added entry /// Whether the append succeeded - public unsafe bool TryAppend(byte[] entry, out long logicalAddress) + public unsafe bool TryEnqueue(byte[] entry, out long logicalAddress) { logicalAddress = 0; @@ -137,7 +155,7 @@ public unsafe bool TryAppend(byte[] entry, out long logicalAddress) /// Entry to be appended to log /// Logical address of added entry /// Whether the append succeeded - public unsafe bool TryAppend(ReadOnlySpan entry, out long logicalAddress) + public unsafe bool TryEnqueue(ReadOnlySpan entry, out long logicalAddress) { logicalAddress = 0; @@ -161,109 +179,104 @@ public unsafe bool TryAppend(ReadOnlySpan entry, out long logicalAddress) } /// - /// Try to append batch of entries as a single atomic unit. Entire batch - /// needs to fit on one page. + /// Try to enqueue batch of entries as a single atomic unit (to memory). Entire + /// batch needs to fit on one log page. /// /// Batch to be appended to log /// Logical address of first added entry /// Whether the append succeeded - public bool TryAppend(IReadOnlySpanBatch readOnlySpanBatch, out long logicalAddress) + public bool TryEnqueue(IReadOnlySpanBatch readOnlySpanBatch, out long logicalAddress) { return TryAppend(readOnlySpanBatch, out logicalAddress, out _); } + #endregion + #region EnqueueAsync /// - /// Append entry to log (async) - completes after entry is flushed to storage + /// Enqueue entry to log in memory (async) - completes after entry is + /// appended to memory, NOT committed to storage. /// /// /// - public async ValueTask AppendAsync(byte[] entry) + public async ValueTask EnqueueAsync(byte[] entry) { long logicalAddress; - // Phase 1: wait for commit to memory while (true) { var task = CommitTask; - if (TryAppend(entry, out logicalAddress)) + if (TryEnqueue(entry, out logicalAddress)) break; await task; } - // Phase 2: wait for commit/flush to storage - while (true) - { - var task = CommitTask; - if (CommittedUntilAddress < logicalAddress + 4 + entry.Length) - { - await task; - } - else - break; - } - return logicalAddress; } /// - /// Append batch of entries to log (async) - completes after batch is flushed to storage + /// Enqueue entry to log in memory (async) - completes after entry is + /// appended to memory, NOT committed to storage. /// - /// + /// /// - public async ValueTask AppendAsync(IReadOnlySpanBatch readOnlySpanBatch) + public async ValueTask EnqueueAsync(ReadOnlyMemory entry) { long logicalAddress; - int allocatedLength; - // Phase 1: wait for commit to memory while (true) { var task = CommitTask; - if (TryAppend(readOnlySpanBatch, out logicalAddress, out allocatedLength)) + if (TryEnqueue(entry.Span, out logicalAddress)) break; await task; } - // Phase 2: wait for commit/flush to storage - while (true) - { - var task = CommitTask; - if (CommittedUntilAddress < logicalAddress + allocatedLength) - { - await task; - } - else - break; - } - return logicalAddress; } /// - /// Append entry to log in memory (async) - completes after entry is appended - /// to memory, not necessarily committed to storage. + /// Enqueue batch of entries to log in memory (async) - completes after entry is + /// appended to memory, NOT committed to storage. /// - /// + /// /// - public async ValueTask AppendToMemoryAsync(byte[] entry) + public async ValueTask EnqueueAsync(IReadOnlySpanBatch readOnlySpanBatch) { long logicalAddress; while (true) { var task = CommitTask; - if (TryAppend(entry, out logicalAddress)) + if (TryEnqueue(readOnlySpanBatch, out logicalAddress)) break; await task; } return logicalAddress; } + #endregion + + #region WaitForCommit and WaitForCommitAsync + + /// + /// Spin-wait for enqueues, until tail or specified address, to commit to + /// storage. Does NOT itself issue a commit, just waits for commit. So you should + /// ensure that someone else causes the commit to happen. + /// + /// Address until which we should wait for commit, default 0 for tail of log + /// + public void WaitForCommit(long untilAddress = 0) + { + var tailAddress = untilAddress; + if (tailAddress == 0) tailAddress = allocator.GetTailAddress(); + + while (CommittedUntilAddress < tailAddress) ; + } /// - /// Wait for all prior appends (in memory) to commit to storage. Does not - /// itself issue a commit, just waits for commit. So you should ensure that - /// someone else causes the commit to happen. + /// Wait for appends (in memory), until tail or specified address, to commit to + /// storage. Does NOT itself issue a commit, just waits for commit. So you should + /// ensure that someone else causes the commit to happen. /// /// Address until which we should wait for commit, default 0 for tail of log /// @@ -283,17 +296,20 @@ public async ValueTask WaitForCommitAsync(long untilAddress = 0) break; } } + #endregion + + #region Commit /// - /// Flush the log until tail + /// Issue commit request for log (until tail) /// - /// If true, wait until flush completes. Otherwise, issue flush and return. + /// If true, spin-wait until commit completes. Otherwise, issue commit and return immediately. /// - public long FlushAndCommit(bool spinWait = false) + public long Commit(bool spinWait = false) { epoch.Resume(); allocator.ShiftReadOnlyToTail(out long tailAddress); - + if (spinWait) { while (CommittedUntilAddress < tailAddress) @@ -307,12 +323,13 @@ public long FlushAndCommit(bool spinWait = false) } /// - /// Async flush log until tail + /// Async commit log (until tail), completes only when we + /// complete the commit /// /// - public async ValueTask FlushAndCommitAsync() + public async ValueTask CommitAsync() { - var tailAddress = FlushAndCommit(); + var tailAddress = Commit(); while (true) { @@ -325,7 +342,161 @@ public async ValueTask FlushAndCommitAsync() break; } return tailAddress; - } + } + + #endregion + + #region EnqueueAndWaitForCommit + + /// + /// Append entry to log - spin-waits until entry is committed to storage. + /// Does NOT itself issue flush! + /// + /// + /// + public long EnqueueAndWaitForCommit(byte[] entry) + { + long logicalAddress; + while (!TryEnqueue(entry, out logicalAddress)) ; + while (CommittedUntilAddress < logicalAddress + 4 + entry.Length) ; + return logicalAddress; + } + + /// + /// Append entry to log - spin-waits until entry is committed to storage. + /// Does NOT itself issue flush! + /// + /// + /// + public long EnqueueAndWaitForCommit(ReadOnlySpan entry) + { + long logicalAddress; + while (!TryEnqueue(entry, out logicalAddress)) ; + while (CommittedUntilAddress < logicalAddress + 4 + entry.Length) ; + return logicalAddress; + } + + /// + /// Append batch of entries to log - spin-waits until entry is committed to storage. + /// Does NOT itself issue flush! + /// + /// + /// + public long EnqueueAndWaitForCommit(IReadOnlySpanBatch readOnlySpanBatch) + { + long logicalAddress; + while (!TryEnqueue(readOnlySpanBatch, out logicalAddress)) ; + while (CommittedUntilAddress < logicalAddress + 1) ; + return logicalAddress; + } + + #endregion + + #region EnqueueAndWaitForCommitAsync + + /// + /// Append entry to log (async) - completes after entry is committed to storage. + /// Does NOT itself issue flush! + /// + /// + /// + public async ValueTask EnqueueAndWaitForCommitAsync(byte[] entry) + { + long logicalAddress; + + // Phase 1: wait for commit to memory + while (true) + { + var task = CommitTask; + if (TryEnqueue(entry, out logicalAddress)) + break; + await task; + } + + // Phase 2: wait for commit/flush to storage + while (true) + { + var task = CommitTask; + if (CommittedUntilAddress < logicalAddress + 4 + entry.Length) + { + await task; + } + else + break; + } + + return logicalAddress; + } + + /// + /// Append entry to log (async) - completes after entry is committed to storage. + /// Does NOT itself issue flush! + /// + /// + /// + public async ValueTask EnqueueAndWaitForCommitAsync(ReadOnlyMemory entry) + { + long logicalAddress; + + // Phase 1: wait for commit to memory + while (true) + { + var task = CommitTask; + if (TryEnqueue(entry.Span, out logicalAddress)) + break; + await task; + } + + // Phase 2: wait for commit/flush to storage + while (true) + { + var task = CommitTask; + if (CommittedUntilAddress < logicalAddress + 4 + entry.Length) + { + await task; + } + else + break; + } + + return logicalAddress; + } + + /// + /// Append batch of entries to log (async) - completes after batch is committed to storage. + /// Does NOT itself issue flush! + /// + /// + /// + public async ValueTask EnqueueAndWaitForCommitAsync(IReadOnlySpanBatch readOnlySpanBatch) + { + long logicalAddress; + int allocatedLength; + + // Phase 1: wait for commit to memory + while (true) + { + var task = CommitTask; + if (TryAppend(readOnlySpanBatch, out logicalAddress, out allocatedLength)) + break; + await task; + } + + // Phase 2: wait for commit/flush to storage + while (true) + { + var task = CommitTask; + if (CommittedUntilAddress < logicalAddress + allocatedLength) + { + await task; + } + else + break; + } + + return logicalAddress; + } + #endregion /// /// Truncate the log until, but not including, untilAddress @@ -333,9 +504,7 @@ public async ValueTask FlushAndCommitAsync() /// public void TruncateUntil(long untilAddress) { - epoch.Resume(); allocator.ShiftBeginAddress(untilAddress); - epoch.Suspend(); } /// @@ -343,32 +512,39 @@ public void TruncateUntil(long untilAddress) /// /// Begin address for scan /// End address for scan (or long.MaxValue for tailing) - /// Delegate to provide user memory where data gets copied to /// Use single or double buffering /// - public FasterLogScanIterator Scan(long beginAddress, long endAddress, GetMemory getMemory = null, ScanBufferingMode scanBufferingMode = ScanBufferingMode.DoublePageBuffering) + public FasterLogScanIterator Scan(long beginAddress, long endAddress, ScanBufferingMode scanBufferingMode = ScanBufferingMode.DoublePageBuffering) { return new FasterLogScanIterator(this, allocator, beginAddress, endAddress, getMemory, scanBufferingMode, epoch); } /// - /// Create and pin epoch entry for this thread - use with ReleaseThread - /// if you manage the thread. - /// DO NOT USE WITH ASYNC CODE - /// - public void AcquireThread() - { - epoch.Acquire(); - } - - /// - /// Dispose epoch entry for this thread. Use with AcquireThread - /// if you manage the thread. - /// DO NOT USE WITH ASYNC CODE + /// Random read record from log, at given address /// - public void ReleaseThread() + /// Logical address to read from + /// Estimated length of entry, if known + /// + public async ValueTask<(byte[], int)> ReadAsync(long address, int estimatedLength = 0) { - epoch.Release(); + epoch.Resume(); + if (address >= CommittedUntilAddress || address < BeginAddress) + { + epoch.Suspend(); + return default; + } + var ctx = new SimpleReadContext + { + logicalAddress = address, + completedRead = new SemaphoreSlim(0) + }; + unsafe + { + allocator.AsyncReadRecordToMemory(address, 4 + estimatedLength, AsyncGetFromDiskCallback, ref ctx); + } + epoch.Suspend(); + await ctx.completedRead.WaitAsync(); + return GetRecordAndFree(ctx.record); } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -382,9 +558,11 @@ private int Align(int length) /// private void Commit(long flushAddress) { - FasterLogRecoveryInfo info = new FasterLogRecoveryInfo(); - info.FlushedUntilAddress = flushAddress; - info.BeginAddress = allocator.BeginAddress; + FasterLogRecoveryInfo info = new FasterLogRecoveryInfo + { + FlushedUntilAddress = flushAddress, + BeginAddress = allocator.BeginAddress + }; var _newCommitTask = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); TaskCompletionSource _commitTask; @@ -470,5 +648,66 @@ private unsafe bool TryAppend(IReadOnlySpanBatch readOnlySpanBatch, out long log epoch.Suspend(); return true; } + + private unsafe void AsyncGetFromDiskCallback(uint errorCode, uint numBytes, NativeOverlapped* overlap) + { + var ctx = (SimpleReadContext)Overlapped.Unpack(overlap).AsyncResult; + + if (errorCode != 0) + { + Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); + ctx.record.Return(); + ctx.record = null; + ctx.completedRead.Release(); + } + else + { + var record = ctx.record.GetValidPointer(); + var length = *(int*)record; + + if (length < 0 || length > allocator.PageSize) + { + Debug.WriteLine("Invalid record length found: " + length); + ctx.record.Return(); + ctx.record = null; + ctx.completedRead.Release(); + } + else + { + int requiredBytes = 4 + length; + if (ctx.record.available_bytes >= requiredBytes) + { + ctx.completedRead.Release(); + } + else + { + ctx.record.Return(); + allocator.AsyncReadRecordToMemory(ctx.logicalAddress, requiredBytes, AsyncGetFromDiskCallback, ref ctx); + } + } + } + Overlapped.Free(overlap); + } + + private (byte[], int) GetRecordAndFree(SectorAlignedMemory record) + { + if (record == null) + return (null, 0); + + byte[] result; + int length; + unsafe + { + var ptr = record.GetValidPointer(); + length = *(int*)ptr; + result = getMemory != null ? getMemory(length) : new byte[length]; + fixed (byte* bp = result) + { + Buffer.MemoryCopy(ptr + 4, bp, length, length); + } + } + record.Return(); + return (result, length); + } } } diff --git a/cs/src/core/Index/FasterLog/FasterLogIterator.cs b/cs/src/core/Index/FasterLog/FasterLogIterator.cs index b52a4c2f1..436785aed 100644 --- a/cs/src/core/Index/FasterLog/FasterLogIterator.cs +++ b/cs/src/core/Index/FasterLog/FasterLogIterator.cs @@ -10,13 +10,6 @@ namespace FASTER.core { - /// - /// Delegate for getting memory from user - /// - /// Minimum length of returned span - /// - public delegate byte[] GetMemory(int minLength); - /// /// Scan iterator for hybrid log /// @@ -33,12 +26,16 @@ public class FasterLogScanIterator : IDisposable private readonly GetMemory getMemory; private long currentAddress, nextAddress; - /// /// Current address /// public long CurrentAddress => currentAddress; + /// + /// Next address + /// + public long NextAddress => nextAddress; + /// /// Constructor /// diff --git a/cs/src/core/Index/FasterLog/FasterLogSettings.cs b/cs/src/core/Index/FasterLog/FasterLogSettings.cs index 00d6e73f1..f0ad92245 100644 --- a/cs/src/core/Index/FasterLog/FasterLogSettings.cs +++ b/cs/src/core/Index/FasterLog/FasterLogSettings.cs @@ -9,6 +9,13 @@ namespace FASTER.core { + /// + /// Delegate for getting memory from user + /// + /// Minimum length of returned byte array + /// + public delegate byte[] GetMemory(int minLength); + /// /// FASTER Log Settings /// @@ -49,6 +56,11 @@ public class FasterLogSettings /// public string LogCommitFile = null; + /// + /// User callback to allocate memory for read entries + /// + public GetMemory GetMemory = null; + internal LogSettings GetLogSettings() { return new LogSettings diff --git a/cs/src/devices/AzureStorageDevice/FASTER.devices.AzureStorageDevice.csproj b/cs/src/devices/AzureStorageDevice/FASTER.devices.AzureStorageDevice.csproj index de06d0e90..a1d1766eb 100644 --- a/cs/src/devices/AzureStorageDevice/FASTER.devices.AzureStorageDevice.csproj +++ b/cs/src/devices/AzureStorageDevice/FASTER.devices.AzureStorageDevice.csproj @@ -35,7 +35,7 @@ - + diff --git a/cs/src/devices/AzureStorageDevice/FASTER.devices.AzureStorageDevice.nuspec b/cs/src/devices/AzureStorageDevice/FASTER.devices.AzureStorageDevice.nuspec index 85c52a7f8..4c13700fa 100644 --- a/cs/src/devices/AzureStorageDevice/FASTER.devices.AzureStorageDevice.nuspec +++ b/cs/src/devices/AzureStorageDevice/FASTER.devices.AzureStorageDevice.nuspec @@ -6,21 +6,21 @@ Azure storage device for FASTER Microsoft Microsoft - https://github.com/Microsoft/FASTER + https://github.com/microsoft/FASTER MIT true - This is a FASTER device implementation for Azure Storage (page blobs). FASTER is a fast concurrent key-value store that also supports indexing of larger-than-memory data. - See the project website at https://github.com/Microsoft/FASTER for more details + This is a FASTER device implementation for Azure Storage (page blobs). FASTER is a fast concurrent key-value store and log for larger-than-memory data. + See the project website at https://github.com/microsoft/FASTER for more details © Microsoft Corporation. All rights reserved. en-US key-value store dictionary hashtable concurrent log persistent azure storage FASTER - + - + diff --git a/cs/test/FASTER.test.csproj b/cs/test/FASTER.test.csproj index 41d0226cc..f36620739 100644 --- a/cs/test/FASTER.test.csproj +++ b/cs/test/FASTER.test.csproj @@ -1,7 +1,7 @@  - net46;netcoreapp2.0 + net46;netcoreapp2.2 AnyCPU;x64 true @@ -38,9 +38,9 @@ 1701;1702;1591 - - - + + + diff --git a/cs/test/FasterLogTests.cs b/cs/test/FasterLogTests.cs index 358b6ec35..ccec889f2 100644 --- a/cs/test/FasterLogTests.cs +++ b/cs/test/FasterLogTests.cs @@ -39,7 +39,6 @@ public void TearDown() public void FasterLogTest1() { log = new FasterLog(new FasterLogSettings { LogDevice = device }); - log.AcquireThread(); byte[] entry = new byte[entryLength]; for (int i = 0; i < entryLength; i++) @@ -47,9 +46,9 @@ public void FasterLogTest1() for (int i = 0; i < numEntries; i++) { - log.Append(entry); + log.Enqueue(entry); } - log.FlushAndCommit(true); + log.Commit(true); using (var iter = log.Scan(0, long.MaxValue)) { @@ -64,7 +63,6 @@ public void FasterLogTest1() Assert.IsTrue(count == numEntries); } - log.ReleaseThread(); log.Dispose(); } @@ -83,10 +81,10 @@ public async Task FasterLogTest2() var waitingReader = iter.WaitAsync(); Assert.IsTrue(!waitingReader.IsCompleted); - while (!log.TryAppend(data1, out _)) ; + while (!log.TryEnqueue(data1, out _)) ; Assert.IsFalse(waitingReader.IsCompleted); - await log.FlushAndCommitAsync(); + await log.CommitAsync(); while (!waitingReader.IsCompleted) ; Assert.IsTrue(waitingReader.IsCompleted); @@ -110,14 +108,14 @@ public async Task FasterLogTest3() using (var iter = log.Scan(0, long.MaxValue, scanBufferingMode: ScanBufferingMode.SinglePageBuffering)) { - var appendResult = log.TryAppend(data1, out _); + var appendResult = log.TryEnqueue(data1, out _); Assert.IsTrue(appendResult); - await log.FlushAndCommitAsync(); + await log.CommitAsync(); await iter.WaitAsync(); var iterResult = iter.GetNext(out byte[] entry, out _); Assert.IsTrue(iterResult); - appendResult = log.TryAppend(data1, out _); + appendResult = log.TryEnqueue(data1, out _); Assert.IsFalse(appendResult); await iter.WaitAsync(); @@ -129,9 +127,9 @@ public async Task FasterLogTest3() var task = iter.WaitAsync(); Assert.IsFalse(task.IsCompleted); - appendResult = log.TryAppend(data1, out _); + appendResult = log.TryEnqueue(data1, out _); Assert.IsTrue(appendResult); - await log.FlushAndCommitAsync(); + await log.CommitAsync(); await task; iterResult = iter.GetNext(out entry, out _); From aa4fef31304ba6e4747aac522deb292eea9c9805 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Wed, 9 Oct 2019 16:08:26 -0700 Subject: [PATCH 24/36] Ensure begin addresses commit if needed, even when tail addresses do not change. Added CommittedBeginAddress metric. --- cs/src/core/Allocator/AllocatorBase.cs | 11 ++- cs/src/core/Index/FasterLog/FasterLog.cs | 96 ++++++++++++------- .../core/Index/FasterLog/ILogCommitManager.cs | 7 +- .../Index/FasterLog/LocalLogCommitManager.cs | 5 +- cs/test/FasterLogTests.cs | 36 +++++++ 5 files changed, 108 insertions(+), 47 deletions(-) diff --git a/cs/src/core/Allocator/AllocatorBase.cs b/cs/src/core/Allocator/AllocatorBase.cs index 79a58fde4..7fa2d009d 100644 --- a/cs/src/core/Allocator/AllocatorBase.cs +++ b/cs/src/core/Allocator/AllocatorBase.cs @@ -769,15 +769,17 @@ private bool CannotAllocate(int page) /// Used by applications to make the current state of the database immutable quickly /// /// - public void ShiftReadOnlyToTail(out long tailAddress) + public bool ShiftReadOnlyToTail(out long tailAddress) { tailAddress = GetTailAddress(); long localTailAddress = tailAddress; long currentReadOnlyOffset = ReadOnlyAddress; if (Utility.MonotonicUpdate(ref ReadOnlyAddress, tailAddress, out long oldReadOnlyOffset)) { - epoch.BumpCurrentEpoch(() => OnPagesMarkedReadOnly(localTailAddress, false)); + epoch.BumpCurrentEpoch(() => OnPagesMarkedReadOnly(localTailAddress)); + return true; } + return false; } /// @@ -788,7 +790,7 @@ public bool ShiftReadOnlyAddress(long newReadOnlyAddress) { if (Utility.MonotonicUpdate(ref ReadOnlyAddress, newReadOnlyAddress, out long oldReadOnlyOffset)) { - epoch.BumpCurrentEpoch(() => OnPagesMarkedReadOnly(newReadOnlyAddress, false)); + epoch.BumpCurrentEpoch(() => OnPagesMarkedReadOnly(newReadOnlyAddress)); return true; } return false; @@ -843,8 +845,7 @@ protected virtual void TruncateUntilAddress(long toAddress) /// Flush: send page to secondary store /// /// - /// - public void OnPagesMarkedReadOnly(long newSafeReadOnlyAddress, bool waitForPendingFlushComplete = false) + public void OnPagesMarkedReadOnly(long newSafeReadOnlyAddress) { if (Utility.MonotonicUpdate(ref SafeReadOnlyAddress, newSafeReadOnlyAddress, out long oldSafeReadOnlyAddress)) { diff --git a/cs/src/core/Index/FasterLog/FasterLog.cs b/cs/src/core/Index/FasterLog/FasterLog.cs index 4b9352831..2b5d52935 100644 --- a/cs/src/core/Index/FasterLog/FasterLog.cs +++ b/cs/src/core/Index/FasterLog/FasterLog.cs @@ -44,10 +44,15 @@ public class FasterLog : IDisposable /// public long CommittedUntilAddress; + /// + /// Log committed begin address + /// + public long CommittedBeginAddress; + /// /// Task notifying commit completions /// - public Task CommitTask => commitTcs.Task; + internal Task CommitTask => commitTcs.Task; /// /// Create new log instance @@ -62,9 +67,11 @@ public FasterLog(FasterLogSettings logSettings) getMemory = logSettings.GetMemory; epoch = new LightEpoch(); CommittedUntilAddress = Constants.kFirstValidAddress; + CommittedBeginAddress = Constants.kFirstValidAddress; + allocator = new BlittableAllocator( logSettings.GetLogSettings(), null, - null, epoch, e => Commit(e)); + null, epoch, e => CommitCallback(e)); allocator.Initialize(); Restore(); } @@ -305,21 +312,9 @@ public async ValueTask WaitForCommitAsync(long untilAddress = 0) /// /// If true, spin-wait until commit completes. Otherwise, issue commit and return immediately. /// - public long Commit(bool spinWait = false) + public void Commit(bool spinWait = false) { - epoch.Resume(); - allocator.ShiftReadOnlyToTail(out long tailAddress); - - if (spinWait) - { - while (CommittedUntilAddress < tailAddress) - { - epoch.ProtectAndDrain(); - Thread.Yield(); - } - } - epoch.Suspend(); - return tailAddress; + CommitInternal(spinWait); } /// @@ -327,9 +322,9 @@ public long Commit(bool spinWait = false) /// complete the commit /// /// - public async ValueTask CommitAsync() + public async ValueTask CommitAsync() { - var tailAddress = Commit(); + var tailAddress = CommitInternal(); while (true) { @@ -341,7 +336,6 @@ public async ValueTask CommitAsync() else break; } - return tailAddress; } #endregion @@ -556,32 +550,34 @@ private int Align(int length) /// /// Commit log /// - private void Commit(long flushAddress) + private void CommitCallback(long flushAddress) { - FasterLogRecoveryInfo info = new FasterLogRecoveryInfo - { - FlushedUntilAddress = flushAddress, - BeginAddress = allocator.BeginAddress - }; - - var _newCommitTask = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); - TaskCompletionSource _commitTask; + long beginAddress = allocator.BeginAddress; + TaskCompletionSource _commitTcs = default; // We can only allow serial monotonic synchronous commit lock (this) { - if (flushAddress > CommittedUntilAddress) + if ((beginAddress > CommittedBeginAddress) || (flushAddress > CommittedUntilAddress)) { - logCommitManager.Commit(flushAddress, info.ToByteArray()); - CommittedUntilAddress = flushAddress; - // info.DebugPrint(); - } + FasterLogRecoveryInfo info = new FasterLogRecoveryInfo + { + BeginAddress = beginAddress > CommittedBeginAddress ? beginAddress : CommittedBeginAddress, + FlushedUntilAddress = flushAddress > CommittedUntilAddress ? flushAddress : CommittedUntilAddress + }; - _commitTask = commitTcs; - if (commitTcs.Task.Status != TaskStatus.Faulted) - commitTcs = _newCommitTask; + logCommitManager.Commit(info.BeginAddress, info.FlushedUntilAddress, info.ToByteArray()); + CommittedBeginAddress = info.BeginAddress; + CommittedUntilAddress = info.FlushedUntilAddress; + + _commitTcs = commitTcs; + if (commitTcs.Task.Status != TaskStatus.Faulted) + { + commitTcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + } + } } - _commitTask.SetResult(flushAddress); + _commitTcs?.SetResult(flushAddress); } /// @@ -604,6 +600,7 @@ private void Restore() allocator.RestoreHybridLog(info.FlushedUntilAddress, headAddress, info.BeginAddress); CommittedUntilAddress = info.FlushedUntilAddress; + CommittedBeginAddress = info.BeginAddress; } /// @@ -709,5 +706,30 @@ private unsafe void AsyncGetFromDiskCallback(uint errorCode, uint numBytes, Nati record.Return(); return (result, length); } + + private long CommitInternal(bool spinWait = false) + { + epoch.Resume(); + if (allocator.ShiftReadOnlyToTail(out long tailAddress)) + { + if (spinWait) + { + while (CommittedUntilAddress < tailAddress) + { + epoch.ProtectAndDrain(); + Thread.Yield(); + } + } + epoch.Suspend(); + } + else + { + // May need to commit begin address + epoch.Suspend(); + CommitCallback(CommittedUntilAddress); + } + + return tailAddress; + } } } diff --git a/cs/src/core/Index/FasterLog/ILogCommitManager.cs b/cs/src/core/Index/FasterLog/ILogCommitManager.cs index 892273815..f3282ede4 100644 --- a/cs/src/core/Index/FasterLog/ILogCommitManager.cs +++ b/cs/src/core/Index/FasterLog/ILogCommitManager.cs @@ -13,9 +13,10 @@ public interface ILogCommitManager /// /// Perform (synchronous) commit with specified metadata /// - /// Address committed until (for information only, not necessary to persist) - /// Commit metadata - void Commit(long address, byte[] commitMetadata); + /// Committed begin address (for information only, not necessary to persist) + /// Address committed until (for information only, not necessary to persist) + /// Commit metadata - should be persisted + void Commit(long beginAddress, long untilAddress, byte[] commitMetadata); /// /// Return prior commit metadata during recovery diff --git a/cs/src/core/Index/FasterLog/LocalLogCommitManager.cs b/cs/src/core/Index/FasterLog/LocalLogCommitManager.cs index 761984d68..f3cdc90a1 100644 --- a/cs/src/core/Index/FasterLog/LocalLogCommitManager.cs +++ b/cs/src/core/Index/FasterLog/LocalLogCommitManager.cs @@ -24,9 +24,10 @@ public LocalLogCommitManager(string CommitFile) /// /// Perform (synchronous) commit with specified metadata /// - /// Address committed until (for information only, not necessary to persist) + /// Committed begin address (for information only, not necessary to persist) + /// Address committed until (for information only, not necessary to persist) /// Commit metadata - public void Commit(long address, byte[] commitMetadata) + public void Commit(long beginAddress, long untilAddress, byte[] commitMetadata) { // Two phase to ensure we write metadata in single Write operation using (var ms = new MemoryStream()) diff --git a/cs/test/FasterLogTests.cs b/cs/test/FasterLogTests.cs index ccec889f2..e2668f0d0 100644 --- a/cs/test/FasterLogTests.cs +++ b/cs/test/FasterLogTests.cs @@ -137,5 +137,41 @@ public async Task FasterLogTest3() } log.Dispose(); } + + [Test] + public async Task FasterLogTest4() + { + log = new FasterLog(new FasterLogSettings { LogDevice = device, PageSizeBits = 14 }); + byte[] data1 = new byte[100]; + for (int i = 0; i < 100; i++) data1[i] = (byte)i; + + for (int i=0; i<100; i++) + { + log.Enqueue(data1); + } + + Assert.IsTrue(log.CommittedUntilAddress == log.BeginAddress); + await log.CommitAsync(); + + Assert.IsTrue(log.CommittedUntilAddress == log.TailAddress); + Assert.IsTrue(log.CommittedBeginAddress == log.BeginAddress); + + using (var iter = log.Scan(0, long.MaxValue)) + { + // Should read the "hole" and return false + var iterResult = iter.GetNext(out byte[] entry, out _); + log.TruncateUntil(iter.NextAddress); + + Assert.IsTrue(log.CommittedUntilAddress == log.TailAddress); + Assert.IsTrue(log.CommittedBeginAddress < log.BeginAddress); + Assert.IsTrue(iter.NextAddress == log.BeginAddress); + + await log.CommitAsync(); + + Assert.IsTrue(log.CommittedUntilAddress == log.TailAddress); + Assert.IsTrue(log.CommittedBeginAddress == log.BeginAddress); + } + log.Dispose(); + } } } From dfd683f55e8a735334b69f05f83d06a67ba2f444 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Wed, 9 Oct 2019 16:30:58 -0700 Subject: [PATCH 25/36] changed test project target --- cs/test/FASTER.test.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cs/test/FASTER.test.csproj b/cs/test/FASTER.test.csproj index f36620739..049443673 100644 --- a/cs/test/FASTER.test.csproj +++ b/cs/test/FASTER.test.csproj @@ -1,7 +1,7 @@  - net46;netcoreapp2.2 + net46;netcoreapp2.0 AnyCPU;x64 true From 8dbba0ae722add0ca61ee77d72208e70db1d0fcf Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Wed, 9 Oct 2019 16:37:56 -0700 Subject: [PATCH 26/36] reverting test nuget version --- cs/test/FASTER.test.csproj | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cs/test/FASTER.test.csproj b/cs/test/FASTER.test.csproj index 049443673..41d0226cc 100644 --- a/cs/test/FASTER.test.csproj +++ b/cs/test/FASTER.test.csproj @@ -38,9 +38,9 @@ 1701;1702;1591 - - - + + + From 4d1c9ea7ae41961292cfbf5a30f2f04e37ebffba Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Wed, 9 Oct 2019 19:48:20 -0700 Subject: [PATCH 27/36] Updated random read example --- cs/playground/FasterLogSample/Program.cs | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/cs/playground/FasterLogSample/Program.cs b/cs/playground/FasterLogSample/Program.cs index 66107ccd5..2a584458a 100644 --- a/cs/playground/FasterLogSample/Program.cs +++ b/cs/playground/FasterLogSample/Program.cs @@ -143,19 +143,6 @@ static void ScanThread() Random r = new Random(); byte[] result; - // First we demonstrate single random read into specified log offset - if (log.CommittedUntilAddress > log.BeginAddress) - { - (result, _) = log.ReadAsync(log.BeginAddress).GetAwaiter().GetResult(); - if (Different(result, staticEntry, out int location)) - { - if (result.Length != staticEntry.Length) - throw new Exception("Invalid entry found, expected length " + staticEntry.Length + ", actual length " + result.Length); - else - throw new Exception("Invalid entry found at offset " + location); - } - } - using (iter = log.Scan(log.BeginAddress, long.MaxValue)) { while (true) @@ -182,6 +169,9 @@ static void ScanThread() log.Enqueue(result); } + // Example of random read from given address + // (result, _) = log.ReadAsync(iter.CurrentAddress).GetAwaiter().GetResult(); + log.TruncateUntil(iter.NextAddress); } } From fd15349fc812fa02974a053e6972a1db10f9787b Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Thu, 10 Oct 2019 14:15:02 -0700 Subject: [PATCH 28/36] Use TrySetResult instead of SetResult, since log closure moves the task to completed state. --- cs/src/core/Index/FasterLog/FasterLog.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cs/src/core/Index/FasterLog/FasterLog.cs b/cs/src/core/Index/FasterLog/FasterLog.cs index 2b5d52935..7cb8b5816 100644 --- a/cs/src/core/Index/FasterLog/FasterLog.cs +++ b/cs/src/core/Index/FasterLog/FasterLog.cs @@ -577,7 +577,7 @@ private void CommitCallback(long flushAddress) } } } - _commitTcs?.SetResult(flushAddress); + _commitTcs?.TrySetResult(flushAddress); } /// From 15c418b25cd949a16422e4c5716f4f04fbb96ecd Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Tue, 15 Oct 2019 13:11:09 -0700 Subject: [PATCH 29/36] Added simple version/checksum to commit info. --- .../Index/FasterLog/FasterLogRecoveryInfo.cs | 114 ++++++++++++++++++ .../core/Index/FasterLog/FasterLogSettings.cs | 85 ------------- 2 files changed, 114 insertions(+), 85 deletions(-) create mode 100644 cs/src/core/Index/FasterLog/FasterLogRecoveryInfo.cs diff --git a/cs/src/core/Index/FasterLog/FasterLogRecoveryInfo.cs b/cs/src/core/Index/FasterLog/FasterLogRecoveryInfo.cs new file mode 100644 index 000000000..7a41976c0 --- /dev/null +++ b/cs/src/core/Index/FasterLog/FasterLogRecoveryInfo.cs @@ -0,0 +1,114 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma warning disable 0162 + +using System; +using System.Diagnostics; +using System.IO; + +namespace FASTER.core +{ + /// + /// Recovery info for FASTER Log + /// + internal struct FasterLogRecoveryInfo + { + /// + /// Begin address + /// + public long BeginAddress; + + /// + /// Flushed logical address + /// + public long FlushedUntilAddress; + + + /// + /// Initialize + /// + public void Initialize() + { + BeginAddress = 0; + FlushedUntilAddress = 0; + } + + /// + /// Initialize from stream + /// + /// + public void Initialize(BinaryReader reader) + { + int version; + long checkSum; + try + { + version = reader.ReadInt32(); + checkSum = reader.ReadInt64(); + BeginAddress = reader.ReadInt64(); + FlushedUntilAddress = reader.ReadInt64(); + } + catch (Exception e) + { + throw new Exception("Unable to recover from previous commit. Inner exception: " + e.ToString()); + } + if (version != 0) + throw new Exception("Invalid version found during commit recovery"); + + if (checkSum != (BeginAddress ^ FlushedUntilAddress)) + throw new Exception("Invalid checksum found during commit recovery"); + } + + /// + /// Recover info from token + /// + /// + /// + internal void Recover(ILogCommitManager logCommitManager) + { + var metadata = logCommitManager.GetCommitMetadata(); + if (metadata == null) + throw new Exception("Invalid log commit metadata during recovery"); + + Initialize(new BinaryReader(new MemoryStream(metadata))); + } + + /// + /// Reset + /// + public void Reset() + { + Initialize(); + } + + /// + /// Write info to byte array + /// + public byte[] ToByteArray() + { + using (var ms = new MemoryStream()) + { + using (var writer = new BinaryWriter(ms)) + { + writer.Write(0); // version + writer.Write(BeginAddress ^ FlushedUntilAddress); // checksum + writer.Write(BeginAddress); + writer.Write(FlushedUntilAddress); + } + return ms.ToArray(); + } + } + + /// + /// Print checkpoint info for debugging purposes + /// + public void DebugPrint() + { + Debug.WriteLine("******** Log Commit Info ********"); + + Debug.WriteLine("BeginAddress: {0}", BeginAddress); + Debug.WriteLine("FlushedUntilAddress: {0}", FlushedUntilAddress); + } + } +} diff --git a/cs/src/core/Index/FasterLog/FasterLogSettings.cs b/cs/src/core/Index/FasterLog/FasterLogSettings.cs index f0ad92245..6285cf198 100644 --- a/cs/src/core/Index/FasterLog/FasterLogSettings.cs +++ b/cs/src/core/Index/FasterLog/FasterLogSettings.cs @@ -76,89 +76,4 @@ internal LogSettings GetLogSettings() }; } } - - /// - /// Recovery info for FASTER Log - /// - internal struct FasterLogRecoveryInfo - { - /// - /// Begin address - /// - public long BeginAddress; - - /// - /// Flushed logical address - /// - public long FlushedUntilAddress; - - - /// - /// Initialize - /// - public void Initialize() - { - BeginAddress = 0; - FlushedUntilAddress = 0; - } - - /// - /// Initialize from stream - /// - /// - public void Initialize(BinaryReader reader) - { - BeginAddress = reader.ReadInt64(); - FlushedUntilAddress = reader.ReadInt64(); - } - - /// - /// Recover info from token - /// - /// - /// - internal void Recover(ILogCommitManager logCommitManager) - { - var metadata = logCommitManager.GetCommitMetadata(); - if (metadata == null) - throw new Exception("Invalid log commit metadata during recovery"); - - Initialize(new BinaryReader(new MemoryStream(metadata))); - } - - /// - /// Reset - /// - public void Reset() - { - Initialize(); - } - - /// - /// Write info to byte array - /// - public byte[] ToByteArray() - { - using (var ms = new MemoryStream()) - { - using (var writer = new BinaryWriter(ms)) - { - writer.Write(BeginAddress); - writer.Write(FlushedUntilAddress); - } - return ms.ToArray(); - } - } - - /// - /// Print checkpoint info for debugging purposes - /// - public void DebugPrint() - { - Debug.WriteLine("******** Log Commit Info ********"); - - Debug.WriteLine("BeginAddress: {0}", BeginAddress); - Debug.WriteLine("FlushedUntilAddress: {0}", FlushedUntilAddress); - } - } } From 4751080b8bcec440841e4653a5f9ac3b25dae955 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Wed, 16 Oct 2019 09:51:48 -0700 Subject: [PATCH 30/36] Added opt-in support for per-entry 8-byte checksum (xor) in header of entry. --- cs/src/core/Index/FasterLog/FasterLog.cs | 102 ++++++++++++++---- .../core/Index/FasterLog/FasterLogIterator.cs | 52 ++++++--- .../core/Index/FasterLog/FasterLogSettings.cs | 20 ++++ cs/src/core/Utilities/Utility.cs | 38 ++++++- cs/test/FasterLogTests.cs | 16 +-- 5 files changed, 182 insertions(+), 46 deletions(-) diff --git a/cs/src/core/Index/FasterLog/FasterLog.cs b/cs/src/core/Index/FasterLog/FasterLog.cs index 7cb8b5816..25338e519 100644 --- a/cs/src/core/Index/FasterLog/FasterLog.cs +++ b/cs/src/core/Index/FasterLog/FasterLog.cs @@ -22,8 +22,10 @@ public class FasterLog : IDisposable private readonly LightEpoch epoch; private readonly ILogCommitManager logCommitManager; private readonly GetMemory getMemory; + private readonly int headerSize; + private readonly LogChecksumType logChecksum; private TaskCompletionSource commitTcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); - + /// /// Beginning address of log /// @@ -64,6 +66,9 @@ public FasterLog(FasterLogSettings logSettings) new LocalLogCommitManager(logSettings.LogCommitFile ?? logSettings.LogDevice.FileName + ".commit"); + // Reserve 8 byte checksum in header if requested + logChecksum = logSettings.LogChecksum; + headerSize = logChecksum == LogChecksumType.PerEntry ? 12 : 4; getMemory = logSettings.GetMemory; epoch = new LightEpoch(); CommittedUntilAddress = Constants.kFirstValidAddress; @@ -139,7 +144,7 @@ public unsafe bool TryEnqueue(byte[] entry, out long logicalAddress) epoch.Resume(); var length = entry.Length; - logicalAddress = allocator.TryAllocate(4 + Align(length)); + logicalAddress = allocator.TryAllocate(headerSize + Align(length)); if (logicalAddress == 0) { epoch.Suspend(); @@ -147,10 +152,9 @@ public unsafe bool TryEnqueue(byte[] entry, out long logicalAddress) } var physicalAddress = allocator.GetPhysicalAddress(logicalAddress); - *(int*)physicalAddress = length; fixed (byte* bp = entry) - Buffer.MemoryCopy(bp, (void*)(4 + physicalAddress), length, length); - + Buffer.MemoryCopy(bp, (void*)(headerSize + physicalAddress), length, length); + SetHeader(length, (byte*)physicalAddress); epoch.Suspend(); return true; } @@ -169,7 +173,7 @@ public unsafe bool TryEnqueue(ReadOnlySpan entry, out long logicalAddress) epoch.Resume(); var length = entry.Length; - logicalAddress = allocator.TryAllocate(4 + Align(length)); + logicalAddress = allocator.TryAllocate(headerSize + Align(length)); if (logicalAddress == 0) { epoch.Suspend(); @@ -177,10 +181,9 @@ public unsafe bool TryEnqueue(ReadOnlySpan entry, out long logicalAddress) } var physicalAddress = allocator.GetPhysicalAddress(logicalAddress); - *(int*)physicalAddress = length; fixed (byte* bp = &entry.GetPinnableReference()) - Buffer.MemoryCopy(bp, (void*)(4 + physicalAddress), length, length); - + Buffer.MemoryCopy(bp, (void*)(headerSize + physicalAddress), length, length); + SetHeader(length, (byte*)physicalAddress); epoch.Suspend(); return true; } @@ -352,7 +355,7 @@ public long EnqueueAndWaitForCommit(byte[] entry) { long logicalAddress; while (!TryEnqueue(entry, out logicalAddress)) ; - while (CommittedUntilAddress < logicalAddress + 4 + entry.Length) ; + while (CommittedUntilAddress < logicalAddress + 1) ; return logicalAddress; } @@ -366,7 +369,7 @@ public long EnqueueAndWaitForCommit(ReadOnlySpan entry) { long logicalAddress; while (!TryEnqueue(entry, out logicalAddress)) ; - while (CommittedUntilAddress < logicalAddress + 4 + entry.Length) ; + while (CommittedUntilAddress < logicalAddress + 1) ; return logicalAddress; } @@ -411,7 +414,7 @@ public async ValueTask EnqueueAndWaitForCommitAsync(byte[] entry) while (true) { var task = CommitTask; - if (CommittedUntilAddress < logicalAddress + 4 + entry.Length) + if (CommittedUntilAddress < logicalAddress + 1) { await task; } @@ -445,7 +448,7 @@ public async ValueTask EnqueueAndWaitForCommitAsync(ReadOnlyMemory e while (true) { var task = CommitTask; - if (CommittedUntilAddress < logicalAddress + 4 + entry.Length) + if (CommittedUntilAddress < logicalAddress + 1) { await task; } @@ -510,7 +513,7 @@ public void TruncateUntil(long untilAddress) /// public FasterLogScanIterator Scan(long beginAddress, long endAddress, ScanBufferingMode scanBufferingMode = ScanBufferingMode.DoublePageBuffering) { - return new FasterLogScanIterator(this, allocator, beginAddress, endAddress, getMemory, scanBufferingMode, epoch); + return new FasterLogScanIterator(this, allocator, beginAddress, endAddress, getMemory, scanBufferingMode, epoch, headerSize); } /// @@ -534,7 +537,7 @@ public FasterLogScanIterator Scan(long beginAddress, long endAddress, ScanBuffer }; unsafe { - allocator.AsyncReadRecordToMemory(address, 4 + estimatedLength, AsyncGetFromDiskCallback, ref ctx); + allocator.AsyncReadRecordToMemory(address, headerSize + estimatedLength, AsyncGetFromDiskCallback, ref ctx); } epoch.Suspend(); await ctx.completedRead.WaitAsync(); @@ -619,7 +622,7 @@ private unsafe bool TryAppend(IReadOnlySpanBatch readOnlySpanBatch, out long log allocatedLength = 0; for (int i = 0; i < totalEntries; i++) { - allocatedLength += Align(readOnlySpanBatch.Get(i).Length) + 4; + allocatedLength += Align(readOnlySpanBatch.Get(i).Length) + headerSize; } epoch.Resume(); @@ -636,10 +639,10 @@ private unsafe bool TryAppend(IReadOnlySpanBatch readOnlySpanBatch, out long log { var span = readOnlySpanBatch.Get(i); var entryLength = span.Length; - *(int*)physicalAddress = entryLength; fixed (byte* bp = &span.GetPinnableReference()) - Buffer.MemoryCopy(bp, (void*)(4 + physicalAddress), entryLength, entryLength); - physicalAddress += Align(entryLength) + 4; + Buffer.MemoryCopy(bp, (void*)(headerSize + physicalAddress), entryLength, entryLength); + SetHeader(entryLength, (byte*)physicalAddress); + physicalAddress += Align(entryLength) + headerSize; } epoch.Suspend(); @@ -660,7 +663,7 @@ private unsafe void AsyncGetFromDiskCallback(uint errorCode, uint numBytes, Nati else { var record = ctx.record.GetValidPointer(); - var length = *(int*)record; + var length = GetLength(record); if (length < 0 || length > allocator.PageSize) { @@ -671,7 +674,7 @@ private unsafe void AsyncGetFromDiskCallback(uint errorCode, uint numBytes, Nati } else { - int requiredBytes = 4 + length; + int requiredBytes = headerSize + length; if (ctx.record.available_bytes >= requiredBytes) { ctx.completedRead.Release(); @@ -696,11 +699,15 @@ private unsafe void AsyncGetFromDiskCallback(uint errorCode, uint numBytes, Nati unsafe { var ptr = record.GetValidPointer(); - length = *(int*)ptr; + length = GetLength(ptr); + if (!VerifyChecksum(ptr, length)) + { + throw new Exception("Checksum failed for read"); + } result = getMemory != null ? getMemory(length) : new byte[length]; fixed (byte* bp = result) { - Buffer.MemoryCopy(ptr + 4, bp, length, length); + Buffer.MemoryCopy(ptr + headerSize, bp, length, length); } } record.Return(); @@ -731,5 +738,54 @@ private long CommitInternal(bool spinWait = false) return tailAddress; } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal unsafe int GetLength(byte* ptr) + { + if (logChecksum == LogChecksumType.None) + return *(int*)ptr; + else if (logChecksum == LogChecksumType.PerEntry) + return *(int*)(ptr + 8); + return 0; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal unsafe bool VerifyChecksum(byte* ptr, int length) + { + if (logChecksum == LogChecksumType.PerEntry) + { + var cs = Utility.XorBytes(ptr + 8, length + 4); + if (cs != *(ulong*)ptr) + { + return false; + } + } + return true; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal unsafe ulong GetChecksum(byte* ptr) + { + if (logChecksum == LogChecksumType.PerEntry) + { + return *(ulong*)ptr; + } + return 0; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private unsafe void SetHeader(int length, byte* dest) + { + if (logChecksum == LogChecksumType.None) + { + *(int*)dest = length; + return; + } + else if (logChecksum == LogChecksumType.PerEntry) + { + *(int*)(dest + 8) = length; + *(ulong*)dest = Utility.XorBytes(dest + 8, length + 4); + } + } } } diff --git a/cs/src/core/Index/FasterLog/FasterLogIterator.cs b/cs/src/core/Index/FasterLog/FasterLogIterator.cs index 436785aed..c1b563914 100644 --- a/cs/src/core/Index/FasterLog/FasterLogIterator.cs +++ b/cs/src/core/Index/FasterLog/FasterLogIterator.cs @@ -24,6 +24,7 @@ public class FasterLogScanIterator : IDisposable private readonly long[] loadedPage; private readonly LightEpoch epoch; private readonly GetMemory getMemory; + private readonly int headerSize; private long currentAddress, nextAddress; /// @@ -45,13 +46,15 @@ public class FasterLogScanIterator : IDisposable /// /// /// + /// /// - internal unsafe FasterLogScanIterator(FasterLog fasterLog, BlittableAllocator hlog, long beginAddress, long endAddress, GetMemory getMemory, ScanBufferingMode scanBufferingMode, LightEpoch epoch) + internal unsafe FasterLogScanIterator(FasterLog fasterLog, BlittableAllocator hlog, long beginAddress, long endAddress, GetMemory getMemory, ScanBufferingMode scanBufferingMode, LightEpoch epoch, int headerSize) { this.fasterLog = fasterLog; this.allocator = hlog; this.getMemory = getMemory; this.epoch = epoch; + this.headerSize = headerSize; if (beginAddress == 0) beginAddress = hlog.GetFirstValidLogicalAddress(0); @@ -118,7 +121,7 @@ public unsafe bool GetNext(out byte[] entry, out int entryLength) } fixed (byte* bp = entry) - Buffer.MemoryCopy((void*)(4 + physicalAddress), bp, entryLength, entryLength); + Buffer.MemoryCopy((void*)(headerSize + physicalAddress), bp, entryLength, entryLength); if (epochTaken) epoch.Suspend(); @@ -143,7 +146,7 @@ public unsafe bool GetNext(MemoryPool pool, out IMemoryOwner entry, entry = pool.Rent(entryLength); fixed (byte* bp = &entry.Memory.Span.GetPinnableReference()) - Buffer.MemoryCopy((void*)(4 + physicalAddress), bp, entryLength, entryLength); + Buffer.MemoryCopy((void*)(headerSize + physicalAddress), bp, entryLength, entryLength); if (epochTaken) epoch.Suspend(); @@ -278,23 +281,46 @@ private unsafe bool GetNextInternal(out long physicalAddress, out int entryLengt physicalAddress = allocator.GetPhysicalAddress(currentAddress); } - // Check if record fits on page, if not skip to next page - entryLength = *(int*)physicalAddress; - int recordSize = 4 + Align(entryLength); + // Get and check entry length + entryLength = fasterLog.GetLength((byte*)physicalAddress); + if (entryLength == 0) + { + if (currentAddress >= headAddress) + epoch.Suspend(); - if ((currentAddress & allocator.PageSizeMask) + recordSize > allocator.PageSize) - throw new Exception(); + nextAddress = (1 + (currentAddress >> allocator.LogPageSizeBits)) << allocator.LogPageSizeBits; + if (0 != fasterLog.GetChecksum((byte*)physicalAddress)) + { + var curPage = currentAddress >> allocator.LogPageSizeBits; + throw new Exception("Invalid checksum found during scan, skipping page " + curPage); + } + else + { + // We are likely at end of page, skip to next + currentAddress = nextAddress; + continue; + } + } - if (entryLength == 0) // we are at end of page, skip to next + int recordSize = headerSize + Align(entryLength); + if ((currentAddress & allocator.PageSizeMask) + recordSize > allocator.PageSize) { - // If record if (currentAddress >= headAddress) epoch.Suspend(); - currentAddress = (1 + (currentAddress >> allocator.LogPageSizeBits)) << allocator.LogPageSizeBits; - continue; + nextAddress = (1 + (currentAddress >> allocator.LogPageSizeBits)) << allocator.LogPageSizeBits; + throw new Exception("Invalid length of record found: " + entryLength + ", skipping page"); } - Debug.Assert((currentAddress & allocator.PageSizeMask) + recordSize <= allocator.PageSize); + // Verify checksum if needed + if (currentAddress < headAddress) + { + if (!fasterLog.VerifyChecksum((byte*)physicalAddress, entryLength)) + { + var curPage = currentAddress >> allocator.LogPageSizeBits; + nextAddress = (1 + (currentAddress >> allocator.LogPageSizeBits)) << allocator.LogPageSizeBits; + throw new Exception("Invalid checksum found during scan, skipping page " + curPage); + } + } if ((currentAddress & allocator.PageSizeMask) + recordSize == allocator.PageSize) nextAddress = (1 + (currentAddress >> allocator.LogPageSizeBits)) << allocator.LogPageSizeBits; diff --git a/cs/src/core/Index/FasterLog/FasterLogSettings.cs b/cs/src/core/Index/FasterLog/FasterLogSettings.cs index 6285cf198..8f02aade6 100644 --- a/cs/src/core/Index/FasterLog/FasterLogSettings.cs +++ b/cs/src/core/Index/FasterLog/FasterLogSettings.cs @@ -16,6 +16,21 @@ namespace FASTER.core /// public delegate byte[] GetMemory(int minLength); + /// + /// Type of checksum to add to log + /// + public enum LogChecksumType + { + /// + /// No checksums + /// + None, + /// + /// Checksum per entry + /// + PerEntry + } + /// /// FASTER Log Settings /// @@ -61,6 +76,11 @@ public class FasterLogSettings /// public GetMemory GetMemory = null; + /// + /// Type of checksum to add to log + /// + public LogChecksumType LogChecksum = LogChecksumType.None; + internal LogSettings GetLogSettings() { return new LogSettings diff --git a/cs/src/core/Utilities/Utility.cs b/cs/src/core/Utilities/Utility.cs index 3909371d1..c250d1b81 100644 --- a/cs/src/core/Utilities/Utility.cs +++ b/cs/src/core/Utilities/Utility.cs @@ -121,7 +121,6 @@ public static long GetHashCode(long input) return (long)Rotr64((ulong)local_rand_hash, 45); } - /// /// Get 64-bit hash code for a byte array /// @@ -147,7 +146,42 @@ public static unsafe long HashBytes(byte* pbString, int len) return (long)Rotr64(magicno * hashState, 4); } - + + /// + /// Compute XOR of all provided bytes + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static unsafe ulong XorBytes(byte* src, int length) + { + ulong result = 0; + byte* curr = src; + byte* end = src + length; + while (curr + 4 * sizeof(ulong) <= end) + { + result ^= *(ulong*)curr; + result ^= *(1 + (ulong*)curr); + result ^= *(2 + (ulong*)curr); + result ^= *(3 + (ulong*)curr); + curr += 4 * sizeof(ulong); + } + while (curr + sizeof(ulong) <= end) + { + result ^= *(ulong*)curr; + curr += sizeof(ulong); + } + while (curr + 1 <= end) + { + result ^= *curr; + curr++; + } + + return result; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static ulong Rotr64(ulong x, int n) { diff --git a/cs/test/FasterLogTests.cs b/cs/test/FasterLogTests.cs index e2668f0d0..4d95e5548 100644 --- a/cs/test/FasterLogTests.cs +++ b/cs/test/FasterLogTests.cs @@ -36,9 +36,9 @@ public void TearDown() } [Test] - public void FasterLogTest1() + public void FasterLogTest1([Values]LogChecksumType logChecksum) { - log = new FasterLog(new FasterLogSettings { LogDevice = device }); + log = new FasterLog(new FasterLogSettings { LogDevice = device, LogChecksum = logChecksum }); byte[] entry = new byte[entryLength]; for (int i = 0; i < entryLength; i++) @@ -67,9 +67,9 @@ public void FasterLogTest1() } [Test] - public async Task FasterLogTest2() + public async Task FasterLogTest2([Values]LogChecksumType logChecksum) { - log = new FasterLog(new FasterLogSettings { LogDevice = device }); + log = new FasterLog(new FasterLogSettings { LogDevice = device, LogChecksum = logChecksum }); byte[] data1 = new byte[10000]; for (int i = 0; i < 10000; i++) data1[i] = (byte)i; @@ -100,9 +100,9 @@ public async Task FasterLogTest2() } [Test] - public async Task FasterLogTest3() + public async Task FasterLogTest3([Values]LogChecksumType logChecksum) { - log = new FasterLog(new FasterLogSettings { LogDevice = device, PageSizeBits = 14 }); + log = new FasterLog(new FasterLogSettings { LogDevice = device, PageSizeBits = 14, LogChecksum = logChecksum }); byte[] data1 = new byte[10000]; for (int i = 0; i < 10000; i++) data1[i] = (byte)i; @@ -139,9 +139,9 @@ public async Task FasterLogTest3() } [Test] - public async Task FasterLogTest4() + public async Task FasterLogTest4([Values]LogChecksumType logChecksum) { - log = new FasterLog(new FasterLogSettings { LogDevice = device, PageSizeBits = 14 }); + log = new FasterLog(new FasterLogSettings { LogDevice = device, PageSizeBits = 14, LogChecksum = logChecksum }); byte[] data1 = new byte[100]; for (int i = 0; i < 100; i++) data1[i] = (byte)i; From 70b4c72ca45f3b7275a9d547fc755f6ac6874b95 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Mon, 21 Oct 2019 14:47:48 -0700 Subject: [PATCH 31/36] Fixing issue with async enqueue. --- cs/src/core/Index/FasterLog/FasterLog.cs | 31 +++++++++++++++++++----- cs/test/FasterLogTests.cs | 31 ++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 6 deletions(-) diff --git a/cs/src/core/Index/FasterLog/FasterLog.cs b/cs/src/core/Index/FasterLog/FasterLog.cs index 25338e519..8d75e7d52 100644 --- a/cs/src/core/Index/FasterLog/FasterLog.cs +++ b/cs/src/core/Index/FasterLog/FasterLog.cs @@ -217,7 +217,8 @@ public async ValueTask EnqueueAsync(byte[] entry) var task = CommitTask; if (TryEnqueue(entry, out logicalAddress)) break; - await task; + if (NeedToWait(CommittedUntilAddress, TailAddress)) + await task; } return logicalAddress; @@ -238,7 +239,8 @@ public async ValueTask EnqueueAsync(ReadOnlyMemory entry) var task = CommitTask; if (TryEnqueue(entry.Span, out logicalAddress)) break; - await task; + if (NeedToWait(CommittedUntilAddress, TailAddress)) + await task; } return logicalAddress; @@ -259,7 +261,8 @@ public async ValueTask EnqueueAsync(IReadOnlySpanBatch readOnlySpanBatch) var task = CommitTask; if (TryEnqueue(readOnlySpanBatch, out logicalAddress)) break; - await task; + if (NeedToWait(CommittedUntilAddress, TailAddress)) + await task; } return logicalAddress; @@ -407,7 +410,8 @@ public async ValueTask EnqueueAndWaitForCommitAsync(byte[] entry) var task = CommitTask; if (TryEnqueue(entry, out logicalAddress)) break; - await task; + if (NeedToWait(CommittedUntilAddress, TailAddress)) + await task; } // Phase 2: wait for commit/flush to storage @@ -441,7 +445,8 @@ public async ValueTask EnqueueAndWaitForCommitAsync(ReadOnlyMemory e var task = CommitTask; if (TryEnqueue(entry.Span, out logicalAddress)) break; - await task; + if (NeedToWait(CommittedUntilAddress, TailAddress)) + await task; } // Phase 2: wait for commit/flush to storage @@ -476,7 +481,8 @@ public async ValueTask EnqueueAndWaitForCommitAsync(IReadOnlySpanBatch rea var task = CommitTask; if (TryAppend(readOnlySpanBatch, out logicalAddress, out allocatedLength)) break; - await task; + if (NeedToWait(CommittedUntilAddress, TailAddress)) + await task; } // Phase 2: wait for commit/flush to storage @@ -787,5 +793,18 @@ private unsafe void SetHeader(int length, byte* dest) *(ulong*)dest = Utility.XorBytes(dest + 8, length + 4); } } + + /// + /// Do we need to await a commit to make forward progress? + /// + /// + /// + /// + private bool NeedToWait(long committedUntilAddress, long tailAddress) + { + return + allocator.GetPage(committedUntilAddress) <= + (allocator.GetPage(tailAddress) - allocator.BufferSize); + } } } diff --git a/cs/test/FasterLogTests.cs b/cs/test/FasterLogTests.cs index 4d95e5548..f044d7ad6 100644 --- a/cs/test/FasterLogTests.cs +++ b/cs/test/FasterLogTests.cs @@ -4,6 +4,7 @@ using System; using System.IO; using System.Linq; +using System.Threading; using System.Threading.Tasks; using FASTER.core; using NUnit.Framework; @@ -173,5 +174,35 @@ public async Task FasterLogTest4([Values]LogChecksumType logChecksum) } log.Dispose(); } + + [Test] + public async Task FasterLogTest5([Values]LogChecksumType logChecksum) + { + log = new FasterLog(new FasterLogSettings { LogDevice = device, PageSizeBits = 16, MemorySizeBits = 16, LogChecksum = logChecksum }); + + int headerSize = logChecksum == LogChecksumType.None ? 4 : 12; + var commit = new Thread(() => { while (true) { log.Commit(true); } }); + + commit.Start(); + + // 65536=page size|headerSize|64=log header + await log.EnqueueAndWaitForCommitAsync(new byte[65536 - headerSize - 64]); + + // 65536=page size|headerSize + await log.EnqueueAndWaitForCommitAsync(new byte[65536 - headerSize]); + + // 65536=page size|headerSize + await log.EnqueueAndWaitForCommitAsync(new byte[65536 - headerSize]); + + // 65536=page size|headerSize + await log.EnqueueAndWaitForCommitAsync(new byte[65536 - headerSize]); + + // 65536=page size|headerSize + await log.EnqueueAndWaitForCommitAsync(new byte[65536 - headerSize]); + + commit.Abort(); + + log.Dispose(); + } } } From 20a7536b58ea68059ff0c14171d993ace0ad6e34 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Mon, 21 Oct 2019 16:12:36 -0700 Subject: [PATCH 32/36] Fixed testcase since thread abort not supported on some platforms. --- cs/test/FasterLogTests.cs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cs/test/FasterLogTests.cs b/cs/test/FasterLogTests.cs index f044d7ad6..e1a020269 100644 --- a/cs/test/FasterLogTests.cs +++ b/cs/test/FasterLogTests.cs @@ -181,7 +181,8 @@ public async Task FasterLogTest5([Values]LogChecksumType logChecksum) log = new FasterLog(new FasterLogSettings { LogDevice = device, PageSizeBits = 16, MemorySizeBits = 16, LogChecksum = logChecksum }); int headerSize = logChecksum == LogChecksumType.None ? 4 : 12; - var commit = new Thread(() => { while (true) { log.Commit(true); } }); + bool _disposed = false; + var commit = new Thread(() => { while (!_disposed) { log.Commit(true); Thread.Sleep(1); } }); commit.Start(); @@ -200,8 +201,9 @@ public async Task FasterLogTest5([Values]LogChecksumType logChecksum) // 65536=page size|headerSize await log.EnqueueAndWaitForCommitAsync(new byte[65536 - headerSize]); - commit.Abort(); + _disposed = true; + commit.Join(); log.Dispose(); } } From 64bbe14895456f4f80e8178158df984cd2b90cd0 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Fri, 25 Oct 2019 18:13:47 -0700 Subject: [PATCH 33/36] Fixing concurrency issue with contiguous partial flush requests. Removed spin-wait for adjacent flush completion. --- cs/src/core/Allocator/AllocatorBase.cs | 44 +++++++++----- cs/src/core/Allocator/AtomicOwner.cs | 65 +++++++++++++++++++++ cs/src/core/Allocator/BlittableAllocator.cs | 5 -- cs/src/core/Allocator/PendingFlushList.cs | 56 ++++++++++++++++++ 4 files changed, 151 insertions(+), 19 deletions(-) create mode 100644 cs/src/core/Allocator/AtomicOwner.cs create mode 100644 cs/src/core/Allocator/PendingFlushList.cs diff --git a/cs/src/core/Allocator/AllocatorBase.cs b/cs/src/core/Allocator/AllocatorBase.cs index 7fa2d009d..1d9357db3 100644 --- a/cs/src/core/Allocator/AllocatorBase.cs +++ b/cs/src/core/Allocator/AllocatorBase.cs @@ -176,6 +176,7 @@ public unsafe abstract partial class AllocatorBase : IDisposable // Array that indicates the status of each buffer page internal readonly FullPageStatus[] PageStatusIndicator; + internal readonly PendingFlushList[] PendingFlush; /// /// Global address of the current tail (next element to be allocated from the circular buffer) @@ -514,6 +515,9 @@ public AllocatorBase(LogSettings settings, IFasterEqualityComparer comparer } PageStatusIndicator = new FullPageStatus[BufferSize]; + PendingFlush = new PendingFlushList[BufferSize]; + for (int i = 0; i < BufferSize; i++) + PendingFlush[i] = new PendingFlushList(); device = settings.LogDevice; sectorSize = (int)device.SectorSize; @@ -1245,24 +1249,12 @@ public void AsyncFlushPages(long fromAddress, long untilAddress) int numPages = (int)(endPage - startPage); long offsetInStartPage = GetOffsetInPage(fromAddress); - long offsetInEndPage = GetOffsetInPage(untilAddress); - + long offsetInEndPage = GetOffsetInPage(untilAddress); // Extra (partial) page being flushed if (offsetInEndPage > 0) numPages++; - // Partial page starting point, need to wait until the - // ongoing adjacent flush is completed to ensure correctness - if (offsetInStartPage > 0) - { - while (FlushedUntilAddress < fromAddress) - { - epoch.ProtectAndDrain(); - Thread.Yield(); - } - } - /* Request asynchronous writes to the device. If waitForPendingFlushComplete * is set, then a CountDownEvent is set in the callback handle. */ @@ -1293,7 +1285,20 @@ public void AsyncFlushPages(long fromAddress, long untilAddress) asyncResult.fromAddress = fromAddress; } - WriteAsync(flushPage, AsyncFlushPageCallback, asyncResult); + // Partial page starting point, need to wait until the + // ongoing adjacent flush is completed to ensure correctness + if (GetOffsetInPage(asyncResult.fromAddress) > 0) + { + // Enqueue work in shared queue + var index = GetPageIndexForAddress(asyncResult.fromAddress); + PendingFlush[index].Add(asyncResult); + if (PendingFlush[index].RemoveAdjacent(FlushedUntilAddress, out PageAsyncFlushResult request)) + { + WriteAsync(request.fromAddress >> LogPageSizeBits, AsyncFlushPageCallback, request); + } + } + else + WriteAsync(flushPage, AsyncFlushPageCallback, asyncResult); } } @@ -1464,6 +1469,12 @@ private void AsyncFlushPageCallback(uint errorCode, uint numBytes, NativeOverlap result.Free(); } + var _flush = FlushedUntilAddress; + if (GetOffsetInPage(_flush) > 0 && PendingFlush[GetPage(_flush) % BufferSize].RemoveAdjacent(_flush, out PageAsyncFlushResult request)) + { + WriteAsync(request.fromAddress >> LogPageSizeBits, AsyncFlushPageCallback, request); + } + Overlapped.Free(overlap); } @@ -1508,5 +1519,10 @@ public virtual void ShallowCopy(ref Value src, ref Value dst) { dst = src; } + + private string PrettyPrint(long address) + { + return $"{GetPage(address)}:{GetOffsetInPage(address)}"; + } } } diff --git a/cs/src/core/Allocator/AtomicOwner.cs b/cs/src/core/Allocator/AtomicOwner.cs new file mode 100644 index 000000000..590d2476f --- /dev/null +++ b/cs/src/core/Allocator/AtomicOwner.cs @@ -0,0 +1,65 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using System.Threading; +using System.Runtime.InteropServices; + +namespace FASTER.core +{ + [StructLayout(LayoutKind.Explicit)] + struct AtomicOwner + { + [FieldOffset(0)] + int owner; + [FieldOffset(4)] + int count; + [FieldOffset(0)] + long atomic; + + /// + /// Enqueue token + /// true: success + caller is new owner + /// false: success + someone else is owner + /// + /// + public bool Enqueue() + { + while (true) + { + var older = this; + var newer = older; + newer.count++; + if (older.owner == 0) + newer.owner = 1; + + if (Interlocked.CompareExchange(ref this.atomic, newer.atomic, older.atomic) == older.atomic) + { + return older.owner == 0; + } + } + } + + /// + /// Dequeue token + /// true: successful dequeue (caller is owner) + /// false: failed dequeue + /// + /// + public bool Dequeue() + { + while (true) + { + var older = this; + var newer = older; + newer.count--; + if (newer.count == 0) + newer.owner = 0; + + if (Interlocked.CompareExchange(ref this.atomic, newer.atomic, older.atomic) == older.atomic) + { + return newer.owner != 0; + } + } + } + } +} diff --git a/cs/src/core/Allocator/BlittableAllocator.cs b/cs/src/core/Allocator/BlittableAllocator.cs index 9c37ffb3e..b326c94a1 100644 --- a/cs/src/core/Allocator/BlittableAllocator.cs +++ b/cs/src/core/Allocator/BlittableAllocator.cs @@ -5,11 +5,6 @@ using System.Runtime.CompilerServices; using System.Threading; using System.Runtime.InteropServices; -using System.Collections.Concurrent; -using System.Collections.Generic; -using System.Linq.Expressions; -using System.IO; -using System.Diagnostics; #pragma warning disable CS1591 // Missing XML comment for publicly visible type or member diff --git a/cs/src/core/Allocator/PendingFlushList.cs b/cs/src/core/Allocator/PendingFlushList.cs new file mode 100644 index 000000000..0896481aa --- /dev/null +++ b/cs/src/core/Allocator/PendingFlushList.cs @@ -0,0 +1,56 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using System; +using System.Threading; + +namespace FASTER.core +{ + class PendingFlushList + { + const int maxSize = 8; + const int maxRetries = 10; + public PageAsyncFlushResult[] list; + + public PendingFlushList() + { + list = new PageAsyncFlushResult[maxSize]; + } + + public void Add(PageAsyncFlushResult t) + { + int retries = 0; + do + { + for (int i = 0; i < maxSize; i++) + { + if (list[i] == default) + { + if (Interlocked.CompareExchange(ref list[i], t, default) == default) + { + return; + } + } + } + } while (retries++ < maxRetries); + throw new Exception("Unable to add item to list"); + } + + public bool RemoveAdjacent(long address, out PageAsyncFlushResult request) + { + for (int i=0; i Date: Tue, 29 Oct 2019 17:16:44 -0700 Subject: [PATCH 34/36] Fasterlog exceptions (#189) * Added storage exception handling, connecting to tasks. * Cleanup of error handling, control when exception is bubbled up to user. * Added yield in NeedToWait * Improved iterator support in case of exception --- cs/playground/FasterLogSample/Program.cs | 28 ++- cs/src/core/Allocator/AllocatorBase.cs | 48 +++- cs/src/core/Allocator/AtomicOwner.cs | 36 ++- cs/src/core/Allocator/BlittableAllocator.cs | 10 +- cs/src/core/Allocator/ErrorList.cs | 63 +++++ cs/src/core/Allocator/GenericAllocator.cs | 2 +- .../Allocator/VarLenBlittableAllocator.cs | 2 +- .../core/Device/ManagedLocalStorageDevice.cs | 40 +++- .../Index/FasterLog/CommitFailureException.cs | 25 ++ cs/src/core/Index/FasterLog/CommitInfo.cs | 51 ++++ cs/src/core/Index/FasterLog/FasterLog.cs | 223 +++++++++++++----- .../core/Index/FasterLog/FasterLogIterator.cs | 73 ++++-- cs/src/core/Utilities/PageAsyncResultTypes.cs | 1 + 13 files changed, 499 insertions(+), 103 deletions(-) create mode 100644 cs/src/core/Allocator/ErrorList.cs create mode 100644 cs/src/core/Index/FasterLog/CommitFailureException.cs create mode 100644 cs/src/core/Index/FasterLog/CommitInfo.cs diff --git a/cs/playground/FasterLogSample/Program.cs b/cs/playground/FasterLogSample/Program.cs index 2a584458a..447810b3e 100644 --- a/cs/playground/FasterLogSample/Program.cs +++ b/cs/playground/FasterLogSample/Program.cs @@ -41,8 +41,12 @@ static void Main() var t1 = new Thread(new ThreadStart(ScanThread)); var t2 = new Thread(new ThreadStart(ReportThread)); var t3 = new Thread(new ThreadStart(CommitThread)); - t1.Start(); t2.Start(); t3.Start(); - t1.Join(); t2.Join(); t3.Join(); + t1.Start(); + t2.Start(); + t3.Start(); + t1.Join(); + t2.Join(); + t3.Join(); } else { @@ -68,8 +72,12 @@ static void Main() var t1 = new Thread(new ThreadStart(ScanThread)); var t2 = new Thread(new ThreadStart(ReportThread)); var t3 = new Thread(new ThreadStart(CommitThread)); - t1.Start(); t2.Start(); t3.Start(); - t1.Join(); t2.Join(); t3.Join(); + t1.Start(); + t2.Start(); + t3.Start(); + t1.Join(); + t2.Join(); + t3.Join(); Task.WaitAll(tasks); } @@ -211,6 +219,7 @@ static void ReportThread() static void CommitThread() { + //Task prevCommitTask = null; while (true) { Thread.Sleep(5); @@ -218,6 +227,17 @@ static void CommitThread() // Async version // await log.CommitAsync(); + + // Async version that catches all commit failures in between + //try + //{ + // prevCommitTask = await log.CommitAsync(prevCommitTask); + //} + //catch (CommitFailureException e) + //{ + // Console.WriteLine(e); + // prevCommitTask = e.LinkedCommitInfo.nextTcs.Task; + //} } } diff --git a/cs/src/core/Allocator/AllocatorBase.cs b/cs/src/core/Allocator/AllocatorBase.cs index 1d9357db3..0bd293ccf 100644 --- a/cs/src/core/Allocator/AllocatorBase.cs +++ b/cs/src/core/Allocator/AllocatorBase.cs @@ -6,6 +6,7 @@ using System.Threading; using System.Runtime.InteropServices; using System.Diagnostics; +using System.Collections.Generic; namespace FASTER.core { @@ -207,7 +208,12 @@ public unsafe abstract partial class AllocatorBase : IDisposable /// /// Flush callback /// - protected readonly Action FlushCallback = null; + protected readonly Action FlushCallback = null; + + /// + /// Error handling + /// + private readonly ErrorList errorList = new ErrorList(); /// /// Observer for records entering read-only region @@ -459,7 +465,7 @@ public unsafe abstract partial class AllocatorBase : IDisposable /// /// /// - public AllocatorBase(LogSettings settings, IFasterEqualityComparer comparer, Action evictCallback, LightEpoch epoch, Action flushCallback) + public AllocatorBase(LogSettings settings, IFasterEqualityComparer comparer, Action evictCallback, LightEpoch epoch, Action flushCallback) { if (evictCallback != null) { @@ -1010,7 +1016,24 @@ protected void ShiftFlushedUntilAddress() { if (Utility.MonotonicUpdate(ref FlushedUntilAddress, currentFlushedUntilAddress, out long oldFlushedUntilAddress)) { - FlushCallback?.Invoke(FlushedUntilAddress); + uint errorCode = 0; + if (errorList.Count > 0) + { + errorCode = errorList.CheckAndWait(oldFlushedUntilAddress, currentFlushedUntilAddress); + } + FlushCallback?.Invoke( + new CommitInfo + { + BeginAddress = BeginAddress, + FromAddress = oldFlushedUntilAddress, + UntilAddress = currentFlushedUntilAddress, + ErrorCode = errorCode + }); + + if (errorList.Count > 0) + { + errorList.RemoveUntil(currentFlushedUntilAddress); + } } } } @@ -1163,7 +1186,7 @@ public void AsyncReadPagesFromDevice( IDevice logDevice = null, IDevice objectLogDevice = null) { AsyncReadPagesFromDevice(readPageStart, numPages, untilAddress, callback, context, - out CountdownEvent completed, devicePageOffset, logDevice, objectLogDevice); + out _, devicePageOffset, logDevice, objectLogDevice); } /// @@ -1446,6 +1469,8 @@ private void AsyncGetFromDiskCallback(uint errorCode, uint numBytes, NativeOverl Overlapped.Free(overlap); } + // static DateTime last = DateTime.Now; + /// /// IOCompletion callback for page flush /// @@ -1459,12 +1484,25 @@ private void AsyncFlushPageCallback(uint errorCode, uint numBytes, NativeOverlap Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); } + /* + if (DateTime.Now - last > TimeSpan.FromSeconds(7)) + { + last = DateTime.Now; + errorCode = 1; + Console.WriteLine("Disk error"); + }*/ + + // Set the page status to flushed PageAsyncFlushResult result = (PageAsyncFlushResult)Overlapped.Unpack(overlap).AsyncResult; if (Interlocked.Decrement(ref result.count) == 0) { - Utility.MonotonicUpdate(ref PageStatusIndicator[result.page % BufferSize].LastFlushedUntilAddress, result.untilAddress, out long old); + if (errorCode != 0) + { + errorList.Add(result.fromAddress); + } + Utility.MonotonicUpdate(ref PageStatusIndicator[result.page % BufferSize].LastFlushedUntilAddress, result.untilAddress, out _); ShiftFlushedUntilAddress(); result.Free(); } diff --git a/cs/src/core/Allocator/AtomicOwner.cs b/cs/src/core/Allocator/AtomicOwner.cs index 590d2476f..ad7de3824 100644 --- a/cs/src/core/Allocator/AtomicOwner.cs +++ b/cs/src/core/Allocator/AtomicOwner.cs @@ -3,6 +3,7 @@ using System.Threading; using System.Runtime.InteropServices; +using System; namespace FASTER.core { @@ -40,8 +41,8 @@ public bool Enqueue() } /// - /// Dequeue token - /// true: successful dequeue (caller is owner) + /// Dequeue token (caller is/remains owner) + /// true: successful dequeue /// false: failed dequeue /// /// @@ -52,12 +53,37 @@ public bool Dequeue() var older = this; var newer = older; newer.count--; - if (newer.count == 0) - newer.owner = 0; if (Interlocked.CompareExchange(ref this.atomic, newer.atomic, older.atomic) == older.atomic) { - return newer.owner != 0; + return newer.count > 0; + } + } + } + + /// + /// Release queue ownership + /// true: successful release + /// false: failed release + /// + /// + public bool Release() + { + while (true) + { + var older = this; + var newer = older; + + if (newer.count > 0) + return false; + + if (newer.owner == 0) + throw new Exception("Invalid release by non-owner thread"); + newer.owner = 0; + + if (Interlocked.CompareExchange(ref this.atomic, newer.atomic, older.atomic) == older.atomic) + { + return true; } } } diff --git a/cs/src/core/Allocator/BlittableAllocator.cs b/cs/src/core/Allocator/BlittableAllocator.cs index b326c94a1..a164e71ac 100644 --- a/cs/src/core/Allocator/BlittableAllocator.cs +++ b/cs/src/core/Allocator/BlittableAllocator.cs @@ -26,7 +26,7 @@ public unsafe sealed class BlittableAllocator : AllocatorBase comparer, Action evictCallback = null, LightEpoch epoch = null, Action flushCallback = null) + public BlittableAllocator(LogSettings settings, IFasterEqualityComparer comparer, Action evictCallback = null, LightEpoch epoch = null, Action flushCallback = null) : base(settings, comparer, evictCallback, epoch, flushCallback) { values = new byte[BufferSize][]; @@ -335,6 +335,7 @@ public override IFasterScanIterator Scan(long beginAddress, long end /// /// /// + /// internal void AsyncReadPagesFromDeviceToFrame( long readPageStart, int numPages, @@ -344,7 +345,9 @@ internal void AsyncReadPagesFromDeviceToFrame( BlittableFrame frame, out CountdownEvent completed, long devicePageOffset = 0, - IDevice device = null, IDevice objectLogDevice = null) + IDevice device = null, + IDevice objectLogDevice = null, + CancellationTokenSource cts = null) { var usedDevice = device; IDevice usedObjlogDevice = objectLogDevice; @@ -371,7 +374,8 @@ internal void AsyncReadPagesFromDeviceToFrame( page = readPage, context = context, handle = completed, - frame = frame + frame = frame, + cts = cts }; ulong offsetInFile = (ulong)(AlignedPageSizeBytes * readPage); diff --git a/cs/src/core/Allocator/ErrorList.cs b/cs/src/core/Allocator/ErrorList.cs new file mode 100644 index 000000000..59d8b48ae --- /dev/null +++ b/cs/src/core/Allocator/ErrorList.cs @@ -0,0 +1,63 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +using System.Collections.Generic; +using System.Threading; + +namespace FASTER.core +{ + class ErrorList + { + private readonly List errorList; + + public ErrorList() => errorList = new List(); + + public void Add(long address) + { + lock (errorList) + errorList.Add(address); + } + + public uint CheckAndWait(long oldFlushedUntilAddress, long currentFlushedUntilAddress) + { + bool done = false; + uint errorCode = 0; + while (!done) + { + done = true; + lock (errorList) + { + for (int i = 0; i < errorList.Count; i++) + { + if (errorList[i] >= oldFlushedUntilAddress && errorList[i] < currentFlushedUntilAddress) + { + errorCode = 1; + } + else if (errorList[i] < oldFlushedUntilAddress) + { + done = false; // spin barrier for other threads during exception + Thread.Yield(); + } + } + } + } + return errorCode; + } + + public void RemoveUntil(long currentFlushedUntilAddress) + { + lock (errorList) + { + for (int i = 0; i < errorList.Count; i++) + { + if (errorList[i] < currentFlushedUntilAddress) + { + errorList.RemoveAt(i); + } + } + } + + } + public int Count => errorList.Count; + } +} diff --git a/cs/src/core/Allocator/GenericAllocator.cs b/cs/src/core/Allocator/GenericAllocator.cs index 1442c8e81..23b747b04 100644 --- a/cs/src/core/Allocator/GenericAllocator.cs +++ b/cs/src/core/Allocator/GenericAllocator.cs @@ -41,7 +41,7 @@ public unsafe sealed class GenericAllocator : AllocatorBase(); private readonly bool valueBlittable = Utility.IsBlittable(); - public GenericAllocator(LogSettings settings, SerializerSettings serializerSettings, IFasterEqualityComparer comparer, Action evictCallback = null, LightEpoch epoch = null, Action flushCallback = null) + public GenericAllocator(LogSettings settings, SerializerSettings serializerSettings, IFasterEqualityComparer comparer, Action evictCallback = null, LightEpoch epoch = null, Action flushCallback = null) : base(settings, comparer, evictCallback, epoch, flushCallback) { SerializerSettings = serializerSettings; diff --git a/cs/src/core/Allocator/VarLenBlittableAllocator.cs b/cs/src/core/Allocator/VarLenBlittableAllocator.cs index 3b59a0050..1cecb248f 100644 --- a/cs/src/core/Allocator/VarLenBlittableAllocator.cs +++ b/cs/src/core/Allocator/VarLenBlittableAllocator.cs @@ -33,7 +33,7 @@ public unsafe sealed class VariableLengthBlittableAllocator : Alloca internal readonly IVariableLengthStruct KeyLength; internal readonly IVariableLengthStruct ValueLength; - public VariableLengthBlittableAllocator(LogSettings settings, VariableLengthStructSettings vlSettings, IFasterEqualityComparer comparer, Action evictCallback = null, LightEpoch epoch = null, Action flushCallback = null) + public VariableLengthBlittableAllocator(LogSettings settings, VariableLengthStructSettings vlSettings, IFasterEqualityComparer comparer, Action evictCallback = null, LightEpoch epoch = null, Action flushCallback = null) : base(settings, comparer, evictCallback, epoch, flushCallback) { values = new byte[BufferSize][]; diff --git a/cs/src/core/Device/ManagedLocalStorageDevice.cs b/cs/src/core/Device/ManagedLocalStorageDevice.cs index 255cd8132..034c30473 100644 --- a/cs/src/core/Device/ManagedLocalStorageDevice.cs +++ b/cs/src/core/Device/ManagedLocalStorageDevice.cs @@ -79,14 +79,16 @@ private void RecoverFiles() class ReadCallbackWrapper { + readonly Stream logHandle; readonly IOCompletionCallback callback; readonly IAsyncResult asyncResult; SectorAlignedMemory memory; readonly IntPtr destinationAddress; readonly uint readLength; - public ReadCallbackWrapper(IOCompletionCallback callback, IAsyncResult asyncResult, SectorAlignedMemory memory, IntPtr destinationAddress, uint readLength) + public ReadCallbackWrapper(Stream logHandle, IOCompletionCallback callback, IAsyncResult asyncResult, SectorAlignedMemory memory, IntPtr destinationAddress, uint readLength) { + this.logHandle = logHandle; this.callback = callback; this.asyncResult = asyncResult; this.memory = memory; @@ -96,34 +98,56 @@ public ReadCallbackWrapper(IOCompletionCallback callback, IAsyncResult asyncResu public unsafe void Callback(IAsyncResult result) { - fixed (void* source = memory.buffer) + uint errorCode = 0; + try { - Buffer.MemoryCopy(source, (void*)destinationAddress, readLength, readLength); + logHandle.EndRead(result); + fixed (void* source = memory.buffer) + { + Buffer.MemoryCopy(source, (void*)destinationAddress, readLength, readLength); + } } + catch + { + errorCode = 1; + } + memory.Return(); Overlapped ov = new Overlapped(0, 0, IntPtr.Zero, asyncResult); - callback(0, 0, ov.UnsafePack(callback, IntPtr.Zero)); + callback(errorCode, 0, ov.UnsafePack(callback, IntPtr.Zero)); } } class WriteCallbackWrapper { + readonly Stream logHandle; readonly IOCompletionCallback callback; readonly IAsyncResult asyncResult; SectorAlignedMemory memory; - public WriteCallbackWrapper(IOCompletionCallback callback, IAsyncResult asyncResult, SectorAlignedMemory memory) + public WriteCallbackWrapper(Stream logHandle, IOCompletionCallback callback, IAsyncResult asyncResult, SectorAlignedMemory memory) { this.callback = callback; this.asyncResult = asyncResult; this.memory = memory; + this.logHandle = logHandle; } public unsafe void Callback(IAsyncResult result) { + uint errorCode = 0; + try + { + logHandle.EndWrite(result); + } + catch + { + errorCode = 1; + } + memory.Return(); Overlapped ov = new Overlapped(0, 0, IntPtr.Zero, asyncResult); - callback(0, 0, ov.UnsafePack(callback, IntPtr.Zero)); + callback(errorCode, 0, ov.UnsafePack(callback, IntPtr.Zero)); } } @@ -146,7 +170,7 @@ public override unsafe void ReadAsync(int segmentId, ulong sourceAddress, var memory = pool.Get((int)readLength); logHandle.Seek((long)sourceAddress, SeekOrigin.Begin); logHandle.BeginRead(memory.buffer, 0, (int)readLength, - new ReadCallbackWrapper(callback, asyncResult, memory, destinationAddress, readLength).Callback, null); + new ReadCallbackWrapper(logHandle, callback, asyncResult, memory, destinationAddress, readLength).Callback, null); } /// @@ -174,7 +198,7 @@ public override unsafe void WriteAsync(IntPtr sourceAddress, } logHandle.Seek((long)destinationAddress, SeekOrigin.Begin); logHandle.BeginWrite(memory.buffer, 0, (int)numBytesToWrite, - new WriteCallbackWrapper(callback, asyncResult, memory).Callback, null); + new WriteCallbackWrapper(logHandle, callback, asyncResult, memory).Callback, null); } /// diff --git a/cs/src/core/Index/FasterLog/CommitFailureException.cs b/cs/src/core/Index/FasterLog/CommitFailureException.cs new file mode 100644 index 000000000..c6374806f --- /dev/null +++ b/cs/src/core/Index/FasterLog/CommitFailureException.cs @@ -0,0 +1,25 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma warning disable 0162 + +using System; +using System.Threading.Tasks; + +namespace FASTER.core +{ + /// + /// Exception thrown when commit fails + /// + public class CommitFailureException : Exception + { + /// + /// Commit info and next commit task in chain + /// + public LinkedCommitInfo LinkedCommitInfo { get; private set; } + + internal CommitFailureException(LinkedCommitInfo linkedCommitInfo, string message) + : base(message) + => LinkedCommitInfo = linkedCommitInfo; + } +} diff --git a/cs/src/core/Index/FasterLog/CommitInfo.cs b/cs/src/core/Index/FasterLog/CommitInfo.cs new file mode 100644 index 000000000..70401edcc --- /dev/null +++ b/cs/src/core/Index/FasterLog/CommitInfo.cs @@ -0,0 +1,51 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +#pragma warning disable 0162 + +using System.Threading.Tasks; + +namespace FASTER.core +{ + /// + /// Info contained in task associated with commit + /// + public struct CommitInfo + { + /// + /// Begin address + /// + public long BeginAddress; + + /// + /// From address of commit range + /// + public long FromAddress; + + /// + /// Until address of commit range + /// + public long UntilAddress; + + /// + /// Error code (0 = success) + /// + public uint ErrorCode; + } + + /// + /// Linked list (chain) of commit info + /// + public struct LinkedCommitInfo + { + /// + /// Commit info + /// + public CommitInfo CommitInfo; + + /// + /// Next task in commit chain + /// + public Task NextTask; + } +} diff --git a/cs/src/core/Index/FasterLog/FasterLog.cs b/cs/src/core/Index/FasterLog/FasterLog.cs index 8d75e7d52..feb71d0ba 100644 --- a/cs/src/core/Index/FasterLog/FasterLog.cs +++ b/cs/src/core/Index/FasterLog/FasterLog.cs @@ -4,6 +4,7 @@ #pragma warning disable 0162 using System; +using System.Collections.Concurrent; using System.Diagnostics; using System.IO; using System.Runtime.CompilerServices; @@ -12,7 +13,6 @@ namespace FASTER.core { - /// /// FASTER log /// @@ -24,8 +24,9 @@ public class FasterLog : IDisposable private readonly GetMemory getMemory; private readonly int headerSize; private readonly LogChecksumType logChecksum; - private TaskCompletionSource commitTcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); - + private TaskCompletionSource commitTcs + = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + /// /// Beginning address of log /// @@ -54,7 +55,7 @@ public class FasterLog : IDisposable /// /// Task notifying commit completions /// - internal Task CommitTask => commitTcs.Task; + internal Task CommitTask => commitTcs.Task; /// /// Create new log instance @@ -76,7 +77,7 @@ public FasterLog(FasterLogSettings logSettings) allocator = new BlittableAllocator( logSettings.GetLogSettings(), null, - null, epoch, e => CommitCallback(e)); + null, epoch, CommitCallback); allocator.Initialize(); Restore(); } @@ -218,7 +219,14 @@ public async ValueTask EnqueueAsync(byte[] entry) if (TryEnqueue(entry, out logicalAddress)) break; if (NeedToWait(CommittedUntilAddress, TailAddress)) - await task; + { + // Wait for *some* commit - failure can be ignored + try + { + await task; + } + catch { } + } } return logicalAddress; @@ -240,7 +248,14 @@ public async ValueTask EnqueueAsync(ReadOnlyMemory entry) if (TryEnqueue(entry.Span, out logicalAddress)) break; if (NeedToWait(CommittedUntilAddress, TailAddress)) - await task; + { + // Wait for *some* commit - failure can be ignored + try + { + await task; + } + catch { } + } } return logicalAddress; @@ -262,7 +277,14 @@ public async ValueTask EnqueueAsync(IReadOnlySpanBatch readOnlySpanBatch) if (TryEnqueue(readOnlySpanBatch, out logicalAddress)) break; if (NeedToWait(CommittedUntilAddress, TailAddress)) - await task; + { + // Wait for *some* commit - failure can be ignored + try + { + await task; + } + catch { } + } } return logicalAddress; @@ -295,16 +317,15 @@ public void WaitForCommit(long untilAddress = 0) /// public async ValueTask WaitForCommitAsync(long untilAddress = 0) { + var task = CommitTask; var tailAddress = untilAddress; if (tailAddress == 0) tailAddress = allocator.GetTailAddress(); while (true) { - var task = CommitTask; - if (CommittedUntilAddress < tailAddress) - { - await task; - } + var linkedCommitInfo = await task; + if (linkedCommitInfo.CommitInfo.UntilAddress < tailAddress) + task = linkedCommitInfo.NextTask; else break; } @@ -325,25 +346,46 @@ public void Commit(bool spinWait = false) /// /// Async commit log (until tail), completes only when we - /// complete the commit + /// complete the commit. Throws exception if this or any + /// ongoing commit fails. /// /// public async ValueTask CommitAsync() { + var task = CommitTask; var tailAddress = CommitInternal(); while (true) { - var task = CommitTask; - if (CommittedUntilAddress < tailAddress) - { - await task; - } + var linkedCommitInfo = await task; + if (linkedCommitInfo.CommitInfo.UntilAddress < tailAddress) + task = linkedCommitInfo.NextTask; else break; } } + /// + /// Async commit log (until tail), completes only when we + /// complete the commit. Throws exception if any commit + /// from prevCommitTask to current fails. + /// + /// + public async ValueTask> CommitAsync(Task prevCommitTask) + { + if (prevCommitTask == null) prevCommitTask = commitTcs.Task; + var tailAddress = CommitInternal(); + + while (true) + { + var linkedCommitInfo = await prevCommitTask; + if (linkedCommitInfo.CommitInfo.UntilAddress < tailAddress) + prevCommitTask = linkedCommitInfo.NextTask; + else + return linkedCommitInfo.NextTask; + } + } + #endregion #region EnqueueAndWaitForCommit @@ -403,25 +445,41 @@ public long EnqueueAndWaitForCommit(IReadOnlySpanBatch readOnlySpanBatch) public async ValueTask EnqueueAndWaitForCommitAsync(byte[] entry) { long logicalAddress; + Task task; // Phase 1: wait for commit to memory while (true) { - var task = CommitTask; + task = CommitTask; if (TryEnqueue(entry, out logicalAddress)) break; if (NeedToWait(CommittedUntilAddress, TailAddress)) - await task; + { + // Wait for *some* commit - failure can be ignored + try + { + await task; + } + catch { } + } } // Phase 2: wait for commit/flush to storage while (true) { - var task = CommitTask; - if (CommittedUntilAddress < logicalAddress + 1) + LinkedCommitInfo linkedCommitInfo; + try { - await task; + linkedCommitInfo = await task; } + catch (CommitFailureException e) + { + linkedCommitInfo = e.LinkedCommitInfo; + if (logicalAddress >= linkedCommitInfo.CommitInfo.FromAddress && logicalAddress < linkedCommitInfo.CommitInfo.UntilAddress) + throw e; + } + if (linkedCommitInfo.CommitInfo.UntilAddress < logicalAddress + 1) + task = linkedCommitInfo.NextTask; else break; } @@ -438,25 +496,41 @@ public async ValueTask EnqueueAndWaitForCommitAsync(byte[] entry) public async ValueTask EnqueueAndWaitForCommitAsync(ReadOnlyMemory entry) { long logicalAddress; + Task task; // Phase 1: wait for commit to memory while (true) { - var task = CommitTask; + task = CommitTask; if (TryEnqueue(entry.Span, out logicalAddress)) break; if (NeedToWait(CommittedUntilAddress, TailAddress)) - await task; + { + // Wait for *some* commit - failure can be ignored + try + { + await task; + } + catch { } + } } // Phase 2: wait for commit/flush to storage while (true) { - var task = CommitTask; - if (CommittedUntilAddress < logicalAddress + 1) + LinkedCommitInfo linkedCommitInfo; + try { - await task; + linkedCommitInfo = await task; } + catch (CommitFailureException e) + { + linkedCommitInfo = e.LinkedCommitInfo; + if (logicalAddress >= linkedCommitInfo.CommitInfo.FromAddress && logicalAddress < linkedCommitInfo.CommitInfo.UntilAddress) + throw e; + } + if (linkedCommitInfo.CommitInfo.UntilAddress < logicalAddress + 1) + task = linkedCommitInfo.NextTask; else break; } @@ -473,26 +547,41 @@ public async ValueTask EnqueueAndWaitForCommitAsync(ReadOnlyMemory e public async ValueTask EnqueueAndWaitForCommitAsync(IReadOnlySpanBatch readOnlySpanBatch) { long logicalAddress; - int allocatedLength; + Task task; // Phase 1: wait for commit to memory while (true) { - var task = CommitTask; - if (TryAppend(readOnlySpanBatch, out logicalAddress, out allocatedLength)) + task = CommitTask; + if (TryEnqueue(readOnlySpanBatch, out logicalAddress)) break; if (NeedToWait(CommittedUntilAddress, TailAddress)) - await task; + { + // Wait for *some* commit - failure can be ignored + try + { + await task; + } + catch { } + } } // Phase 2: wait for commit/flush to storage while (true) { - var task = CommitTask; - if (CommittedUntilAddress < logicalAddress + allocatedLength) + LinkedCommitInfo linkedCommitInfo; + try { - await task; + linkedCommitInfo = await task; } + catch (CommitFailureException e) + { + linkedCommitInfo = e.LinkedCommitInfo; + if (logicalAddress >= linkedCommitInfo.CommitInfo.FromAddress && logicalAddress < linkedCommitInfo.CommitInfo.UntilAddress) + throw e; + } + if (linkedCommitInfo.CommitInfo.UntilAddress < logicalAddress + 1) + task = linkedCommitInfo.NextTask; else break; } @@ -559,34 +648,48 @@ private int Align(int length) /// /// Commit log /// - private void CommitCallback(long flushAddress) + private void CommitCallback(CommitInfo commitInfo) { - long beginAddress = allocator.BeginAddress; - TaskCompletionSource _commitTcs = default; + TaskCompletionSource _commitTcs = default; // We can only allow serial monotonic synchronous commit lock (this) { - if ((beginAddress > CommittedBeginAddress) || (flushAddress > CommittedUntilAddress)) + if (CommittedBeginAddress > commitInfo.BeginAddress) + commitInfo.BeginAddress = CommittedBeginAddress; + if (CommittedUntilAddress > commitInfo.FromAddress) + commitInfo.FromAddress = CommittedUntilAddress; + if (CommittedUntilAddress > commitInfo.UntilAddress) + commitInfo.UntilAddress = CommittedUntilAddress; + + FasterLogRecoveryInfo info = new FasterLogRecoveryInfo { - FasterLogRecoveryInfo info = new FasterLogRecoveryInfo - { - BeginAddress = beginAddress > CommittedBeginAddress ? beginAddress : CommittedBeginAddress, - FlushedUntilAddress = flushAddress > CommittedUntilAddress ? flushAddress : CommittedUntilAddress - }; - - logCommitManager.Commit(info.BeginAddress, info.FlushedUntilAddress, info.ToByteArray()); - CommittedBeginAddress = info.BeginAddress; - CommittedUntilAddress = info.FlushedUntilAddress; - - _commitTcs = commitTcs; - if (commitTcs.Task.Status != TaskStatus.Faulted) - { - commitTcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); - } + BeginAddress = commitInfo.BeginAddress, + FlushedUntilAddress = commitInfo.UntilAddress + }; + + logCommitManager.Commit(info.BeginAddress, info.FlushedUntilAddress, info.ToByteArray()); + CommittedBeginAddress = info.BeginAddress; + CommittedUntilAddress = info.FlushedUntilAddress; + + _commitTcs = commitTcs; + // If task is not faulted, create new task + // If task is faulted due to commit exception, create new task + if (commitTcs.Task.Status != TaskStatus.Faulted || commitTcs.Task.Exception.InnerException as CommitFailureException != null) + { + commitTcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); } } - _commitTcs?.TrySetResult(flushAddress); + var lci = new LinkedCommitInfo + { + CommitInfo = commitInfo, + NextTask = commitTcs.Task + }; + + if (commitInfo.ErrorCode == 0) + _commitTcs?.TrySetResult(lci); + else + _commitTcs.TrySetException(new CommitFailureException(lci, $"Commit of address range [{commitInfo.FromAddress}-{commitInfo.UntilAddress}] failed with error code {commitInfo.ErrorCode}")); } /// @@ -739,7 +842,12 @@ private long CommitInternal(bool spinWait = false) { // May need to commit begin address epoch.Suspend(); - CommitCallback(CommittedUntilAddress); + var beginAddress = allocator.BeginAddress; + if (beginAddress > CommittedBeginAddress) + CommitCallback(new CommitInfo { BeginAddress = beginAddress, + FromAddress = CommittedUntilAddress, + UntilAddress = CommittedUntilAddress, + ErrorCode = 0 }); } return tailAddress; @@ -802,6 +910,7 @@ private unsafe void SetHeader(int length, byte* dest) /// private bool NeedToWait(long committedUntilAddress, long tailAddress) { + Thread.Yield(); return allocator.GetPage(committedUntilAddress) <= (allocator.GetPage(tailAddress) - allocator.BufferSize); diff --git a/cs/src/core/Index/FasterLog/FasterLogIterator.cs b/cs/src/core/Index/FasterLog/FasterLogIterator.cs index c1b563914..faf24b0f8 100644 --- a/cs/src/core/Index/FasterLog/FasterLogIterator.cs +++ b/cs/src/core/Index/FasterLog/FasterLogIterator.cs @@ -21,6 +21,7 @@ public class FasterLogScanIterator : IDisposable private readonly long endAddress; private readonly BlittableFrame frame; private readonly CountdownEvent[] loaded; + private readonly CancellationTokenSource[] loadedCancel; private readonly long[] loadedPage; private readonly LightEpoch epoch; private readonly GetMemory getMemory; @@ -75,10 +76,13 @@ internal unsafe FasterLogScanIterator(FasterLog fasterLog, BlittableAllocator @@ -91,7 +95,14 @@ public async ValueTask WaitAsync() { var commitTask = fasterLog.CommitTask; if (nextAddress >= fasterLog.CommittedUntilAddress) - await commitTask; + { + // Ignore commit exceptions + try + { + await commitTask; + } + catch { } + } else break; } @@ -171,47 +182,71 @@ private unsafe void BufferAndLoad(long currentAddress, long currentPage, long cu if (loadedPage[currentFrame] != currentPage) { if (loadedPage[currentFrame] != -1) - loaded[currentFrame].Wait(); // Ensure we have completed ongoing load - allocator.AsyncReadPagesFromDeviceToFrame(currentAddress >> allocator.LogPageSizeBits, 1, endAddress, AsyncReadPagesCallback, Empty.Default, frame, out loaded[currentFrame]); + { + WaitForFrameLoad(currentFrame); + } + + allocator.AsyncReadPagesFromDeviceToFrame(currentAddress >> allocator.LogPageSizeBits, 1, endAddress, AsyncReadPagesCallback, Empty.Default, frame, out loaded[currentFrame], 0, null, null, loadedCancel[currentFrame]); loadedPage[currentFrame] = currentAddress >> allocator.LogPageSizeBits; } if (frameSize == 2) { - currentPage++; - currentFrame = (currentFrame + 1) % frameSize; + var nextPage = currentPage + 1; + var nextFrame = (currentFrame + 1) % frameSize; - if (loadedPage[currentFrame] != currentPage) + if (loadedPage[nextFrame] != nextPage) { - if (loadedPage[currentFrame] != -1) - loaded[currentFrame].Wait(); // Ensure we have completed ongoing load - allocator.AsyncReadPagesFromDeviceToFrame(1 + (currentAddress >> allocator.LogPageSizeBits), 1, endAddress, AsyncReadPagesCallback, Empty.Default, frame, out loaded[currentFrame]); - loadedPage[currentFrame] = 1 + (currentAddress >> allocator.LogPageSizeBits); + if (loadedPage[nextFrame] != -1) + { + WaitForFrameLoad(nextFrame); + } + + allocator.AsyncReadPagesFromDeviceToFrame(1 + (currentAddress >> allocator.LogPageSizeBits), 1, endAddress, AsyncReadPagesCallback, Empty.Default, frame, out loaded[nextFrame], 0, null, null, loadedCancel[nextFrame]); + loadedPage[nextFrame] = 1 + (currentAddress >> allocator.LogPageSizeBits); } } - loaded[currentFrame].Wait(); + + WaitForFrameLoad(currentFrame); + } + + private void WaitForFrameLoad(long frame) + { + if (loaded[frame].IsSet) return; + + try + { + loaded[frame].Wait(loadedCancel[frame].Token); // Ensure we have completed ongoing load + } + catch (Exception e) + { + loadedPage[frame] = -1; + loadedCancel[frame] = new CancellationTokenSource(); + nextAddress = (1 + (currentAddress >> allocator.LogPageSizeBits)) << allocator.LogPageSizeBits; + throw new Exception("Page read from storage failed, skipping page. Inner exception: " + e.ToString()); + } } private unsafe void AsyncReadPagesCallback(uint errorCode, uint numBytes, NativeOverlapped* overlap) { + var result = (PageAsyncReadResult)Overlapped.Unpack(overlap).AsyncResult; + if (errorCode != 0) { Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode); + result.cts?.Cancel(); } - var result = (PageAsyncReadResult)Overlapped.Unpack(overlap).AsyncResult; - if (result.freeBuffer1 != null) { - allocator.PopulatePage(result.freeBuffer1.GetValidPointer(), result.freeBuffer1.required_bytes, result.page); + if (errorCode == 0) + allocator.PopulatePage(result.freeBuffer1.GetValidPointer(), result.freeBuffer1.required_bytes, result.page); result.freeBuffer1.Return(); result.freeBuffer1 = null; } - if (result.handle != null) - { - result.handle.Signal(); - } + if (errorCode == 0) + result.handle?.Signal(); Interlocked.MemoryBarrier(); Overlapped.Free(overlap); diff --git a/cs/src/core/Utilities/PageAsyncResultTypes.cs b/cs/src/core/Utilities/PageAsyncResultTypes.cs index 5a8792ce7..eb349ad3f 100644 --- a/cs/src/core/Utilities/PageAsyncResultTypes.cs +++ b/cs/src/core/Utilities/PageAsyncResultTypes.cs @@ -22,6 +22,7 @@ public class PageAsyncReadResult : IAsyncResult internal IOCompletionCallback callback; internal IDevice objlogDevice; internal object frame; + internal CancellationTokenSource cts; /* Used for iteration */ internal long resumePtr; From e940a0a66ba93fc61cc28786c043a0aa65b34ad8 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Wed, 30 Oct 2019 11:34:34 -0700 Subject: [PATCH 35/36] Added async iterator support --- .../FasterLogSample/FasterLogSample.csproj | 1 + cs/playground/FasterLogSample/Program.cs | 40 ++++++++++------- cs/src/core/FASTER.core.csproj | 7 +++ .../core/Index/FasterLog/FasterLogIterator.cs | 45 ++++++++++++++++++- 4 files changed, 75 insertions(+), 18 deletions(-) diff --git a/cs/playground/FasterLogSample/FasterLogSample.csproj b/cs/playground/FasterLogSample/FasterLogSample.csproj index 54367d67c..c9e13fb4b 100644 --- a/cs/playground/FasterLogSample/FasterLogSample.csproj +++ b/cs/playground/FasterLogSample/FasterLogSample.csproj @@ -3,6 +3,7 @@ netcoreapp2.2 x64 + preview win7-x64;linux-x64 diff --git a/cs/playground/FasterLogSample/Program.cs b/cs/playground/FasterLogSample/Program.cs index 2a584458a..8f2feef7f 100644 --- a/cs/playground/FasterLogSample/Program.cs +++ b/cs/playground/FasterLogSample/Program.cs @@ -38,11 +38,9 @@ static void Main() new Thread(new ThreadStart(LogWriterThread)).Start(); // Threads for scan, reporting, commit - var t1 = new Thread(new ThreadStart(ScanThread)); - var t2 = new Thread(new ThreadStart(ReportThread)); - var t3 = new Thread(new ThreadStart(CommitThread)); - t1.Start(); t2.Start(); t3.Start(); - t1.Join(); t2.Join(); t3.Join(); + new Thread(new ThreadStart(ScanThread)).Start(); + new Thread(new ThreadStart(ReportThread)).Start(); + new Thread(new ThreadStart(CommitThread)).Start(); } else { @@ -64,14 +62,14 @@ static void Main() tasks[i] = Task.Run(() => AsyncLogWriter(local)); } - // Threads for scan, reporting, commit - var t1 = new Thread(new ThreadStart(ScanThread)); - var t2 = new Thread(new ThreadStart(ReportThread)); - var t3 = new Thread(new ThreadStart(CommitThread)); - t1.Start(); t2.Start(); t3.Start(); - t1.Join(); t2.Join(); t3.Join(); + var scan = Task.Run(() => AsyncScan()); + + // Threads for reporting, commit + new Thread(new ThreadStart(ReportThread)).Start(); + new Thread(new ThreadStart(CommitThread)).Start(); Task.WaitAll(tasks); + Task.WaitAll(scan); } } @@ -149,6 +147,8 @@ static void ScanThread() { while (!iter.GetNext(out result, out int length)) { + // For finite end address, check if iteration ended + // if (iter.CurrentAddress >= endAddress) return; iter.WaitAsync().GetAwaiter().GetResult(); } @@ -156,12 +156,7 @@ static void ScanThread() // iter.GetNext(pool, out IMemoryOwner resultMem, out int length)) if (Different(result, staticEntry, out int location)) - { - if (result.Length != staticEntry.Length) - throw new Exception("Invalid entry found, expected length " + staticEntry.Length + ", actual length " + result.Length); - else - throw new Exception("Invalid entry found at offset " + location); - } + throw new Exception("Invalid entry found"); // Re-insert entry with small probability if (r.Next(100) < 10) @@ -177,6 +172,17 @@ static void ScanThread() } } + static async Task AsyncScan() + { + using (iter = log.Scan(log.BeginAddress, long.MaxValue)) + await foreach ((byte[] result, int length) in iter.GetAsyncEnumerable()) + { + if (Different(result, staticEntry, out int location)) + throw new Exception("Invalid entry found"); + log.TruncateUntil(iter.NextAddress); + } + } + static void ReportThread() { long lastTime = 0; diff --git a/cs/src/core/FASTER.core.csproj b/cs/src/core/FASTER.core.csproj index dc808cfd7..9d49b9163 100644 --- a/cs/src/core/FASTER.core.csproj +++ b/cs/src/core/FASTER.core.csproj @@ -3,6 +3,7 @@ netstandard2.0;net46 AnyCPU;x64 + preview @@ -40,4 +41,10 @@ + + + + 4.0.0 + + \ No newline at end of file diff --git a/cs/src/core/Index/FasterLog/FasterLogIterator.cs b/cs/src/core/Index/FasterLog/FasterLogIterator.cs index c1b563914..2eb7124db 100644 --- a/cs/src/core/Index/FasterLog/FasterLogIterator.cs +++ b/cs/src/core/Index/FasterLog/FasterLogIterator.cs @@ -7,6 +7,7 @@ using System.Runtime.CompilerServices; using System.Threading.Tasks; using System.Buffers; +using System.Collections.Generic; namespace FASTER.core { @@ -81,12 +82,54 @@ internal unsafe FasterLogScanIterator(FasterLog fasterLog, BlittableAllocator + /// Async enumerable for iterator + /// + /// Entry and entry length + public async IAsyncEnumerable<(byte[], int)> GetAsyncEnumerable() + { + while (true) + { + byte[] result; + int length; + while (!GetNext(out result, out length)) + { + if (currentAddress >= endAddress) + yield break; + await WaitAsync(); + } + yield return (result, length); + } + } + + /// + /// Async enumerable for iterator (memory pool based version) + /// + /// Entry and entry length + public async IAsyncEnumerable<(IMemoryOwner, int)> GetAsyncEnumerable(MemoryPool pool) + { + while (true) + { + IMemoryOwner result; + int length; + while (!GetNext(pool, out result, out length)) + { + if (currentAddress >= endAddress) + yield break; + await WaitAsync(); + } + yield return (result, length); + } + } +#endif + /// /// Wait for iteration to be ready to continue /// /// public async ValueTask WaitAsync() - { + { while (true) { var commitTask = fasterLog.CommitTask; From 8e175e085b180244532ac795e2dc53ac61e5dc16 Mon Sep 17 00:00:00 2001 From: Badrish Chandramouli Date: Wed, 30 Oct 2019 14:10:10 -0700 Subject: [PATCH 36/36] Added support for persistent/recoverable named iterators. --- cs/playground/FasterLogSample/Program.cs | 3 ++ cs/src/core/Index/FasterLog/FasterLog.cs | 40 ++++++++++++---- .../core/Index/FasterLog/FasterLogIterator.cs | 13 +++++- .../Index/FasterLog/FasterLogRecoveryInfo.cs | 46 +++++++++++++++++++ 4 files changed, 91 insertions(+), 11 deletions(-) diff --git a/cs/playground/FasterLogSample/Program.cs b/cs/playground/FasterLogSample/Program.cs index eb3f0cfbe..cdee54937 100644 --- a/cs/playground/FasterLogSample/Program.cs +++ b/cs/playground/FasterLogSample/Program.cs @@ -170,6 +170,9 @@ static void ScanThread() log.TruncateUntil(iter.NextAddress); } } + + // Example of recoverable (named) iterator: + // using (iter = log.Scan(log.BeginAddress, long.MaxValue, "foo")) } static async Task AsyncScan() diff --git a/cs/src/core/Index/FasterLog/FasterLog.cs b/cs/src/core/Index/FasterLog/FasterLog.cs index feb71d0ba..3a46cfb86 100644 --- a/cs/src/core/Index/FasterLog/FasterLog.cs +++ b/cs/src/core/Index/FasterLog/FasterLog.cs @@ -4,7 +4,7 @@ #pragma warning disable 0162 using System; -using System.Collections.Concurrent; +using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Runtime.CompilerServices; @@ -24,6 +24,7 @@ public class FasterLog : IDisposable private readonly GetMemory getMemory; private readonly int headerSize; private readonly LogChecksumType logChecksum; + private readonly Dictionary RecoveredIterators; private TaskCompletionSource commitTcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); @@ -79,7 +80,7 @@ public FasterLog(FasterLogSettings logSettings) logSettings.GetLogSettings(), null, null, epoch, CommitCallback); allocator.Initialize(); - Restore(); + Restore(out RecoveredIterators); } /// @@ -602,13 +603,30 @@ public void TruncateUntil(long untilAddress) /// /// Pull-based iterator interface for scanning FASTER log /// - /// Begin address for scan - /// End address for scan (or long.MaxValue for tailing) + /// Begin address for scan. + /// End address for scan (or long.MaxValue for tailing). + /// Name of iterator, if we need to persist/recover it (default null - do not persist). + /// Whether to recover named iterator from latest commit (if exists). If false, iterator starts from beginAddress. /// Use single or double buffering /// - public FasterLogScanIterator Scan(long beginAddress, long endAddress, ScanBufferingMode scanBufferingMode = ScanBufferingMode.DoublePageBuffering) + public FasterLogScanIterator Scan(long beginAddress, long endAddress, string name = null, bool recover = true, ScanBufferingMode scanBufferingMode = ScanBufferingMode.DoublePageBuffering) { - return new FasterLogScanIterator(this, allocator, beginAddress, endAddress, getMemory, scanBufferingMode, epoch, headerSize); + FasterLogScanIterator iter; + if (recover && name != null && RecoveredIterators != null && RecoveredIterators.ContainsKey(name)) + iter = new FasterLogScanIterator(this, allocator, RecoveredIterators[name], endAddress, getMemory, scanBufferingMode, epoch, headerSize, name); + else + iter = new FasterLogScanIterator(this, allocator, beginAddress, endAddress, getMemory, scanBufferingMode, epoch, headerSize, name); + + if (name != null) + { + if (name.Length > 20) + throw new Exception("Max length of iterator name is 20 characters"); + if (FasterLogScanIterator.PersistedIterators.ContainsKey(name)) + Debug.WriteLine("Iterator name exists, overwriting"); + FasterLogScanIterator.PersistedIterators[name] = iter; + } + + return iter; } /// @@ -667,6 +685,7 @@ private void CommitCallback(CommitInfo commitInfo) BeginAddress = commitInfo.BeginAddress, FlushedUntilAddress = commitInfo.UntilAddress }; + info.PopulateIterators(); logCommitManager.Commit(info.BeginAddress, info.FlushedUntilAddress, info.ToByteArray()); CommittedBeginAddress = info.BeginAddress; @@ -695,8 +714,9 @@ private void CommitCallback(CommitInfo commitInfo) /// /// Restore log /// - private void Restore() + private void Restore(out Dictionary recoveredIterators) { + recoveredIterators = null; FasterLogRecoveryInfo info = new FasterLogRecoveryInfo(); var commitInfo = logCommitManager.GetCommitMetadata(); @@ -710,6 +730,8 @@ private void Restore() var headAddress = info.FlushedUntilAddress - allocator.GetOffsetInPage(info.FlushedUntilAddress); if (headAddress == 0) headAddress = Constants.kFirstValidAddress; + recoveredIterators = info.Iterators; + allocator.RestoreHybridLog(info.FlushedUntilAddress, headAddress, info.BeginAddress); CommittedUntilAddress = info.FlushedUntilAddress; CommittedBeginAddress = info.BeginAddress; @@ -840,10 +862,10 @@ private long CommitInternal(bool spinWait = false) } else { - // May need to commit begin address + // May need to commit begin address and/or iterators epoch.Suspend(); var beginAddress = allocator.BeginAddress; - if (beginAddress > CommittedBeginAddress) + if (beginAddress > CommittedBeginAddress || FasterLogScanIterator.PersistedIterators.Count > 0) CommitCallback(new CommitInfo { BeginAddress = beginAddress, FromAddress = CommittedUntilAddress, UntilAddress = CommittedUntilAddress, diff --git a/cs/src/core/Index/FasterLog/FasterLogIterator.cs b/cs/src/core/Index/FasterLog/FasterLogIterator.cs index 52db3b3cc..385823a17 100644 --- a/cs/src/core/Index/FasterLog/FasterLogIterator.cs +++ b/cs/src/core/Index/FasterLog/FasterLogIterator.cs @@ -8,6 +8,7 @@ using System.Threading.Tasks; using System.Buffers; using System.Collections.Generic; +using System.Collections.Concurrent; namespace FASTER.core { @@ -17,6 +18,7 @@ namespace FASTER.core public class FasterLogScanIterator : IDisposable { private readonly int frameSize; + private readonly string name; private readonly FasterLog fasterLog; private readonly BlittableAllocator allocator; private readonly long endAddress; @@ -39,6 +41,9 @@ public class FasterLogScanIterator : IDisposable /// public long NextAddress => nextAddress; + internal static readonly ConcurrentDictionary PersistedIterators + = new ConcurrentDictionary(); + /// /// Constructor /// @@ -49,8 +54,9 @@ public class FasterLogScanIterator : IDisposable /// /// /// + /// /// - internal unsafe FasterLogScanIterator(FasterLog fasterLog, BlittableAllocator hlog, long beginAddress, long endAddress, GetMemory getMemory, ScanBufferingMode scanBufferingMode, LightEpoch epoch, int headerSize) + internal unsafe FasterLogScanIterator(FasterLog fasterLog, BlittableAllocator hlog, long beginAddress, long endAddress, GetMemory getMemory, ScanBufferingMode scanBufferingMode, LightEpoch epoch, int headerSize, string name) { this.fasterLog = fasterLog; this.allocator = hlog; @@ -61,8 +67,9 @@ internal unsafe FasterLogScanIterator(FasterLog fasterLog, BlittableAllocator pool, out IMemoryOwner entry, public void Dispose() { frame?.Dispose(); + if (name != null) + PersistedIterators.TryRemove(name, out _); } private unsafe void BufferAndLoad(long currentAddress, long currentPage, long currentFrame) diff --git a/cs/src/core/Index/FasterLog/FasterLogRecoveryInfo.cs b/cs/src/core/Index/FasterLog/FasterLogRecoveryInfo.cs index 7a41976c0..4dd46d452 100644 --- a/cs/src/core/Index/FasterLog/FasterLogRecoveryInfo.cs +++ b/cs/src/core/Index/FasterLog/FasterLogRecoveryInfo.cs @@ -4,6 +4,7 @@ #pragma warning disable 0162 using System; +using System.Collections.Generic; using System.Diagnostics; using System.IO; @@ -24,6 +25,10 @@ internal struct FasterLogRecoveryInfo /// public long FlushedUntilAddress; + /// + /// Persisted iterators + /// + public Dictionary Iterators; /// /// Initialize @@ -58,6 +63,22 @@ public void Initialize(BinaryReader reader) if (checkSum != (BeginAddress ^ FlushedUntilAddress)) throw new Exception("Invalid checksum found during commit recovery"); + + var count = 0; + try + { + count = reader.ReadInt32(); + } + catch { } + + if (count > 0) + { + Iterators = new Dictionary(); + for (int i = 0; i < count; i++) + { + Iterators.Add(reader.ReadString(), reader.ReadInt64()); + } + } } /// @@ -95,11 +116,36 @@ public byte[] ToByteArray() writer.Write(BeginAddress ^ FlushedUntilAddress); // checksum writer.Write(BeginAddress); writer.Write(FlushedUntilAddress); + if (Iterators?.Count > 0) + { + writer.Write(Iterators.Count); + foreach (var kvp in Iterators) + { + writer.Write(kvp.Key); + writer.Write(kvp.Value); + } + } } return ms.ToArray(); } } + /// + /// Take snapshot of persisted iterators + /// + public void PopulateIterators() + { + if (FasterLogScanIterator.PersistedIterators.Count > 0) + { + Iterators = new Dictionary(); + + foreach (var kvp in FasterLogScanIterator.PersistedIterators) + { + Iterators.Add(kvp.Key, kvp.Value.CurrentAddress); + } + } + } + /// /// Print checkpoint info for debugging purposes ///